import requests
import time
from bs4 import BeautifulSoup
def get_douban_books(url):
headers ={
'user-agent':'Mozilla/5.0(Macintosh;IntelMacOSX10_13_6)AppleWebKit/537.36(KHTML,likeGecko)Chrome/76.0.3809.132Safari/537.36'
}
res = requests.get(url,headers=headers)
soup = BeautifulSoup(res.text,'html.parser')
items = soup.find_all('div',class_ = 'p12')
for i in items:
tag = i.find('a')
name = tag['title']
link = tag['href']
print(name,link)
url = 'https://book.douban.com/top250?start={}'
urls =[url.format(num * 25)for num in range(10)]
for i in urls:
get_douban_books(i)
time.sleep(1)
|