在软件工程中,有着这么几个字“高内聚低耦合”,意思就是说:大模块分割成一个个小模块实现,每一个模块之间的独立性较高,修改某个模块,对其他模块或整个项目影响较小。
我们以一个图片下载的爬虫为示例,让大家更能清楚。
错误示例
import re
import requests
def Visit(url,regularity,regularity_1): #参数1,网页地址,参数2,正则表达式规则 参数3二级网页正则规则 参数4页码 参数5总页码
r = requests.get(url)
r.encoding = r.apparent_encoding
web_source=r.text
regular_ls=re.findall(regularity,web_source)
for i in range(len(regular_ls)):
yema=regular_ls[i]
print(yema)
url_1="https://www.236www.com/"+regular_ls[i] #提取的二级网页地址
print(url_1)
html= requests.get(url_1)
html.encoding = html.apparent_encoding
web_source_html = html.text
regular_ls_1 = re.findall(regularity_1, web_source_html)
for n in range(len(regular_ls_1)):
try:
picture_url=regular_ls_1[n]
picture_html=requests.get(picture_url)
address = "D:\图片\"+str(yema[17:])+"--"+str(i)+"--"+str(n)+".jpg" #图片下载的本地路径
with open(address, "wb") as f:
f.write(picture_html.content)
print(address, '下载完成')
except:
print(str(i)+str(n),"打印失败")
def web_batch(The_number_of):
regularity = ' |