这是我的课程期末作业需要爬取一个网址中的数据,其中测试百度翻译的爬虫时出现了一些问题。
import json
import requests
url = "https://fanyi.baidu.com/v2transapi"
# query_str = input("请输入要翻译的中文:")
query_string = {"from": "zh",
"to": "en",
"query": "你好世界",
"transtype": "translang",
"simple_means_flag": "3",
"sign": "1265.321472",
"token": "11c16a562f2de40546a9f27f22f2b17d",
"domain": "common"
}
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"Cookie": "BIDUPSID=80E13FA61FB06ABD9052A70B1274F507; PSTM=1591928152; BDUSS=5wU2JtTkVMbVlnUVc5ZTdaeVdkd1Nqa0tWLTAxSGpQWG5VUlNUTURReHpDb0JnRVFBQUFBJCQAAAAAAAAAAAEAAAAUSTesxOPDx8O7ztLLp7ChsKEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHN9WGBzfVhgNk; BDUSS_BFESS=5wU2JtTkVMbVlnUVc5ZTdaeVdkd1Nqa0tWLTAxSGpQWG5VUlNUTURReHpDb0JnRVFBQUFBJCQAAAAAAAAAAAEAAAAUSTesxOPDx8O7ztLLp7ChsKEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHN9WGBzfVhgNk; __yjs_duid=1_cae0585cd3a1c5e6e359a009a16146a21621944461667; BAIDUID=30E5DE4616987C67FEFC7B42425BEF85:FG=1; BAIDUID_BFESS=30E5DE4616987C67FEFC7B42425BEF85:FG=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=5; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_PREFER_SWITCH=1; SOUND_SPD_SWITCH=1; HISTORY_SWITCH=1; Hm_lvt_afd111fa62852d1f37001d1f980b6800=1624523890; BCLID=11043766726698675042; BDSFRCVID=Dk4OJexroG38EYQec-kRbcMK8_weG7bTDYLELhM_xuxFdV0VJeC6EG0Pts1-dEu-EHtdogKKKgOTHI8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tR3aQ5rtKRTffjrnhPF3DJKvXP6-hnjy3bAOKxTl5R8VHP3P-Ucveh4Wbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvyP-g3-7AqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIEoCvDKRrBjjrP-trf5DCShUFs0PPJB2Q-XPoO3K8WsfThbt-VMpDV-PciqpRf5mkf3fbgylRp8bnEBn7h2MKO3G3pBtQmJeTxoUJ2-KDVeh5Gqfo15-0ebPRiB-b9QgbAVpQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0HPonHjLKejcB3e; BCLID_BFESS=11043766726698675042; BDSFRCVID_BFESS=Dk4OJexroG38EYQec-kRbcMK8_weG7bTDYLELhM_xuxFdV0VJeC6EG0Pts1-dEu-EHtdogKKKgOTHI8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tR3aQ5rtKRTffjrnhPF3DJKvXP6-hnjy3bAOKxTl5R8VHP3P-Ucveh4Wbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvyP-g3-7AqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIEoCvDKRrBjjrP-trf5DCShUFs0PPJB2Q-XPoO3K8WsfThbt-VMpDV-PciqpRf5mkf3fbgylRp8bnEBn7h2MKO3G3pBtQmJeTxoUJ2-KDVeh5Gqfo15-0ebPRiB-b9QgbAVpQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0HPonHjLKejcB3e; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1624524063,1624524067,1624528433,1624528582; BDRCVFR[rlCQPVAaoab]=mk3SLVN4HKm; H_PS_PSSID=31254_26350; Hm_lpvt_afd111fa62852d1f37001d1f980b6800=1624688438; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1624695135; __yjs_st=2_ZTY3YjZjODFkMmU2ZWM3MjBiOTc1ZGMyMDljYjUxZjU2YjI4ZjdhZTM3ODlmMDVjN2NiZDU2YmE2M2U4ZGEzYThmMzhlN2JkNjU4MmJlMDQ1ZGJjOTMxODEzNmE5NjI3YWMzZDRiNzY1OTAxNGVkOWU0NzkzMDdiMThmMmM2NjJlOWZkYzI5MjJmYjk1Nzc4Njg0MGE3ZjM4ODllOWFmYjUwMTdlODNhM2I0N2M3YWZmZjNiMzkxOTI4NmNhODc2ZmZiMTJhYzc4YjcwOTc3ZDUyOTU1ZGQwM2JmNzUzNGM4YWNiODBkNTJmMGU3ODM0NzhjZjdhMWI3MmQxMTZiY183XzNmYzlmNzYw; ab_sr=1.0.1_NTY1YTVlZjI0ZWJmNTlkODQxOGNjODZlMThiYjdlYzFiNTNiOTcwYTg5OWQwMDA5ODI5YmFjYWU5YzIyNzRlYTA5NDI5MDBjN2JkZmUyMTRhZjM2MDY0MjIwMWVjOTU3NmMzZjJhY2UzZTVlZmNkMjhkZGNjMGNkZGE2MjNhOWUyYjAwMTYzM2EwN2IxNGE2OTZmYWJiYTU2M2VmMWYxZmY3MDk1ZTQzYjhmMWM0NWY5ZWI2Y2ZmYTgzMmJlMDI1"}
response = requests.post(url,data=query_string,headers = headers)
# print(response)
# print(response.content.decode())
# print(type(response.content.decode()))
html_str = response.content.decode()#json字符串
dict_ret = json.loads(html_str)#json字符串转化为python类型
# print(html_str)
ret = dict_ret["trans_result"]['data'][0]['dst']
print("翻译结果是:",ret)
在在18年时还不会出现 {‘errno’: 997, ‘errmsg’: ‘未知错误’, '‘query’: ‘你好世界’, ‘from’: ‘zh’, ‘to’: ‘en’, ‘error’: 997}} 但后面百度反爬多出现了一个叫sign叶也就是下图中的sign 现在的query:(也就是你输入的内容)不同,他的sign也就会发生变化,所以就出现了很脑溢血的事情,你要是想这段代码不出异常错误码997&998,你必须每次把内容先翻译一遍然后再去修改sign值***吐血***!!!!
|