Python 爬虫 爬取淘宝店铺数据

您所在的位置:网站首页 爬取淘宝网的书包数据 Python 爬虫 爬取淘宝店铺数据

Python 爬虫 爬取淘宝店铺数据

2024-04-26 22:55| 来源: 网络整理| 查看: 265

import requests # 发送请求 需要安装import reimport jsonimport csv

# with open('taobao.csv', mode='a', encoding='utf-8', newline='') as f:# csv_writer = csv.writer(f)# csv_writer.writerow(['raw_title', 'pic_url', 'detail_url', 'view_price', 'item_loc', 'view_sales', 'nick'])

# 伪装headers = { 'cookie': 'cna=s/5FG78j/FUCAa8APiecOvNg; lgc=tb668512329; tracknick=tb668512329; thw=cn; enc=5QzxAFeTLCIaj4DdlClUUmCfmppq0mVmYnRM4MnjLLB4RjqMpvuUixwqmjkBvCn0Jgo9mK5a7GX5bTUVvYOjcKlG6Dcyihb49SfHSHh4p5w%3D; t=c1a7661aebc8b0eee31b756f0feeff62; _tb_token_=f17333878dd31; _m_h5_tk=4121cfdc611986d82be69f74d3c29f02_1659536900036; _m_h5_tk_enc=cfe86496d903c8670edb6df8d9008465; xlly_s=1; cookie2=17f624d84070bbd6d85563a647087846; _samesite_flag_=true; sgcookie=E100mMZhcjay7BJ0U6dkbhG6C500Ca%2FFJHGrQDTTkuu7sIBT4Vvt6geS1GV5dolt%2FZ14wi031qNjkp543s5U%2BulN9GYdFqm8S3V%2FxQ%2FyrrbqnGQ%3D; unb=2210627905944; uc3=lg2=Vq8l%2BKCLz3%2F65A%3D%3D&id2=UUpgRsItw%2BrsB7dvyw%3D%3D&nk2=F5RDKmf768KMcHQ%3D&vt3=F8dCv4GzuqQxFlJO5FQ%3D; csg=d213a0bb; cancelledSubSites=empty; cookie17=UUpgRsItw%2BrsB7dvyw%3D%3D; dnk=tb668512329; skt=7f611b532a6d5d3b; existShop=MTY1OTUyOTE2NA%3D%3D; uc4=nk4=0%40FY4I6earzOZXUhcMjuCdOoW0PkQqMw%3D%3D&id4=0%40U2gqyZJ81Yv14cp6ZGKPzfdzKRn7Ce%2F%2F; _cc_=VFC%2FuZ9ajQ%3D%3D; _l_g_=Ug%3D%3D; sg=94f; _nk_=tb668512329; cookie1=WvY2bcMyBjwC2%2FESfKPhqaOXs%2FXPxaxugpcVR2PVSmM%3D; v=0; mt=ci=0_1; uc1=cookie15=W5iHLLyFOGW7aA%3D%3D&cookie14=UoexOzkHHad4ew%3D%3D&cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=U%2BGCWk%2F7pY%2FF&existShop=false&pas=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; JSESSIONID=6C253D0599A8D872843E8F57D3E9FBC4; tfstk=cPacBgTZHoojq0FU_rgfSRGdRUIRZlYZKPz7zsNvv9-Y8zzPixxyY4MOZxXC3h1..; l=eBrY7YtILf1CVXMtBOfwlurza77tJIRfguPzaNbMiOCP_75p5KhGW6xNhxL9CnGVn6rXR35Wn1oBBSYikyUBhJpKPJLCgsDLIdTh.; isg=BDg4Vvj3rnHVaMLWcAwzn0AICebKoZwrBlMZpXKpvHMmjdl3GrJQu5ivRYU93VQD; x5sec=7b227365617263686170703b32223a226236666434373934313735396466356131393239396366306264393430326536434a7a33715a6347454d486e74766657374e507174774561447a49794d5441324d6a63354d4455354e4451374d6a436e68594b652f502f2f2f2f384251414d3d227d', 'referer': 'https://s.taobao.com/search?q=%E9%BB%91%E4%B8%9D%E6%83%85%E8%B6%A3&suggest=0_4&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306&_input_charset=utf-8&wq=%E9%BB%91%E4%B8%9D&suggest_query=%E9%BB%91%E4%B8%9D&source=suggest', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}for page in range(61, 71): print(f'----正在爬取第{page}页----') url = f'https://s.taobao.com/search?ie=utf8&initiative_id=staobaoz_20220803&stats_click=search_radio_all%3A1&js=1&imgfile=&q=%E6%B3%B3%E8%A1%A3%E5%A5%B3%E5%A4%8F%E5%AD%A3&suggest=0_2&_input_charset=utf-8&wq=%E6%B3%B3%E8%A1%A3&suggest_query=%E6%B3%B3%E8%A1%A3&source=suggest&bcoffset=3&ntoffset=3&p4ppushleft=2%2C48&s={page * 44}' # 1. 发送请求 response = requests.get(url=url, headers=headers) # 2. 获取数据 html_data = response.text # 3. 解析数据(提取数据 想要的内容取出来) # json: 前后端数据传输的格式 # 网站开发 全栈 # 前端: 网页 页面 好看 # 后端: 功能实现 数据传输 # {"":"", "":""} # 'g_page_config = (.*);': 规则 你要匹配什么内容 # html_data: 我需要在哪里匹配 json_str = re.findall('g_page_config = (.*);', html_data)[0] # 转成 Python里面字典类型数据 json_dict = json.loads(json_str) auctions = json_dict['mods']['itemlist']['data']['auctions'] for auction in auctions: try: raw_title = auction['raw_title'] pic_url = auction['pic_url'] detail_url = auction['detail_url'] view_price = auction['view_price'] item_loc = auction['item_loc'] view_sales = auction['view_sales'] nick = auction['nick'] print(raw_title, pic_url, detail_url, view_price, item_loc, view_sales, nick) # 4. 保存数据 with open('taobao.csv', mode='a', encoding='utf-8', newline='') as f: csv_writer = csv.writer(f) csv_writer.writerow([raw_title, pic_url, detail_url, view_price, item_loc, view_sales, nick]) except: pass



【本文地址】


今日新闻


推荐新闻


CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3