python 爬虫 xpath实战爬取房价

您所在的位置:网站首页 爬虫违法 python 爬虫 xpath实战爬取房价

python 爬虫 xpath实战爬取房价

#python 爬虫 xpath实战爬取房价| 来源: 网络整理| 查看: 265

import requestsfrom lxml import etree

class Sougou_Spider(object): def __init__(self): self.uel = "https://cs.lianjia.com/ershoufang/" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0 " }

def get_data_index(self): response = requests.get(url=self.uel, headers=self.headers) if response.status_code == 200: return response.text else: return None

# 解析数据 def parse_data_index(self, response): html = etree.HTML(response) data_list = html.xpath('//ul[@class="sellListContent"]//li') for data in data_list: title = data.xpath("./div/div/a/text()") # 标题 mojor = data.xpath('./div[1]/div[2]/div[1]/a/text()') # 位置 area = data.xpath('./div[1]/div[3]/div[1]/text()') # 房屋配置以及面积 pay_close = data.xpath('./div[1]/div[4]/text()') # 关注人数 advantage = data.xpath('./div[1]/div[5]/span/text()') # 优点 price = data.xpath('./div[1]/div[6]/div[1]/span/text()') # 总价格 square_metre = data.xpath('./div[1]/div[6]/div[2]/span//text()') # 价格/平方米 print(title, mojor, area, pay_close, advantage, price, square_metre, sep="----")

def run(self): response = self.get_data_index() self.parse_data_index(response)

if __name__ == '__main__': spider = Sougou_Spider() spider.run()



【本文地址】


今日新闻


推荐新闻


    CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3