python 爬虫 xpath实战爬取房价 |
您所在的位置:网站首页 › 爬虫违法 › python 爬虫 xpath实战爬取房价 |
import requestsfrom lxml import etree class Sougou_Spider(object): def __init__(self): self.uel = "https://cs.lianjia.com/ershoufang/" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0 " } def get_data_index(self): response = requests.get(url=self.uel, headers=self.headers) if response.status_code == 200: return response.text else: return None # 解析数据 def parse_data_index(self, response): html = etree.HTML(response) data_list = html.xpath('//ul[@class="sellListContent"]//li') for data in data_list: title = data.xpath("./div/div/a/text()") # 标题 mojor = data.xpath('./div[1]/div[2]/div[1]/a/text()') # 位置 area = data.xpath('./div[1]/div[3]/div[1]/text()') # 房屋配置以及面积 pay_close = data.xpath('./div[1]/div[4]/text()') # 关注人数 advantage = data.xpath('./div[1]/div[5]/span/text()') # 优点 price = data.xpath('./div[1]/div[6]/div[1]/span/text()') # 总价格 square_metre = data.xpath('./div[1]/div[6]/div[2]/span//text()') # 价格/平方米 print(title, mojor, area, pay_close, advantage, price, square_metre, sep="----") def run(self): response = self.get_data_index() self.parse_data_index(response) if __name__ == '__main__': spider = Sougou_Spider() spider.run() |
CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3 |