用Python获取网页数据 |
您所在的位置:网站首页 › python自动刷新网页获取数据 › 用Python获取网页数据 |
#coding=utf-8
import urllib.request
import re
import time #lxml提供了丰富的操作xml的api from lxml import etree
#获取指定url响应页面内容 def getHtml(url): page = urllib.request.urlopen(url) html = page.read() return html
#解析页面内容,获取需要的值 def parserHtml(page): doc = etree.HTML(page.decode('utf-8', 'ignore')) all_red = doc.xpath('//li[@class="ball_red"]') all_blue = doc.xpath('//li[@class="ball_blue"]') strNo = "" for row in all_red: strNo += row.text + " " strNo += all_blue[0].text return strNo
#保存内容至文件 def saveFile(filename,contents): fh = open(filename, 'wb+') fh.write(contents.encode(encoding='utf-8')) fh.close()
#读本地页面 indexHtml = open('source.html','r').read() doc = etree.HTML(indexHtml) all_href = doc.xpath('//a') //打开本地文件 result = open('ssq.txt', 'wb+')#循环 for href in all_href: page = getHtml(href.get('href')) line = href.text + ':' + parserHtml(page) + '\n'print(line) #结果写到ssq.txt文件中 result.write(line.encode(encoding='utf-8')) #睡眠2s time.sleep(2) result.close() #page = getHtml("http://kaijiang.500.com/shtml/ssq/17097.shtml") # #saveFile("ssq.txt",strNo) |
今日新闻 |
推荐新闻 |
CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3 |