python 模拟浏览器

您所在的位置:网站首页 python模拟浏览器打开url python 模拟浏览器

python 模拟浏览器

2024-05-17 19:10| 来源: 网络整理| 查看: 265

想用python模拟浏览器访问web的方法测试些东西,有哪几种方法呢?

一类:单纯的访问web,不解析其js,css等。

1. urllib2

#-*- coding:utf-8 -* import urllib2 def Furllib2(ip,port,url,timeout): proxydict = {} proxydict['http'] = "http://%s:%s"%(ip,port) print proxydict proxy_handler = urllib2.ProxyHandler(proxydict) opener = urllib2.build_opener(proxy_handler) opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib2.install_opener(opener) try: response = urllib2.urlopen(url,timeout=timeout) print response.geturl() print response.getcode() print response.info() print response.read() return True except: print 'some errors occored' + '-'*50 return 0 def main(): proxyip = '14.18.16.69' proxyport = '80' proxy = 'http://2.181.1.127:80' url = 'http://www.cnblogs.com/' timeout = 4 print Furllib2(proxyip,proxyport,url,timeout) if __name__ == "__main__": main()

 2. mechanize(与网站的自动化交互)

http://wwwsearch.sourceforge.net/mechanize/doc.html

def Fmechanize(url): cookies = mechanize.CookieJar() opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) try: r = opener.open(url) # GET # r = opener.open("http://example.com/", data) # POST print r.geturl() print r.info() return True except: return 0

 二类:模拟浏览器,使用firefox等的浏览器引擎,支持js,css等。

1. selenium 的firefox或者chrome等驱动,但是由于要打开一个浏览器,所以会比较慢(浏览器驱动可以到selenium官网上下载,也可以到firefox插件出搜索)

def Fselenium_firefox(ip,port,url,timeout): try: profile = webdriver.FirefoxProfile() profile.set_preference('network.proxy.type', 1) profile.set_preference('network.proxy.http',ip) profile.set_preference('network.proxy.http_port', port) profile.update_preferences() driver = webdriver.Firefox(profile,timeout = timeout) except Exception: print traceback.print_exc() return 0 pass try: driver.get(url) time.sleep(5) cookies= driver.get_cookies() print cookies # driver.get() driver.quit() return 1 except Exception: traceback.print_exc() # print 'not have Union allianceid' driver.quit() return 0

 2. selenium :headless test使用selenium+ phantomjs驱动,无需打开浏览器,但是支持js的模拟浏览器动作,也就说说和你手工打开是没有区别的。

http://selenium.googlecode.com/git/docs/api/py/api.html

def Fselenium_phantomjs(ip,port,url,timeout): try: proxyip = '%s%s%s%s'%('--proxy=',ip,':',port) proxyport = '--proxy-type=http' service_args = [] service_args.append(proxyip) service_args.append(proxyport) print service_args driver = webdriver.PhantomJS(service_args = service_args)   #driver = webdriver.PhantomJS("/root/phantomjs-1.9.7-linux-x86_64/bin/phantomjs",service_args = service_args)制定phantomjs的位置 driver.set_page_load_timeout(timeout) driver.get(url) time.sleep(4) except Exception: traceback.print_exc() try: geturl = driver.current_url print driver.current_url return True except Exception: traceback.print_exc() geturl = None return 0

 3. qt,网上戗来的代码

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

from PyQt4 import QtCore, QtGui, QtWebKit, QtNetwork class cookieJar(QtNetwork.QNetworkCookieJar): def __init__(self, cookiesKey, parent=None): super(cookieJar, self).__init__(parent) self.mainWindow = parent self.cookiesKey = cookiesKey cookiesValue = self.mainWindow.settings.value(self.cookiesKey) if cookiesValue: cookiesList = QtNetwork.QNetworkCookie.parseCookies(cookiesValue) self.setAllCookies(cookiesList) # def setCookiesFromUrl (self, cookieList, url): # cookiesValue = self.mainWindow.settings.value(self.cookiesKey) # cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray() # for cookie in cookieList: # cookiesArray.append(cookie.toRawForm() + "\n") #self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray) #return super(cookieJar, self).setCookiesFromUrl(cookieList, url) def deleteCookie(self,cookieList): cookie = [] self.mainWindow.settings.value(cookie) class webView(QtWebKit.QWebView): def __init__(self, cookiesKey, url, parent=None): super(webView, self).__init__(parent) self.cookieJar = cookieJar(cookiesKey, parent) self.page().networkAccessManager().setCookieJar(self.cookieJar) class myWindow(QtGui.QMainWindow): def __init__(self, parent=None): super(myWindow, self).__init__(parent) self.cookiesKey = "cookies" self.centralwidget = QtGui.QWidget(self) self.tabWidget = QtGui.QTabWidget(self.centralwidget) self.tabWidget.setTabsClosable(True) self.verticalLayout = QtGui.QVBoxLayout(self.centralwidget) self.verticalLayout.addWidget(self.tabWidget) self.actionTabAdd = QtGui.QAction(self) self.actionTabAdd.setText("Add Tab") self.actionTabAdd.triggered.connect(self.on_actionTabAdd_triggered) self.lineEdit = QtGui.QLineEdit(self) self.lineEdit.setText("http://www.example.com") self.toolBar = QtGui.QToolBar(self) self.toolBar.addAction(self.actionTabAdd) self.toolBar.addWidget(self.lineEdit) self.addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self.toolBar) self.setCentralWidget(self.tabWidget) self.settings = QtCore.QSettings() @QtCore.pyqtSlot() def on_actionShowCookies_triggered(self): webView = self.tabWidget.currentWidget() listCookies = webView.page().networkAccessManager().cookieJar().allCookies() for cookie in listCookies: print cookie.toRawForm() @QtCore.pyqtSlot() def on_actionTabAdd_triggered(self): url = self.lineEdit.text() self.addNewTab(url if url else 'about:blank') def addNewTab(self, url): tabName = u"Tab {0}".format(str(self.tabWidget.count())) tabWidget= webView(self.cookiesKey, url, self) tabWidget.loadFinished.connect(self.on_tabWidget_loadFinished) tabWidget.load(QtCore.QUrl(url)) tabIndex = self.tabWidget.addTab(tabWidget, tabName) self.tabWidget.setCurrentIndex(tabIndex) @QtCore.pyqtSlot() def on_tabWidget_loadFinished(self): cookies2 = self.settings.value(self.cookiesKey) if __name__ == "__main__": import sys app = QtGui.QApplication(sys.argv) app.setApplicationName('myWindow') main = myWindow() main.resize(666, 333) main.show() sys.exit(app.exec_())

 

4. qt-headless

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

import sys from PyQt4.QtGui import * from PyQt4.QtCore import * from PyQt4.QtWebKit import * class Render(QWebPage): def __init__(self, url): self.app = QApplication(sys.argv) QWebPage.__init__(self) self.loadFinished.connect(self._loadFinished) self.mainFrame().load(QUrl(url)) self.app.exec_() def _loadFinished(self, result): self.frame = self.mainFrame() self.app.quit() url = 'http://webscraping.com' r = Render(url) html = r.frame.toHtml() print html

 5. splinter :打开浏览器,模拟操作,python的

http://splinter.cobrateam.info/docs/tutorial.html

>>> from splinter import Browser >>> browser = Browser() >>> url = "http://www.cnblogs.com" >>> browser.visit(url)

 

 

 

具体用哪个要看你有什么具体的需求了



【本文地址】


今日新闻


推荐新闻


CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3