5 years ago · af8951f532
--- a/README.md
+++ b/README.md
@@ -0,0 +1,19 @@
 
				+##  car-nalysis
			
 
				+
			
 
				+二手车分析，驾考分析。
			
 
				+
			
 
				+### usage
			
 
				+
			
 
				+```
			
 
				+cd my_project_dir
			
 
				+virtualenv -p /opt/python/bin/python3 venv
			
 
				+source venv/bin/activate
			
 
				+pip install -r requirements.txt
			
 
				+
			
 
				+
			
 
				+python crwal122.py
			
 
				+
			
 
				+
			
 
				+```
			
 
				+
			
 
				+
			
--- a/crwal122.py
+++ b/crwal122.py
@@ -14,6 +14,7 @@ from selenium import webdriver
 
				 from selenium.common.exceptions import NoSuchElementException
			
 
				 from selenium.webdriver.common.keys import Keys
			
 
				 from selenium.webdriver import ActionChains
			
 
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
			
 
				 
			
 
				 import os
			
 
				 import re
			
@@ -22,7 +23,7 @@ import time
 
				 
			
 
				 base = r"https://sh.122.gov.cn/#/noticeDetail?fzjg=%E6%B2%AAA&tjyf=202007&fwdmgl=6003"
			
 
				 chormepath = r"D:/Program-Files/browser-driver/chromedriver.exe"
			
 
				-phantomjspath = r"D:/Program-Files/phantomjs-2.1.1-windows/bin/phantomjs.exe"
			
 
				+phantomjspath = r"/opt/phantomjs/bin/phantomjs"
			
 
				 
			
 
				 link = []
			
 
				 res = []
			
@@ -43,8 +44,11 @@ prefs = {
 
				 }
			
 
				 option.add_experimental_option('prefs', prefs)
			
 
				 
			
 
				-# driver = webdriver.PhantomJS(executable_path=phantomjspath)
			
 
				-driver = webdriver.Chrome(executable_path=chormepath, options=option)
			
 
				+desired_cap = DesiredCapabilities.PHANTOMJS.copy()
			
 
				+desired_cap['phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36'
			
 
				+driver = webdriver.PhantomJS(
			
 
				+    executable_path=phantomjspath, desired_capabilities=desired_cap)
			
 
				+# driver = webdriver.Chrome(executable_path=chormepath, options=option)
			
 
				 # driver.maximize_window()
			
 
				 
			
 
				 
			
@@ -102,7 +106,8 @@ def crawl():
 
				                 text = driver.find_element_by_xpath(
			
 
				                     '//*[@id="pagination"]/span').text  # 有异常
			
 
				                 # 共2391条记录 1/120页
			
 
				-                pagesize = re.split("[/页]", re.search("/.*页  ", text).group())[1]
			
 
				+                pagesize = re.split(
			
 
				+                    "[/页]", re.search("/.*页  ", text).group())[1]
			
 
				                 reportData = pd.DataFrame(
			
 
				                     columns=["date", "place", "course1", "course2", "course3", "course4"])
			
 
				                 for i in range(int(pagesize)):
			
@@ -127,5 +132,6 @@ def crawl():
 
				             reportData.to_csv("data/report" + month1 + ".csv", header=False)
			
 
				     driver.close()
			
 
				 
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     crawl()
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 
				 requests
			
 
				 pandas
			
 
				 numpy
			
 
				-selenium=3.141.0
			
 
				+selenium==2.48.0