liuyuqi-dellpc 5 years ago
parent
commit
af8951f532
3 changed files with 30 additions and 5 deletions
  1. 19 0
      README.md
  2. 10 4
      crwal122.py
  3. 1 1
      requirements.txt

+ 19 - 0
README.md

@@ -0,0 +1,19 @@
+##  car-nalysis
+
+二手车分析,驾考分析。
+
+### usage
+
+```
+cd my_project_dir
+virtualenv -p /opt/python/bin/python3 venv
+source venv/bin/activate
+pip install -r requirements.txt
+
+
+python crwal122.py
+
+
+```
+
+

+ 10 - 4
crwal122.py

@@ -14,6 +14,7 @@ from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver import ActionChains
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
 import os
 import re
@@ -22,7 +23,7 @@ import time
 
 base = r"https://sh.122.gov.cn/#/noticeDetail?fzjg=%E6%B2%AAA&tjyf=202007&fwdmgl=6003"
 chormepath = r"D:/Program-Files/browser-driver/chromedriver.exe"
-phantomjspath = r"D:/Program-Files/phantomjs-2.1.1-windows/bin/phantomjs.exe"
+phantomjspath = r"/opt/phantomjs/bin/phantomjs"
 
 link = []
 res = []
@@ -43,8 +44,11 @@ prefs = {
 }
 option.add_experimental_option('prefs', prefs)
 
-# driver = webdriver.PhantomJS(executable_path=phantomjspath)
-driver = webdriver.Chrome(executable_path=chormepath, options=option)
+desired_cap = DesiredCapabilities.PHANTOMJS.copy()
+desired_cap['phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36'
+driver = webdriver.PhantomJS(
+    executable_path=phantomjspath, desired_capabilities=desired_cap)
+# driver = webdriver.Chrome(executable_path=chormepath, options=option)
 # driver.maximize_window()
 
 
@@ -102,7 +106,8 @@ def crawl():
                 text = driver.find_element_by_xpath(
                     '//*[@id="pagination"]/span').text  # 有异常
                 # 共2391条记录 1/120页
-                pagesize = re.split("[/页]", re.search("/.*页  ", text).group())[1]
+                pagesize = re.split(
+                    "[/页]", re.search("/.*页  ", text).group())[1]
                 reportData = pd.DataFrame(
                     columns=["date", "place", "course1", "course2", "course3", "course4"])
                 for i in range(int(pagesize)):
@@ -127,5 +132,6 @@ def crawl():
             reportData.to_csv("data/report" + month1 + ".csv", header=False)
     driver.close()
 
+
 if __name__ == "__main__":
     crawl()

+ 1 - 1
requirements.txt

@@ -1,4 +1,4 @@
 requests
 pandas
 numpy
-selenium=3.141.0
+selenium==2.48.0