|
@@ -14,6 +14,7 @@ from selenium import webdriver
|
|
|
from selenium.common.exceptions import NoSuchElementException
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver import ActionChains
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
|
|
|
import os
|
|
|
import re
|
|
@@ -22,7 +23,7 @@ import time
|
|
|
|
|
|
base = r"https://sh.122.gov.cn/#/noticeDetail?fzjg=%E6%B2%AAA&tjyf=202007&fwdmgl=6003"
|
|
|
chormepath = r"D:/Program-Files/browser-driver/chromedriver.exe"
|
|
|
-phantomjspath = r"D:/Program-Files/phantomjs-2.1.1-windows/bin/phantomjs.exe"
|
|
|
+phantomjspath = r"/opt/phantomjs/bin/phantomjs"
|
|
|
|
|
|
link = []
|
|
|
res = []
|
|
@@ -43,8 +44,11 @@ prefs = {
|
|
|
}
|
|
|
option.add_experimental_option('prefs', prefs)
|
|
|
|
|
|
-# driver = webdriver.PhantomJS(executable_path=phantomjspath)
|
|
|
-driver = webdriver.Chrome(executable_path=chormepath, options=option)
|
|
|
+desired_cap = DesiredCapabilities.PHANTOMJS.copy()
|
|
|
+desired_cap['phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36'
|
|
|
+driver = webdriver.PhantomJS(
|
|
|
+ executable_path=phantomjspath, desired_capabilities=desired_cap)
|
|
|
+# driver = webdriver.Chrome(executable_path=chormepath, options=option)
|
|
|
# driver.maximize_window()
|
|
|
|
|
|
|
|
@@ -102,7 +106,8 @@ def crawl():
|
|
|
text = driver.find_element_by_xpath(
|
|
|
'//*[@id="pagination"]/span').text # 有异常
|
|
|
# 共2391条记录 1/120页
|
|
|
- pagesize = re.split("[/页]", re.search("/.*页 ", text).group())[1]
|
|
|
+ pagesize = re.split(
|
|
|
+ "[/页]", re.search("/.*页 ", text).group())[1]
|
|
|
reportData = pd.DataFrame(
|
|
|
columns=["date", "place", "course1", "course2", "course3", "course4"])
|
|
|
for i in range(int(pagesize)):
|
|
@@ -127,5 +132,6 @@ def crawl():
|
|
|
reportData.to_csv("data/report" + month1 + ".csv", header=False)
|
|
|
driver.close()
|
|
|
|
|
|
+
|
|
|
if __name__ == "__main__":
|
|
|
crawl()
|