Browse Source

fix error

fish 9 months ago
parent
commit
5181e34840
6 changed files with 49 additions and 1 deletions
  1. 1 0
      .gitignore
  2. 2 0
      README.md
  3. 1 1
      crawl_sse/options.py
  4. 1 0
      crawl_sse/sse.py
  5. 17 0
      test/install_browser.py
  6. 27 0
      test/se_option.py

+ 1 - 0
.gitignore

@@ -5,3 +5,4 @@
 data/**/*.html
 build/
 dist/
+*.log

+ 2 - 0
README.md

@@ -24,6 +24,8 @@ python main.py download --extractor cninfo
 docker 打包交付运行:
 
 ```
+# docker run -d -p 9515:9515 -v $(pwd):/app mcr.microsoft.com/msedge/msedgedriver
+
 docker run -it --rm -v /data/crawl_sse:/app jianboy/crawl_sse:1.0.1 download --extractor cninfo
 
 ```

+ 1 - 1
crawl_sse/options.py

@@ -17,7 +17,7 @@ def parse_args():
     parse command line params
     """
     parser = argparse.ArgumentParser(description='search domain')
-    parser.add_argument('command',  help='command: generate, search', choices=['generate','search', 'help','version'] , default='help')
+    parser.add_argument('command',  help='command: crawl, download', choices=['crawl','download', 'help','version'] , default='help')
     parser.add_argument('--extractor', help='extractor: cninfo, sse', choices=['cninfo','sse'], default='cninfo')
     args = parser.parse_args()
 

+ 1 - 0
crawl_sse/sse.py

@@ -47,6 +47,7 @@ class Sse(object):
         chrome_option.add_argument('--headless')
         chrome_option.add_experimental_option("detach", True)
         chrome_option.add_experimental_option('excludeSwitches', ['enable-logging'])
+        # service = webdriver.Chrome(ChromeDriverManager().install())
         service = webdriver.EdgeService(EdgeChromiumDriverManager().install())
         self.driver = webdriver.Edge(service = service, options=chrome_option)
     

+ 17 - 0
test/install_browser.py

@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+"""
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2024/07/05
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   insntall brawser with selenium
+"""
+
+from selenium import webdriver
+from webdriver_manager.microsoft import EdgeChromiumDriverManager
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.edge.service import Service
+
+edge = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
+# edge = webdriver.EdgeService(EdgeChromiumDriverManager().install())
+# chrome = webdriver.ChromeService(ChromeDriverManager().install())

+ 27 - 0
test/se_option.py

@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+"""
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2024/07/05
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+"""
+
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+import time
+from selenium.webdriver.edge.service import Service
+
+service = Service(verbose = True)
+driver = webdriver.Edge(service=service)
+
+driver.get('https://bing.com')
+
+element = driver.find_element(By.ID, 'sb_form_q')
+element.send_keys('WebDriver')
+element.submit()
+
+time.sleep(5)
+driver.quit()
+
+