liuyuqi-dellpc 9 months ago
parent
commit
0852dc5111
3 changed files with 11 additions and 3 deletions
  1. 5 1
      README.md
  2. 0 1
      crawl_mrdx/crawl_mrdx.py
  3. 6 1
      main.py

+ 5 - 1
README.md

@@ -8,8 +8,12 @@ cd my_project_dir
 virtualenv -p /opt/python/bin/python3 venv
 source venv/bin/activate
 pip install -r requirements.txt
-python main.py --start 20210525 --end 20210525
 
+# method 1
+python main.py --start 20230822 --end 20230823
+
+# method 2,先配置 conf/config.json
+python main.py
 ```
 
 ### 截图

+ 0 - 1
crawl_mrdx/crawl_mrdx.py

@@ -22,7 +22,6 @@ from concurrent.futures import ThreadPoolExecutor
 import random
 import utils.user_agent as user_agent
 
-
 class CrawlMrdx():
 
     def __init__(self):

+ 6 - 1
main.py

@@ -9,5 +9,10 @@
 @Desc    :   按照规则下载pdf文件,直到请求无效停止
 '''
 import crawl_mrdx
+import argparse
 if __name__ == '__main__':
-    crawl_mrdx.main(start = '20210525', end = '20210525')
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--start', dest='start date', type=str, default=None,)
+    parser.add_argument('--end', dest='end date', type=str, default=None,)
+    args = parser.parse_args()
+    crawl_mrdx.main(start=args.start, end=args.end)