Browse Source

连接修复

liuyuqi-dellpc 1 year ago
parent
commit
6a257ba7b3
2 changed files with 9 additions and 3 deletions
  1. 1 1
      crawl_mrdx/api.py
  2. 8 2
      crawl_mrdx/crawl_mrdx.py

+ 1 - 1
crawl_mrdx/api.py

@@ -10,6 +10,6 @@
 _host = r"http://mrdx.cn"
 api_host=r"http://xx.com"
 
-pdfUrl = _host + "/PDF/%s/0%s.pdf"
+pdfUrl = _host + "/PDF/%s/%s.pdf"
 get_version = api_host+"/api/get_version"
 

+ 8 - 2
crawl_mrdx/crawl_mrdx.py

@@ -80,11 +80,17 @@ class CrawlMrdx():
             print(current_date.strftime("%Y%m%d"))
             current_date_str = current_date.strftime("%Y%m%d")
             for j in range(1, 17):
-                fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
+                if j<10:
+                    fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
+                else:
+                    fileName= r"./data/%s/%s.pdf" % (current_date_str, j)
                 if(os.path.exists(fileName)):
                     print("跳过" + fileName)
                 else:
-                    url = api.pdfUrl % (current_date_str, j)
+                    if j<10:
+                        url = api.pdfUrl % (current_date_str, "0"+str(j))
+                    else:
+                        url = api.pdfUrl % (current_date_str, j)
                     # 检查链接有效性
                     response = requests.head(url)
                     if response.status_code == 200: