|
@@ -80,11 +80,17 @@ class CrawlMrdx():
|
|
|
print(current_date.strftime("%Y%m%d"))
|
|
|
current_date_str = current_date.strftime("%Y%m%d")
|
|
|
for j in range(1, 17):
|
|
|
- fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
|
|
|
+ if j<10:
|
|
|
+ fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
|
|
|
+ else:
|
|
|
+ fileName= r"./data/%s/%s.pdf" % (current_date_str, j)
|
|
|
if(os.path.exists(fileName)):
|
|
|
print("跳过" + fileName)
|
|
|
else:
|
|
|
- url = api.pdfUrl % (current_date_str, j)
|
|
|
+ if j<10:
|
|
|
+ url = api.pdfUrl % (current_date_str, "0"+str(j))
|
|
|
+ else:
|
|
|
+ url = api.pdfUrl % (current_date_str, j)
|
|
|
# 检查链接有效性
|
|
|
response = requests.head(url)
|
|
|
if response.status_code == 200:
|