|
@@ -30,7 +30,7 @@ class CrawlMrdx():
|
|
self.conf = self.jsonConf.load()
|
|
self.conf = self.jsonConf.load()
|
|
self.start_date = self.conf.get('startDate')
|
|
self.start_date = self.conf.get('startDate')
|
|
self.end_date = self.conf.get("endDate")
|
|
self.end_date = self.conf.get("endDate")
|
|
-
|
|
|
|
|
|
+
|
|
def update(self):
|
|
def update(self):
|
|
'''update app'''
|
|
'''update app'''
|
|
pass
|
|
pass
|
|
@@ -60,18 +60,21 @@ class CrawlMrdx():
|
|
:param start: start date
|
|
:param start: start date
|
|
:param end: end date
|
|
:param end: end date
|
|
'''
|
|
'''
|
|
- start_time = time.time() # 计算耗时
|
|
|
|
|
|
+ start_time = time.time() # 计算耗时
|
|
if not os.path.exists("data"):
|
|
if not os.path.exists("data"):
|
|
os.makedirs("data")
|
|
os.makedirs("data")
|
|
pool = ThreadPoolExecutor(max_workers=10) # 创建一个最大可容纳10个task的线程池
|
|
pool = ThreadPoolExecutor(max_workers=10) # 创建一个最大可容纳10个task的线程池
|
|
- index = 1
|
|
|
|
|
|
+ if start is None:
|
|
|
|
+ start = self.conf.get('startDate')
|
|
|
|
+ if end is None:
|
|
|
|
+ end = self.conf.get("endDate")
|
|
start_date = datetime.datetime.strptime(start, "%Y%m%d")
|
|
start_date = datetime.datetime.strptime(start, "%Y%m%d")
|
|
end_date = datetime.datetime.strptime(end, "%Y%m%d")
|
|
end_date = datetime.datetime.strptime(end, "%Y%m%d")
|
|
|
|
|
|
current_date = start_date
|
|
current_date = start_date
|
|
while current_date <= end_date:
|
|
while current_date <= end_date:
|
|
print(current_date.strftime("%Y%m%d"))
|
|
print(current_date.strftime("%Y%m%d"))
|
|
- current_date_str=current_date.strftime("%Y%m%d")
|
|
|
|
|
|
+ current_date_str = current_date.strftime("%Y%m%d")
|
|
for j in range(1, 17):
|
|
for j in range(1, 17):
|
|
fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
|
|
fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
|
|
if(os.path.exists(fileName)):
|
|
if(os.path.exists(fileName)):
|
|
@@ -88,4 +91,4 @@ class CrawlMrdx():
|
|
if(current_date_str == self.conf.get('endDate')):
|
|
if(current_date_str == self.conf.get('endDate')):
|
|
break
|
|
break
|
|
current_date += datetime.timedelta(days=1)
|
|
current_date += datetime.timedelta(days=1)
|
|
- print("last time: {} s".format(time.time() - start_time))
|
|
|
|
|
|
+ print("last time: {} s".format(time.time() - start_time))
|