liuyuqi-dellpc 1 year ago
parent
commit
3a3fc8cd39
2 changed files with 9 additions and 8 deletions
  1. 1 3
      crawl_mrdx/__init__.py
  2. 8 5
      crawl_mrdx/crawl_mrdx.py

+ 1 - 3
crawl_mrdx/__init__.py

@@ -7,10 +7,8 @@
 @Desc    :   main
 '''
 
-import time
 from crawl_mrdx.crawl_mrdx import CrawlMrdx
 
-
-def main(start: str, end: str):
+def main(start: str=None, end: str=None):
     crawl = CrawlMrdx()
     crawl.crawl(start, end)

+ 8 - 5
crawl_mrdx/crawl_mrdx.py

@@ -30,7 +30,7 @@ class CrawlMrdx():
         self.conf = self.jsonConf.load()
         self.start_date = self.conf.get('startDate')
         self.end_date = self.conf.get("endDate")
-        
+
     def update(self):
         '''update app'''
         pass
@@ -60,18 +60,21 @@ class CrawlMrdx():
         :param start: start date
         :param end: end date
         '''
-        start_time = time.time() # 计算耗时
+        start_time = time.time()  # 计算耗时
         if not os.path.exists("data"):
             os.makedirs("data")
         pool = ThreadPoolExecutor(max_workers=10)  # 创建一个最大可容纳10个task的线程池
-        index = 1
+        if start is None:
+            start = self.conf.get('startDate')
+        if end is None:
+            end = self.conf.get("endDate")
         start_date = datetime.datetime.strptime(start, "%Y%m%d")
         end_date = datetime.datetime.strptime(end, "%Y%m%d")
 
         current_date = start_date
         while current_date <= end_date:
             print(current_date.strftime("%Y%m%d"))
-            current_date_str=current_date.strftime("%Y%m%d")
+            current_date_str = current_date.strftime("%Y%m%d")
             for j in range(1, 17):
                 fileName = r"./data/%s/0%s.pdf" % (current_date_str, j)
                 if(os.path.exists(fileName)):
@@ -88,4 +91,4 @@ class CrawlMrdx():
             if(current_date_str == self.conf.get('endDate')):
                 break
             current_date += datetime.timedelta(days=1)
-        print("last time: {} s".format(time.time() - start_time))
+        print("last time: {} s".format(time.time() - start_time))