Browse Source

同时开启10个任务下载

liuyuqi-dellpc 4 years ago
parent
commit
2458e7b09b
2 changed files with 9 additions and 1 deletions
  1. 9 1
      main.py
  2. 0 0
      requirements.txt

+ 9 - 1
main.py

@@ -14,6 +14,10 @@ import requests
 
 import DownloadProgress
 import user_agent
+import threading
+from concurrent.futures import ThreadPoolExecutor
+
+import time
 
 # src = "D:/PycharmProjects/crawl_xuexi/"
 # os.chdir(src)
@@ -52,6 +56,7 @@ def downloadVideo(url, file_name):
 def crawl():
     with open("data/ml.json", "r", encoding="utf8") as f:
         mlData = json.loads(f.read())
+        pool = ThreadPoolExecutor(max_workers=10)  # 创建一个最大可容纳10个task的线程池
         for i in range((len(mlData["fpe1ki18v228w00"]))):
             frst_name = mlData["fpe1ki18v228w00"][i]["frst_name"].replace('\t', ' ')
             static_page_url = mlData["fpe1ki18v228w00"][i]["static_page_url"]
@@ -61,8 +66,11 @@ def crawl():
             pattern = r'src="./data(.*?)"></script>'
             url = "https://www.xuexi.cn/" + preUrl + "/data" + re.findall(pattern, resData, re.I)[0]
             res = get_video_links(url)[0]
-            downloadVideo(res, file_name=frst_name)
+            future1 = pool.submit(downloadVideo,
+                                  res, frst_name)  # 往线程池里面加入一个task
 
 
 if __name__ == '__main__':
+    start_time = time.time()
     crawl()
+    print("last time: {} s".format(time.time() - start_time))

+ 0 - 0
requirements.txt