main.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. # -*- coding: utf-8 -*-
  2. '''
  3. @Auther :liuyuqi.gov@msn.cn
  4. @date :2019/4/8
  5. '''
  6. __author__ = "liuyuqi"
  7. import json
  8. import os
  9. import re
  10. from contextlib import closing
  11. import requests
  12. import DownloadProgress
  13. import user_agent
  14. # src = "D:/PycharmProjects/crawl_xuexi/"
  15. # os.chdir(src)
  16. def crawl():
  17. with open("data/ml.json", "r", encoding="utf8") as f:
  18. mlData = json.loads(f.read())
  19. for i in range((len(mlData["fpe1ki18v228w00"]))):
  20. frst_name = mlData["fpe1ki18v228w00"][i]["frst_name"].replace('\t', ' ')
  21. static_page_url = mlData["fpe1ki18v228w00"][i]["static_page_url"]
  22. # 打开 mp4 视频网页链接
  23. resData = requests.get(static_page_url, headers=user_agent.getheaders()).content.decode("utf8")
  24. preUrl = static_page_url.split("/")[3]
  25. pattern = r'src="./data(.*?)"></script>'
  26. url = "https://www.xuexi.cn/" + preUrl + "/data" + re.findall(pattern, resData, re.I)[0]
  27. res = get_video_links(url)[0]
  28. downloadVideo(res, file_name=frst_name)
  29. if __name__ == '__main__':
  30. crawl()