123456789101112131415161718192021222324252627282930313233343536373839 |
- # -*- coding: utf-8 -*-
- '''
- @Auther :liuyuqi.gov@msn.cn
- @date :2019/4/8
- '''
- __author__ = "liuyuqi"
- import json
- import os
- import re
- from contextlib import closing
- import requests
- import DownloadProgress
- import user_agent
- # src = "D:/PycharmProjects/crawl_xuexi/"
- # os.chdir(src)
- def crawl():
- with open("data/ml.json", "r", encoding="utf8") as f:
- mlData = json.loads(f.read())
- for i in range((len(mlData["fpe1ki18v228w00"]))):
- frst_name = mlData["fpe1ki18v228w00"][i]["frst_name"].replace('\t', ' ')
- static_page_url = mlData["fpe1ki18v228w00"][i]["static_page_url"]
- # 打开 mp4 视频网页链接
- resData = requests.get(static_page_url, headers=user_agent.getheaders()).content.decode("utf8")
- preUrl = static_page_url.split("/")[3]
- pattern = r'src="./data(.*?)"></script>'
- url = "https://www.xuexi.cn/" + preUrl + "/data" + re.findall(pattern, resData, re.I)[0]
- res = get_video_links(url)[0]
- downloadVideo(res, file_name=frst_name)
- if __name__ == '__main__':
- crawl()
|