# -*- coding: utf-8 -*- ''' @Auther :liuyuqi.gov@msn.cn @date :2019/4/8 ''' __author__ = "liuyuqi" import json import os import re from contextlib import closing import requests import DownloadProgress import user_agent # src = "D:/PycharmProjects/crawl_xuexi/" # os.chdir(src) def crawl(): with open("data/ml.json", "r", encoding="utf8") as f: mlData = json.loads(f.read()) for i in range((len(mlData["fpe1ki18v228w00"]))): frst_name = mlData["fpe1ki18v228w00"][i]["frst_name"].replace('\t', ' ') static_page_url = mlData["fpe1ki18v228w00"][i]["static_page_url"] # 打开 mp4 视频网页链接 resData = requests.get(static_page_url, headers=user_agent.getheaders()).content.decode("utf8") preUrl = static_page_url.split("/")[3] pattern = r'src="./data(.*?)">' url = "https://www.xuexi.cn/" + preUrl + "/data" + re.findall(pattern, resData, re.I)[0] res = get_video_links(url)[0] downloadVideo(res, file_name=frst_name) if __name__ == '__main__': crawl()