get_links.py 770 B

1234567891011121314151617181920212223242526
  1. import requests
  2. import re
  3. def get_article_links():
  4. article = requests.get(
  5. "https://www.xuexi.cn/c06bf4acc7eef6ef0a560328938b5771/data9a3668c13f6e303932b5e0e100fc248b.js").content.decode(
  6. "utf8")
  7. pattern = r"list\"\:(.+),\"count\"\:"
  8. links = []
  9. list = eval(re.search(pattern, article).group(1))[:20000]
  10. list.reverse()
  11. for i in range(len(list)):
  12. links.append(list[i]["static_page_url"])
  13. return links
  14. def get_video_links():
  15. video = requests.get(
  16. "https://www.xuexi.cn/4426aa87b0b64ac671c96379a3a8bd26/datadb086044562a57b441c24f2af1c8e101.js").content.decode(
  17. "utf8")
  18. pattern = r'https://www.xuexi.cn/[^,"]*html'
  19. link = re.findall(pattern, video, re.I)
  20. link.reverse()
  21. return link