crawl.py 716 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @Contact : liuyuqi.gov@msn.cn
  5. @Time : 2021/03/02 01:38:04
  6. @License : Copyright © 2017-2020 liuyuqi. All Rights Reserved.
  7. @Desc : 爬虫入口
  8. '''
  9. import os
  10. import sys
  11. import re
  12. import json
  13. import pandas
  14. url = r"http://ccgp-shaanxi.gov.cn/notice/list.do?noticetype=3&province=province"
  15. def getUrl():
  16. """
  17. param :
  18. return:
  19. """
  20. with open("data/url.txt") as file:
  21. res = file.readlines().decode("utf8")
  22. print(res)
  23. def crwal():
  24. """
  25. param url:
  26. return:
  27. """
  28. pass
  29. # 爬虫数据
  30. # 设置标签,下次从标签处继续爬
  31. # 存储到数据库
  32. if __name__ == "__main__":
  33. crawl()