main.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #coding=utf-8
  2. '''
  3. Created on 2017年7月15日
  4. @vsersion:python3.6
  5. @author: liuyuqi
  6. '''
  7. import csv
  8. import os
  9. from time import sleep
  10. import random
  11. from urllib import request
  12. project_dir="C:/Users/dell/Desktop/xiaohua-crawl"
  13. img_dir=project_dir+"/images"
  14. data_dir=project_dir+"/data"
  15. def downloadImg(imgUrl,fileName):
  16. try:
  17. headers = {
  18. 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
  19. 'Referer':'http://www.xiaohuar.com'
  20. }
  21. req=request.Request(url=imgUrl)
  22. for i in headers:
  23. req.add_header(i,headers[i])
  24. res=request.urlopen(req)
  25. with open(img_dir+"/"+fileName+imgUrl[-4:], "wb") as code:
  26. code.write(res.read())
  27. # sleep(random.randint(1,5))
  28. except Exception as err:
  29. print(err)
  30. finally:
  31. print("pic:"+ fileName+".jpg")
  32. def __init__():
  33. if(os.path.exists(img_dir)!=True):
  34. os.mkdir(img_dir)
  35. if(os.path.exists(data_dir)!=True):
  36. os.mkdir(data_dir)
  37. def main():
  38. file=data_dir+"/result.csv"
  39. with open(file, 'r') as f:
  40. # data=csv.reader(f, csv.excel_tab)
  41. data=csv.reader(f)
  42. for row in data:
  43. imgUrl=""
  44. fileName=""
  45. for i in range(len(row)):
  46. fileName=row[4]+"-"+row[3]
  47. imgUrl="http://www.xiaohuar.com"+row[2]
  48. downloadImg(imgUrl,fileName)
  49. __init__()
  50. main()