12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- #coding=utf-8
- '''
- Created on 2017年7月15日
- @vsersion:python3.6
- @author: liuyuqi
- '''
- import csv
- import os
- from time import sleep
- import random
- from urllib import request
- project_dir="C:/Users/dell/Desktop/xiaohua-crawl"
- img_dir=project_dir+"/images"
- data_dir=project_dir+"/data"
- def downloadImg(imgUrl,fileName):
- try:
- headers = {
- 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
- 'Referer':'http://www.xiaohuar.com'
- }
- req=request.Request(url=imgUrl)
- for i in headers:
- req.add_header(i,headers[i])
- res=request.urlopen(req)
- with open(img_dir+"/"+fileName+imgUrl[-4:], "wb") as code:
- code.write(res.read())
- # sleep(random.randint(1,5))
- except Exception as err:
- print(err)
- finally:
- print("pic:"+ fileName+".jpg")
- def __init__():
- if(os.path.exists(img_dir)!=True):
- os.mkdir(img_dir)
- if(os.path.exists(data_dir)!=True):
- os.mkdir(data_dir)
- def main():
- file=data_dir+"/result.csv"
- with open(file, 'r') as f:
- # data=csv.reader(f, csv.excel_tab)
- data=csv.reader(f)
- for row in data:
- imgUrl=""
- fileName=""
- for i in range(len(row)):
- fileName=row[4]+"-"+row[3]
- imgUrl="http://www.xiaohuar.com"+row[2]
-
- downloadImg(imgUrl,fileName)
- __init__()
- main()
|