|
@@ -1,4 +1,4 @@
|
|
|
-
|
|
|
+
|
|
|
'''
|
|
|
Created on 2017年7月15日
|
|
|
@vsersion:python3.6
|
|
@@ -10,46 +10,51 @@ from time import sleep
|
|
|
import random
|
|
|
from urllib import request
|
|
|
|
|
|
-project_dir="C:/Users/dell/Desktop/xiaohua-crawl"
|
|
|
-img_dir=project_dir+"/images"
|
|
|
-data_dir=project_dir+"/data"
|
|
|
-
|
|
|
-def downloadImg(imgUrl,fileName):
|
|
|
- try:
|
|
|
- headers = {
|
|
|
- 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
|
|
|
- 'Referer':'http://www.xiaohuar.com'
|
|
|
- }
|
|
|
- req=request.Request(url=imgUrl)
|
|
|
+project_dir = "C:/Users/dell/Desktop/xiaohua-crawl"
|
|
|
+img_dir = project_dir+"/images"
|
|
|
+data_dir = project_dir+"/data"
|
|
|
+
|
|
|
+
|
|
|
+def downloadImg(imgUrl, fileName):
|
|
|
+ try:
|
|
|
+ headers = {
|
|
|
+ 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
|
|
|
+ 'Referer': 'http://www.xiaohuar.com'
|
|
|
+ }
|
|
|
+ req = request.Request(url=imgUrl)
|
|
|
for i in headers:
|
|
|
- req.add_header(i,headers[i])
|
|
|
- res=request.urlopen(req)
|
|
|
+ req.add_header(i, headers[i])
|
|
|
+ res = request.urlopen(req)
|
|
|
with open(img_dir+"/"+fileName+imgUrl[-4:], "wb") as code:
|
|
|
code.write(res.read())
|
|
|
|
|
|
- except Exception as err:
|
|
|
- print(err)
|
|
|
- finally:
|
|
|
- print("pic:"+ fileName+".jpg")
|
|
|
+ except Exception as err:
|
|
|
+ print(err)
|
|
|
+ finally:
|
|
|
+ print("pic:" + fileName+".jpg")
|
|
|
+
|
|
|
|
|
|
def __init__():
|
|
|
- if(os.path.exists(img_dir)!=True):
|
|
|
- os.mkdir(img_dir)
|
|
|
- if(os.path.exists(data_dir)!=True):
|
|
|
- os.mkdir(data_dir)
|
|
|
+ if(os.path.exists(img_dir) != True):
|
|
|
+ os.mkdir(img_dir)
|
|
|
+ if(os.path.exists(data_dir) != True):
|
|
|
+ os.mkdir(data_dir)
|
|
|
+
|
|
|
|
|
|
def main():
|
|
|
- file=data_dir+"/result.csv"
|
|
|
+ file = data_dir+"/result.csv"
|
|
|
with open(file, 'r') as f:
|
|
|
-
|
|
|
- data=csv.reader(f)
|
|
|
+
|
|
|
+ data = csv.reader(f)
|
|
|
for row in data:
|
|
|
- imgUrl=""
|
|
|
- fileName=""
|
|
|
+ imgUrl = ""
|
|
|
+ fileName = ""
|
|
|
for i in range(len(row)):
|
|
|
- fileName=row[4]+"-"+row[3]
|
|
|
- imgUrl="http://www.xiaohuar.com"+row[2]
|
|
|
-
|
|
|
- downloadImg(imgUrl,fileName)
|
|
|
+ fileName = row[4]+"-"+row[3]
|
|
|
+ imgUrl = "http://www.xiaohuar.com"+row[2]
|
|
|
+
|
|
|
+ downloadImg(imgUrl, fileName)
|
|
|
+
|
|
|
+
|
|
|
__init__()
|
|
|
-main()
|
|
|
+main()
|