|
@@ -18,10 +18,8 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
class Qinimg(object):
|
|
|
''' '''
|
|
|
- # _host = "https://www.qinimg.com"
|
|
|
- _host="http://localhost:88"
|
|
|
- _url = "https://www.qinimg.com/random"
|
|
|
- _name = "qinimg"
|
|
|
+ _host = "https://www.qinimg.com"
|
|
|
+ # _host="http://localhost:88"
|
|
|
_header = {
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
"Referer": "https://www.qinimg.com/",
|
|
@@ -79,10 +77,10 @@ class Qinimg(object):
|
|
|
def get_list(self, next_page=None):
|
|
|
''' get all pic list '''
|
|
|
if next_page is None:
|
|
|
- next_page = self._url
|
|
|
+ next_page = self._host
|
|
|
else:
|
|
|
next_page = self._host + next_page
|
|
|
- res=self.sess.get(self._host, headers=self._header)
|
|
|
+ res=self.sess.get(next_page, headers=self._header)
|
|
|
res.encoding = 'utf-8'
|
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
imgs = soup.select('div.list_box ul')
|
|
@@ -93,7 +91,7 @@ class Qinimg(object):
|
|
|
title=img.select('a')[0]['title'].strip()
|
|
|
createtime=img.select('span')[1].text.strip()
|
|
|
self.save_girl(url,title,createtime)
|
|
|
-
|
|
|
+ self.get_pic_detail(url,title,createtime)
|
|
|
except Exception as e:
|
|
|
print(e)
|
|
|
finally:
|
|
@@ -132,21 +130,30 @@ Select * from qinimg_girl where uid = '{}' and isdelete = 0
|
|
|
else:
|
|
|
print(str(uid)+" is exist.")
|
|
|
|
|
|
- def get_pic_detail(self, url):
|
|
|
+ def get_pic_detail(self, url, name, createtime):
|
|
|
''' get pic detail '''
|
|
|
+ uid=re.findall(r'(\d+)', url)[0]
|
|
|
+ index=0
|
|
|
+ url=self._host+url
|
|
|
res=self.sess.get(url, headers=self._header)
|
|
|
res.encoding = 'utf-8'
|
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
- img = soup.find('img')
|
|
|
# find all img
|
|
|
- imgs = soup.find_all('img')
|
|
|
-
|
|
|
+ imgs=soup.select('div#image p a')
|
|
|
# save to sqlite, qinimg_girl_detail
|
|
|
for img in imgs:
|
|
|
- print(img['src'])
|
|
|
+ print(img['href'])
|
|
|
# if not exist in sqlite, add to sqlite
|
|
|
# if exist, check whether the pic is downloaded
|
|
|
- self.download_all_pic(img['src'])
|
|
|
+ # self.download_all_pic(img['href'])
|
|
|
+ self.save_girl_detail(uid,name,index,createtime,url)
|
|
|
+
|
|
|
+ def save_girl_detail(self, uid,name,index,createtime,url):
|
|
|
+ sql='''
|
|
|
+insert into qinimg_girl_detail (uid,name,index,createtime,url) values ('{}', '{}', '{}', '{}', '{}')
|
|
|
+ '''
|
|
|
+ self.cursor.execute(sql.format(uid,name,index,createtime,url))
|
|
|
+ self.conn.commit()
|
|
|
|
|
|
def download_all_pic(self):
|
|
|
''' download pic '''
|
|
@@ -169,14 +176,19 @@ Select * from qinimg_girl where uid = '{}' and isdelete = 0
|
|
|
os.mkdir("data/{}".format(name))
|
|
|
with open("data/{}/{}.jpg".format(name, index), "wb") as f:
|
|
|
f.write(self.sess.get(url, headers=self._header).content)
|
|
|
+ sql='''
|
|
|
+ update qinimg_girl_detail set is_download = 1 where url = '{}'
|
|
|
+ '''
|
|
|
+ self.cursor.execute(sql.format(url))
|
|
|
+ self.conn.commit()
|
|
|
|
|
|
def run(self):
|
|
|
''' run '''
|
|
|
# get all pic
|
|
|
init_page=self.get_config("init_page")
|
|
|
- self.get_list(next_page=init_page)
|
|
|
+ self.get_list(next_page = init_page)
|
|
|
# download pic
|
|
|
- self.download_all_pic()
|
|
|
+ # self.download_all_pic()
|
|
|
self.__release__()
|
|
|
|
|
|
def set_config(self, key, value):
|