|
@@ -20,17 +20,25 @@ class Qinimg(object):
|
|
|
''' '''
|
|
|
_host = "https://www.qinimg.com"
|
|
|
# _host="http://localhost:88"
|
|
|
+
|
|
|
_header = {
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
"Referer": "https://www.qinimg.com/",
|
|
|
- "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
|
|
|
- "Accept-Encoding": "gzip, deflate, br",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Host": "www.qinimg.com"
|
|
|
+ "authority": "www.qinimg.com",
|
|
|
+ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
+ "accept-language": "en-US,en;q=0.9",
|
|
|
+ "cache-control": "max-age=0",
|
|
|
+ "dnt": "1",
|
|
|
+ "sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116"',
|
|
|
+ "sec-ch-ua-mobile": "?0",
|
|
|
+ "sec-ch-ua-platform": '"Windows"',
|
|
|
+ "sec-fetch-dest": "document",
|
|
|
+ "sec-fetch-mode": "navigate",
|
|
|
+ "sec-fetch-site": "same-origin",
|
|
|
+ "sec-fetch-user": "?1",
|
|
|
+ "upgrade-insecure-requests": "1"
|
|
|
}
|
|
|
- max_page = 1
|
|
|
+ max_page = 10
|
|
|
|
|
|
def __init__(self):
|
|
|
self.sess = requests.Session()
|
|
@@ -77,16 +85,15 @@ class Qinimg(object):
|
|
|
|
|
|
def get_list(self, next_page=None):
|
|
|
''' get all pic list '''
|
|
|
- print("init page: "+next_page)
|
|
|
+ print("init page: " + next_page)
|
|
|
if next_page is None:
|
|
|
next_page = self._host
|
|
|
else:
|
|
|
- next_page = self._host + next_page
|
|
|
+ next_page = next_page
|
|
|
+
|
|
|
res=self.sess.get(next_page, headers=self._header)
|
|
|
- # res.encoding = 'utf-8'
|
|
|
+ res.encoding = 'utf-8'
|
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
- with open("data/list.html", "w", encoding="utf-8") as f:
|
|
|
- f.write(res.text)
|
|
|
imgs = soup.select('div.list_box ul')
|
|
|
# save to sqlite, qinimg_girl
|
|
|
for img in imgs:
|
|
@@ -107,7 +114,8 @@ class Qinimg(object):
|
|
|
for pagenumber in pagenumbers:
|
|
|
if pagenumber.text == 'Next' and index < self.max_page:
|
|
|
next = pagenumber
|
|
|
- self.get_list(next_page = next['href'])
|
|
|
+ next_page = self._host + next['href']
|
|
|
+ self.get_list(next_page=next_page)
|
|
|
break
|
|
|
index=index + 1
|
|
|
self.set_config("init_page", next_page)
|
|
@@ -137,29 +145,36 @@ Select * from qinimg_girl where uid = '{}' and isdelete = 0
|
|
|
|
|
|
def get_pic_detail(self, url, name, createtime):
|
|
|
''' get pic detail '''
|
|
|
- uid=re.findall(r'(\d+)', url)[0]
|
|
|
- index=0
|
|
|
- url=self._host+url
|
|
|
- res=self.sess.get(url, headers=self._header)
|
|
|
- res.encoding = 'utf-8'
|
|
|
- soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
- # find all img
|
|
|
- imgs=soup.select('div#image p a')
|
|
|
- # save to sqlite, qinimg_girl_detail
|
|
|
- for img in imgs:
|
|
|
- print(img['href'])
|
|
|
- # if not exist in sqlite, add to sqlite
|
|
|
- # if exist, check whether the pic is downloaded
|
|
|
- # self.download_all_pic(img['href'])
|
|
|
- self.save_girl_detail(uid,name,index,createtime,url)
|
|
|
+ uid = re.findall(r'(\d+)', url)[0]
|
|
|
+ pic_index = 0
|
|
|
+ url = self._host + url
|
|
|
+ try:
|
|
|
+ res=self.sess.get(url, headers=self._header)
|
|
|
+ res.encoding = 'utf-8'
|
|
|
+ soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
+ # find all img
|
|
|
+ imgs=soup.select('div#image p a')
|
|
|
+ # save to sqlite, qinimg_girl_detail
|
|
|
+ for img in imgs:
|
|
|
+ # if not exist in sqlite, add to sqlite
|
|
|
+ # if exist, check whether the pic is downloaded
|
|
|
+ # self.download_all_pic(img['href'])
|
|
|
+ self.save_girl_detail(uid, name, pic_index, createtime, img['href'])
|
|
|
+ pic_index = pic_index + 1
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ finally:
|
|
|
+ pass
|
|
|
+
|
|
|
|
|
|
- def save_girl_detail(self, uid,name,index,createtime,url):
|
|
|
+ def save_girl_detail(self, uid,name,pic_index,createtime,url):
|
|
|
''' save girl detail to sqlite '''
|
|
|
sql='''
|
|
|
-insert into qinimg_girl_detail (uid,name,index,createtime,url) values ('{}', '{}', '{}', '{}', '{}')
|
|
|
+insert into qinimg_girl_detail (uid,name,pic_index,createtime,url) values ('{}', '{}', '{}', '{}', '{}')
|
|
|
'''
|
|
|
- self.cursor.execute(sql.format(uid,name,index,createtime,url))
|
|
|
+ self.cursor.execute(sql.format(uid,name,str(pic_index),createtime,url))
|
|
|
self.conn.commit()
|
|
|
+ # print(sql.format(uid,name,str(pic_index),createtime,url))
|
|
|
|
|
|
def download_all_pic(self):
|
|
|
''' download pic '''
|
|
@@ -169,9 +184,14 @@ insert into qinimg_girl_detail (uid,name,index,createtime,url) values ('{}', '{}
|
|
|
# get all pic detail
|
|
|
self.cursor.execute(sql)
|
|
|
details = self.cursor.fetchall()
|
|
|
+ index=1
|
|
|
for detail in details:
|
|
|
pool = ThreadPoolExecutor(max_workers=5)
|
|
|
- future1 = pool.submit(self._download_pic, detail["url"], detail["name"], detail["index"])
|
|
|
+ future1 = pool.submit(self._download_pic, detail[4], detail[2], detail[3])
|
|
|
+ # print(detail)
|
|
|
+ # if index>2:
|
|
|
+ # return
|
|
|
+ # index = index + 1
|
|
|
time.sleep(1)
|
|
|
|
|
|
def _download_pic(self, url, name,index):
|