|
@@ -11,13 +11,15 @@ import requests
|
|
|
import sys
|
|
|
import os
|
|
|
import re
|
|
|
-import json
|
|
|
+import json,time
|
|
|
from bs4 import BeautifulSoup
|
|
|
import sqlite3
|
|
|
+from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
class Qinimg(object):
|
|
|
''' '''
|
|
|
- _host = "https://www.qinimg.com/"
|
|
|
+ # _host = "https://www.qinimg.com"
|
|
|
+ _host="http://localhost:88"
|
|
|
_url = "https://www.qinimg.com/random"
|
|
|
_name = "qinimg"
|
|
|
_header = {
|
|
@@ -34,34 +36,102 @@ class Qinimg(object):
|
|
|
|
|
|
def __init__(self):
|
|
|
self.sess = requests.Session()
|
|
|
- # init sqlite database
|
|
|
+ self.conn = sqlite3.connect('xiaohua.db')
|
|
|
+ self.cursor = self.conn.cursor()
|
|
|
+ self.init_database()
|
|
|
+
|
|
|
+ def init_database(self):
|
|
|
+ ''' init sqlite database '''
|
|
|
+ # whether the ''qinimg_girl table exists
|
|
|
+ self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='qinimg_girl'")
|
|
|
+ exists = self.cursor.fetchone()
|
|
|
+ if not exists:
|
|
|
+ # if not exist, create table
|
|
|
+ sql1='''
|
|
|
+ CREATE TABLE "qinimg_girl" (
|
|
|
+ "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
|
+ "name" TEXT,
|
|
|
+ "uid" INTEGER,
|
|
|
+ "createtime" TEXT,
|
|
|
+ "isdelete" INTEGER NOT NULL DEFAULT 0,
|
|
|
+ "url" TEXT
|
|
|
+ );
|
|
|
+ '''
|
|
|
+ self.cursor.execute(sql1)
|
|
|
|
|
|
- def get_list(self,page=1):
|
|
|
+
|
|
|
+ self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='qinimg_girl_detail'")
|
|
|
+ exists = self.cursor.fetchone()
|
|
|
+ if not exists:
|
|
|
+ sql2='''
|
|
|
+ CREATE TABLE "qinimg_girl_detail" (
|
|
|
+ "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
|
+ "url" TEXT,
|
|
|
+ "is_download" INTEGER NOT NULL DEFAULT 0,
|
|
|
+ "createtime" TEXT
|
|
|
+ );
|
|
|
+ '''
|
|
|
+ self.cursor.execute(sql2)
|
|
|
+
|
|
|
+ # self.cursor.close()
|
|
|
+ # self.conn.close()
|
|
|
+
|
|
|
+ def get_list(self, next_page=None):
|
|
|
''' get all pic list '''
|
|
|
+ if next_page is None:
|
|
|
+ next_page = self._url
|
|
|
+ else:
|
|
|
+ next_page = self._host + next_page
|
|
|
res=self.sess.get(self._host, headers=self._header)
|
|
|
res.encoding = 'utf-8'
|
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
- img = soup.find('img')
|
|
|
- # find all img
|
|
|
- # /html/body/div[3]/div[2]/ul[1]/li[1]/a/img
|
|
|
- # /html/body/div[3]/div[2]/ul[4]/li[1]/a/img
|
|
|
- # div.list_box > ul:nth-child(5) > a
|
|
|
- imgs = soup.find_all('img')
|
|
|
-
|
|
|
+ imgs = soup.select('div.list_box ul')
|
|
|
# save to sqlite, qinimg_girl
|
|
|
for img in imgs:
|
|
|
- print(img['src'])
|
|
|
- # if not exist in sqlite, add to sqlite
|
|
|
- # if exist, skip
|
|
|
+ try:
|
|
|
+ url=img.select('a')[0]['href'].strip()
|
|
|
+ title=img.select('a')[0]['title'].strip()
|
|
|
+ createtime=img.select('span')[1].text.strip()
|
|
|
+ self.save_girl(url,title,createtime)
|
|
|
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ finally:
|
|
|
+ pass
|
|
|
+
|
|
|
# find the next button
|
|
|
- next = soup.find('a', {'class': 'next'})
|
|
|
- # if exist next button and page < max_page
|
|
|
- if next and int(next.text) < self.max_page:
|
|
|
- self.get_list(page=int(next.text))
|
|
|
+ pagenumbers=soup.select('div.pages a')
|
|
|
+ index=0
|
|
|
+ for pagenumber in pagenumbers:
|
|
|
+ if pagenumber.text == 'Next' and index < self.max_page:
|
|
|
+ next = pagenumber
|
|
|
+ self.get_list(next_page = next['href'])
|
|
|
+ break
|
|
|
+ index=index + 1
|
|
|
+ self.set_config("init_page", next_page)
|
|
|
+
|
|
|
+ def save_girl(self, url:str, title:str, createtime:str):
|
|
|
+ # if not exist in sqlite, add to sqlite
|
|
|
+ # if exist, skip
|
|
|
+ uid=re.findall(r'(\d+)', url)[0]
|
|
|
+ url=self._host+url
|
|
|
+ sql1='''
|
|
|
+Select * from qinimg_girl where uid = '{}' and isdelete = 0
|
|
|
+'''
|
|
|
+ # print(sql1.format(uid))
|
|
|
+ self.cursor.execute(sql1.format(str(uid)))
|
|
|
+ exists = self.cursor.fetchone()
|
|
|
+ if not exists:
|
|
|
+ sql2='''
|
|
|
+ INSERT INTO qinimg_girl ("name", "uid", "createtime", "url")
|
|
|
+ VALUES ("{}", {}, "{}", "{}");
|
|
|
+ '''
|
|
|
+ # print(sql2.format(title, uid, createtime, url))
|
|
|
+ self.cursor.execute(sql2.format(title, uid, createtime, url))
|
|
|
+ self.conn.commit()
|
|
|
else:
|
|
|
- return
|
|
|
-
|
|
|
+ print(str(uid)+" is exist.")
|
|
|
+
|
|
|
def get_pic_detail(self, url):
|
|
|
''' get pic detail '''
|
|
|
res=self.sess.get(url, headers=self._header)
|
|
@@ -76,18 +146,77 @@ class Qinimg(object):
|
|
|
print(img['src'])
|
|
|
# if not exist in sqlite, add to sqlite
|
|
|
# if exist, check whether the pic is downloaded
|
|
|
- self.download(img['src'])
|
|
|
+ self.download_all_pic(img['src'])
|
|
|
|
|
|
- def download(self, url):
|
|
|
- self.sess.get(url, headers=self._header)
|
|
|
- # save to local
|
|
|
+ def download_all_pic(self):
|
|
|
+ ''' download pic '''
|
|
|
+ sql = '''
|
|
|
+ select * from qinimg_girl_detail where is_download = 0
|
|
|
+ '''
|
|
|
+ # get all pic detail
|
|
|
+ self.cursor.execute(sql)
|
|
|
+ details = self.cursor.fetchall()
|
|
|
+ for detail in details:
|
|
|
+ pool = ThreadPoolExecutor(max_workers=5)
|
|
|
+ future1 = pool.submit(self._download_pic, detail["url"], detail["name"], detail["index"])
|
|
|
+ time.sleep(1)
|
|
|
|
|
|
+ def _download_pic(self, url, name,index):
|
|
|
+ ''' download pic '''
|
|
|
+ if not os.path.exists("data"):
|
|
|
+ os.mkdir("data")
|
|
|
+ if not os.path.exists("data/{}".format(name)):
|
|
|
+ os.mkdir("data/{}".format(name))
|
|
|
+ with open("data/{}/{}.jpg".format(name, index), "wb") as f:
|
|
|
+ f.write(self.sess.get(url, headers=self._header).content)
|
|
|
|
|
|
def run(self):
|
|
|
''' run '''
|
|
|
- self.get_list()
|
|
|
+ # get all pic
|
|
|
+ init_page=self.get_config("init_page")
|
|
|
+ self.get_list(next_page=init_page)
|
|
|
+ # download pic
|
|
|
+ self.download_all_pic()
|
|
|
+ self.__release__()
|
|
|
+
|
|
|
+ def set_config(self, key, value):
|
|
|
+ ''' save config '''
|
|
|
+ if self.get_config(key) is None:
|
|
|
+ sql='''
|
|
|
+ insert into qinimg_config (key, value) values ('{}', '{}')
|
|
|
+ '''
|
|
|
+ else:
|
|
|
+ sql='''
|
|
|
+ update qinimg_config set value = '{}' where key = '{}'
|
|
|
+ '''
|
|
|
+ try:
|
|
|
+ self.cursor.execute(sql.format(key, value))
|
|
|
+ self.conn.commit()
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ finally:
|
|
|
+ pass
|
|
|
|
|
|
+
|
|
|
+ def get_config(self, key):
|
|
|
+ ''' get config '''
|
|
|
+ sql='''
|
|
|
+select * from qinimg_config where key = '{}'
|
|
|
+'''
|
|
|
+ try:
|
|
|
+ self.cursor.execute(sql.format(key))
|
|
|
+ config = self.cursor.fetchone()
|
|
|
+ return config["value"]
|
|
|
+ except Exception as e:
|
|
|
+ return None
|
|
|
+ finally:
|
|
|
+ pass
|
|
|
|
|
|
+ def __release__(self):
|
|
|
+ ''' release '''
|
|
|
+ self.cursor.close()
|
|
|
+ self.conn.close()
|
|
|
+
|
|
|
if __name__ == "__main__":
|
|
|
qinimg = Qinimg()
|
|
|
qinimg.run()
|