|
@@ -0,0 +1,93 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2023/09/14 07:14:07
|
|
|
+@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
|
+@Desc :
|
|
|
+https://www.qinimg.com/
|
|
|
+'''
|
|
|
+import requests
|
|
|
+import sys
|
|
|
+import os
|
|
|
+import re
|
|
|
+import json
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+import sqlite3
|
|
|
+
|
|
|
+class Qinimg(object):
|
|
|
+ ''' '''
|
|
|
+ _host = "https://www.qinimg.com/"
|
|
|
+ _url = "https://www.qinimg.com/random"
|
|
|
+ _name = "qinimg"
|
|
|
+ _header = {
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
+ "Referer": "https://www.qinimg.com/",
|
|
|
+ "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
|
|
|
+ "Accept-Encoding": "gzip, deflate, br",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Host": "www.qinimg.com"
|
|
|
+ }
|
|
|
+ max_page = 1000
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.sess = requests.Session()
|
|
|
+ # init sqlite database
|
|
|
+
|
|
|
+ def get_list(self,page=1):
|
|
|
+ ''' get all pic list '''
|
|
|
+ res=self.sess.get(self._host, headers=self._header)
|
|
|
+ res.encoding = 'utf-8'
|
|
|
+ soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
+ img = soup.find('img')
|
|
|
+ # find all img
|
|
|
+ # /html/body/div[3]/div[2]/ul[1]/li[1]/a/img
|
|
|
+ # /html/body/div[3]/div[2]/ul[4]/li[1]/a/img
|
|
|
+ # div.list_box > ul:nth-child(5) > a
|
|
|
+ imgs = soup.find_all('img')
|
|
|
+
|
|
|
+ # save to sqlite, qinimg_girl
|
|
|
+ for img in imgs:
|
|
|
+ print(img['src'])
|
|
|
+ # if not exist in sqlite, add to sqlite
|
|
|
+ # if exist, skip
|
|
|
+
|
|
|
+ # find the next button
|
|
|
+ next = soup.find('a', {'class': 'next'})
|
|
|
+ # if exist next button and page < max_page
|
|
|
+ if next and int(next.text) < self.max_page:
|
|
|
+ self.get_list(page=int(next.text))
|
|
|
+ else:
|
|
|
+ return
|
|
|
+
|
|
|
+ def get_pic_detail(self, url):
|
|
|
+ ''' get pic detail '''
|
|
|
+ res=self.sess.get(url, headers=self._header)
|
|
|
+ res.encoding = 'utf-8'
|
|
|
+ soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
+ img = soup.find('img')
|
|
|
+ # find all img
|
|
|
+ imgs = soup.find_all('img')
|
|
|
+
|
|
|
+ # save to sqlite, qinimg_girl_detail
|
|
|
+ for img in imgs:
|
|
|
+ print(img['src'])
|
|
|
+ # if not exist in sqlite, add to sqlite
|
|
|
+ # if exist, check whether the pic is downloaded
|
|
|
+ self.download(img['src'])
|
|
|
+
|
|
|
+ def download(self, url):
|
|
|
+ self.sess.get(url, headers=self._header)
|
|
|
+ # save to local
|
|
|
+
|
|
|
+
|
|
|
+ def run(self):
|
|
|
+ ''' run '''
|
|
|
+ self.get_list()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ qinimg = Qinimg()
|
|
|
+ qinimg.run()
|