liuyuqi-dellpc 1 year ago
parent
commit
2c6888643b
1 changed files with 11 additions and 5 deletions
  1. 11 5
      crawl_xiaohua/crawl_xiaohua/extractor/qinimg.py

+ 11 - 5
crawl_xiaohua/crawl_xiaohua/extractor/qinimg.py

@@ -30,10 +30,12 @@ class Qinimg(object):
                "Connection": "keep-alive",
                "Host": "www.qinimg.com"
                }
-    max_page = 1000
+    max_page = 1
 
     def __init__(self):
         self.sess = requests.Session()
+        # set proxy
+        self.sess.proxies = {"http": "http://127.0.0.1:2181", "https": "http://127.0.0.1:2181"}
         self.conn = sqlite3.connect('xiaohua.db')
         self.cursor = self.conn.cursor()
         self.init_database()
@@ -57,7 +59,6 @@ class Qinimg(object):
     '''
             self.cursor.execute(sql1)
 
-
         self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='qinimg_girl_detail'")
         exists = self.cursor.fetchone()
         if not exists:
@@ -76,13 +77,16 @@ class Qinimg(object):
 
     def get_list(self, next_page=None):
         ''' get all pic list '''
+        print("init page: "+next_page)
         if next_page is None:
             next_page = self._host
         else:
             next_page = self._host + next_page
         res=self.sess.get(next_page, headers=self._header)
-        res.encoding = 'utf-8'
+        # res.encoding = 'utf-8'
         soup = BeautifulSoup(res.text, 'html.parser')
+        with open("data/list.html", "w", encoding="utf-8") as f:
+            f.write(res.text)
         imgs = soup.select('div.list_box ul')
         # save to sqlite, qinimg_girl
         for img in imgs:
@@ -109,6 +113,7 @@ class Qinimg(object):
         self.set_config("init_page", next_page)
 
     def save_girl(self, url:str, title:str, createtime:str):
+        ''' save girl list to sqlite '''
         # if not exist in sqlite, add to sqlite
         # if exist, skip
         uid=re.findall(r'(\d+)', url)[0]
@@ -149,6 +154,7 @@ Select * from qinimg_girl where uid = '{}' and isdelete = 0
             self.save_girl_detail(uid,name,index,createtime,url)
 
     def save_girl_detail(self, uid,name,index,createtime,url):
+        ''' save girl detail to sqlite '''
         sql='''
 insert into qinimg_girl_detail (uid,name,index,createtime,url) values ('{}', '{}', '{}', '{}', '{}')
         '''
@@ -185,7 +191,7 @@ insert into qinimg_girl_detail (uid,name,index,createtime,url) values ('{}', '{}
     def run(self):
         ''' run '''
         # get all pic
-        init_page=self.get_config("init_page")
+        init_page = self.get_config("init_page")
         self.get_list(next_page = init_page)
         # download pic
         # self.download_all_pic()
@@ -218,7 +224,7 @@ select * from qinimg_config where key = '{}'
         try:
             self.cursor.execute(sql.format(key))
             config = self.cursor.fetchone()
-            return config["value"]
+            return config[2]
         except Exception as e:
             return None
         finally: