6 months ago · c7a1609a7c
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 
				+*.pyc
			
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 
				 # crawl_emotions
			
 
				 
			
 
				-表情包爬取工具
			
 
				+表情包爬取工具，速度不能快，会触发 Cloudflare 限制。
			
 
				 
			
 
				 ## License
			
 
				 
			
--- a/crawl_emotions/emotions.py
+++ b/crawl_emotions/emotions.py
@@ -6,26 +6,25 @@
 
				 @License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				 @Desc    :   表情包
			
 
				 '''
			
 
				-import os,sys,re
			
 
				+import os
			
 
				 # import requests
			
 
				 import httpx
			
 
				-import bs4,csv,lxml
			
 
				+import bs4
			
 
				 from concurrent.futures import ThreadPoolExecutor
			
 
				 
			
 
				 class Emotions(object):
			
 
				-    """"""
			
 
				+    """ crawl emotions """
			
 
				     _url = 'https://fabiaoqing.com/biaoqing/lists/page/{page}.html'
			
 
				     header= {
			
 
				         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.142.86 Safari/537.36"
			
 
				     }
			
 
				     def __init__(self):
			
 
				-        self.sess = httpx.Session()
			
 
				-        self.sess.headers.update(self.header)
			
 
				+        self.sess = httpx.Client(headers=self.header)
			
 
				         self.path = os.path.dirname(os.path.abspath(__file__))
			
 
				         if not os.path.exists(self.path+'/data'):
			
 
				             os.mkdir(self.path+'/data')
			
 
				         self.data_path = self.path+'/data/'
			
 
				-        self.pool = ThreadPoolExecutor(10)
			
 
				+        self.pool = ThreadPoolExecutor(2)
			
 
				 
			
 
				     def run(self):
			
 
				         for i in range(1, 4328+1):
			
@@ -42,7 +41,7 @@ class Emotions(object):
 
				             print('下载图片： ', title)
			
 
				             try:
			
 
				                 with open(self.data_path + title + os.path.splitext(image)[-1], 'wb') as f:
			
 
				-                    img = requests.get(image).content
			
 
				+                    img = self.sess.get(image).content
			
 
				                     f.write(img)
			
 
				             except OSError:
			
 
				                 print('length  failed')
			
--- a/main.py
+++ b/main.py
@@ -2,5 +2,5 @@
 
				 from crawl_emotions import Emotions
			
 
				 
			
 
				 if __name__=='__main__':
			
 
				-    emo=Emotions()
			
 
				+    emo= Emotions()
			
 
				     emo.run()