6 months ago · cebc329cbd
--- a/README.md
+++ b/README.md
@@ -0,0 +1,8 @@
 
				+# crawl_emotions
			
 
				+
			
 
				+表情包爬取工具
			
 
				+
			
 
				+## License
			
 
				+
			
 
				+
			
 
				+## Reference
			
--- a/crawl_emotions/__init__.py
+++ b/crawl_emotions/__init__.py
@@ -0,0 +1 @@
 
				+from .emotions import Emotions
			
--- a/crawl_emotions/emotions.py
+++ b/crawl_emotions/emotions.py
@@ -0,0 +1,54 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+'''
			
 
				+@Contact :   liuyuqi.gov@msn.cn
			
 
				+@Time    :   2024/08/21 18:45:54
			
 
				+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				+@Desc    :   表情包
			
 
				+'''
			
 
				+import os,sys,re
			
 
				+# import requests
			
 
				+import httpx
			
 
				+import bs4,csv,lxml
			
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				+
			
 
				+class Emotions(object):
			
 
				+    """"""
			
 
				+    _url = 'https://fabiaoqing.com/biaoqing/lists/page/{page}.html'
			
 
				+    header= {
			
 
				+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.142.86 Safari/537.36"
			
 
				+    }
			
 
				+    def __init__(self):
			
 
				+        self.sess = httpx.Session()
			
 
				+        self.sess.headers.update(self.header)
			
 
				+        self.path = os.path.dirname(os.path.abspath(__file__))
			
 
				+        if not os.path.exists(self.path+'/data'):
			
 
				+            os.mkdir(self.path+'/data')
			
 
				+        self.data_path = self.path+'/data/'
			
 
				+        self.pool = ThreadPoolExecutor(10)
			
 
				+
			
 
				+    def run(self):
			
 
				+        for i in range(1, 4328+1):
			
 
				+            url = self._url.format(page=i)
			
 
				+            self.pool.submit(self.get_page, url)
			
 
				+
			
 
				+    def get_page(self, url):
			
 
				+        response = self.sess.get(url)
			
 
				+        soup = bs4.BeautifulSoup(response.text, 'lxml')
			
 
				+        img_list = soup.find_all('img', class_='ui image lazy')
			
 
				+        for img in img_list:
			
 
				+            image = img.get('data-original')
			
 
				+            title = img.get('title')
			
 
				+            print('下载图片： ', title)
			
 
				+            try:
			
 
				+                with open(self.data_path + title + os.path.splitext(image)[-1], 'wb') as f:
			
 
				+                    img = requests.get(image).content
			
 
				+                    f.write(img)
			
 
				+            except OSError:
			
 
				+                print('length  failed')
			
 
				+                break
			
 
				+        print('下载完毕： ', url)
			
 
				+    
			
 
				+        def __del__(self):
			
 
				+            self.pool.shutdown(wait=True)
			
 
				+            
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,6 @@
 
				+
			
 
				+from crawl_emotions import Emotions
			
 
				+
			
 
				+if __name__=='__main__':
			
 
				+    emo=Emotions()
			
 
				+    emo.run()
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,4 @@
 
				+requests
			
 
				+lxml
			
 
				+bs4
			
 
				+httpx