|
@@ -0,0 +1,54 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2024/08/21 18:45:54
|
|
|
+@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
|
+@Desc : 表情包
|
|
|
+'''
|
|
|
+import os,sys,re
|
|
|
+# import requests
|
|
|
+import httpx
|
|
|
+import bs4,csv,lxml
|
|
|
+from concurrent.futures import ThreadPoolExecutor
|
|
|
+
|
|
|
+class Emotions(object):
|
|
|
+ """"""
|
|
|
+ _url = 'https://fabiaoqing.com/biaoqing/lists/page/{page}.html'
|
|
|
+ header= {
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.142.86 Safari/537.36"
|
|
|
+ }
|
|
|
+ def __init__(self):
|
|
|
+ self.sess = httpx.Session()
|
|
|
+ self.sess.headers.update(self.header)
|
|
|
+ self.path = os.path.dirname(os.path.abspath(__file__))
|
|
|
+ if not os.path.exists(self.path+'/data'):
|
|
|
+ os.mkdir(self.path+'/data')
|
|
|
+ self.data_path = self.path+'/data/'
|
|
|
+ self.pool = ThreadPoolExecutor(10)
|
|
|
+
|
|
|
+ def run(self):
|
|
|
+ for i in range(1, 4328+1):
|
|
|
+ url = self._url.format(page=i)
|
|
|
+ self.pool.submit(self.get_page, url)
|
|
|
+
|
|
|
+ def get_page(self, url):
|
|
|
+ response = self.sess.get(url)
|
|
|
+ soup = bs4.BeautifulSoup(response.text, 'lxml')
|
|
|
+ img_list = soup.find_all('img', class_='ui image lazy')
|
|
|
+ for img in img_list:
|
|
|
+ image = img.get('data-original')
|
|
|
+ title = img.get('title')
|
|
|
+ print('下载图片: ', title)
|
|
|
+ try:
|
|
|
+ with open(self.data_path + title + os.path.splitext(image)[-1], 'wb') as f:
|
|
|
+ img = requests.get(image).content
|
|
|
+ f.write(img)
|
|
|
+ except OSError:
|
|
|
+ print('length failed')
|
|
|
+ break
|
|
|
+ print('下载完毕: ', url)
|
|
|
+
|
|
|
+ def __del__(self):
|
|
|
+ self.pool.shutdown(wait=True)
|
|
|
+
|