|
@@ -6,26 +6,25 @@
|
|
@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
@Desc : 表情包
|
|
@Desc : 表情包
|
|
'''
|
|
'''
|
|
-import os,sys,re
|
|
|
|
|
|
+import os
|
|
# import requests
|
|
# import requests
|
|
import httpx
|
|
import httpx
|
|
-import bs4,csv,lxml
|
|
|
|
|
|
+import bs4
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
class Emotions(object):
|
|
class Emotions(object):
|
|
- """"""
|
|
|
|
|
|
+ """ crawl emotions """
|
|
_url = 'https://fabiaoqing.com/biaoqing/lists/page/{page}.html'
|
|
_url = 'https://fabiaoqing.com/biaoqing/lists/page/{page}.html'
|
|
header= {
|
|
header= {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.142.86 Safari/537.36"
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.142.86 Safari/537.36"
|
|
}
|
|
}
|
|
def __init__(self):
|
|
def __init__(self):
|
|
- self.sess = httpx.Session()
|
|
|
|
- self.sess.headers.update(self.header)
|
|
|
|
|
|
+ self.sess = httpx.Client(headers=self.header)
|
|
self.path = os.path.dirname(os.path.abspath(__file__))
|
|
self.path = os.path.dirname(os.path.abspath(__file__))
|
|
if not os.path.exists(self.path+'/data'):
|
|
if not os.path.exists(self.path+'/data'):
|
|
os.mkdir(self.path+'/data')
|
|
os.mkdir(self.path+'/data')
|
|
self.data_path = self.path+'/data/'
|
|
self.data_path = self.path+'/data/'
|
|
- self.pool = ThreadPoolExecutor(10)
|
|
|
|
|
|
+ self.pool = ThreadPoolExecutor(2)
|
|
|
|
|
|
def run(self):
|
|
def run(self):
|
|
for i in range(1, 4328+1):
|
|
for i in range(1, 4328+1):
|
|
@@ -42,7 +41,7 @@ class Emotions(object):
|
|
print('下载图片: ', title)
|
|
print('下载图片: ', title)
|
|
try:
|
|
try:
|
|
with open(self.data_path + title + os.path.splitext(image)[-1], 'wb') as f:
|
|
with open(self.data_path + title + os.path.splitext(image)[-1], 'wb') as f:
|
|
- img = requests.get(image).content
|
|
|
|
|
|
+ img = self.sess.get(image).content
|
|
f.write(img)
|
|
f.write(img)
|
|
except OSError:
|
|
except OSError:
|
|
print('length failed')
|
|
print('length failed')
|