123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- # -*-coding:utf8-*-
- import requests
- import json
- import random
- import pymysql
- import sys
- import datetime
- import time
- from imp import reload
- from multiprocessing.dummy import Pool as ThreadPool
- def datetime_to_timestamp_in_milliseconds(d):
- def current_milli_time(): return int(round(time.time() * 1000))
- return current_milli_time()
- reload(sys)
- def LoadUserAgents(uafile):
- uas = []
- with open(uafile, 'rb') as uaf:
- for ua in uaf.readlines():
- if ua:
- uas.append(ua.strip()[:-1])
- random.shuffle(uas)
- return uas
- uas = LoadUserAgents("user_agents.txt")
- head = {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': 'http://space.bilibili.com/45388',
- 'Origin': 'http://space.bilibili.com',
- 'Host': 'space.bilibili.com',
- 'AlexaToolbar-ALX_NS_PH': 'AlexaToolbar/alx-4.0',
- 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4',
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
- }
- # Please replace your own proxies.
- proxies = {
- 'http': 'http://120.26.110.59:8080',
- 'http': 'http://120.52.32.46:80',
- 'http': 'http://218.85.133.62:80',
- }
- time1 = time.time()
- urls = []
- # Please change the range data by yourself.
- for m in range(5214, 5215):
- for i in range(m * 100, (m + 1) * 100):
- url = 'https://space.bilibili.com/' + str(i)
- urls.append(url)
- def getsource(url):
- payload = {
- '_': datetime_to_timestamp_in_milliseconds(datetime.datetime.now()),
- 'mid': url.replace('https://space.bilibili.com/', '')
- }
- ua = random.choice(uas)
- head = {
- 'User-Agent': ua,
- 'Referer': 'https://space.bilibili.com/' + str(i) + '?from=search&seid=' + str(random.randint(10000, 50000))
- }
- jscontent = requests \
- .session() \
- .post('http://space.bilibili.com/ajax/member/GetInfo',
- headers=head,
- data=payload,
- proxies=proxies) \
- .text
- time2 = time.time()
- try:
- jsDict = json.loads(jscontent)
- statusJson = jsDict['status'] if 'status' in jsDict.keys() else False
- if statusJson == True:
- if 'data' in jsDict.keys():
- jsData = jsDict['data']
- mid = jsData['mid']
- name = jsData['name']
- sex = jsData['sex']
- rank = jsData['rank']
- face = jsData['face']
- regtimestamp = jsData['regtime']
- regtime_local = time.localtime(regtimestamp)
- regtime = time.strftime("%Y-%m-%d %H:%M:%S",regtime_local)
- spacesta = jsData['spacesta']
- birthday = jsData['birthday'] if 'birthday' in jsData.keys() else 'nobirthday'
- sign = jsData['sign']
- level = jsData['level_info']['current_level']
- OfficialVerifyType = jsData['official_verify']['type']
- OfficialVerifyDesc = jsData['official_verify']['desc']
- vipType = jsData['vip']['vipType']
- vipStatus = jsData['vip']['vipStatus']
- toutu = jsData['toutu']
- toutuId = jsData['toutuId']
- coins = jsData['coins']
- print("Succeed get user info: " + str(mid) + "\t" + str(time2 - time1))
- try:
- res = requests.get(
- 'https://api.bilibili.com/x/relation/stat?vmid=' + str(mid) + '&jsonp=jsonp').text
- viewinfo = requests.get(
- 'https://api.bilibili.com/x/space/upstat?mid=' + str(mid) + '&jsonp=jsonp').text
- js_fans_data = json.loads(res)
- js_viewdata = json.loads(viewinfo)
- following = js_fans_data['data']['following']
- fans = js_fans_data['data']['follower']
- archiveview = js_viewdata['data']['archive']['view']
- article = js_viewdata['data']['article']['view']
- except:
- following = 0
- fans = 0
- archiveview = 0
- article = 0
- else:
- print('no data now')
- try:
- # Please write your MySQL's information.
- conn = pymysql.connect(
- host='localhost', user='root', passwd='123456', db='bilibili', charset='utf8')
- cur = conn.cursor()
- cur.execute('INSERT INTO bilibili_user_info(mid, name, sex, rank, face, regtime, spacesta, \
- birthday, sign, level, OfficialVerifyType, OfficialVerifyDesc, vipType, vipStatus, \
- toutu, toutuId, coins, following, fans ,archiveview, article) \
- VALUES ("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s",\
- "%s","%s","%s","%s","%s", "%s","%s","%s","%s","%s","%s")'
- %
- (mid, name, sex, rank, face, regtime, spacesta, \
- birthday, sign, level, OfficialVerifyType, OfficialVerifyDesc, vipType, vipStatus, \
- toutu, toutuId, coins, following, fans ,archiveview, article))
- conn.commit()
- except Exception as e:
- print(e)
- else:
- print("Error: " + url)
- except Exception as e:
- print(e)
- pass
- if __name__ == "__main__":
- pool = ThreadPool(1)
- try:
- results = pool.map(getsource, urls)
- except Exception as e:
- print(e)
-
- pool.close()
- pool.join()
|