get_user.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # -*-coding:utf8-*-
  2. import requests
  3. import json
  4. import random
  5. import pymysql
  6. import sys
  7. import datetime
  8. import time
  9. from imp import reload
  10. from multiprocessing.dummy import Pool as ThreadPool
  11. def datetime_to_timestamp_in_milliseconds(d):
  12. def current_milli_time(): return int(round(time.time() * 1000))
  13. return current_milli_time()
  14. reload(sys)
  15. def LoadUserAgents(uafile):
  16. uas = []
  17. with open(uafile, 'rb') as uaf:
  18. for ua in uaf.readlines():
  19. if ua:
  20. uas.append(ua.strip()[:-1])
  21. random.shuffle(uas)
  22. return uas
  23. uas = LoadUserAgents("user_agents.txt")
  24. head = {
  25. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
  26. 'X-Requested-With': 'XMLHttpRequest',
  27. 'Referer': 'http://space.bilibili.com/45388',
  28. 'Origin': 'http://space.bilibili.com',
  29. 'Host': 'space.bilibili.com',
  30. 'AlexaToolbar-ALX_NS_PH': 'AlexaToolbar/alx-4.0',
  31. 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4',
  32. 'Accept': 'application/json, text/javascript, */*; q=0.01',
  33. }
  34. # Please replace your own proxies.
  35. proxies = {
  36. 'http': 'http://120.26.110.59:8080',
  37. 'http': 'http://120.52.32.46:80',
  38. 'http': 'http://218.85.133.62:80',
  39. }
  40. time1 = time.time()
  41. urls = []
  42. # Please change the range data by yourself.
  43. for m in range(5214, 5215):
  44. for i in range(m * 100, (m + 1) * 100):
  45. url = 'https://space.bilibili.com/' + str(i)
  46. urls.append(url)
  47. def getsource(url):
  48. payload = {
  49. '_': datetime_to_timestamp_in_milliseconds(datetime.datetime.now()),
  50. 'mid': url.replace('https://space.bilibili.com/', '')
  51. }
  52. ua = random.choice(uas)
  53. head = {
  54. 'User-Agent': ua,
  55. 'Referer': 'https://space.bilibili.com/' + str(i) + '?from=search&seid=' + str(random.randint(10000, 50000))
  56. }
  57. jscontent = requests \
  58. .session() \
  59. .post('http://space.bilibili.com/ajax/member/GetInfo',
  60. headers=head,
  61. data=payload,
  62. proxies=proxies) \
  63. .text
  64. time2 = time.time()
  65. try:
  66. jsDict = json.loads(jscontent)
  67. statusJson = jsDict['status'] if 'status' in jsDict.keys() else False
  68. if statusJson == True:
  69. if 'data' in jsDict.keys():
  70. jsData = jsDict['data']
  71. mid = jsData['mid']
  72. name = jsData['name']
  73. sex = jsData['sex']
  74. rank = jsData['rank']
  75. face = jsData['face']
  76. regtimestamp = jsData['regtime']
  77. regtime_local = time.localtime(regtimestamp)
  78. regtime = time.strftime("%Y-%m-%d %H:%M:%S",regtime_local)
  79. spacesta = jsData['spacesta']
  80. birthday = jsData['birthday'] if 'birthday' in jsData.keys() else 'nobirthday'
  81. sign = jsData['sign']
  82. level = jsData['level_info']['current_level']
  83. OfficialVerifyType = jsData['official_verify']['type']
  84. OfficialVerifyDesc = jsData['official_verify']['desc']
  85. vipType = jsData['vip']['vipType']
  86. vipStatus = jsData['vip']['vipStatus']
  87. toutu = jsData['toutu']
  88. toutuId = jsData['toutuId']
  89. coins = jsData['coins']
  90. print("Succeed get user info: " + str(mid) + "\t" + str(time2 - time1))
  91. try:
  92. res = requests.get(
  93. 'https://api.bilibili.com/x/relation/stat?vmid=' + str(mid) + '&jsonp=jsonp').text
  94. viewinfo = requests.get(
  95. 'https://api.bilibili.com/x/space/upstat?mid=' + str(mid) + '&jsonp=jsonp').text
  96. js_fans_data = json.loads(res)
  97. js_viewdata = json.loads(viewinfo)
  98. following = js_fans_data['data']['following']
  99. fans = js_fans_data['data']['follower']
  100. archiveview = js_viewdata['data']['archive']['view']
  101. article = js_viewdata['data']['article']['view']
  102. except:
  103. following = 0
  104. fans = 0
  105. archiveview = 0
  106. article = 0
  107. else:
  108. print('no data now')
  109. try:
  110. # Please write your MySQL's information.
  111. conn = pymysql.connect(
  112. host='localhost', user='root', passwd='123456', db='bilibili', charset='utf8')
  113. cur = conn.cursor()
  114. cur.execute('INSERT INTO bilibili_user_info(mid, name, sex, rank, face, regtime, spacesta, \
  115. birthday, sign, level, OfficialVerifyType, OfficialVerifyDesc, vipType, vipStatus, \
  116. toutu, toutuId, coins, following, fans ,archiveview, article) \
  117. VALUES ("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s",\
  118. "%s","%s","%s","%s","%s", "%s","%s","%s","%s","%s","%s")'
  119. %
  120. (mid, name, sex, rank, face, regtime, spacesta, \
  121. birthday, sign, level, OfficialVerifyType, OfficialVerifyDesc, vipType, vipStatus, \
  122. toutu, toutuId, coins, following, fans ,archiveview, article))
  123. conn.commit()
  124. except Exception as e:
  125. print(e)
  126. else:
  127. print("Error: " + url)
  128. except Exception as e:
  129. print(e)
  130. pass
  131. if __name__ == "__main__":
  132. pool = ThreadPool(1)
  133. try:
  134. results = pool.map(getsource, urls)
  135. except Exception as e:
  136. print(e)
  137. pool.close()
  138. pool.join()