#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@File    :   get_user.py
@Time    :   2019/05/15 20:28:36
@Author  :   Liuyuqi 
@Version :   1.0
@Contact :   liuyuqi.gov@msn.cn
@License :   (C)Copyright 2019
@Desc    :   抓取 用户信息,接口为:
https://space.bilibili.com/521400
http://space.bilibili.com/ajax/member/GetInfo
"""

import requests
import json
import random
import pymysql
import datetime
import time
import os, sys

src = "C:/Users/liuyuqi/Desktop/crawl_bilibili"
os.chdir(src)
sys.path.append(src)

from utils.user_agent import getheaders

# 连接数据库
conn = pymysql.connect(
    host="192.168.99.100", user="root", passwd="123456", db="bilibili", charset="utf8"
)
cur = conn.cursor()
# cur.execute("sql")
# conn.commit()

head = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
    "X-Requested-With": "XMLHttpRequest",
    "Referer": "http://space.bilibili.com/45388",
    "Origin": "http://space.bilibili.com",
    "Host": "space.bilibili.com",
    "AlexaToolbar-ALX_NS_PH": "AlexaToolbar/alx-4.0",
    "Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4",
    "Accept": "application/json, text/javascript, */*; q=0.01",
}

proxies = {"http": "http://120.26.110.59:8080"}

time1 = time.time()  # 1557920724.447739
urls = []
uas = []
uas = getheaders()


def datetime_to_timestamp_in_milliseconds():
    return int(round(time.time() * 1000))  # 1557920582757


def getsource(url, i):
    payload = {
        "_": datetime_to_timestamp_in_milliseconds(),
        "mid": url.replace("https://space.bilibili.com/", ""),
    }
    head = {
        "User-Agent": random.choice(uas),
        "Referer": "https://space.bilibili.com/"
        + str(i)
        + "?from=search&seid="
        + str(random.randint(10000, 50000)),
    }
    jscontent = (
        requests.session()
        .post(
            "http://space.bilibili.com/ajax/member/GetInfo",
            headers=head,
            data=payload,
            # proxies=proxies,
        )
        .text
    )
    time2 = time.time()
    try:
        jsDict = json.loads(jscontent)
        statusJson = jsDict["status"] if "status" in jsDict.keys() else False
        if statusJson == True:
            if "data" in jsDict.keys():
                jsData = jsDict["data"]
                mid = jsData["mid"]
                name = jsData["name"]
                sex = jsData["sex"]
                rank = jsData["rank"]
                face = jsData["face"]
                # regtimestamp = jsData["regtime"] #没有这个值
                # regtime_local = time.localtime(regtimestamp)
                regtime = "2018-05-06 12:22:23"
                spacesta = jsData["spacesta"]
                birthday = (
                    jsData["birthday"] if "birthday" in jsData.keys() else "nobirthday"
                )
                sign = jsData["sign"]
                level = jsData["level_info"]["current_level"]
                OfficialVerifyType = jsData["official_verify"]["type"]
                OfficialVerifyDesc = jsData["official_verify"]["desc"]
                vipType = jsData["vip"]["vipType"]
                vipStatus = jsData["vip"]["vipStatus"]
                toutu = jsData["toutu"]
                toutuId = jsData["toutuId"]
                coins = jsData["coins"]
                print("Succeed get user info: " + str(mid) + "\t" + str(time2 - time1))
                try:
                    res = requests.get(
                        "https://api.bilibili.com/x/relation/stat?vmid="
                        + str(mid)
                        + "&jsonp=jsonp"
                    ).text
                    viewinfo = requests.get(
                        "https://api.bilibili.com/x/space/upstat?mid="
                        + str(mid)
                        + "&jsonp=jsonp"
                    ).text
                    js_fans_data = json.loads(res)
                    js_viewdata = json.loads(viewinfo)
                    following = js_fans_data["data"]["following"]
                    fans = js_fans_data["data"]["follower"]
                    archiveview = js_viewdata["data"]["archive"]["view"]
                    article = js_viewdata["data"]["article"]["view"]
                except:
                    following = 0
                    fans = 0
                    archiveview = 0
                    article = 0
            else:
                print("no data now")
            try:
                cur.execute(
                    'INSERT INTO user(mid, name, sex, rank, face, regtime, spacesta, \
                            birthday, sign, level, OfficialVerifyType, OfficialVerifyDesc, vipType, vipStatus, \
                            toutu, toutuId, coins, following, fans ,archiveview, article) \
                VALUES ("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s",\
                        "%s","%s","%s","%s","%s", "%s","%s","%s","%s","%s","%s")'
                    % (
                        mid,
                        name,
                        sex,
                        rank,
                        face,
                        regtime,
                        spacesta,
                        birthday,
                        sign,
                        level,
                        OfficialVerifyType,
                        OfficialVerifyDesc,
                        vipType,
                        vipStatus,
                        toutu,
                        toutuId,
                        coins,
                        following,
                        fans,
                        archiveview,
                        article,
                    )
                )
                conn.commit()
            except Exception as e:
                print(e)
        else:
            print("Error: " + url)
    except Exception as e:
        print(e)
        pass


def crawlUser():
    """
    开抓
    param :
    return:
    """
    m = 5214
    for i in range(m * 100, ((m * 100 )+ 1)):  # range(521400,521500)
        url = "https://space.bilibili.com/" + str(i)
        # urls.append(url)
        getsource(url, i)


if __name__ == "__main__":
    src = "C:/Users/liuyuqi/Desktop/crawl_bilibili"
    os.chdir(src)
    crawlUser()