#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @Author : liuyuqi @Contact : liuyuqi.gov@msn.cn @Time : 2019/08/11 06:41:06 @Version : 1.0 @License : (C)Copyright 2019 @Desc : 获取所有相亲用户数据 https://short-msg-ms.juejin.im/v1/pinList/topic?uid=&device_id=&token=&src=web&topicId=5abcaa67092dcb4620ca335c&page=3&pageSize=20&sortType=rank 总共 447 信息,每页20条,共23页。 ''' # import pandas # import josn # import os,sys,re # import requests import time import threading import urllib.request url_seed = "" url_login = "" url_cache = set() headers = { 'User-Agent': "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36", 'Cookie': 'did=web_34abffaccc51410a45a2f09bee712ec6; didv=2; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549878747,1549878930,1549878956; Hm_lpvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549879170', 'Host': 'id.kuaishou.com', 'Referer': 'https://www.kuaishou.com/account/login/?redirectURL=https%3A%2F%2Fverify.kuaishou.com%2F%23%2Fverify%2Fpersonal', 'Upgrade-Insecure-Requests': '1', } def crawl(): for i in range(1, 24): print(i) def getUser(): data = { "": "", "": "", "": "", "": "", "": "", "": "", "": "", } try: req = urllib.request.Request( url=url_seed, data=urllib.parse.urlencode(data).encode(encoding='UTF8'), headers=headers) with urllib.request.urlopen(req) as res: print(res.read().decode('utf-8')) for i in res["d"]["list"].length: saveUser(res[i]) except Exception as err: print(err) def saveUser(jsonUser): ''' 保存到mongodb中 ''' pass if __name__ == "__main__": start_time = time.time() crawl() print("last time: {} s".format(time.time() - start_time))