123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @Author : liuyuqi
- @Contact : liuyuqi.gov@msn.cn
- @Time : 2019/08/11 06:41:06
- @Version : 1.0
- @License : (C)Copyright 2019
- @Desc : 获取所有相亲用户数据
- https://short-msg-ms.juejin.im/v1/pinList/topic?uid=&device_id=&token=&src=web&topicId=5abcaa67092dcb4620ca335c&page=3&pageSize=20&sortType=rank
- 总共 447 信息,每页20条,共23页。
- '''
- # import pandas
- # import josn
- # import os,sys,re
- # import requests
- import time
- import threading
- import urllib.request
- url_seed = ""
- url_login = ""
- url_cache = set()
- headers = {
- 'User-Agent': "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36",
- 'Cookie': 'did=web_34abffaccc51410a45a2f09bee712ec6; didv=2; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549878747,1549878930,1549878956; Hm_lpvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549879170',
- 'Host': 'id.kuaishou.com',
- 'Referer': 'https://www.kuaishou.com/account/login/?redirectURL=https%3A%2F%2Fverify.kuaishou.com%2F%23%2Fverify%2Fpersonal',
- 'Upgrade-Insecure-Requests': '1',
- }
- def crawl():
- for i in range(1, 24):
- print(i)
- def getUser():
- data = {
- "": "",
- "": "",
- "": "",
- "": "",
- "": "",
- "": "",
- "": "",
- }
- try:
- req = urllib.request.Request(
- url=url_seed, data=urllib.parse.urlencode(data).encode(encoding='UTF8'), headers=headers)
- with urllib.request.urlopen(req) as res:
- print(res.read().decode('utf-8'))
- for i in res["d"]["list"].length:
- saveUser(res[i])
- except Exception as err:
- print(err)
- def saveUser(jsonUser):
- '''
- 保存到mongodb中
- '''
- pass
- if __name__ == "__main__":
- start_time = time.time()
- crawl()
- print("last time: {} s".format(time.time() - start_time))
|