|
@@ -0,0 +1,72 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Author : liuyuqi
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2019/08/11 06:41:06
|
|
|
+@Version : 1.0
|
|
|
+@License : (C)Copyright 2019
|
|
|
+@Desc : 获取所有相亲用户数据
|
|
|
+
|
|
|
+https://short-msg-ms.juejin.im/v1/pinList/topic?uid=&device_id=&token=&src=web&topicId=5abcaa67092dcb4620ca335c&page=3&pageSize=20&sortType=rank
|
|
|
+
|
|
|
+总共 447 信息,每页20条,共23页。
|
|
|
+'''
|
|
|
+# import pandas
|
|
|
+# import josn
|
|
|
+# import os,sys,re
|
|
|
+# import requests
|
|
|
+
|
|
|
+import time
|
|
|
+import threading
|
|
|
+import urllib.request
|
|
|
+
|
|
|
+url_seed = ""
|
|
|
+url_login = ""
|
|
|
+url_cache = set()
|
|
|
+
|
|
|
+headers = {
|
|
|
+ 'User-Agent': "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36",
|
|
|
+ 'Cookie': 'did=web_34abffaccc51410a45a2f09bee712ec6; didv=2; Hm_lvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549878747,1549878930,1549878956; Hm_lpvt_86a27b7db2c5c0ae37fee4a8a35033ee=1549879170',
|
|
|
+ 'Host': 'id.kuaishou.com',
|
|
|
+ 'Referer': 'https://www.kuaishou.com/account/login/?redirectURL=https%3A%2F%2Fverify.kuaishou.com%2F%23%2Fverify%2Fpersonal',
|
|
|
+ 'Upgrade-Insecure-Requests': '1',
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def crawl():
|
|
|
+ for i in range(1, 24):
|
|
|
+ print(i)
|
|
|
+
|
|
|
+
|
|
|
+def getUser():
|
|
|
+ data = {
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ "": "",
|
|
|
+ }
|
|
|
+ try:
|
|
|
+ req = urllib.request.Request(
|
|
|
+ url=url_seed, data=urllib.parse.urlencode(data).encode(encoding='UTF8'), headers=headers)
|
|
|
+ with urllib.request.urlopen(req) as res:
|
|
|
+ print(res.read().decode('utf-8'))
|
|
|
+ for i in res["d"]["list"].length:
|
|
|
+ saveUser(res[i])
|
|
|
+ except Exception as err:
|
|
|
+ print(err)
|
|
|
+
|
|
|
+
|
|
|
+def saveUser(jsonUser):
|
|
|
+ '''
|
|
|
+ 保存到mongodb中
|
|
|
+ '''
|
|
|
+ pass
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ start_time = time.time()
|
|
|
+ crawl()
|
|
|
+ print("last time: {} s".format(time.time() - start_time))
|