6 years ago · 1ed41cee3b
--- a/getBaiduMap.py
+++ b/getBaiduMap.py
@@ -0,0 +1,66 @@
 
															+#!/usr/bin/env python
														
 
															+# -*- encoding: utf-8 -*-
														
 
															+'''
														
 
															+@Author  :   liuyuqi
														
 
															+@Contact :   liuyuqi.gov@msn.cn
														
 
															+@Time    :   2019/11/18 03:20:34
														
 
															+@Version :   1.0
														
 
															+@License :   (C)Copyright 2019
														
 
															+@Desc    :   百度地图信息采集
														
 
															+'''
														
 
															+
														
 
															+import requests
														
 
															+import os
														
 
															+import re
														
 
															+import json
														
 
															+from bs4 import BeautifulSoup
														
 
															+
														
 
															+class BaiduMap(object):
														
 
															+	"""docstring for BaiduMap"""
														
 
															+	def __init__(self):
														
 
															+		super(BaiduMap, self).__init__()
														
 
															+
														
 
															+	#城市获取数据
														
 
															+	def getCityData(self,cityName):
														
 
															+		# http://map.baidu.com/?newmap=1&qt=cur&ie=utf-8&wd=  &oue=1&res=jc
														
 
															+		try:
														
 
															+			webData = requests.get("http://map.baidu.com/?newmap=1&qt=cur&ie=utf-8&wd=" + cityName + "&oue=1&res=jc").text
														
 
															+			jsonData = json.loads(webData)
														
 
															+			print(jsonData,end="\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
														
 
															+
														
 
															+
														
 
															+			if 'weather' in jsonData: #存在天气预报的情况下
														
 
															+				weatherData = json.loads(jsonData['weather'])
														
 
															+				print(weatherData['OriginQuery']," PM2.5:",weatherData['pm25'],weatherData['weather0'],"[",weatherData['temp0'],"][",weatherData['wind0'],"]",end=' ')
														
 
															+
														
 
															+			if 'cur_area_id' in jsonData:
														
 
															+				print("城市id:",jsonData['cur_area_id'])
														
 
															+				return jsonData['cur_area_id']
														
 
															+			else:
														
 
															+				return -1
														
 
															+
														
 
															+		except Exception as e:
														
 
															+			raise
														
 
															+
														
 
															+	def getMapData(self,cityId,info_): 
														
 
															+
														
 
															+		qt        = "s"
														
 
															+		rn        = "10"
														
 
															+		modNum    = "10"
														
 
															+		loopValue = 1
														
 
															+
														
 
															+		if cityId < 0 :
														
 
															+			return -1
														
 
															+
														
 
															+		getUrl   = "http://api.map.baidu.com/?qt=" + qt + "&c=" + str(cityId) + "&wd=" + info_ + "&rn=" + rn + "&pn=1" + "&ie=utf-8&oue=1&fromproduct=jsapi&res=api&callback=BMap._rd._cbk7303&ak=E4805d16520de693a3fe707cdc962045";
														
 
															+		webData  = requests.get(getUrl).text
														
 
															+		# print(webData)
														
 
															+		loopNum = re.search("\"total\":([\\s\\S]*?),",webData).group(1) #数量
														
 
															+		reJson = re.search("content\":([\\s\\S]*?),\"current_city",webData).group(1)
														
 
															+		print(loopNum)
														
 
															+		jsonData = json.loads(reJson)
														
 
															+		print(jsonData)
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+	obj = BaiduMap()
														
 
															+	obj.getMapData(obj.getCityData("潮州"),"酒店")
														
--- a/getGanJiData.py
+++ b/getGanJiData.py
@@ -0,0 +1,70 @@
 
															+#!/usr/bin/env python
														
 
															+# -*- encoding: utf-8 -*-
														
 
															+'''
														
 
															+@Author  :   liuyuqi
														
 
															+@Contact :   liuyuqi.gov@msn.cn
														
 
															+@Time    :   2019/11/18 03:19:28
														
 
															+@Version :   1.0
														
 
															+@License :   (C)Copyright 2019
														
 
															+@Desc    :   赶集网二手爬虫
														
 
															+'''
														
 
															+
														
 
															+import requests
														
 
															+import os
														
 
															+
														
 
															+from bs4 import BeautifulSoup
														
 
															+
														
 
															+
														
 
															+
														
 
															+class GanJi():
														
 
															+    """docstring for GanJi"""
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        super(GanJi, self).__init__()
														
 
															+
														
 
															+    def get(self,url):
														
 
															+
														
 
															+        user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
														
 
															+        headers    = {'User-Agent':user_agent}
														
 
															+        
														
 
															+        webData    = requests.get(url + 'o1',headers=headers).text
														
 
															+        soup       = BeautifulSoup(webData,'lxml')
														
 
															+        
														
 
															+        
														
 
															+        sum        = soup.find('span',class_="num").text.replace("套","")
														
 
															+        ave        = int(sum) / 32
														
 
															+        forNum     = int(ave)
														
 
															+
														
 
															+        if forNum < ave:
														
 
															+            forNum = forNum + 1
														
 
															+
														
 
															+
														
 
															+        for x in range(forNum):
														
 
															+            webData    = requests.get(url + 'o' + str(x + 1),headers=headers).text
														
 
															+            soup       = BeautifulSoup(webData,'lxml')
														
 
															+            find_list  = soup.find('div',class_="f-main-list").find_all('div',class_="f-list-item ershoufang-list")
														
 
															+
														
 
															+            for dl in find_list:
														
 
															+                
														
 
															+                print(dl.find('a',class_="js-title value title-font").text,end='|') # 名称
														
 
															+
														
 
															+                # 中间 5 个信息
														
 
															+                tempDD = dl.find('dd',class_="dd-item size").find_all('span')
														
 
															+                for tempSpan in tempDD:
														
 
															+                    if not tempSpan.text == '' : 
														
 
															+                        print(tempSpan.text.replace("\n", ""),end='|')
														
 
															+
														
 
															+                
														
 
															+                print(dl.find('span',class_="area").text.replace(" ","").replace("\n",""),end='|') # 地址
														
 
															+                
														
 
															+                print(dl.find('div',class_="price").text.replace(" ","").replace("\n",""),end='|') # 价钱
														
 
															+                
														
 
															+                print(dl.find('div',class_="time").text.replace(" ","").replace("\n",""),end="|") # 平均
														
 
															+                
														
 
															+                print("http://chaozhou.ganji.com" + dl['href'],end="|") # 地址
														
 
															+
														
 
															+                print(str(x + 1))
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    temp = GanJi()
														
 
															+    temp.get("http://chaozhou.ganji.com/fang5/xiangqiao/")