Browse Source

Upload files to ''

天问 5 years ago
parent
commit
1ed41cee3b
2 changed files with 136 additions and 0 deletions
  1. 66 0
      getBaiduMap.py
  2. 70 0
      getGanJiData.py

+ 66 - 0
getBaiduMap.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Author  :   liuyuqi
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2019/11/18 03:20:34
+@Version :   1.0
+@License :   (C)Copyright 2019
+@Desc    :   百度地图信息采集
+'''
+
+import requests
+import os
+import re
+import json
+from bs4 import BeautifulSoup
+
+class BaiduMap(object):
+	"""docstring for BaiduMap"""
+	def __init__(self):
+		super(BaiduMap, self).__init__()
+
+	#城市获取数据
+	def getCityData(self,cityName):
+		# http://map.baidu.com/?newmap=1&qt=cur&ie=utf-8&wd=  &oue=1&res=jc
+		try:
+			webData = requests.get("http://map.baidu.com/?newmap=1&qt=cur&ie=utf-8&wd=" + cityName + "&oue=1&res=jc").text
+			jsonData = json.loads(webData)
+			print(jsonData,end="\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
+
+
+			if 'weather' in jsonData: #存在天气预报的情况下
+				weatherData = json.loads(jsonData['weather'])
+				print(weatherData['OriginQuery']," PM2.5:",weatherData['pm25'],weatherData['weather0'],"[",weatherData['temp0'],"][",weatherData['wind0'],"]",end=' ')
+
+			if 'cur_area_id' in jsonData:
+				print("城市id:",jsonData['cur_area_id'])
+				return jsonData['cur_area_id']
+			else:
+				return -1
+
+		except Exception as e:
+			raise
+
+	def getMapData(self,cityId,info_): 
+
+		qt        = "s"
+		rn        = "10"
+		modNum    = "10"
+		loopValue = 1
+
+		if cityId < 0 :
+			return -1
+
+		getUrl   = "http://api.map.baidu.com/?qt=" + qt + "&c=" + str(cityId) + "&wd=" + info_ + "&rn=" + rn + "&pn=1" + "&ie=utf-8&oue=1&fromproduct=jsapi&res=api&callback=BMap._rd._cbk7303&ak=E4805d16520de693a3fe707cdc962045";
+		webData  = requests.get(getUrl).text
+		# print(webData)
+		loopNum = re.search("\"total\":([\\s\\S]*?),",webData).group(1) #数量
+		reJson = re.search("content\":([\\s\\S]*?),\"current_city",webData).group(1)
+		print(loopNum)
+		jsonData = json.loads(reJson)
+		print(jsonData)
+
+if __name__ == '__main__':
+	obj = BaiduMap()
+	obj.getMapData(obj.getCityData("潮州"),"酒店")

+ 70 - 0
getGanJiData.py

@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Author  :   liuyuqi
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2019/11/18 03:19:28
+@Version :   1.0
+@License :   (C)Copyright 2019
+@Desc    :   赶集网二手爬虫
+'''
+
+import requests
+import os
+
+from bs4 import BeautifulSoup
+
+
+
+class GanJi():
+    """docstring for GanJi"""
+
+    def __init__(self):
+        super(GanJi, self).__init__()
+
+    def get(self,url):
+
+        user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
+        headers    = {'User-Agent':user_agent}
+        
+        webData    = requests.get(url + 'o1',headers=headers).text
+        soup       = BeautifulSoup(webData,'lxml')
+        
+        
+        sum        = soup.find('span',class_="num").text.replace("套","")
+        ave        = int(sum) / 32
+        forNum     = int(ave)
+
+        if forNum < ave:
+            forNum = forNum + 1
+
+
+        for x in range(forNum):
+            webData    = requests.get(url + 'o' + str(x + 1),headers=headers).text
+            soup       = BeautifulSoup(webData,'lxml')
+            find_list  = soup.find('div',class_="f-main-list").find_all('div',class_="f-list-item ershoufang-list")
+
+            for dl in find_list:
+                
+                print(dl.find('a',class_="js-title value title-font").text,end='|') # 名称
+
+                # 中间 5 个信息
+                tempDD = dl.find('dd',class_="dd-item size").find_all('span')
+                for tempSpan in tempDD:
+                    if not tempSpan.text == '' : 
+                        print(tempSpan.text.replace("\n", ""),end='|')
+
+                
+                print(dl.find('span',class_="area").text.replace(" ","").replace("\n",""),end='|') # 地址
+                
+                print(dl.find('div',class_="price").text.replace(" ","").replace("\n",""),end='|') # 价钱
+                
+                print(dl.find('div',class_="time").text.replace(" ","").replace("\n",""),end="|") # 平均
+                
+                print("http://chaozhou.ganji.com" + dl['href'],end="|") # 地址
+
+                print(str(x + 1))
+
+if __name__ == '__main__':
+    temp = GanJi()
+    temp.get("http://chaozhou.ganji.com/fang5/xiangqiao/")