Browse Source

rename crawl_house

liuyuqi-dellpc 7 months ago
parent
commit
797c4525c2

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+*.pyc
+__pycache__

+ 5 - 1
README.md

@@ -1,6 +1,10 @@
-# 链家爬虫
+# 新房二手房租房爬虫
+
+
+
 ### 简介 
     链家房源爬虫,通过小区信息爬取所有房源,基于scrapy
+    
 ### 用法
     setting.py中配置MongoDB
     run run.py

+ 0 - 0
house_spider/__init__.py → crawl_house/__init__.py


+ 0 - 0
house_spider/items.py → crawl_house/items.py


+ 0 - 0
house_spider/middlewares.py → crawl_house/middlewares.py


+ 0 - 0
house_spider/pipelines.py → crawl_house/pipelines.py


+ 0 - 0
house_spider/run.py → crawl_house/run.py


+ 0 - 0
house_spider/settings.py → crawl_house/settings.py


+ 0 - 0
house_spider/spiders/__init__.py → crawl_house/spiders/__init__.py


+ 24 - 0
crawl_house/spiders/beike.py

@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/09/22 17:45:53
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   贝壳网
+'''
+
+import scrapy
+
+class BeikeSpider(scrapy.Spider):
+    name = 'beike'
+    allowed_domains = ['cq.ke.com']
+    start_urls = ['http://cq.ke.com/']
+
+    def parse(self, response):
+        pass
+    def start_requests(self):
+        url = self.base_url + 'xiaoqu/'
+        yield scrapy.Request(url=url, callback=self.parse_district_links)
+
+    def __init__(self):
+        pass

+ 0 - 0
house_spider/spiders/lianjia.py → crawl_house/spiders/lianjia.py


+ 31 - 0
main.py

@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/09/22 17:42:53
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+'''
+
+from scrapy import cmdline
+from scrapy.utils.project import get_project_settings
+from scrapy.scheduler import Scheduler, CrawlerProcess, CrawlerRunner
+from scrapy.utils.project import reactor, defer
+from twisted.internet import CrawlerRunner
+from scrapy.crawler import CrawlerRunner
+from scrapy.utils.log import configure_logging
+
+import os,crawl_house
+
+configure_loging()
+runner = CrawlerRunner(get_project_settings())
+
+@defer.inlineCallbacks
+def crawl():
+    # yield runner.crawl(crawl_house)
+    cmdline.execute('scrapy genspider -o houses.csv houses.com'.split())
+    reactor.stop()
+
+if __name__=='__main__':
+    crawl()
+    reactor.run()

+ 6 - 0
requirements.txt

@@ -0,0 +1,6 @@
+Scrapy==2.9.0
+PyMySQL==0.9.3
+pymongo==3.10.1
+urllib3==1.23
+requests==2.19.1
+