4 years ago · 91137d58cc
--- a/README.md
+++ b/README.md
@@ -0,0 +1,7 @@
 
				+# 链家爬虫
			
 
				+### 简介 
			
 
				+    链家房源爬虫，通过小区信息爬取所有房源,基于scrapy
			
 
				+### 用法
			
 
				+    setting.py中配置MongoDB
			
 
				+    run run.py
			
 
				+
			
--- a/house_spider/__init__.py
+++ b/house_spider/__init__.py
--- a/house_spider/items.py
+++ b/house_spider/items.py
@@ -0,0 +1,60 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define here the models for your scraped items
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://doc.scrapy.org/en/latest/topics/items.html
			
 
				+
			
 
				+import scrapy
			
 
				+
			
 
				+
			
 
				+class HouseSpiderItem(scrapy.Item):
			
 
				+    # define the fields for your item here like:
			
 
				+    # name = scrapy.Field()
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+class LianjiaVillageItem(scrapy.Item):
			
 
				+    # 链家小区
			
 
				+    collection = 'lianjia_village'
			
 
				+    id = scrapy.Field()
			
 
				+    name = scrapy.Field()
			
 
				+    zone = scrapy.Field()
			
 
				+    address = scrapy.Field()
			
 
				+    latitude = scrapy.Field()
			
 
				+    longitude = scrapy.Field()
			
 
				+    year = scrapy.Field()
			
 
				+    build_type = scrapy.Field()
			
 
				+    property_costs = scrapy.Field()
			
 
				+    property_company = scrapy.Field()
			
 
				+    developers = scrapy.Field()
			
 
				+    buildings = scrapy.Field()
			
 
				+    total_house = scrapy.Field()
			
 
				+
			
 
				+class LianjiaHouseItem(scrapy.Item):
			
 
				+    collection = 'lianjia_House'
			
 
				+    房屋Id = scrapy.Field()
			
 
				+    标题 = scrapy.Field()
			
 
				+    售价 = scrapy.Field()
			
 
				+    小区 = scrapy.Field()
			
 
				+    小区ID = scrapy.Field()
			
 
				+    房屋户型 = scrapy.Field()
			
 
				+    所在楼层 = scrapy.Field()
			
 
				+    建筑面积 = scrapy.Field()
			
 
				+    户型结构 = scrapy.Field()
			
 
				+    套内面积 = scrapy.Field()
			
 
				+    建筑类型 = scrapy.Field()
			
 
				+    房屋朝向 = scrapy.Field()
			
 
				+    建筑结构 = scrapy.Field()
			
 
				+    装修情况 = scrapy.Field()
			
 
				+    梯户比例 = scrapy.Field()
			
 
				+    配备电梯 = scrapy.Field()
			
 
				+    产权年限 = scrapy.Field()
			
 
				+    挂牌时间 = scrapy.Field()
			
 
				+    交易权属 = scrapy.Field()
			
 
				+    上次交易 = scrapy.Field()
			
 
				+    房屋用途 = scrapy.Field()
			
 
				+    房屋年限 = scrapy.Field()
			
 
				+    产权所属 = scrapy.Field()
			
 
				+    抵押信息 = scrapy.Field()
			
 
				+    房本备件 = scrapy.Field()
			
--- a/house_spider/middlewares.py
+++ b/house_spider/middlewares.py
@@ -0,0 +1,103 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define here the models for your spider middleware
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+from scrapy import signals
			
 
				+
			
 
				+
			
 
				+class HouseSpiderSpiderMiddleware(object):
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the spider middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_spider_input(self, response, spider):
			
 
				+        # Called for each response that goes through the spider
			
 
				+        # middleware and into the spider.
			
 
				+
			
 
				+        # Should return None or raise an exception.
			
 
				+        return None
			
 
				+
			
 
				+    def process_spider_output(self, response, result, spider):
			
 
				+        # Called with the results returned from the Spider, after
			
 
				+        # it has processed the response.
			
 
				+
			
 
				+        # Must return an iterable of Request, dict or Item objects.
			
 
				+        for i in result:
			
 
				+            yield i
			
 
				+
			
 
				+    def process_spider_exception(self, response, exception, spider):
			
 
				+        # Called when a spider or process_spider_input() method
			
 
				+        # (from other spider middleware) raises an exception.
			
 
				+
			
 
				+        # Should return either None or an iterable of Response, dict
			
 
				+        # or Item objects.
			
 
				+        pass
			
 
				+
			
 
				+    def process_start_requests(self, start_requests, spider):
			
 
				+        # Called with the start requests of the spider, and works
			
 
				+        # similarly to the process_spider_output() method, except
			
 
				+        # that it doesn’t have a response associated.
			
 
				+
			
 
				+        # Must return only requests (not items).
			
 
				+        for r in start_requests:
			
 
				+            yield r
			
 
				+
			
 
				+    def spider_opened(self, spider):
			
 
				+        spider.logger.info('Spider opened: %s' % spider.name)
			
 
				+
			
 
				+
			
 
				+class HouseSpiderDownloaderMiddleware(object):
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the downloader middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_request(self, request, spider):
			
 
				+        # Called for each request that goes through the downloader
			
 
				+        # middleware.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this request
			
 
				+        # - or return a Response object
			
 
				+        # - or return a Request object
			
 
				+        # - or raise IgnoreRequest: process_exception() methods of
			
 
				+        #   installed downloader middleware will be called
			
 
				+        return None
			
 
				+
			
 
				+    def process_response(self, request, response, spider):
			
 
				+        # Called with the response returned from the downloader.
			
 
				+
			
 
				+        # Must either;
			
 
				+        # - return a Response object
			
 
				+        # - return a Request object
			
 
				+        # - or raise IgnoreRequest
			
 
				+        return response
			
 
				+
			
 
				+    def process_exception(self, request, exception, spider):
			
 
				+        # Called when a download handler or a process_request()
			
 
				+        # (from other downloader middleware) raises an exception.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this exception
			
 
				+        # - return a Response object: stops process_exception() chain
			
 
				+        # - return a Request object: stops process_exception() chain
			
 
				+        pass
			
 
				+
			
 
				+    def spider_opened(self, spider):
			
 
				+        spider.logger.info('Spider opened: %s' % spider.name)
			
--- a/house_spider/pipelines.py
+++ b/house_spider/pipelines.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define your item pipelines here
			
 
				+#
			
 
				+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
			
 
				+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+import pymongo
			
 
				+from scrapy.conf import settings
			
 
				+
			
 
				+
			
 
				+class HouseSpiderPipeline(object):
			
 
				+    def process_item(self, item, spider):
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class LianjiaVillageSavePipeline(object):
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def process_item(self, item, spider):
			
 
				+        if spider.name == 'lianjia':
			
 
				+            client = pymongo.MongoClient(host=settings['MONGO_HOST'], port=settings['MONGO_PORT'])
			
 
				+            db = client['house']
			
 
				+            coll = db[item.collection]
			
 
				+            coll.insert(dict(item))
			
--- a/house_spider/run.py
+++ b/house_spider/run.py
@@ -0,0 +1,3 @@
 
				+from scrapy import cmdline
			
 
				+
			
 
				+cmdline.execute("scrapy crawl lianjia".split())
			
--- a/house_spider/settings.py
+++ b/house_spider/settings.py
@@ -0,0 +1,94 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Scrapy settings for house_spider project
			
 
				+#
			
 
				+# For simplicity, this file contains only settings considered important or
			
 
				+# commonly used. You can find more settings consulting the documentation:
			
 
				+#
			
 
				+#     https://doc.scrapy.org/en/latest/topics/settings.html
			
 
				+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+BOT_NAME = 'house_spider'
			
 
				+
			
 
				+SPIDER_MODULES = ['house_spider.spiders']
			
 
				+NEWSPIDER_MODULE = 'house_spider.spiders'
			
 
				+
			
 
				+
			
 
				+# Crawl responsibly by identifying yourself (and your website) on the user-agent
			
 
				+#USER_AGENT = 'house_spider (+http://www.yourdomain.com)'
			
 
				+
			
 
				+# Obey robots.txt rules
			
 
				+ROBOTSTXT_OBEY = True
			
 
				+
			
 
				+# Configure maximum concurrent requests performed by Scrapy (default: 16)
			
 
				+#CONCURRENT_REQUESTS = 32
			
 
				+
			
 
				+# Configure a delay for requests for the same website (default: 0)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
			
 
				+# See also autothrottle settings and docs
			
 
				+#DOWNLOAD_DELAY = 3
			
 
				+# The download delay setting will honor only one of:
			
 
				+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
			
 
				+#CONCURRENT_REQUESTS_PER_IP = 16
			
 
				+
			
 
				+# Disable cookies (enabled by default)
			
 
				+#COOKIES_ENABLED = False
			
 
				+
			
 
				+# Disable Telnet Console (enabled by default)
			
 
				+#TELNETCONSOLE_ENABLED = False
			
 
				+
			
 
				+# Override the default request headers:
			
 
				+#DEFAULT_REQUEST_HEADERS = {
			
 
				+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
			
 
				+#   'Accept-Language': 'en',
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable spider middlewares
			
 
				+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+#SPIDER_MIDDLEWARES = {
			
 
				+#    'house_spider.middlewares.HouseSpiderSpiderMiddleware': 543,
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable downloader middlewares
			
 
				+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+#DOWNLOADER_MIDDLEWARES = {
			
 
				+#    'house_spider.middlewares.HouseSpiderDownloaderMiddleware': 543,
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable extensions
			
 
				+# See https://doc.scrapy.org/en/latest/topics/extensions.html
			
 
				+#EXTENSIONS = {
			
 
				+#    'scrapy.extensions.telnet.TelnetConsole': None,
			
 
				+#}
			
 
				+
			
 
				+# Configure item pipelines
			
 
				+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+ITEM_PIPELINES = {
			
 
				+    'house_spider.pipelines.HouseSpiderPipeline': 300,
			
 
				+    'house_spider.pipelines.LianjiaVillageSavePipeline': 301,
			
 
				+}
			
 
				+
			
 
				+# Enable and configure the AutoThrottle extension (disabled by default)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
			
 
				+#AUTOTHROTTLE_ENABLED = True
			
 
				+# The initial download delay
			
 
				+#AUTOTHROTTLE_START_DELAY = 5
			
 
				+# The maximum download delay to be set in case of high latencies
			
 
				+#AUTOTHROTTLE_MAX_DELAY = 60
			
 
				+# The average number of requests Scrapy should be sending in parallel to
			
 
				+# each remote server
			
 
				+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
			
 
				+# Enable showing throttling stats for every response received:
			
 
				+#AUTOTHROTTLE_DEBUG = False
			
 
				+
			
 
				+# Enable and configure HTTP caching (disabled by default)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
			
 
				+#HTTPCACHE_ENABLED = True
			
 
				+#HTTPCACHE_EXPIRATION_SECS = 0
			
 
				+#HTTPCACHE_DIR = 'httpcache'
			
 
				+#HTTPCACHE_IGNORE_HTTP_CODES = []
			
 
				+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
			
 
				+
			
 
				+MONGO_HOST = "127.0.0.1"  # 主机IP
			
 
				+MONGO_PORT = 27017  # 端口号
			
--- a/house_spider/spiders/__init__.py
+++ b/house_spider/spiders/__init__.py
@@ -0,0 +1,4 @@
 
				+# This package will contain the spiders of your Scrapy project
			
 
				+#
			
 
				+# Please refer to the documentation for information on how to create and manage
			
 
				+# your spiders.
			
--- a/house_spider/spiders/lianjia.py
+++ b/house_spider/spiders/lianjia.py
@@ -0,0 +1,142 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import scrapy
			
 
				+from scrapy import Selector
			
 
				+import json
			
 
				+import re
			
 
				+
			
 
				+from house_spider.items import LianjiaVillageItem, LianjiaHouseItem
			
 
				+
			
 
				+
			
 
				+class LianjiaSpider(scrapy.Spider):
			
 
				+    name = 'lianjia'
			
 
				+    allowed_domains = ['cq.lianjia.com']
			
 
				+    start_urls = ['cq.lianjia.com']
			
 
				+
			
 
				+    def __init__(self, **kwargs):
			
 
				+        super().__init__(**kwargs)
			
 
				+        self.base_url = 'https://cq.lianjia.com'
			
 
				+
			
 
				+    def start_requests(self):
			
 
				+        request_url = 'https://cq.lianjia.com/xiaoqu/'
			
 
				+        yield scrapy.Request(url=request_url, callback=self.parse_district_links)
			
 
				+
			
 
				+    def parse_district_links(self, response):
			
 
				+        """提取地区链接"""
			
 
				+        sel = Selector(response)
			
 
				+        links = sel.css("div[data-role='ershoufang'] div:first-child a::attr(href)").extract()
			
 
				+        for link in links:
			
 
				+            url = self.base_url + link
			
 
				+            yield scrapy.Request(url=url, callback=self.parse_bizcircle_links)
			
 
				+
			
 
				+    def parse_bizcircle_links(self, response):
			
 
				+        """提取商圈链接"""
			
 
				+        sel = Selector(response)
			
 
				+        links = sel.css("div[data-role='ershoufang'] div:nth-child(2) a::attr(href)").extract()
			
 
				+        for link in links:
			
 
				+            url = self.base_url + link
			
 
				+            yield scrapy.Request(url=url, callback=self.parse_village_list, meta={"ref": url})
			
 
				+
			
 
				+    def parse_village_list(self, response):
			
 
				+        """提取小区链接"""
			
 
				+        sel = Selector(response)
			
 
				+        links = sel.css(".listContent .xiaoquListItem .img::attr(href)").extract()
			
 
				+        for link in links:
			
 
				+            yield scrapy.Request(url=link, callback=self.parse_village_detail)
			
 
				+
			
 
				+        # page
			
 
				+        page_data = sel.css(".house-lst-page-box::attr(page-data)").extract_first()
			
 
				+        page_data = json.loads(page_data)
			
 
				+        if page_data['curPage'] < page_data['totalPage']:
			
 
				+            url = response.meta["ref"] + 'pg' + str(page_data['curPage'] + 1)
			
 
				+            yield scrapy.Request(url=url, callback=self.parse_village_list, meta=response.meta)
			
 
				+
			
 
				+    def parse_village_detail(self, response):
			
 
				+        """提取小区详情"""
			
 
				+        village_url = response.url
			
 
				+        sel = Selector(response)
			
 
				+        zone = sel.css('.xiaoquDetailbreadCrumbs .l-txt a::text').extract()
			
 
				+        latitude = 0
			
 
				+        longitude = 0
			
 
				+        try:
			
 
				+            html = response.body.decode().replace('\r', '')
			
 
				+            local = html[html.find('resblockPosition:'):html.find('resblockName') - 1]
			
 
				+            m = re.search('(\d.*\d),(\d.*\d)', local)
			
 
				+            longitude = m.group(1)
			
 
				+            latitude = m.group(2)
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+
			
 
				+        item = LianjiaVillageItem()
			
 
				+        item['id'] = village_url.replace(self.base_url + '/xiaoqu/', '').replace('/', '')
			
 
				+        item['name'] = sel.css('.detailHeader .detailTitle::text').extract_first()
			
 
				+        item['address'] = sel.css('.detailHeader .detailDesc::text').extract_first()
			
 
				+        item['latitude'] = latitude
			
 
				+        item['longitude'] = longitude
			
 
				+        item['zone'] = ','.join(zone)
			
 
				+        item['year'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(1) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['build_type'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(2) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['property_costs'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(3) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['property_company'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(4) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['developers'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(5) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['buildings'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(6) .xiaoquInfoContent::text').extract_first()
			
 
				+        item['total_house'] = sel.css('.xiaoquInfo .xiaoquInfoItem:nth-child(7) .xiaoquInfoContent::text').extract_first()
			
 
				+
			
 
				+        print(item['name'])
			
 
				+        yield item
			
 
				+
			
 
				+        # 小区房源 https://cq.lianjia.com/ershoufang/c3620038190566370/
			
 
				+        url = self.base_url + "/ershoufang/c" + item['id'] + "/"
			
 
				+        yield scrapy.Request(url=url, callback=self.parse_house_list, meta={"ref": url})
			
 
				+
			
 
				+    def parse_house_list(self, response):
			
 
				+        """提取房源链接"""
			
 
				+        sel = Selector(response)
			
 
				+        # 链家有时小区查询不到数据
			
 
				+        total = sel.css('.resultDes .total span::text').extract_first()
			
 
				+        total = int(total)
			
 
				+        if total > 0:
			
 
				+            # 提取房源链接
			
 
				+            links = sel.css(".sellListContent li .info .title a::attr(href)").extract()
			
 
				+            for link in links:
			
 
				+                yield scrapy.Request(url=link, callback=self.parse_house_detail)
			
 
				+            # 链接分页
			
 
				+            page_data = sel.css(".house-lst-page-box::attr(page-data)").extract_first()
			
 
				+            page_data = json.loads(page_data)
			
 
				+            if page_data['curPage'] == 1 and page_data['totalPage'] > 1:
			
 
				+                price = response.url.replace(self.base_url + '/ershoufang/', '')
			
 
				+                for x in range(2, page_data['totalPage'] + 1, 1):
			
 
				+                    url = self.base_url + '/ershoufang/' + 'pg' + str(x) + price
			
 
				+                    yield scrapy.Request(url=url, callback=self.parse_house_list)
			
 
				+
			
 
				+    def parse_house_detail(self, response):
			
 
				+        """提取房源信息"""
			
 
				+        sel = Selector(response)
			
 
				+
			
 
				+        item = LianjiaHouseItem()
			
 
				+        item['房屋Id'] = response.url.replace(self.base_url + '/ershoufang/', '').replace('.html', '')
			
 
				+        item['标题'] = sel.css('.title-wrapper .title .main::text').extract_first()
			
 
				+        item['售价'] = sel.css('.overview .content .price .total::text').extract_first()
			
 
				+        item['小区'] = sel.css('.overview .content .aroundInfo .communityName a.info::text').extract_first()
			
 
				+        item['小区ID'] = sel.css('.overview .content .aroundInfo .communityName a.info::attr(href)').extract_first().replace('/xiaoqu/', '').replace('/', '')
			
 
				+        item['房屋户型'] = sel.css('#introduction .base .content ul li:nth-child(1)::text').extract_first()
			
 
				+        item['所在楼层'] = sel.css('#introduction .base .content ul li:nth-child(2)::text').extract_first()
			
 
				+        item['建筑面积'] = sel.css('#introduction .base .content ul li:nth-child(3)::text').extract_first()
			
 
				+        item['户型结构'] = sel.css('#introduction .base .content ul li:nth-child(4)::text').extract_first()
			
 
				+        item['套内面积'] = sel.css('#introduction .base .content ul li:nth-child(5)::text').extract_first()
			
 
				+        item['建筑类型'] = sel.css('#introduction .base .content ul li:nth-child(6)::text').extract_first()
			
 
				+        item['房屋朝向'] = sel.css('#introduction .base .content ul li:nth-child(7)::text').extract_first()
			
 
				+        item['建筑结构'] = sel.css('#introduction .base .content ul li:nth-child(8)::text').extract_first()
			
 
				+        item['装修情况'] = sel.css('#introduction .base .content ul li:nth-child(9)::text').extract_first()
			
 
				+        item['梯户比例'] = sel.css('#introduction .base .content ul li:nth-child(10)::text').extract_first()
			
 
				+        item['配备电梯'] = sel.css('#introduction .base .content ul li:nth-child(11)::text').extract_first()
			
 
				+        item['产权年限'] = sel.css('#introduction .base .content ul li:nth-child(12)::text').extract_first()
			
 
				+        item['挂牌时间'] = sel.css('#introduction .transaction .content ul li:nth-child(1) span:nth-child(2)::text').extract_first()
			
 
				+        item['交易权属'] = sel.css('#introduction .transaction .content ul li:nth-child(2) span:nth-child(2)::text').extract_first()
			
 
				+        item['上次交易'] = sel.css('#introduction .transaction .content ul li:nth-child(3) span:nth-child(2)::text').extract_first()
			
 
				+        item['房屋用途'] = sel.css('#introduction .transaction .content ul li:nth-child(4) span:nth-child(2)::text').extract_first()
			
 
				+        item['房屋年限'] = sel.css('#introduction .transaction .content ul li:nth-child(5) span:nth-child(2)::text').extract_first()
			
 
				+        item['产权所属'] = sel.css('#introduction .transaction .content ul li:nth-child(6) span:nth-child(2)::text').extract_first()
			
 
				+        item['抵押信息'] = sel.css('#introduction .transaction .content ul li:nth-child(7) span:nth-child(2)::attr(title)').extract_first()
			
 
				+        item['房本备件'] = sel.css('#introduction .transaction .content ul li:nth-child(8) span:nth-child(2)::text').extract_first()
			
 
				+
			
 
				+        yield item
			
--- a/scrapy.cfg
+++ b/scrapy.cfg
@@ -0,0 +1,11 @@
 
				+# Automatically created by: scrapy startproject
			
 
				+#
			
 
				+# For more information about the [deploy] section see:
			
 
				+# https://scrapyd.readthedocs.io/en/latest/deploy.html
			
 
				+
			
 
				+[settings]
			
 
				+default = house_spider.settings
			
 
				+
			
 
				+[deploy]
			
 
				+#url = http://localhost:6800/
			
 
				+project = house_spider