|
@@ -14,13 +14,18 @@ BOT_NAME = 'shlib'
|
|
|
SPIDER_MODULES = ['shlib.spiders']
|
|
|
NEWSPIDER_MODULE = 'shlib.spiders'
|
|
|
|
|
|
+# 数据库连接参数
|
|
|
+DBKWARGS={'db':'ippool','user':'root', 'passwd':'toor',
|
|
|
+ 'host':'localhost','use_unicode':True, 'charset':'utf8'}
|
|
|
|
|
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
|
|
-#USER_AGENT = 'shlib (+http://www.yourdomain.com)'
|
|
|
+USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
|
|
|
|
|
|
# Obey robots.txt rules
|
|
|
ROBOTSTXT_OBEY = True
|
|
|
|
|
|
+LOG_FILE = "shlib.log"
|
|
|
+
|
|
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
|
|
#CONCURRENT_REQUESTS = 32
|
|
|
|
|
@@ -64,9 +69,9 @@ ROBOTSTXT_OBEY = True
|
|
|
|
|
|
# Configure item pipelines
|
|
|
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
|
|
|
-#ITEM_PIPELINES = {
|
|
|
-# 'shlib.pipelines.ShlibPipeline': 300,
|
|
|
-#}
|
|
|
+ITEM_PIPELINES = {
|
|
|
+ 'shlib.pipelines.ShlibPipeline': 300,
|
|
|
+}
|
|
|
|
|
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
|
|
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
|