123456789101112131415161718192021222324 |
- # -*- coding: utf-8 -*-
- import scrapy
- from boto.beanstalk.response import Response
- class DockerSpider(scrapy.Spider):
- name = 'docker'
- allowed_domains = ['http://ipac.library.sh.cn']
- start_urls = ['http://http://ipac.library.sh.cn/']
- def parse(self, response):
- for href in response.css('.question'):
- full_url=response.urljoin(href.extract())
- yield scrapy.Request(full_url,callback=self.parse_question)
- def start_request(self):
- res=[]
- def parse_question(self,response):
- yield{
- 'title':response.css('h1').extract()[0],
- 'title':response.css('h1').extract()[0],
- 'title':response.css('h1').extract()[0],
- 'title':response.css('h1').extract()[0]
- }
-
|