docker.py 793 B

123456789101112131415161718192021222324
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from boto.beanstalk.response import Response
  4. class DockerSpider(scrapy.Spider):
  5. name = 'docker'
  6. allowed_domains = ['http://ipac.library.sh.cn']
  7. start_urls = ['http://http://ipac.library.sh.cn/']
  8. def parse(self, response):
  9. for href in response.css('.question'):
  10. full_url=response.urljoin(href.extract())
  11. yield scrapy.Request(full_url,callback=self.parse_question)
  12. def start_request(self):
  13. res=[]
  14. def parse_question(self,response):
  15. yield{
  16. 'title':response.css('h1').extract()[0],
  17. 'title':response.css('h1').extract()[0],
  18. 'title':response.css('h1').extract()[0],
  19. 'title':response.css('h1').extract()[0]
  20. }