所以我有一个scrapy程序,我试图开始,但我无法让我的代码执行它总是出现以下错误.
我仍然可以使用scrapy shell命令访问该网站,所以我知道Url和所有工作.
这是我的代码
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from Malscraper.items import MalItem
class MalSpider(CrawlSpider):
name = 'Mal'
allowed_domains = ['www.website.net']
start_urls = ['http://www.website.net/stuff.php?']
rules = [
Rule(LinkExtractor(
allow=['//*[@id="content"]/div[2]/div[2]/div/span/a[1]']),
callback='parse_item',
follow=True)
]
def parse_item(self, response):
mal_list = response.xpath('//*[@id="content"]/div[2]/table/tr/td[2]/')
for mal in mal_list:
item = MalItem()
item['name'] = mal.xpath('a[1]/strong/text()').extract_first()
item['link'] = mal.xpath('a[1]/@href').extract_first()
yield item
Run Code Online (Sandbox Code Playgroud)
编辑:这是跟踪.
Traceback (most recent call last):
File "C:\Users\2015\Anaconda\lib\site-packages\boto\utils.py", line 210, in retry_url
r = opener.open(req, timeout=timeout)
File "C:\Users\2015\Anaconda\lib\urllib2.py", line 431, in …Run Code Online (Sandbox Code Playgroud)