我有这个简单的杂乱代码.但是当我使用response.urljoin(port_homepage_url)这部分代码时会出现此错误.
import re
import scrapy
from vesseltracker.items import VesseltrackerItem
class GetVessel(scrapy.Spider):
name = "getvessel"
allowed_domains = ["marinetraffic.com"]
start_urls = [
'http://www.marinetraffic.com/en/ais/index/ports/all/flag:AE',
]
def parse(self, response):
item = VesseltrackerItem()
for ports in response.xpath('//table/tr[position()>1]'):
item['port_name'] = ports.xpath('td[2]/a/text()').extract()
port_homepage_url = ports.xpath('td[7]/a/@href').extract()
port_homepage_url = response.urljoin(port_homepage_url)
yield scrapy.Request(port_homepage_url, callback=self.parse, meta={'item': item})
Run Code Online (Sandbox Code Playgroud)
可能有什么不对?
这是错误日志.
2016-09-30 17:17:13 [scrapy] DEBUG: Crawled (200) <GET http://www.marinetraffic.com/robots.txt> (referer: None)
2016-09-30 17:17:14 [scrapy] DEBUG: Crawled (200) <GET http://www.marinetraffic.com/en/ais/index/ports/all/flag:AE> (referer: None)
2016-09-30 17:17:14 [scrapy] ERROR: Spider error processing <GET http://www.marinetraffic.com/en/ais/index/ports/all/flag:AE> (referer: …Run Code Online (Sandbox Code Playgroud)