我试图从子站点中抓取详细信息并与通过 site 抓取的详细信息合并。我一直在研究 stackoverflow 以及文档。但是,我仍然无法让我的代码工作。似乎我从子站点提取其他详细信息的功能不起作用。如果有人能看一下,我将不胜感激。
# -*- coding: utf-8 -*-
from scrapy.spiders import Spider
from scrapy.selector import Selector
from scrapeInfo.items import infoItem
import pyodbc
class scrapeInfo(Spider):
name = "info"
allowed_domains = ["http://www.nevermind.com"]
start_urls = []
def start_requests(self):
#Get infoID and Type from database
self.conn = pyodbc.connect('DRIVER={SQL Server};SERVER=server;DATABASE=dbname;UID=user;PWD=password')
self.cursor = self.conn.cursor()
self.cursor.execute("SELECT InfoID, category FROM dbo.StageItem")
rows = self.cursor.fetchall()
for row in rows:
url = 'http://www.nevermind.com/info/'
InfoID = row[0]
category = row[1]
yield self.make_requests_from_url(url+InfoID, InfoID, category, self.parse)
def make_requests_from_url(self, …Run Code Online (Sandbox Code Playgroud)