通过使用scrapy中的请求有效负载来发布请求

Pri*_*tln 5 scrapy

我如何抓取这个网站?如何使用有效负载发送发布请求并从中获取数据?

如果我使用此代码,则可以抓取第一页,但如何抓取第二页?我需要使用硒还是足够稀薄的呢?

import scrapy
from scrapy import log
from scrapy.http import *
import urllib2
class myntra_spider(scrapy.Spider):
    name="myntra"
    allowed_domain=[]
    start_urls=["http://www.myntra.com/men-footwear"]
    logfile=open('testlog.log','w')
    log_observer=log.ScrapyFileLogObserver(logfile,level=log.ERROR)
    log_observer.start()
    # sub_category=[]



    def parse(self,response):
        print "response url ",response.url

        link=response.xpath("//ul[@class='results small']/li/a/@href").extract()
        print links
        yield Request('http://www.myntra.com/search-service/searchservice/search/filteredSearch', callback=self.nextpages,body="")



    def nextpages(self,response):
        link=response.xpath("//ul[@class='results small']/li/a/@href").extract()
        for i in range(10):
            print "link ",link[i]
Run Code Online (Sandbox Code Playgroud)

diz*_*y54 7

为此,您不需要 Selenium。检查需要与浏览器中的请求一起发送的有效负载,并将其附加到请求中。

我在您的网站上尝试过,以下代码段有效-

def start_requests(self):
    url = "http://www.myntra.com/search-service/searchservice/search/filteredSearch"
    payload = [{
        "query": "(global_attr_age_group:(\"Adults-Unisex\" OR \"Adults-Women\") AND global_attr_master_category:(\"Footwear\"))",
        "start": 0,
        "rows": 96,
        "facetField": [],
        "pivotFacets": [],
        "fq": ["count_options_availbale:[1 TO *]"],
        "sort": [
            {"sort_field": "count_options_availbale", "order_by": "desc"},
            {"sort_field": "score", "order_by": "desc"},
            {"sort_field": "style_store1_female_sort_field", "order_by": "desc"},
            {"sort_field": "potential_revenue_female_sort_field", "order_by": "desc"},
            {"sort_field": "global_attr_catalog_add_date", "order_by": "desc"}
        ],
        "return_docs": True,
        "colour_grouping": True,
        "useCache": True,
        "flatshot": False,
        "outOfStock": False,
        "showInactiveStyles": False,
        "facet": True
    }]
    yield Request(url, self.parse, method="POST", body=json.dumps(payload))

def parse(self, response):
    data = json.loads(response.body)
    print data
Run Code Online (Sandbox Code Playgroud)