我正在尝试从clutch.co中获取有关不同机构的一些信息。当我在浏览器中查找 url 时,一切都很好,但使用 scrapy 却给出了 403 响应。从我读到的所有相关问题来看,我认为它来自 Cloudflare。我是否可以绕过这些安全措施?这是我的scrapy代码:
class ClutchSpider(scrapy.Spider):
name = "clutch"
allowed_domains = ["clutch.co"]
start_urls = ["http://clutch.co/"]
custom_settings = {
'DOWNLOAD_DELAY': 1,
'CONCURRENT_REQUESTS': 5,
'RETRY_ENABLED': True,
'RETRY_TIMES': 5,
'ROBOTSTXT_OBEY': False,
'FEED_URL': f'output/output{datetime.timestamp(datetime.now())}.json',
'FEED_FORMAT': 'json',
}
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.input_urls = ['https://clutch.co/directory/mobile-application-developers']
self.headers = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,fa;q=0.8',
# 'cookie': 'shortlist_prompts=true; FPID=FPID2.2.iqvavTK2dqTJ7yLsgWqoL8fYmkFoX3pzUlG6mTVjfi0%3D.1673247154; CookieConsent={stamp:%27zejzt8TIN2JRypvuDr+oPX/PjYUsuVCNii4qWhJvCxxtOxEXcb5hMg==%27%2Cnecessary:true%2Cpreferences:true%2Cstatistics:true%2Cmarketing:true%2Cmethod:%27explicit%27%2Cver:1%2Cutc:1673247163647%2Cregion:%27nl%27}; _gcl_au=1.1.1124048711.1676796982; _gid=GA1.2.316079371.1676796983; ab.storage.deviceId.c7739970-c490-4772-aa67-2b5c1403137e=%7B%22g%22%3A%22d2822ae5-4bac-73ae-cfc0-86adeaeb1add%22%2C%22c%22%3A1676797005041%2C%22l%22%3A1676797005041%7D; ln_or=eyIyMTU0NjAyIjoiZCJ9; hubspotutk=f019384cf677064ee212b1891e67181c; FPLC=o62q7Cwf0JP12iF73tjxOelgvID3ocGZrxnLxzHlB%2F9In25%2BL7oYAwvSxOTnaZWDYH7G2iMkQ03VUW%2BJgWsv7i7StDXSdFnQr6Dpj6VC%2F2Ya4ZptNbWzzRcJUv00JA%3D%3D; __hssrc=1; shortlist_prompts=true; __hstc=238368351.f019384cf677064ee212b1891e67181c.1676798584729.1676873409297.1676879456609.3; __cf_bm=Pn4xsZ2pgyFdB0bdi9t0xTpqxVzY9t5vhySYN6uRpAQ-1676881063-0-AT8uJ+ux6Tmu0WU+bsJovJ1CubUhs+C0JBulUr1i2aQLY28rn7T23PVuGWffSrCaNjeeYPzSDN42NJ46j10jKEPjPO3mS4P8uMx9dDmA7wTqz5NCdil5W5uGQJs2pMbcjbQSfNTjQLh5umYER6hhhLx8qrRFHDnTTJ1vkORfc0eSqBe0rjqaHeR4HFINZOp1UQ==; _ga=GA1.2.298895719.1676796981; _gat_gtag_UA_2814589_5=1; __hssc=238368351.3.1676879456609; _ga_D0WFGX8X3V=GS1.1.1676879405.3.1.1676881074.46.0.0',
'referer': …Run Code Online (Sandbox Code Playgroud)