
import scrapy class ExampleSpider(scrapy.Spider): name = 'baidu.com' allowed_domains = ['www.baidu.com'] # start_urls = ['https://www.baidu.com/'] def __init__(self, key): super(ExampleSpider, self).__init__() self.key = key def start_requests(self): url = f'https://www.baidu.com/s?wd={self.key}' yield scrapy.Request(url=url, callback=self.mparse) def mparse(self, response): yield { 'title': response.xpath('//title/text()').extract_first() } 这样传递参数似乎不行
from scrapy.crawler import CrawlerProcess from test_spider.spiders.example import ExampleSpider process = CrawlerProcess() process.crawl(ExampleSpider(key='ip')) process.start() 1 IanPeverell 2019-09-16 18:45:31 +08:00 这种情况可以直接用 process = CrawlerProcess(settings={"key":"ip"}) 然后在爬虫里用 self.setting.get("key") 获取 |
2 wersonliu9527 OP @IanPeverell 感谢 |