def get_page_content(self,response):
next_page = response.xpath('//div[@class="page"]/a[contains(@ka,"page-next")]/@href').extract()
print(response.url)
linkList=response.xpath('//div[@class="sub-li"]/a[contains(@class,"company-info")]/@href').extract()
linkList=[response.urljoin(link) for link in linkList]
if linkList :
for link in linkList:
yield scrapy.Request(url=link,callback=self.final_parsre,dont_filter=True)
next_page = response.xpath('//div[@class="page"]/a[contains(@ka,"page-next")]/@href').extract()
print(next_page)
if next_page is not None:
next_page=response.urljoin(next_page[0])
yield scrapy.Request(url=next_page,callback=self.get_page_content,dont_filter=True)
https://www.zhipin.com/gongsi/_zzz_c101200100_iy100101_t801_s302/?page=1&ka=page-1
这个网站翻页是通过点击 next page 不知道有多少页,因为我要点很多按钮 城市 融资,然后点入每个详情页 抓数据,现在不知道多少页,只能通过点击下一页找,如何写?
next_page = response.xpath('//div[@class="page"]/a[contains(@ka,"page-next")]/@href').extract()
print(response.url)
linkList=response.xpath('//div[@class="sub-li"]/a[contains(@class,"company-info")]/@href').extract()
linkList=[response.urljoin(link) for link in linkList]
if linkList :
for link in linkList:
yield scrapy.Request(url=link,callback=self.final_parsre,dont_filter=True)
next_page = response.xpath('//div[@class="page"]/a[contains(@ka,"page-next")]/@href').extract()
print(next_page)
if next_page is not None:
next_page=response.urljoin(next_page[0])
yield scrapy.Request(url=next_page,callback=self.get_page_content,dont_filter=True)
https://www.zhipin.com/gongsi/_zzz_c101200100_iy100101_t801_s302/?page=1&ka=page-1
这个网站翻页是通过点击 next page 不知道有多少页,因为我要点很多按钮 城市 融资,然后点入每个详情页 抓数据,现在不知道多少页,只能通过点击下一页找,如何写?
