
最近在做一个亚马逊商家后台产品搜索的爬虫,需要利用 cookie 跳过登录,一开始通过 superagent 成功爬到了 https://sellercentral.amazon.com/product-search/ 页面的信息。但是它的商品是通过 ajax 向后台获取并加密了接口的。所以我开始利用模拟浏览器行为去获取,nightmare 就是一个不错的 API 。
但在实际使用 nightmare 过程中,就出现了无法通过 cookie 跳过登录,访问页面后被 302 重定向为了登入页面。
nightmare 代码如下:
class gCreeper { constructor(mainPageUrl) { this.mainPageUrl = mainPageUrl this.ua = user_agents[Math.floor(Math.random()*3)]; } async getMainPage() { await nightmare.goto(this.mainPageUrl,{ 'user-agent':this.ua, 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'accept-encoding':'gzip, deflate, br','accept-language':'en-US,en;q=0.9' }); await nightmare.cookies.set({ "at-main":"Atza|xxx", "i18n-prefs":"USD", "mons-lang":"zh_CN", "s_sess":"%20s_cc%3xxx", "sess-at-main":"\"CLE6xxx\"", "session-id":"147-xxx", "session-id-time":"2082787201l", "session-token":"\"Fw1F/xxx\"", "sid":"\"g1vTxxx\"", "sst-main":"Sst1|xxx", "ubid-main":"130-5348684-1470161", "x-main":"v5o0xxx", "x-wl-uid":"1rm9xxx", "csm-hit":"tb:V1YYxxx" }).inject('js', 'jquery.min.js').wait('#ap_email').catch(function(err) { console.log('连接失败:'+err); }); await nightmare.evaluate((myua) => { $('#ap_email').val('GanxiaozheTest'); }, this.ua); app.get('/', async (req, res, next) => { res.send("success"); }); } } 估计是 cookies.set 问题,但看官方文档给的示例是这样没错。迷......
1 gxz OP 问题解决了,是需要在 nightmare 执行 goto 前给 cookie 赋值。也许需要这样处理: Nightmare.action('preloadCookies', function(name, options, parent, win, renderer, done) { parent.on('did-start-loading', function(url, sessionCookies) { if (sessionCookies) { parent.emit('log', 'Preloading cookies'); for (var i = 0; i < sessionCookies.length; i++) { var details = Object.assign({ url: url }, sessionCookies[i]); win.webContents.session.cookies.set(details, function (error) { if (error) done(error); }); } } parent.emit('did-start-loading'); }); done(); return this; }, function(cookies, url, done) { this.child.once('did-start-loading', done); this.child.emit('did-start-loading', url, cookies); }); let cookies = storage.getItemSync(process.env.COOKIES_STORAGE); let url = storage.getItemSync(process.env.URL_STORAGE); Nightmare().preloadCookies(cookies, url).goto(url); |