我使用的是 python2+urllib2+cookielib 来模拟登录豆瓣。但是很奇怪的是我获取了验证码之后,再尝试登录一直显示验证码错误。后来我打印了一下获取到的 cookie,发现只有一行,也就是并不全。这会不会是登录失败的原因?求各位 v 友帮忙看一下,实在不知道错哪了。。。。谢谢,代码如下。
import urllib import urllib2 import cookielib import re login_url = 'https://www.douban.com/accounts/login' #set opener cookiejar = cookielib.LWPCookieJar() handler = urllib2.HTTPCookieProcessor(cookiejar) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) text = urllib2.urlopen(login_url).read() cookie = "" for item in cookiejar: cookie += item.name + '=' + item.value + ';' print cookie #get captcha image image_pattern = re.compile('<img.*?src="(.*?)" alt.*?/>') picture_url = re.findall(image_pattern, text) picture = urllib2.urlopen((str(picture_url[0]))).read() #save captcha image captcha_image = open('./image.jpg', 'wb') captcha_image.write(picture) captcha_image.close() #get captcha id pattern = re.compile('<input.*?captcha-id".*?value=(.*?)/>', re.S) captcha_id = re.findall(pattern, text) #input secret_code captcha_code = raw_input('input the code') #set post_data params = {} params['source'] = 'None' params['redir'] = 'https://www.douban.com' params['form_email'] = '[email protected]' params['form_password'] = 'xxxxxx' params['captcha-solution'] = captcha_code params['captcha-id'] = str(captcha_id[0]) params['login'] = '登录' postdata = urllib.urlencode(params) headers = { "Host": "accounts.douban.com", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Referer": "https://www.douban.com/accounts/login", "Cookie": cookie, "Connection":"keep-alive" } #login req = urllib2.Request(login_url, postdata, headers) respOnse= urllib2.urlopen(req) print response.read() 