可扩展的Cookies池,用无头浏览器登录并生成cookie供给爬虫使用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
4.1 KiB

8 years ago
import json
from selenium import webdriver
8 years ago
from selenium.webdriver import DesiredCapabilities
6 years ago
from selenium.webdriver.chrome.options import Options
8 years ago
from cookiespool.config import *
7 years ago
from cookiespool.db import RedisClient
from login.weibo.cookies import WeiboCookies
8 years ago
class CookiesGenerator(object):
7 years ago
def __init__(self, website='default'):
8 years ago
"""
父类, 初始化一些对象
7 years ago
:param website: 名称
8 years ago
:param browser: 浏览器, 若不使用浏览器则可设置为 None
"""
7 years ago
self.website = website
self.cookies_db = RedisClient('cookies', self.website)
self.accounts_db = RedisClient('accounts', self.website)
self.init_browser()
7 years ago
def __del__(self):
self.close()
def init_browser(self):
8 years ago
"""
通过browser参数初始化全局浏览器供模拟登录使用
:return:
"""
7 years ago
if BROWSER_TYPE == 'PhantomJS':
8 years ago
caps = DesiredCapabilities.PHANTOMJS
caps[
8 years ago
"phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
8 years ago
self.browser = webdriver.PhantomJS(desired_capabilities=caps)
8 years ago
self.browser.set_window_size(1400, 500)
7 years ago
elif BROWSER_TYPE == 'Chrome':
6 years ago
chrome_options = Options()
chrome_options.add_argument('--headless')
self.browser = webdriver.Chrome(chrome_options=chrome_options)
7 years ago
8 years ago
def new_cookies(self, username, password):
7 years ago
"""
新生成Cookies子类需要重写
:param username: 用户名
:param password: 密码
:return:
"""
8 years ago
raise NotImplementedError
7 years ago
def process_cookies(self, cookies):
8 years ago
"""
7 years ago
处理Cookies
:param cookies:
8 years ago
:return:
"""
7 years ago
dict = {}
for cookie in cookies:
7 years ago
dict[cookie['name']] = cookie['value']
7 years ago
return dict
8 years ago
def run(self):
"""
运行, 得到所有账户, 然后顺次模拟登录
:return:
"""
7 years ago
accounts_usernames = self.accounts_db.usernames()
cookies_usernames = self.cookies_db.usernames()
for username in accounts_usernames:
if not username in cookies_usernames:
password = self.accounts_db.get(username)
print('正在生成Cookies', '账号', username, '密码', password)
result = self.new_cookies(username, password)
# 成功获取
if result.get('status') == 1:
cookies = self.process_cookies(result.get('content'))
print('成功获取到Cookies', cookies)
if self.cookies_db.set(username, json.dumps(cookies)):
print('成功保存Cookies')
# 密码错误,移除账号
elif result.get('status') == 2:
print(result.get('content'))
if self.accounts_db.delete(username):
print('成功删除账号')
else:
print(result.get('content'))
7 years ago
else:
print('所有账号都已经成功获取Cookies')
7 years ago
8 years ago
def close(self):
7 years ago
"""
关闭
:return:
"""
8 years ago
try:
print('Closing Browser')
self.browser.close()
del self.browser
except TypeError:
print('Browser not opened')
class WeiboCookiesGenerator(CookiesGenerator):
7 years ago
def __init__(self, website='weibo'):
8 years ago
"""
7 years ago
初始化操作
:param website: 站点名称
8 years ago
:param browser: 使用的浏览器
"""
7 years ago
CookiesGenerator.__init__(self, website)
self.website = website
7 years ago
8 years ago
def new_cookies(self, username, password):
"""
生成Cookies
:param username: 用户名
:param password: 密码
:return: 用户名和Cookies
"""
7 years ago
return WeiboCookies(username, password, self.browser).main()
8 years ago
8 years ago
if __name__ == '__main__':
generator = WeiboCookiesGenerator()
7 years ago
generator.run()