Browse Source

update generator

pull/4/merge
Germey 8 years ago
parent
commit
5f37313887
  1. 14
      cookiespool/config.py
  2. 11
      cookiespool/db.py
  3. 85
      cookiespool/generator.py
  4. 7
      cookiespool/scheduler.py
  5. 28
      cookiespool/tester.py

14
cookiespool/config.py

@ -1,11 +1,11 @@
# Redis数据库地址 # Redis数据库地址
REDIS_HOST = 'localhost' REDIS_HOST = 'DataCrawl-Pool.redis.cache.chinacloudapi.cn'
# Redis端口 # Redis端口
REDIS_PORT = 6379 REDIS_PORT = 6379
# Redis密码,如无填None # Redis密码,如无填None
REDIS_PASSWORD = 'foobared' REDIS_PASSWORD = 'kk7YBCEHvswKYORLA6FCF3rfpi8mZXlKnAqBZIXqXXE='
# 配置信息,无需修改 # 配置信息,无需修改
REDIS_DOMAIN = '*' REDIS_DOMAIN = '*'
@ -13,7 +13,7 @@ REDIS_NAME = '*'
# 云打码相关配置到yundama.com申请注册 # 云打码相关配置到yundama.com申请注册
YUNDAMA_USERNAME = 'Germey' YUNDAMA_USERNAME = 'Germey'
YUNDAMA_PASSWORD = '940629cqc' YUNDAMA_PASSWORD = 'CQCcqc123'
YUNDAMA_APP_ID = '3372' YUNDAMA_APP_ID = '3372'
YUNDAMA_APP_KEY = '1b586a30bfda5c7fa71c881075ba49d0' YUNDAMA_APP_KEY = '1b586a30bfda5c7fa71c881075ba49d0'
@ -23,16 +23,16 @@ YUNDAMA_API_URL = 'http://api.yundama.com/api.php'
YUNDAMA_MAX_RETRY = 20 YUNDAMA_MAX_RETRY = 20
# 产生器默认使用的浏览器 # 产生器默认使用的浏览器
DEFAULT_BROWSER = 'Chrome' DEFAULT_BROWSER = 'PhantomJS'
# 产生器类,如扩展其他站点,请在此配置 # 产生器类,如扩展其他站点,请在此配置
GENERATOR_MAP = { GENERATOR_MAP = {
'weibo': 'WeiboCookiesGenerator' 'mweibo': 'MWeiboCookiesGenerator'
} }
# 测试类,如扩展其他站点,请在此配置 # 测试类,如扩展其他站点,请在此配置
TESTER_MAP = { TESTER_MAP = {
'weibo': 'WeiboValidTester' 'mweibo': 'MWeiboValidTester'
} }
# 产生器和验证器循环周期 # 产生器和验证器循环周期
@ -44,7 +44,7 @@ API_PORT = 5000
# 进程开关 # 进程开关
# 产生器,模拟登录添加Cookies # 产生器,模拟登录添加Cookies
GENERATOR_PROCESS = False GENERATOR_PROCESS = True
# 验证器,循环检测数据库中Cookies是否可用,不可用删除 # 验证器,循环检测数据库中Cookies是否可用,不可用删除
VALID_PROCESS = False VALID_PROCESS = False
# API接口服务 # API接口服务

11
cookiespool/db.py

@ -201,12 +201,9 @@ if __name__ == '__main__':
""" """
# 测试 # 测试
conn = AccountRedisClient(name='weibo') conn = AccountRedisClient(name='weibo')
conn.set('14760253606', 'gmidy8470') conn2 = AccountRedisClient(name='mweibo')
conn.set('14760253607', 'uoyuic8427')
conn.set('18459749258', 'rktfye8937')
conn.set('18459748505', 'astvar3647')
print(conn.get('18459748505'))
print(conn.keys())
accounts = conn.all() accounts = conn.all()
for account in accounts: for account in accounts:
print(account) conn2.set(account['username'], account['password'])

85
cookiespool/generator.py

@ -1,6 +1,7 @@
import json import json
import requests import requests
import time
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import WebDriverException, TimeoutException from selenium.common.exceptions import WebDriverException, TimeoutException
from selenium.webdriver import DesiredCapabilities from selenium.webdriver import DesiredCapabilities
@ -34,8 +35,9 @@ class CookiesGenerator(object):
if browser_type == 'PhantomJS': if browser_type == 'PhantomJS':
caps = DesiredCapabilities.PHANTOMJS caps = DesiredCapabilities.PHANTOMJS
caps[ caps[
"phantomjs.page.settings.userAgent"] = "Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
self.browser = webdriver.PhantomJS(desired_capabilities=caps) self.browser = webdriver.PhantomJS(desired_capabilities=caps)
self.browser.set_window_size(1400, 500)
elif browser_type == 'Chrome': elif browser_type == 'Chrome':
self.browser = webdriver.Chrome() self.browser = webdriver.Chrome()
@ -162,6 +164,87 @@ class WeiboCookiesGenerator(CookiesGenerator):
print(e.args) print(e.args)
class MWeiboCookiesGenerator(CookiesGenerator):
def __init__(self, name='weibo', browser_type=DEFAULT_BROWSER):
"""
初始化操作, 微博需要声明一个云打码引用
:param name: 名称微博
:param browser: 使用的浏览器
"""
CookiesGenerator.__init__(self, name, browser_type)
self.name = name
self.ydm = Yundama(YUNDAMA_USERNAME, YUNDAMA_PASSWORD, YUNDAMA_APP_ID, YUNDAMA_APP_KEY)
def _success(self, username):
wait = WebDriverWait(self.browser, 5)
success = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'me_portrait_w')))
if success:
print('登录成功')
self.browser.get('http://m.weibo.cn/')
if "微博" in self.browser.title:
print(self.browser.get_cookies())
cookies = {}
for cookie in self.browser.get_cookies():
cookies[cookie["name"]] = cookie["value"]
print(cookies)
print('成功获取到Cookies')
return (username, json.dumps(cookies))
def new_cookies(self, username, password):
"""
生成Cookies
:param username: 用户名
:param password: 密码
:return: 用户名和Cookies
"""
print('Generating Cookies of', username)
self.browser.delete_all_cookies()
self.browser.get('http://my.sina.com.cn/profile/unlogin')
wait = WebDriverWait(self.browser, 20)
try:
login = wait.until(EC.visibility_of_element_located((By.ID, 'hd_login')))
login.click()
user = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginformlist input[name="loginname"]')))
user.send_keys(username)
psd = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginformlist input[name="password"]')))
psd.send_keys(password)
submit = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.login_btn')))
submit.click()
try:
result = self._success(username)
if result:
return result
except TimeoutException:
print('出现验证码,开始识别验证码')
yzm = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginform_yzm .yzm')))
url = yzm.get_attribute('src')
cookies = self.browser.get_cookies()
cookies_dict = {}
for cookie in cookies:
cookies_dict[cookie.get('name')] = cookie.get('value')
response = requests.get(url, cookies=cookies_dict)
result = self.ydm.identify(stream=response.content)
if not result:
print('验证码识别失败, 跳过识别')
return
door = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginform_yzm input[name="door"]')))
door.send_keys(result)
submit.click()
result = self._success(username)
if result:
return result
except WebDriverException as e:
pass
if __name__ == '__main__': if __name__ == '__main__':
generator = WeiboCookiesGenerator() generator = WeiboCookiesGenerator()
generator._init_browser('Chrome') generator._init_browser('Chrome')

7
cookiespool/scheduler.py

@ -13,7 +13,7 @@ class Scheduler(object):
print('Checking Cookies') print('Checking Cookies')
try: try:
for name, cls in TESTER_MAP.items(): for name, cls in TESTER_MAP.items():
tester = eval(cls + '()') tester = eval(cls + '(name="' + name + '")')
tester.run() tester.run()
print('Tester Finished') print('Tester Finished')
del tester del tester
@ -27,7 +27,7 @@ class Scheduler(object):
print('Generating Cookies') print('Generating Cookies')
try: try:
for name, cls in GENERATOR_MAP.items(): for name, cls in GENERATOR_MAP.items():
generator = eval(cls + '()') generator = eval(cls + '(name="' + name + '")')
generator.run() generator.run()
print('Generator Finished') print('Generator Finished')
generator.close() generator.close()
@ -44,12 +44,15 @@ class Scheduler(object):
if GENERATOR_PROCESS: if GENERATOR_PROCESS:
generate_process = Process(target=Scheduler.generate_cookie) generate_process = Process(target=Scheduler.generate_cookie)
generate_process.start() generate_process.start()
generate_process.join()
if VALID_PROCESS: if VALID_PROCESS:
valid_process = Process(target=Scheduler.valid_cookie) valid_process = Process(target=Scheduler.valid_cookie)
valid_process.start() valid_process.start()
valid_process.join()
if API_PROCESS: if API_PROCESS:
api_process = Process(target=Scheduler.api) api_process = Process(target=Scheduler.api)
api_process.start() api_process.start()
api_process.join()

28
cookiespool/tester.py

@ -56,6 +56,34 @@ class WeiboValidTester(ValidTester):
print('Invalid Cookies', account.get('username')) print('Invalid Cookies', account.get('username'))
class MWeiboValidTester(ValidTester):
def __init__(self, name='weibo'):
ValidTester.__init__(self, name)
def test(self, account, cookies):
print('Testing Account', account.get('username'))
try:
cookies = json.loads(cookies)
except TypeError:
# Cookie 格式不正确
print('Invalid Cookies Value', account.get('username'))
self.cookies_db.delete(account.get('username'))
print('Deleted User', account.get('username'))
return None
try:
test_url = 'http://m.weibo.cn/api/container/getIndex?uid=1804544030&type=uid&page=1&containerid=1076031804544030'
response = requests.get(test_url, cookies=cookies, timeout=5, allow_redirects=False)
if response.status_code == 200:
print('Valid Cookies', account.get('username'))
else:
print(response.status_code, response.headers)
print('Invalid Cookies', account.get('username'))
self.cookies_db.delete(account.get('username'))
print('Deleted User', account.get('username'))
except ConnectionError as e:
print('Error', e.args)
print('Invalid Cookies', account.get('username'))
if __name__ == '__main__': if __name__ == '__main__':
tester = WeiboValidTester() tester = WeiboValidTester()
tester.run() tester.run()

Loading…
Cancel
Save