Browse Source

update generator

pull/4/merge
Germey 8 years ago
parent
commit
5f37313887
  1. 14
      cookiespool/config.py
  2. 11
      cookiespool/db.py
  3. 85
      cookiespool/generator.py
  4. 7
      cookiespool/scheduler.py
  5. 28
      cookiespool/tester.py

14
cookiespool/config.py

@ -1,11 +1,11 @@
# Redis数据库地址
REDIS_HOST = 'localhost'
REDIS_HOST = 'DataCrawl-Pool.redis.cache.chinacloudapi.cn'
# Redis端口
REDIS_PORT = 6379
# Redis密码,如无填None
REDIS_PASSWORD = 'foobared'
REDIS_PASSWORD = 'kk7YBCEHvswKYORLA6FCF3rfpi8mZXlKnAqBZIXqXXE='
# 配置信息,无需修改
REDIS_DOMAIN = '*'
@ -13,7 +13,7 @@ REDIS_NAME = '*'
# 云打码相关配置到yundama.com申请注册
YUNDAMA_USERNAME = 'Germey'
YUNDAMA_PASSWORD = '940629cqc'
YUNDAMA_PASSWORD = 'CQCcqc123'
YUNDAMA_APP_ID = '3372'
YUNDAMA_APP_KEY = '1b586a30bfda5c7fa71c881075ba49d0'
@ -23,16 +23,16 @@ YUNDAMA_API_URL = 'http://api.yundama.com/api.php'
YUNDAMA_MAX_RETRY = 20
# 产生器默认使用的浏览器
DEFAULT_BROWSER = 'Chrome'
DEFAULT_BROWSER = 'PhantomJS'
# 产生器类,如扩展其他站点,请在此配置
GENERATOR_MAP = {
'weibo': 'WeiboCookiesGenerator'
'mweibo': 'MWeiboCookiesGenerator'
}
# 测试类,如扩展其他站点,请在此配置
TESTER_MAP = {
'weibo': 'WeiboValidTester'
'mweibo': 'MWeiboValidTester'
}
# 产生器和验证器循环周期
@ -44,7 +44,7 @@ API_PORT = 5000
# 进程开关
# 产生器,模拟登录添加Cookies
GENERATOR_PROCESS = False
GENERATOR_PROCESS = True
# 验证器,循环检测数据库中Cookies是否可用,不可用删除
VALID_PROCESS = False
# API接口服务

11
cookiespool/db.py

@ -201,12 +201,9 @@ if __name__ == '__main__':
"""
# 测试
conn = AccountRedisClient(name='weibo')
conn.set('14760253606', 'gmidy8470')
conn.set('14760253607', 'uoyuic8427')
conn.set('18459749258', 'rktfye8937')
conn.set('18459748505', 'astvar3647')
print(conn.get('18459748505'))
print(conn.keys())
conn2 = AccountRedisClient(name='mweibo')
accounts = conn.all()
for account in accounts:
print(account)
conn2.set(account['username'], account['password'])

85
cookiespool/generator.py

@ -1,6 +1,7 @@
import json
import requests
import time
from selenium import webdriver
from selenium.common.exceptions import WebDriverException, TimeoutException
from selenium.webdriver import DesiredCapabilities
@ -34,8 +35,9 @@ class CookiesGenerator(object):
if browser_type == 'PhantomJS':
caps = DesiredCapabilities.PHANTOMJS
caps[
"phantomjs.page.settings.userAgent"] = "Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
"phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
self.browser = webdriver.PhantomJS(desired_capabilities=caps)
self.browser.set_window_size(1400, 500)
elif browser_type == 'Chrome':
self.browser = webdriver.Chrome()
@ -162,6 +164,87 @@ class WeiboCookiesGenerator(CookiesGenerator):
print(e.args)
class MWeiboCookiesGenerator(CookiesGenerator):
def __init__(self, name='weibo', browser_type=DEFAULT_BROWSER):
"""
初始化操作, 微博需要声明一个云打码引用
:param name: 名称微博
:param browser: 使用的浏览器
"""
CookiesGenerator.__init__(self, name, browser_type)
self.name = name
self.ydm = Yundama(YUNDAMA_USERNAME, YUNDAMA_PASSWORD, YUNDAMA_APP_ID, YUNDAMA_APP_KEY)
def _success(self, username):
wait = WebDriverWait(self.browser, 5)
success = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'me_portrait_w')))
if success:
print('登录成功')
self.browser.get('http://m.weibo.cn/')
if "微博" in self.browser.title:
print(self.browser.get_cookies())
cookies = {}
for cookie in self.browser.get_cookies():
cookies[cookie["name"]] = cookie["value"]
print(cookies)
print('成功获取到Cookies')
return (username, json.dumps(cookies))
def new_cookies(self, username, password):
"""
生成Cookies
:param username: 用户名
:param password: 密码
:return: 用户名和Cookies
"""
print('Generating Cookies of', username)
self.browser.delete_all_cookies()
self.browser.get('http://my.sina.com.cn/profile/unlogin')
wait = WebDriverWait(self.browser, 20)
try:
login = wait.until(EC.visibility_of_element_located((By.ID, 'hd_login')))
login.click()
user = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginformlist input[name="loginname"]')))
user.send_keys(username)
psd = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginformlist input[name="password"]')))
psd.send_keys(password)
submit = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.login_btn')))
submit.click()
try:
result = self._success(username)
if result:
return result
except TimeoutException:
print('出现验证码,开始识别验证码')
yzm = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginform_yzm .yzm')))
url = yzm.get_attribute('src')
cookies = self.browser.get_cookies()
cookies_dict = {}
for cookie in cookies:
cookies_dict[cookie.get('name')] = cookie.get('value')
response = requests.get(url, cookies=cookies_dict)
result = self.ydm.identify(stream=response.content)
if not result:
print('验证码识别失败, 跳过识别')
return
door = wait.until(
EC.visibility_of_element_located((By.CSS_SELECTOR, '.loginform_yzm input[name="door"]')))
door.send_keys(result)
submit.click()
result = self._success(username)
if result:
return result
except WebDriverException as e:
pass
if __name__ == '__main__':
generator = WeiboCookiesGenerator()
generator._init_browser('Chrome')

7
cookiespool/scheduler.py

@ -13,7 +13,7 @@ class Scheduler(object):
print('Checking Cookies')
try:
for name, cls in TESTER_MAP.items():
tester = eval(cls + '()')
tester = eval(cls + '(name="' + name + '")')
tester.run()
print('Tester Finished')
del tester
@ -27,7 +27,7 @@ class Scheduler(object):
print('Generating Cookies')
try:
for name, cls in GENERATOR_MAP.items():
generator = eval(cls + '()')
generator = eval(cls + '(name="' + name + '")')
generator.run()
print('Generator Finished')
generator.close()
@ -44,12 +44,15 @@ class Scheduler(object):
if GENERATOR_PROCESS:
generate_process = Process(target=Scheduler.generate_cookie)
generate_process.start()
generate_process.join()
if VALID_PROCESS:
valid_process = Process(target=Scheduler.valid_cookie)
valid_process.start()
valid_process.join()
if API_PROCESS:
api_process = Process(target=Scheduler.api)
api_process.start()
api_process.join()

28
cookiespool/tester.py

@ -56,6 +56,34 @@ class WeiboValidTester(ValidTester):
print('Invalid Cookies', account.get('username'))
class MWeiboValidTester(ValidTester):
def __init__(self, name='weibo'):
ValidTester.__init__(self, name)
def test(self, account, cookies):
print('Testing Account', account.get('username'))
try:
cookies = json.loads(cookies)
except TypeError:
# Cookie 格式不正确
print('Invalid Cookies Value', account.get('username'))
self.cookies_db.delete(account.get('username'))
print('Deleted User', account.get('username'))
return None
try:
test_url = 'http://m.weibo.cn/api/container/getIndex?uid=1804544030&type=uid&page=1&containerid=1076031804544030'
response = requests.get(test_url, cookies=cookies, timeout=5, allow_redirects=False)
if response.status_code == 200:
print('Valid Cookies', account.get('username'))
else:
print(response.status_code, response.headers)
print('Invalid Cookies', account.get('username'))
self.cookies_db.delete(account.get('username'))
print('Deleted User', account.get('username'))
except ConnectionError as e:
print('Error', e.args)
print('Invalid Cookies', account.get('username'))
if __name__ == '__main__':
tester = WeiboValidTester()
tester.run()

Loading…
Cancel
Save