From 51931f9cd9c02916655309b36710526e6329785b Mon Sep 17 00:00:00 2001
From: jyhl <zhongpei@vip.qq.com>
Date: Tue, 12 May 2020 10:28:11 +0800
Subject: [PATCH] init

---
 requirements.dev.txt                          |   3 +
 requirements.txt                              |  32 ++++
 scrapy.cfg                                    |  11 ++
 scrapypyppeteer/__init__.py                   |   0
 scrapypyppeteer/items.py                      |  14 ++
 scrapypyppeteer/middlewares.py                | 161 ++++++++++++++++++
 scrapypyppeteer/pipelines.py                  |  11 ++
 scrapypyppeteer/settings.py                   |  98 +++++++++++
 scrapypyppeteer/spiders/16yun.py              |  15 ++
 scrapypyppeteer/spiders/__init__.py           |   4 +
 .../spiders/__pycache__/16yun.cpython-36.pyc  | Bin 0 -> 898 bytes
 .../__pycache__/__init__.cpython-36.pyc       | Bin 0 -> 164 bytes
 .../spiders/__pycache__/quotes.cpython-36.pyc | Bin 0 -> 892 bytes
 .../spiders/__pycache__/taobao.cpython-36.pyc | Bin 0 -> 979 bytes
 scrapypyppeteer/spiders/quotes.py             |  18 ++
 scrapypyppeteer/spiders/taobao.py             |  18 ++
 useragents.txt                                |   6 +
 17 files changed, 391 insertions(+)
 create mode 100644 requirements.dev.txt
 create mode 100644 requirements.txt
 create mode 100644 scrapy.cfg
 create mode 100644 scrapypyppeteer/__init__.py
 create mode 100644 scrapypyppeteer/items.py
 create mode 100644 scrapypyppeteer/middlewares.py
 create mode 100644 scrapypyppeteer/pipelines.py
 create mode 100644 scrapypyppeteer/settings.py
 create mode 100644 scrapypyppeteer/spiders/16yun.py
 create mode 100644 scrapypyppeteer/spiders/__init__.py
 create mode 100644 scrapypyppeteer/spiders/__pycache__/16yun.cpython-36.pyc
 create mode 100644 scrapypyppeteer/spiders/__pycache__/__init__.cpython-36.pyc
 create mode 100644 scrapypyppeteer/spiders/__pycache__/quotes.cpython-36.pyc
 create mode 100644 scrapypyppeteer/spiders/__pycache__/taobao.cpython-36.pyc
 create mode 100644 scrapypyppeteer/spiders/quotes.py
 create mode 100644 scrapypyppeteer/spiders/taobao.py
 create mode 100644 useragents.txt

diff --git a/requirements.dev.txt b/requirements.dev.txt
new file mode 100644
index 0000000..15d8833
--- /dev/null
+++ b/requirements.dev.txt
@@ -0,0 +1,3 @@
+pyppeteer
+scrapy
+scrapy-random-useragent
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..81f8f3a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,32 @@
+appdirs==1.4.4
+attrs==19.3.0
+Automat==20.2.0
+cffi==1.14.0
+constantly==15.1.0
+cryptography==2.9.2
+cssselect==1.1.0
+hyperlink==19.0.0
+idna==2.9
+incremental==17.5.0
+lxml==4.5.0
+parsel==1.6.0
+Protego==0.1.16
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+PyDispatcher==2.0.5
+pyee==7.0.1
+PyHamcrest==2.0.2
+pyOpenSSL==19.1.0
+pyppeteer==0.2.2
+queuelib==1.5.0
+Scrapy==2.1.0
+scrapy-random-useragent==0.2
+service-identity==18.1.0
+six==1.14.0
+tqdm==4.46.0
+Twisted==20.3.0
+urllib3==1.25.9
+w3lib==1.21.0
+websockets==8.1
+zope.interface==5.1.0
diff --git a/scrapy.cfg b/scrapy.cfg
new file mode 100644
index 0000000..495ba59
--- /dev/null
+++ b/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.org/en/latest/deploy.html
+
+[settings]
+default = scrapypyppeteer.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = scrapypyppeteer
diff --git a/scrapypyppeteer/__init__.py b/scrapypyppeteer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scrapypyppeteer/items.py b/scrapypyppeteer/items.py
new file mode 100644
index 0000000..5c7c909
--- /dev/null
+++ b/scrapypyppeteer/items.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# http://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class ScrapypyppeteerItem(scrapy.Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    pass
diff --git a/scrapypyppeteer/middlewares.py b/scrapypyppeteer/middlewares.py
new file mode 100644
index 0000000..e343d00
--- /dev/null
+++ b/scrapypyppeteer/middlewares.py
@@ -0,0 +1,161 @@
+import websockets
+from scrapy.http import HtmlResponse
+from logging import getLogger
+import asyncio
+import pyppeteer
+import logging
+from concurrent.futures._base import TimeoutError
+import base64
+import sys
+import random
+
+pyppeteer_level = logging.WARNING
+logging.getLogger('websockets.protocol').setLevel(pyppeteer_level)
+logging.getLogger('pyppeteer').setLevel(pyppeteer_level)
+
+PY3 = sys.version_info[0] >= 3
+
+
+def base64ify(bytes_or_str):
+    if PY3 and isinstance(bytes_or_str, str):
+        input_bytes = bytes_or_str.encode('utf8')
+    else:
+        input_bytes = bytes_or_str
+
+    output_bytes = base64.urlsafe_b64encode(input_bytes)
+    if PY3:
+        return output_bytes.decode('ascii')
+    else:
+        return output_bytes
+
+
+class ProxyMiddleware(object):
+    USER_AGENT = open('useragents.txt').readlines()
+
+    def process_request(self, request, spider):
+        # 代理服务器
+        # proxyHost = "t.16yun.cn"
+        # proxyPort = "31111"
+        proxyHost = "u1.5.tn.16yun.cn"
+        proxyPort = "6441"
+        # 代理隧道验证信息
+        # proxyUser = "username"
+        # proxyPass = "password"
+        proxyUser = "16ZJZYVL"
+        proxyPass = "113813"
+
+        request.meta['proxy'] = "http://{0}:{1}".format(proxyHost, proxyPort)
+
+        # 添加验证头
+        encoded_user_pass = base64ify(proxyUser + ":" + proxyPass)
+        request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass
+
+        # 设置IP切换头(根据需求)
+        tunnel = random.randint(1, 10000)
+        request.headers['Proxy-Tunnel'] = str(tunnel)
+        request.headers['User-Agent'] = random.choice(self.USER_AGENT)
+
+
+class PyppeteerMiddleware(object):
+    def __init__(self, **args):
+        """
+        init logger, loop, browser
+        :param args:
+        """
+        self.logger = getLogger(__name__)
+        self.loop = asyncio.get_event_loop()
+        self.browser = self.loop.run_until_complete(
+            pyppeteer.launch(headless=True))
+        self.args = args
+
+    def __del__(self):
+        """
+        close loop
+        :return:
+        """
+        self.loop.close()
+
+    def render(self, url, retries=1, script=None, wait=0.3, scrolldown=False, sleep=0,
+               timeout=8.0, keep_page=False):
+        """
+        render page with pyppeteer
+        :param url: page url
+        :param retries: max retry times
+        :param script: js script to evaluate
+        :param wait: number of seconds to wait before loading the page, preventing timeouts
+        :param scrolldown: how many times to page down
+        :param sleep: how many long to sleep after initial render
+        :param timeout: the longest wait time, otherwise raise timeout error
+        :param keep_page: keep page not to be closed, browser object needed
+        :param browser: pyppetter browser object
+        :param with_result: return with js evaluation result
+        :return: content, [result]
+        """
+
+        # define async render
+        async def async_render(url, script, scrolldown, sleep, wait, timeout, keep_page):
+            try:
+                # basic render
+                page = await self.browser.newPage()
+                await asyncio.sleep(wait)
+                response = await page.goto(url, options={'timeout': int(timeout * 1000)})
+                if response.status != 200:
+                    return None, None, response.status
+                result = None
+                # evaluate with script
+                if script:
+                    result = await page.evaluate(script)
+
+                # scroll down for {scrolldown} times
+                if scrolldown:
+                    for _ in range(scrolldown):
+                        await page._keyboard.down('PageDown')
+                        await asyncio.sleep(sleep)
+                else:
+                    await asyncio.sleep(sleep)
+                if scrolldown:
+                    await page._keyboard.up('PageDown')
+
+                # get html of page
+                content = await page.content()
+
+                return content, result, response.status
+            except TimeoutError:
+                return None, None, 500
+            finally:
+                # if keep page, do not close it
+                if not keep_page:
+                    await page.close()
+
+        content, result, status = [None] * 3
+
+        # retry for {retries} times
+        for i in range(retries):
+            if not content:
+                content, result, status = self.loop.run_until_complete(
+                    async_render(url=url, script=script, sleep=sleep, wait=wait,
+                                 scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
+            else:
+                break
+
+        # if need to return js evaluation result
+        return content, result, status
+
+    def process_request(self, request, spider):
+        """
+        :param request: request object
+        :param spider: spider object
+        :return: HtmlResponse
+        """
+        if request.meta.get('render'):
+            try:
+                self.logger.debug('rendering %s', request.url)
+                html, result, status = self.render(request.url)
+                return HtmlResponse(url=request.url, body=html, request=request, encoding='utf-8',
+                                    status=status)
+            except websockets.exceptions.ConnectionClosed:
+                pass
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(**crawler.settings.get('PYPPETEER_ARGS', {}))
diff --git a/scrapypyppeteer/pipelines.py b/scrapypyppeteer/pipelines.py
new file mode 100644
index 0000000..2f9908b
--- /dev/null
+++ b/scrapypyppeteer/pipelines.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+class ScrapypyppeteerPipeline(object):
+    def process_item(self, item, spider):
+        return item
diff --git a/scrapypyppeteer/settings.py b/scrapypyppeteer/settings.py
new file mode 100644
index 0000000..0f1fd5d
--- /dev/null
+++ b/scrapypyppeteer/settings.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for scrapypyppeteer project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     http://doc.scrapy.org/en/latest/topics/settings.html
+#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'scrapypyppeteer'
+
+SPIDER_MODULES = ['scrapypyppeteer.spiders']
+NEWSPIDER_MODULE = 'scrapypyppeteer.spiders'
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+# USER_AGENT = 'scrapypyppeteer (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = False
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+# CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+# DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+# COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+# TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+# DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+# }
+
+# Enable or disable spider middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+# SPIDER_MIDDLEWARES = {
+#    'scrapypyppeteer.middlewares.ScrapypyppeteerSpiderMiddleware': 543,
+# }
+
+# Enable or disable downloader middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+    'scrapypyppeteer.middlewares.PyppeteerMiddleware': 543,
+    'scrapypyppeteer.middlewares.ProxyMiddleware': 100,
+
+}
+
+# Enable or disable extensions
+# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
+# EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+# }
+
+# Configure item pipelines
+# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
+# ITEM_PIPELINES = {
+#    'scrapypyppeteer.pipelines.ScrapypyppeteerPipeline': 300,
+# }
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
+# AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+# AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+# AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+# AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = 'httpcache'
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+
+PYPPETEER_ARGS = {
+    'timeout': 8
+}
+
+LOG_LEVEL = 'INFO'
\ No newline at end of file
diff --git a/scrapypyppeteer/spiders/16yun.py b/scrapypyppeteer/spiders/16yun.py
new file mode 100644
index 0000000..72aa23d
--- /dev/null
+++ b/scrapypyppeteer/spiders/16yun.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+from scrapy import Spider, Request
+
+
+class YiniuYunSpider(Spider):
+    name = '16yun'
+    allowed_domains = ['current-ip.16yun.cn']
+    start_url = 'http://current-ip.16yun.cn:802/ip'
+
+    def start_requests(self):
+        yield Request(self.start_url, callback=self.parse_list)
+
+    def parse_list(self, response):
+        with open('16yun.html', 'w', encoding='utf-8') as f:
+            f.write(response.text)
diff --git a/scrapypyppeteer/spiders/__init__.py b/scrapypyppeteer/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/scrapypyppeteer/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/scrapypyppeteer/spiders/__pycache__/16yun.cpython-36.pyc b/scrapypyppeteer/spiders/__pycache__/16yun.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da969d53214bd321b4fe6ef5092ee42e4694d5c9
GIT binary patch
literal 898
zcmZuv&2H2%5Vjp>H=F*j0;v)QZd`)cjSy0W5JHtWa9fF1P?`f#8hf{C6UVh3EgO|n
zd#!j5UV%sP6>;Fg18`!T6tP8Enz23pH1o|je!8_4eh<EW_=yqvhSnB^^B$Of2#z6!
zOH|<mS0o`Ay+({M?+P)G<4Z5`7(GCd{{xUHB6e^f3&xcl?(_48Ym>Y9pGDxX2c{o`
z<0!!lC4^zf0T~x0p^PRz^V#4EB?C5O0qjFIg8ZS~J}N}f95tdZkoZq_SB+@z<&9EY
zm`Ne0U1XYzcH_htd0dETtrqQ$yXfa}ycUcLnM2dva=UBTG1WWZ47xz47!0UglXnP%
z1VDkzB5Z>^E6d|7KZ`u&0oIPR$&^VORoYNyO7pZVv;id5yqteQY6PfXv-qv%O2_kh
zUdM;EI*S>v>R7JKNiAZXt4yvYawR1<oU5C_+P%|M$9;oxWw-mPs@GU=I_gebMA`?p
zb(-Er<{b)vj=ZDx3QRu&XJE{342)iTpYMJm7X&oGr{of2bO;K(LHp=^uY^Bq!=B2C
zsY;8Nt>2jW<eAfli@auqSVW$swdBJ3OH~+dso@_Dh!#L88z`=2Ei|`y-U-yZ5Z=@f
z+!#!|nIJsI?fri``YZD+3T%+3BCB|s+AvM4nl+_6Z>H&ala)6w)a~{TjILgCma@9a
z3Zc~i3aAa|T+uZmP8ij(aU^^&?QQ|4I3UyusB-aHMD3uTopQ2vAEJBtUn(2C=-It=
J*lt>5?>87$<Lm$c

literal 0
HcmV?d00001

diff --git a/scrapypyppeteer/spiders/__pycache__/__init__.cpython-36.pyc b/scrapypyppeteer/spiders/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff1d31a0adbd8586a78f9cc2b9729882388c2eb5
GIT binary patch
literal 164
zcmXr!<>gw#xGQcU0|UcjAcg}*Aj<)Wi&=m~3PUi1CZpd<h9ZzKg7_7vA6lGRRIHzt
zpO&v5o?n!$pOTuJuU}AEl98XMUz}W&SWu~3P+3rrT9TSt1QkHy7i6Y@w8zJ1=4F<|
Y$LkeT-r}&y%}*)KNwou6RSd)o04ByP#sB~S

literal 0
HcmV?d00001

diff --git a/scrapypyppeteer/spiders/__pycache__/quotes.cpython-36.pyc b/scrapypyppeteer/spiders/__pycache__/quotes.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33b64c60cd299123711dd5b47a9f8d52ce8a7e91
GIT binary patch
literal 892
zcmZuv&2H2%5FW=#ny$8L*$Tv|>VeB1njmpOtE%FF#BEWjpePbVCib?wBynncX*b%Q
zwpYXp@B&CY0gvJ<CtiUQ<76xC0Zabo$K$cTnR&Le)B8z&eO!0|zhPtfsK3Ci4-hzT
zQa}o?fP0+Y0jC0HWJObtlQS6le=r8dxx@Q~`MUd0P!W(44lX*S+`AyD&naTSeIDFF
z8uAdcY}x+lTpFRz>M0k>hF7iC>~}IYQs;`*BF<&m99$YxkCNm+XUR+_xqIF%M`Ihe
z`<tF2+?*`Q4C$=MOZf2g)?0dLc`Jf+pw;Py?xXIlfCt~{J0Nh?KL<m3$hUaJx4+VF
zgIl_!GaoYsD?;EL3v~)_;5|wt)EhV3eEKDB|Jdy6npLq8H)b?)*bH>ZiXvunb1Bu@
z_sGQ?cA*bX4w+Lr|BzbW-AThP8ZC@z9<JHXbSx_NsSxp{DGN)kn_zAxN6&}EMxx3k
zpH>&c(9&FMyMt*I%S|?!DsAl6+O)o`MP-9)H8n!HsttYXYf(&Wq=c?zrG*XJ_bi$G
z0QCSv{=QD$YN2#8krSDmOZ7S7qLfL!KuT58wrg=zFX~ztA=KKjVYiLe_f4!9HmI4>
zqUV~`M*y<t^+}&R_V!3KINg|zxD6cR&_r2Qu~KB2?PXah`Mhv?Kg+J>tXQA;PTE~8
zx4afS<FaJaO52_`Oqp!13a$3>wU*Qd44ltRQh-}W2odo~NJ6jmVpqbBqYb;6ZQUCQ
TT#=N^Vx!~kYbP$9b`a5j>yG%I

literal 0
HcmV?d00001

diff --git a/scrapypyppeteer/spiders/__pycache__/taobao.cpython-36.pyc b/scrapypyppeteer/spiders/__pycache__/taobao.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea5c21281ee0b92e5f66617181b3180764e7726f
GIT binary patch
literal 979
zcmZuv&2G~`5Z+n;#7UF3;=+L&LP)M?jNpP0e-cRCf@pyvOJKR)O_SF4I=dUC3CXFw
zRy+q1Pr#$>l@qVPiP_kQRAQz1c04nh`TWg3?Daa|;m7AaO2|*Laa{CYps70uj4;Sa
z0WlObra)d3MwxR-n8V@1i5=!1lF<8wl_aFbKdi=#OVc{wlPOm^v+=(N=hS`gqr*uI
zjKq`y_Mj&ayUd9_=A!2_kNKA*Zm|{%@D5lTXLL;WT`G=KQICjyz5ZHsRloN7nJ5-_
zM_N~pqi8c2DW1w~^nCL4%Nd`~gk-DO9akyKprPjA?V(w^>H)&}y(Kv%ugIs{D?krU
z=@J-R!HQ~UNipIwdPP4FAd}z-H9~&QAOzED2A~^cX`Uaa*;(kBpb0C}RyvhBnabSw
zLm`V)OFP_js#GeT<YT2n$GD2;LunZq2TQ*Z*+c*Lo9MmbQbj{C6w#58XA$Fth^o0B
zi84}|Osn~RHLohJIhRdf<9odu6*Y2IbF*Euk@d3F77kH90)Z}U!{XMa;Qi~hncb#6
zmVO)UC7Rkp(D-8P8)5F1^YzAxF6k*OQNarUVzqip4#>w^GjFiiYS@f)ksFvTyr~}U
zKeGDqG81fEo>)CZ#Y^MO<XCg#YJQF)Nz2v*lB-ITiW?ZNMXF&NbFzc5i>B;@Ca?>O
zo7aBmpX7tEZGt2z(}E|7=_E-Z*fh6(H%TVbG;cDlz0D3TR?Ij{m?+Y5sbqldWy`{j
z1v__xi(Q>u6S6$)QdJir02h2~H1MR&3_{oV^%A8Ocl|Kdng2C2!IPTgGh3@H>^gq{
Dq#60D

literal 0
HcmV?d00001

diff --git a/scrapypyppeteer/spiders/quotes.py b/scrapypyppeteer/spiders/quotes.py
new file mode 100644
index 0000000..53cba83
--- /dev/null
+++ b/scrapypyppeteer/spiders/quotes.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+import scrapy
+
+
+class QuotesSpider(scrapy.Spider):
+    name = 'quotes'
+    allowed_domains = ['quotes.toscrape.com']
+    start_urls = ['http://quotes.toscrape.com/js/']
+    
+    def parse(self, response):
+        for quote in response.css('div.quote'):
+            yield {
+                'text': quote.css('span.text::text').extract_first(),
+                'author': quote.css('small.author::text').extract_first(),
+                'tags': quote.css('div.tags > a.tag::text').extract()
+            }
+        with open('quotes.js.enable.html', 'w', encoding='utf-8') as f:
+            f.write(response.text)
diff --git a/scrapypyppeteer/spiders/taobao.py b/scrapypyppeteer/spiders/taobao.py
new file mode 100644
index 0000000..4e36763
--- /dev/null
+++ b/scrapypyppeteer/spiders/taobao.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from scrapy import Spider, Request
+
+
+class TaobaoSpider(Spider):
+    name = 'taobao'
+    allowed_domains = ['s.taobao.com']
+    start_url = 'http://s.taobao.com/search?q={keyword}'
+    keywords = ['ipad']
+    
+    def start_requests(self):
+        for keyword in self.keywords:
+            url = self.start_url.format(keyword=keyword)
+            yield Request(url, callback=self.parse_list)
+    
+    def parse_list(self, response):
+        with open('taobao.html', 'w', encoding='utf-8') as f:
+            f.write(response.text)
diff --git a/useragents.txt b/useragents.txt
new file mode 100644
index 0000000..74161f0
--- /dev/null
+++ b/useragents.txt
@@ -0,0 +1,6 @@
+Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.16) Gecko/20110319 Firefox/40
+Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; QQBrowser/8.3.4769.400)
+MozillaTest/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; )
+Mozilla/5.0 (Windows NT 6.2; rv:39.0) Gecko/20100101 Firefox/39.0
+Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; QQBrowser/8.3.4769.400)
+Mozilla/5.0 (Windows NT 6.1; rv:39.0) Gecko/20100101 Firefox/39.0