forked from yokonsan/IPProxyPool
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
45 lines (39 loc) · 1.2 KB
/
utils.py
File metadata and controls
45 lines (39 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# coding=utf-8
import asyncio
import aiohttp
import requests
from requests.exceptions import ConnectionError
def parse_url(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0'
}
try:
resp = requests.get(url, headers=headers)
if resp.status_code == 200:
print('ok')
return resp.text
return None
except ConnectionError:
print('Error.')
return None
class Downloader(object):
"""
python3.5的标准库自带的async和await指令,
相当于3.5之前的 @asyncio.coroutine和yield from
提供异步抓取
"""
def __init__(self, urls):
self.urls = urls
self._htmls = []
async def download_single_page(self, url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
self._htmls.append(await resp.text())
def download(self):
loop = asyncio.get_event_loop()
tasks = [self.download_single_page(url) for url in self.urls]
loop.run_until_complete(asyncio.wait(tasks))
@property
def htmls(self):
self.download()
return self._htmls