-
Notifications
You must be signed in to change notification settings - Fork 17
Open
Description
import json
from collections import namedtuple
from aspider.routeing import get_router
from aspider import aspider
from requests_html import HTML
Woshipm = namedtuple('Article', ['cateory', 'comment', 'date','id','permalink','snipper','view'])
router = get_router()
woshipm_list = []
@router.route('/page/<no>')
def process_page(text):
html = HTML(html=text)
json_data = json.loads(html.text)['payload']
cateorys = [i['cateory'] for i in json_data]
comments = [i['comment'] for i in json_data]
dates = [i['date'] for i in json_data]
ids = [i['id'] for i in json_data]
permalinks = [i['permalink'] for i in json_data]
snippers = [i['snipper'] for i in json_data]
views = [i['view'] for i in json_data]
for cateory, comment, date,id,permalink,snipper,view in zip(cateorys,comments,dates,ids ,permalinks, snippers,views):
woshipm_list.append(Woshipm(cateory, comment, date,id,permalink,snipper,view))
def main():
#options = {'roots': ['http://www.woshipm.com/__api/v1/stream-list/page/{}'.format(i) for i in range(1,5000)]}
options = {'roots': 'http://www.woshipm.com/__api/v1/stream-list/page/1'}
stats = aspider.download(extra_args=options)
stats.report()
fname = 'woshipm.txt'
sorted_woshipm = sorted(woshipm_list, key=lambda m: m.rank)
with open(fname, 'w',encoding='utf-8') as f:
for movie in sorted_woshipm:
print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}')
print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}', file=f)
if __name__ == "__main__":
main()
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels