diff --git a/final_task/README.md b/final_task/README.md index 7af281f..2bc2d98 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,70 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +## Iteration 1 +RSS reader is a command utility, which receives RSS URL and prints the result in convenient output format + +Input data has the following interface: + +`rss_reader.py source [-h] [--version] [--verbose] [--json] [--limit LIMIT]` +```` +positional arguments: +source - URL which provides a RSS feed +optional arguments: +-h - prints this help page +--version - prints in stdout current version +--verbose - prints all logs in stdout +--json - prints news in JSON format +--limit LIMIT - limits the amount of news entries in the output +```` +JSON structure: +``` +[ + { + "title": "A black man was put in handcuffs after a police officer stopped him on a trainplatform because he was eating", + "article": "Bay Area Rapid Transit police said Steve Foster, of Concord, California,violated state law by eating a sandwich on a BART station's platform. ", + "links": [ + "https://news.yahoo.com/black-man-put-handcuffs-police-170516695.html", + "http://l.yimg.com/uu/api/res/1.2/iLcp4eQPeHI64PZ9LpeQcw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en-US/insider_articles_922/e4254e78d7432dae4387d72624ee3086" + ], + "link": "https://news.yahoo.com/black-man-put-handcuffs-police-170516695.html", + "date": "Mon, 11 Nov 2019 17:06:55 -0500" + }, + { + ... + }, + ... +] +``` + +## Iteration 2 +to run rss parser on your computer you need to: +1) clone repository from https://github.com/ElizabethUniverse/FinalTaskRssParser +2) `$cd final_task` +3) `$python setup.py sdist bdist_wheel` +4) `$cd dist` +3) `$pip install rss_reader-1.1.tar.gz` +4) run `$rss_reader https://news.yahoo.com/rss --limit 2 --verbose` + + +## Iteration 3 +News is stored in the csv cache in following format and with tab delimiter. + +`date title link article list_links` + +Now we are searching for the news in the cache with O(n) complexity. But in the near future we plan to optimize this process. + +If you want to receive news for the 15/11/2019, please enter the following command in the command line + +`$python rss_reader.py https://news.yahoo.com/rss --date 20191115` + +--date argument works without internet connection and with --verbose, --json, --limit LIMIT arguments the same way. + +## Iteration 4 + +News can be converted to pdf or html. + +If you want to convert news to pdf: + +`$python rss_reader.py https://news.yahoo.com/rss --to-pdf path` + +to html: + +`$python rss_reader.py https://news.yahoo.com/rss --to-html path` diff --git a/final_task/rss_reader.egg-info/PKG-INFO b/final_task/rss_reader.egg-info/PKG-INFO new file mode 100644 index 0000000..94cdf45 --- /dev/null +++ b/final_task/rss_reader.egg-info/PKG-INFO @@ -0,0 +1,80 @@ +Metadata-Version: 1.2 +Name: rss-reader +Version: 1.4 +Summary: RSS parser +Home-page: https://github.com/ElizabethUniverse/FinalTaskRssParser +Author: Elizaveta Lapunova +Author-email: liza.lapunova99@gmail.com +License: BSD +Description: ## Iteration 1 + RSS reader is a command utility, which receives RSS URL and prints the result in convenient output format + + Input data has the following interface: + + `rss_reader.py source [-h] [--version] [--verbose] [--json] [--limit LIMIT]` + ```` + positional arguments: + source - URL which provides a RSS feed + optional arguments: + -h - prints this help page + --version - prints in stdout current version + --verbose - prints all logs in stdout + --json - prints news in JSON format + --limit LIMIT - limits the amount of news entries in the output + ```` + JSON structure: + ``` + [ + { + "title": "A black man was put in handcuffs after a police officer stopped him on a trainplatform because he was eating", + "article": "Bay Area Rapid Transit police said Steve Foster, of Concord, California,violated state law by eating a sandwich on a BART station's platform. ", + "links": [ + "https://news.yahoo.com/black-man-put-handcuffs-police-170516695.html", + "http://l.yimg.com/uu/api/res/1.2/iLcp4eQPeHI64PZ9LpeQcw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en-US/insider_articles_922/e4254e78d7432dae4387d72624ee3086" + ], + "link": "https://news.yahoo.com/black-man-put-handcuffs-police-170516695.html", + "date": "Mon, 11 Nov 2019 17:06:55 -0500" + }, + { + ... + }, + ... + ] + ``` + + ## Iteration 2 + to run rss parser on your computer you need to: + 1) clone repository from https://github.com/ElizabethUniverse/FinalTaskRssParser + 2) `$cd final_task` + 3) `$python setup.py sdist bdist_wheel` + 4) `$cd dist` + 3) `$pip install rss_reader-1.1.tar.gz` + 4) run `$rss_reader https://news.yahoo.com/rss --limit 2 --verbose` + + + ## Iteration 3 + News is stored in the csv cache in following format and with tab delimiter. + + `date title link article list_links` + + Now we are searching for the news in the cache with O(n) complexity. But in the near future we plan to optimize this process. + + If you want to receive news for the 15/11/2019, please enter the following command in the command line + + `$python rss_reader.py https://news.yahoo.com/rss --date 20191115` + + --date argument works without internet connection and with --verbose, --json, --limit LIMIT arguments the same way. + + ##Iteration 4 + + News can be converted to pdf or html. + + If you want to convert news to pdf: + + `$python rss_reader.py https://news.yahoo.com/rss --to-pdf path` + + to html: + + `$python rss_reader.py https://news.yahoo.com/rss --to-html path` +Platform: any +Requires-Python: >=3.7.0 diff --git a/final_task/rss_reader.egg-info/SOURCES.txt b/final_task/rss_reader.egg-info/SOURCES.txt new file mode 100644 index 0000000..182ffe8 --- /dev/null +++ b/final_task/rss_reader.egg-info/SOURCES.txt @@ -0,0 +1,19 @@ +README.md +setup.py +rss_reader/CSVEntities.py +rss_reader/ClassNews.py +rss_reader/ToHTML.py +rss_reader/ToPDF.py +rss_reader/__init__.py +rss_reader/__main__.py +rss_reader/requirements.txt +rss_reader/rss_reader.py +rss_reader.egg-info/PKG-INFO +rss_reader.egg-info/SOURCES.txt +rss_reader.egg-info/dependency_links.txt +rss_reader.egg-info/entry_points.txt +rss_reader.egg-info/not-zip-safe +rss_reader.egg-info/requires.txt +rss_reader.egg-info/top_level.txt +test/RssUnitTest.py +test/__init__.py \ No newline at end of file diff --git a/final_task/rss_reader.egg-info/dependency_links.txt b/final_task/rss_reader.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader.egg-info/entry_points.txt b/final_task/rss_reader.egg-info/entry_points.txt new file mode 100644 index 0000000..644f1cd --- /dev/null +++ b/final_task/rss_reader.egg-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +rss_reader=rss_reader.rss_reader:main + diff --git a/final_task/rss_reader.egg-info/not-zip-safe b/final_task/rss_reader.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader.egg-info/requires.txt b/final_task/rss_reader.egg-info/requires.txt new file mode 100644 index 0000000..716a618 --- /dev/null +++ b/final_task/rss_reader.egg-info/requires.txt @@ -0,0 +1,4 @@ +html2text==2019.9.26 +python-dateutil==2.8.0 +jinja2==2.10.1 +fpdf==1.7.2 diff --git a/final_task/rss_reader.egg-info/top_level.txt b/final_task/rss_reader.egg-info/top_level.txt new file mode 100644 index 0000000..fd7f51c --- /dev/null +++ b/final_task/rss_reader.egg-info/top_level.txt @@ -0,0 +1,2 @@ +rss_reader +test diff --git a/final_task/rss_reader/CSVEntities.py b/final_task/rss_reader/CSVEntities.py new file mode 100644 index 0000000..e36a6cb --- /dev/null +++ b/final_task/rss_reader/CSVEntities.py @@ -0,0 +1,56 @@ +import csv +from datetime import date +from dateutil.parser import parse +from dataclasses import dataclass, asdict +import os + +import ClassNews + +FIELDNAMES = ['date', 'title', 'link', 'article', 'links'] + + +def csv_to_python(articles_list, csv_file): + """This function inserts news to the source csv file that has never been seen in it.""" + if not os.path.exists(csv_file): + open(csv_file, 'x', encoding='utf-8').close() + + articles_list_from_csv = [] + with open(csv_file, "r", encoding='utf-8') as file: + reader = csv.DictReader(file, FIELDNAMES, delimiter='\t') + for item in reader: + r = ClassNews.Article(**item) + articles_list_from_csv.append(r) + + union_list = articles_list_from_csv[:] + for item in articles_list: + if item not in articles_list_from_csv: + union_list.append(item) + + with open(csv_file, "w", encoding='utf-8') as file: + writer = csv.DictWriter(file, fieldnames=FIELDNAMES, delimiter='\t') + for item in union_list: + writer.writerow(asdict(item)) + return True + return False + +def return_news_to_date(input_date, csv_file, limit): + """This function read from the file those news that match by date""" + article_list_by_date = [] + datetime_input = date(int(input_date[0:4]), int(input_date[4:6]), int(input_date[6:8])) + with open(csv_file, "r", encoding='utf-8') as file: + reader = csv.DictReader(file, FIELDNAMES, delimiter='\t') + match_counter = 0 + for item in reader: + article_from_file = ClassNews.Article(**item) + + date_time = parse(article_from_file.date) + date_from_file = date_time.date() + + if date_from_file == datetime_input: + match_counter += 1 + article_list_by_date.append(article_from_file) + + if limit == match_counter: + return article_list_by_date + + return article_list_by_date \ No newline at end of file diff --git a/final_task/rss_reader/ClassNews.py b/final_task/rss_reader/ClassNews.py new file mode 100644 index 0000000..6bb92da --- /dev/null +++ b/final_task/rss_reader/ClassNews.py @@ -0,0 +1,77 @@ +import re +import html2text +from dataclasses import dataclass + + +LINKS_TEMPLATE = '\"((http|https)://(\w|.)+?)\"' + + +def xml_arguments_for_class(xml_string, limit): + """This function receive the xml and limit of articles and returns list of dictionaries""" + dict_article_list = [] + text = html2text.HTML2Text() + text.ignore_images = True + text.ignore_links = True + text.ignore_emphasis = True + for counter, xml_news in enumerate(xml_string.iter('item')): + parser_dictionary = {} + for xml_news_item in xml_news: + # Here we create the article in the form of a dictionary + if xml_news_item.tag == 'title': + parser_dictionary['title'] = text.handle(xml_news_item.text).replace('\n', "") + + if xml_news_item.tag == 'pubDate': + parser_dictionary['date'] = xml_news_item.text + + if xml_news_item.tag == 'link': + parser_dictionary['link'] = xml_news_item.text + + if xml_news_item.tag == 'description': + parser_dictionary['article'] = text.handle(xml_news_item.text).replace('\n', '') + parser_dictionary['links'] = xml_news_item.text.replace('\n', '') + + dict_article_list.append(parser_dictionary) + + if limit == counter + 1: + return dict_article_list + return dict_article_list + +def dicts_to_articles(dict_list): + """This function receive list of dictionaries and convert it to list of articles """ + article_list = [] + for item in dict_list: + article_list.append(Article(**item)) + return article_list + +def html_text_to_list_links(html_links): + html_links = html_links.replace("\'", "\"") + list_links = [] + for group1 in re.finditer(LINKS_TEMPLATE, html_links): + list_links.append(group1.group(1)) + return list_links + +@dataclass +class Article: + """This is news class, which receives dictionary and have title, date, link, article and links keys fields""" + title: str + date: str + link: str + article: str + links: str + + def __post_init__(self): + self.links = html_text_to_list_links(self.links) + + def __str__(self): + result_string_article = "\nTitle: %s\nDate: %s\nLink: %s\n\n%s\n\n" % (self.title, self.date, self.link, + self.article) + for link_idx, link in enumerate(self.links): + result_string_article += "[%d]: %s\n" % (link_idx + 1, link) + result_string_article += '\n' + return result_string_article + + def __eq__(self, other): + if self.article == other.article and self.title == other.title and self.link == other.link and \ + self.date == other.date: + return True + return False diff --git a/final_task/rss_reader/ToHTML.py b/final_task/rss_reader/ToHTML.py new file mode 100644 index 0000000..a4445d2 --- /dev/null +++ b/final_task/rss_reader/ToHTML.py @@ -0,0 +1,19 @@ +from jinja2 import Environment, FileSystemLoader +import os + +FILENAME_HTML = "articles.html" + + +def print_article_list_to_html(list_articles, path): + if not os.path.exists(path): + raise FileNotFoundError + html_stream = print_article_list(list_articles) + with open(os.path.join(path, FILENAME_HTML), "w", encoding='utf-8') as html: + html.write(html_stream) + + +def print_article_list(list_articles): + # directory with templates + env = Environment(loader=FileSystemLoader('.')) + template = env.get_template('template.html') + return template.render(articles=list_articles) \ No newline at end of file diff --git a/final_task/rss_reader/ToPDF.py b/final_task/rss_reader/ToPDF.py new file mode 100644 index 0000000..a447fde --- /dev/null +++ b/final_task/rss_reader/ToPDF.py @@ -0,0 +1,44 @@ +import os +from fpdf import FPDF + +FILENAME_PDF = "articles.pdf" + + +def conv_str(input_str): + return (input_str.replace('\u2026', '').replace('\u2019', '').replace('\u201c', '').replace('\u201d', '')\ + .replace('\u2013', '').replace('\u2018', '')) + + +class PDF(FPDF): + + # Page footer + def footer(self): + # Position at 1.5 cm from bottom + self.set_y(-15) + # Arial italic 8 + self.set_font('Arial', 'I', 8) + # Page number + self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C') + + +def print_article_list_to_pdf(list_articles, path): + + if not os.path.exists(path): + raise FileNotFoundError + path = os.path.join(path, FILENAME_PDF) + + pdf = PDF() + pdf.alias_nb_pages() + pdf.add_page() + pdf.set_font('Arial', '', 12) + + for item in list_articles: + pdf.cell(0, 10, "Title: %s" % (conv_str(item.title)), 0, 1) + pdf.cell(0, 10, "Date: %s" % (conv_str(item.date)), 0, 1) + pdf.cell(0, 10, "Link: %s" % (conv_str(item.link)), 0, 1) + pdf.multi_cell(0, 10, '%s' % (conv_str(item.article)), 0, 1) + for idx, link in enumerate(item.links): + pdf.multi_cell(0, 10, "[%d]:%s" % (idx, (conv_str(link))), 0, 1) + pdf.cell(0, 10, "", 0, 1) + pdf.output(path, 'F') + return True \ No newline at end of file diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader/__init__.py @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader/__main__.py b/final_task/rss_reader/__main__.py new file mode 100644 index 0000000..ec247e5 --- /dev/null +++ b/final_task/rss_reader/__main__.py @@ -0,0 +1,5 @@ +from rss_reader import rss_reader + +if __name__ == "__main__": + # execute only if run as a script + rss_reader.main() diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..72722e2 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,4 @@ +html2text +dateutil +jinja2 +fpdf \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..f9c47f4 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,175 @@ +import argparse +import xml.etree.ElementTree as ET +import requests +import json +import logging +import sys +from dataclasses import asdict +import jinja2.exceptions + + +import ClassNews +import CSVEntities +import ToPDF +import ToHTML + + +VERSION = 1.4 + + +def args_parser(args): + # Parse our arguments + parser = argparse.ArgumentParser() + parser.add_argument('source', nargs='?', help="RSS URL") + parser.add_argument('--version', action='store_true', help='Print version info') + parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') + parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') + parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') + parser.add_argument('--date', type=str, help='Date for selecting topics') + parser.add_argument('--to-pdf', type=str, help='Convert news to pdf') + parser.add_argument('--to-html', type=str, help='Convert news to html') + + res_args = parser.parse_args(args) + return res_args + + +def get_dict_from_xml(rss_request, limit): #test + main_title = '' + root = ET.fromstring(rss_request.content) + + # Here we get title of api + for channel_info in root.iter('channel'): + for item in channel_info: + if item.tag == 'title': + main_title = item.text + + # Here we have the dictionary of articles + res_dict_articles = ClassNews.xml_arguments_for_class(root, limit) + return res_dict_articles, main_title + + +def get_request(args_source, timeout=None): + rss_logging(logging, 'Start parsing', 'info') + rss_request = requests.get(args_source, timeout=timeout) + + # Check status code + status_code = rss_request.status_code + rss_logging(logging, "Status code {}".format(status_code), 'info') + rss_request.raise_for_status() + return rss_request + + +def articles_to_dict_articles(res_articles): + dict_articles = [] + for article in res_articles: + dict_articles.append(asdict(article)) + return dict_articles + + +def print_list(res_list): + for article in res_list: + print(article) + + +def convert_to_pdf(list_articles, path): + if ToPDF.print_article_list_to_pdf(list_articles, path): + rss_logging(logging, "News converted to pdf successfully", 'info') + return True + + +def convert_to_html(list_articles, path): + if ToHTML.print_article_list_to_html(list_articles, path): + rss_logging(logging, "News converted to html successfully", 'info') + return True + + +def convert_articles_to_json(res_dict_articles): + json_articles = json.dumps(res_dict_articles, indent=4) + return json_articles + + +def rss_logging(logger, msg, level): + if level == 'critical': + return logger.critical(msg) + if level == 'info': + return logger.info(msg) + if level == 'warning': + return logger.warning(msg) + + +def main(): + try: + args = args_parser(sys.argv[1:]) + res_dict_articles = [] + logging_level = logging.CRITICAL + if args.verbose: + logging_level = logging.INFO + + if args.version: + print("Current version: " + str(VERSION)) + if args.limit: + print('News LIMIT: ' + str(args.limit)) + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging_level) + + if args.source and (not args.date): + # Get request + rss_request = get_request(args.source) + rss_logging(logging, 'Parsing completed successfully', 'info') + rss_logging(logging, "Content type: %s" % rss_request.headers['content-type'], 'info') + + # Here we check the type of response. To correctly process it + if rss_request.headers['content-type'] == "application/xml" or 'application/rss+xml': + res_dict_articles, main_title = get_dict_from_xml(rss_request, args.limit) + + rss_logging(logging, 'Print news:', 'info') + print("\nFeed: {}".format(main_title)) + result_articles = ClassNews.dicts_to_articles(res_dict_articles) + print_list(result_articles) + res = CSVEntities.csv_to_python(result_articles, "datecsv.csv") + else: + rss_logging(logging, rss_request.headers['content-type'], 'info') + rss_logging(logging, 'We received not an xml file from api, sorry', 'warning') + + if args.date: + rss_logging(logging, 'Search news by date: ', 'info') + result_articles = CSVEntities.return_news_to_date(args.date, "datecsv.csv", args.limit) + + res_dict_articles = articles_to_dict_articles(result_articles) + if result_articles: + if not (args.to_html or args.to_pdf): + rss_logging(logging, 'Print news by date: ', 'info') + print_list(result_articles) + else: + rss_logging(logging, "We don't have any news in cache %s" % args.date, 'info') + + if args.json and res_dict_articles: + rss_logging(logging, 'Print result as JSON in stdout', 'info') + print(convert_articles_to_json(res_dict_articles)) + + if args.to_pdf: + convert_to_pdf(result_articles, args.to_pdf) + + if args.to_html: + convert_to_html(result_articles, args.to_html) + + except requests.exceptions.InvalidSchema: + rss_logging(logging, 'It is not http request!', 'critical') + except requests.exceptions.Timeout: + rss_logging(logging, 'Time to connect is out', 'critical') + except requests.exceptions.HTTPError as httpserr: + rss_logging(logging, 'Time to connect is out', 'critical') + except requests.exceptions.InvalidURL: + rss_logging(logging, "Sorry, that's not valid url", 'critical') + except requests.exceptions.ConnectionError: + rss_logging(logging, 'Sorry, you have an proxy or SSL error', 'critical') + # A proxy or SSL error occurred. + except FileNotFoundError: + rss_logging(logging, "Sorry, path do not exist", 'critical') + except PermissionError: + rss_logging(logging, "Sorry, you do not have access to this file.", 'critical') + except jinja2.exceptions.TemplateNotFound: + rss_logging(logging, "Sorry, you forgot the template", 'critical') + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/final_task/rss_reader/template.html b/final_task/rss_reader/template.html new file mode 100644 index 0000000..545e0e2 --- /dev/null +++ b/final_task/rss_reader/template.html @@ -0,0 +1,21 @@ + + + +{% for item in articles %} + +

{{item.title}}

+

{{item.date}}

+

{{item.link}}

+ + {%for link in item.links[1:]%} + + {% endfor%} +

{{item.article}}

+ + {% for links in item.links %} +

{{links}}

+ {% endfor %} + +{% endfor %} + + \ No newline at end of file diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..bfc127f 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,34 @@ +import sys +import os +from setuptools import setup, find_packages + +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() + +setup( +# metadata + name='rss_reader', + version='1.4', + author='Elizaveta Lapunova', + author_email='liza.lapunova99@gmail.com', + url='https://github.com/ElizabethUniverse/FinalTaskRssParser', + + description='RSS parser', + long_description=read("README.md"), + license='BSD', + platforms='any', + + #options + packages=find_packages(), + install_requires=['html2text==2019.9.26', 'python-dateutil==2.8.0', 'jinja2==2.10.1','fpdf==1.7.2'], + package_data={ + '': ['*.py', '*.txt'] + }, + python_requires='>=3.7.0', + entry_points={ + "console_scripts": + "rss_reader=rss_reader.rss_reader:main" + }, + test_suite='test', + zip_safe=False +) diff --git a/final_task/test/RssUnitTest.py b/final_task/test/RssUnitTest.py new file mode 100644 index 0000000..ed3bd6d --- /dev/null +++ b/final_task/test/RssUnitTest.py @@ -0,0 +1,269 @@ +import unittest +import sys +import requests.exceptions as rexc +import os +sys.path.append('../rss_reader') +import rss_reader +import CSVEntities +import ClassNews +import ToPDF +import ToHTML +import logging + + +TEST_LIST = [ + ClassNews.Article( + 'On an upswing, the Pete Buttigieg show rolls through New Hampshire', + 'Sat, 17 Nov 2019 09:36:14 -0500', + 'https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'Pete Buttigieg traveled more than 100 miles through the Granite State on a busemblazoned' + ' with his name and packed with over a dozen journalists. It\'s aspectacle that hasn\'t been ' + 'seen in recent presidential races, but it\'s part ofa freewheeling strategy has helped bring ' + 'Buttigieg from relative obscurity tothe top of the Democratic primary field. ', + '

On '
+            'an upswing, the Pete Buttigieg show rolls through New Hampshire' + 'Pete Buttigieg traveled more than 100 miles through the Granite State on a bus ' + 'emblazoned with his name and packed with over a dozen journalists. It’s a spectacle ' + 'that hasn’t been seen in recent presidential races, but it’s part of a freewheeling ' + 'strategy has helped bring Buttigieg from relative obscurity to the top of the ' + 'Democratic primary field.


'.replace('\n',"") + ), + ClassNews.Article( + 'NATO ally expels undercover Russian spy ', + 'Sat, 16 Nov 2019 16:11:50 -0500', + 'https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'In a rare move, NATO ally Bulgaria has expelled an undercover spy affiliated ' + 'with the Russian military intelligence service, according to a Westernintelligence source.', + '

NATO ally expels undercover Russian spy In a rare move, NATO ' + 'ally Bulgaria has expelled an undercover spy affiliated with the Russian military ' + 'intelligence service, according to a Western intelligence source.


'.replace('\n', '') + ) +] +TEST_LIST_DICT = [ + { + 'title': 'On an upswing, the Pete Buttigieg show rolls through New Hampshire', + 'date': 'Sat, 17 Nov 2019 09:36:14 -0500', + 'link': 'https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'article': "Pete Buttigieg traveled more than 100 miles through the Granite State on a busembla" + "zoned with his name and packed with over a dozen journalists. It's aspectacle that ha" + "sn't been seen in recent presidential races, but it's part ofa freewheeling strategy " + "has helped bring Buttigieg from relative obscurity tothe top of the Democratic primary" + " field. ", + 'links': ['https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'http://l2.yimg.com/uu/api/res/1.2/cqp8V_ndESsAGfj_ke5adw--/YXBwaWQ9eXRhY2h5b247aD04Nj' + 't3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/9e' + '842ef0-04eb-11ea-a66f-fec562b3bef1' + ] + }, + { + 'title': 'NATO ally expels undercover Russian spy ', + 'date': 'Sat, 16 Nov 2019 16:11:50 -0500', + 'link': 'https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'article': 'In a rare move, NATO ally Bulgaria has expelled an undercover spy affiliated with the' + ' Russian military intelligence service, according to a Westernintelligence source.', + 'links': ['https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'http://l1.yimg.com/uu/api/res/1.2/IKBjTl0jeU0BCnrjqbCKAw--/YXBwaWQ9eXRhY2h5b247aD04Nj' + 't3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/' + '440e0010-0714-11ea-9bcb-45ff7f6277b3' + ] + } +] + + +class RssReaderTestCase(unittest.TestCase): + def test_html_to_links(self): + self.assertTrue(ClassNews.html_text_to_list_links( + "

\"Racist,Syracuse suspended a fraternity and halted social " + "activities at all of them for the semester after a series of racist and anti-Semitic incidents.


" + )),[ + 'https://news.yahoo.com/syracuse-suspends-fraternity-activities-string-150512659.html', + 'http://l.yimg.com/uu/api/res/1.2/WtsFIK_rUo0Z_cSM4WlEhA--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://medi' + 'a.zenfs.com/en-us/usa_today_news_641/0303a78836e137c91b44145a8c735262' + ] + + def test_dicts_to_articles(self): + self.assertEqual( + ClassNews.dicts_to_articles([{ + 'date': 'Sat, 17 Nov 2019 09:36:14 -0500', + 'title': 'On an upswing, the Pete Buttigieg show rolls through New Hampshire', + 'link': 'https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'article': 'Pete Buttigieg traveled more than 100 miles through the Granite State on a ' + 'busemblazoned with his name and packed with over a dozen journalists. It\'s ' + 'aspectacle that hasn\'t been seen in recent presidential races, but it\'s part ofa ' + 'freewheeling strategy has helped bring Buttigieg from relative obscurity tothe top ' + 'of the Democratic primary field. ', + 'links': '

On an upswing, the Pete Buttigieg show rolls through New Hampshire Pete Buttigieg traveled more than 100 miles through the Granite ' + 'State on a bus emblazoned with his name and packed with over a dozen journalists.' + ' It’s a spectacle that hasn’t been seen in recent presidential races, but it’s part ' + 'of a freewheeling strategy has helped bring Buttigieg from relative obscurity to the' + ' top of the Democratic primary field.


'.replace('\n', "") + }, + { + 'title': 'NATO ally expels undercover Russian spy ', + 'date': 'Sat, 16 Nov 2019 16:11:50 -0500', + 'link': 'https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'article': 'In a rare move, NATO ally Bulgaria has expelled an undercover spy affiliated ' + 'with the Russian military intelligence service, according to a ' + 'Westernintelligence source.', + 'links': '

NATO ally expels undercover Russian spy In a rare' + ' move, NATO ally Bulgaria has expelled an undercover spy affiliated with the ' + 'Russian military intelligence service, according to a Western intelligence ' + 'source.


'.replace('\n', '') + } + ] + ), TEST_LIST + ) + + def test_write_csv(self): + self.assertEqual(CSVEntities.csv_to_python(TEST_LIST, 'rss_test.csv'), True) + + def test_args_parser(self): + parser = rss_reader.args_parser(['https://news_api.com', '--version', '--json', '--verbose', '--limit', '2', + '--date', '20191119','--to-pdf', 'd:/set', '--to-html', 'd:/set']) + self.assertEqual(parser.source, 'https://news_api.com') + self.assertTrue(parser.version) + self.assertTrue(parser.json) + self.assertTrue(parser.verbose) + self.assertEqual(parser.limit, 2) + self.assertEqual(parser.date, '20191119') + self.assertEqual(parser.to_pdf, 'd:/set') + self.assertEqual(parser.to_html, 'd:/set') + def test_pdf_writing(self): + path = os.path.dirname(__file__) + self.assertEqual(ToPDF.print_article_list_to_pdf(TEST_LIST, path), True) + def test_pdf_writing(self): + path = os.path.dirname(__file__) + self.assertEqual(rss_reader.convert_to_pdf(TEST_LIST, path), True) + + def test_requests(self): + self.assertEqual(rss_reader.get_request('https://news.yahoo.com/rss').status_code, 200) + + def test_requests_exceptions_inv_schema(self): + self.assertRaises(rexc.InvalidSchema, rss_reader.get_request, 'htps://news.yahoo.com') + + def test_requests_exceptions_read_timeout(self): + self.assertRaises(rexc.Timeout, rss_reader.get_request, 'https://news.yahoo.com', timeout=(1, 0.01)) + + def test_requests_exceptions_httperror(self): + self.assertRaises(rexc.HTTPError, rss_reader.get_request, 'https://yahoo.com/rss') + + def test_to_pdf_exceptions(self): + self.assertRaises(FileNotFoundError, rss_reader.convert_to_pdf, TEST_LIST, 'c:/somenonexistdir') + + def test_to_html_exceptions(self): + self.assertRaises(FileNotFoundError, rss_reader.convert_to_html, TEST_LIST, 'c:/somenonexistdir') + + def test_articles_to_dict_articles(self): + self.assertEqual(rss_reader.articles_to_dict_articles(TEST_LIST), + [ + { + 'title': 'On an upswing, the Pete Buttigieg show rolls through New Hampshire', + 'date': 'Sat, 17 Nov 2019 09:36:14 -0500', + 'link': 'https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'article': "Pete Buttigieg traveled more than 100 miles through the Granite State on a busembla" + "zoned with his name and packed with over a dozen journalists. It's aspectacle that ha" + "sn't been seen in recent presidential races, but it's part ofa freewheeling strategy " + "has helped bring Buttigieg from relative obscurity tothe top of the Democratic primary" + " field. ", + 'links': ['https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html', + 'http://l2.yimg.com/uu/api/res/1.2/cqp8V_ndESsAGfj_ke5adw--/YXBwaWQ9eXRhY2h5b247aD04Nj' + 't3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/9e' + '842ef0-04eb-11ea-a66f-fec562b3bef1' + ] + }, + { + 'title': 'NATO ally expels undercover Russian spy ', + 'date': 'Sat, 16 Nov 2019 16:11:50 -0500', + 'link': 'https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'article': 'In a rare move, NATO ally Bulgaria has expelled an undercover spy affiliated with the' + ' Russian military intelligence service, according to a Westernintelligence source.', + 'links': ['https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html', + 'http://l1.yimg.com/uu/api/res/1.2/IKBjTl0jeU0BCnrjqbCKAw--/YXBwaWQ9eXRhY2h5b247aD04Nj' + 't3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/' + '440e0010-0714-11ea-9bcb-45ff7f6277b3' + ] + } + ] + ) + + def test_logger_critical(self): + logging_level = logging.CRITICAL + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging_level) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'critical'), logging.critical("message")) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'warning'), logging.warning("message")) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'info'), logging.info("message")) + + def test_logger_info(self): + logging_level = logging.INFO + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging_level) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'critical'), logging.critical("message")) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'warning'), logging.warning("message")) + self.assertEqual(rss_reader.rss_logging(logging, "message", 'info'), logging.info("message")) + + def test_list_to_json(self): + self.assertEqual( + TEST_LIST_DICT, + [ + { + "title": "On an upswing, the Pete Buttigieg show rolls through New Hampshire", + "date": "Sat, 17 Nov 2019 09:36:14 -0500", + "link": "https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html", + "article": "Pete Buttigieg traveled more than 100 miles through the Granite State on a busemblaz" + "oned with his name and packed with over a dozen journalists. It's aspectacle that ha" + "sn't been seen in recent presidential races, but it's part ofa freewheeling strategy ha" + "s helped bring Buttigieg from relative obscurity tothe top of the Democratic primary " + "field. ", + "links": [ + "https://news.yahoo.com/pete-buttigieg-bus-tour-upswing-polls-143614985.html", + "http://l2.yimg.com/uu/api/res/1.2/cqp8V_ndESsAGfj_ke5adw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTE" + "zMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/9e842ef0-04" + "eb-11ea-a66f-fec562b3bef1" + ] + }, + { + "title": "NATO ally expels undercover Russian spy ", + "date": "Sat, 16 Nov 2019 16:11:50 -0500", + "link": "https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html", + "article": "In a rare move, NATO ally Bulgaria has expelled an undercover spy affiliated with the " + "Russian military intelligence service, according to a Westernintelligence source.", + "links": [ + "https://news.yahoo.com/nato-ally-expels-undercover-russian-spy-211150048.html", + "http://l1.yimg.com/uu/api/res/1.2/IKBjTl0jeU0BCnrjqbCKAw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTE" + "zMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/440e0010-071" + "4-11ea-9bcb-45ff7f6277b3" + ] + } + ] + + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/test/__init__.py b/final_task/test/__init__.py new file mode 100644 index 0000000..e69de29