From 285a90f2947ecfee67f1a9006d035e1f1b04c585 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Mon, 18 Nov 2019 23:46:56 +0300 Subject: [PATCH 1/8] first iteration --- final_task/README.md | 16 ++- final_task/rss_reader/requirements.txt | 2 + final_task/rss_reader/rss_reader.py | 57 +++++++++ final_task/rss_reader/scripts/News.py | 58 +++++++++ final_task/rss_reader/scripts/__init__.py | 0 final_task/rss_reader/scripts/pars_args.py | 62 +++++++++ final_task/rss_reader/scripts/parser_rss.py | 134 ++++++++++++++++++++ final_task/tests/__init__.py | 0 final_task/tests/test_News.py | 37 ++++++ final_task/tests/test_parser_rss.py | 56 ++++++++ 10 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 final_task/rss_reader/scripts/News.py create mode 100644 final_task/rss_reader/scripts/__init__.py create mode 100644 final_task/rss_reader/scripts/pars_args.py create mode 100644 final_task/rss_reader/scripts/parser_rss.py create mode 100644 final_task/tests/__init__.py create mode 100644 final_task/tests/test_News.py create mode 100644 final_task/tests/test_parser_rss.py diff --git a/final_task/README.md b/final_task/README.md index 7af281f..e187657 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,13 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. + +This program which receives RSS URL and prints results in human-readable +format. + +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limit news topics if this parameter provided diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..354aefb 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,2 @@ +feedparser == 5.2.1 +python-dateutil == 2.8.1 \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..023c90b 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,57 @@ +from urllib.error import URLError +from final_task.rss_reader.scripts import pars_args +from final_task.rss_reader.scripts import parser_rss +import logging +import sys + + +def main(): + """ + The main entry point of the application + """ + try: + args = pars_args.get_args() + logger = logging.getLogger("rss_reader") + logger.setLevel(logging.INFO) + # create the logging file handler + if not args.verbose: + fh = logging.FileHandler("new_snake.log") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + logger.addHandler(fh) + else: + fh = logging.basicConfig(stream=sys.stdout, + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.DEBUG) + + # add handler to logger object + + logger.info("Program started") + + list_of_news = [] + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + if args.json: + parser_rss.print_news_in_json(list_of_news) + else: + parser_rss.print_news(list_of_news) + + except ValueError: + raise + except URLError: + raise + except parser_rss.TimeOutExeption: + raise + + +if __name__ == '__main__': + try: + main() + except parser_rss.TimeOutExeption as e: + print(e) + except URLError as er: + print(er) + except ValueError as v: + print(v) diff --git a/final_task/rss_reader/scripts/News.py b/final_task/rss_reader/scripts/News.py new file mode 100644 index 0000000..a79f459 --- /dev/null +++ b/final_task/rss_reader/scripts/News.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +import datetime +import logging + +module_logger = logging.getLogger("rss_reader.scripts.News") + + +@dataclass +class News: + feed: str + title: str + date: datetime.datetime + link: str + info_about_image: str + briefly_about_news: str + links_from_news: list + + def get_json(self): + """ + returns news in json format + """ + logger = logging.getLogger("rss_reader.scripts.News.get_json") + logger.info("return news in json format") + data = { + "Feed": self.feed, + "Title": self.title, + "Date": str(self.date), + "Link": self.link, + "Info about image": self.info_about_image, + "Briefly about news": self.briefly_about_news, + "Links": self.links_from_news + + } + return data + + def __str__(self): + """ + Return a string representation of the news for print in stdout. + """ + logger = logging.getLogger("rss_reader.scripts.News.__str__") + logger.info("return str") + links = "" + for index, link in enumerate(self.links_from_news or []): + links += "[" + str(index) + "] " + link + "\n" + + return "Feed: {feed}\n" \ + "Title: {title} \n" \ + "Date: {date} \n" \ + "Link: {link}\n" \ + "Info about image: {info_about_image}\n" \ + "Briefly about news: {briefly_about_news}\n" \ + "Links: \n{links}".format(feed=self.feed, + title=self.title, + date=self.date, + link=self.link, + info_about_image=self.info_about_image, + briefly_about_news=self.briefly_about_news, + links=links) diff --git a/final_task/rss_reader/scripts/__init__.py b/final_task/rss_reader/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/rss_reader/scripts/pars_args.py b/final_task/rss_reader/scripts/pars_args.py new file mode 100644 index 0000000..e073a77 --- /dev/null +++ b/final_task/rss_reader/scripts/pars_args.py @@ -0,0 +1,62 @@ +import argparse +import sys +import logging + +module_logger = logging.getLogger("rss_reader.scripts.pars_args") + + +def create_parser(): + """ function to parse the command line """ + logger = logging.getLogger("rss_reader.scripts.create_parser") + logger.info("parse the command line ") + parser = argparse.ArgumentParser( + prog='rss_reader', + description=''' This program which receives RSS URL + and prints results in human-readable format.''', + epilog='''Thank you for using this program''' + + ) + + # add information about the expected parameters + # using the add_argument method one call for each parameter). + + parser.add_argument('source', + type=str, + default="not url", + help='RSS URL') + + parser.add_argument('--version', + action='version', + help='Print version info', + version='%(prog)s {}'.format("1.1")) + + parser.add_argument('--json', + action='store_const', + const=True, + default=False, + help='Print result as JSON in stdout') + + parser.add_argument('--verbose', + action='store_const', + const=True, + default=False, + help='Outputs verbose status messages') + + parser.add_argument('--limit', + type=int, + metavar='LIMIT', + default=None, + help='Limit news topics if this parameter provided') + + return parser + + +def get_args(): + """ + returns command line arguments + """ + logger = logging.getLogger("rss_reader.scripts.get_args") + logger.info("return args command line") + parser = create_parser() + args = parser.parse_args(sys.argv[1:]) + return args diff --git a/final_task/rss_reader/scripts/parser_rss.py b/final_task/rss_reader/scripts/parser_rss.py new file mode 100644 index 0000000..094f5a5 --- /dev/null +++ b/final_task/rss_reader/scripts/parser_rss.py @@ -0,0 +1,134 @@ +import json +import re +import signal +import logging +from contextlib import contextmanager, closing +from urllib.error import URLError +import feedparser +from dateutil import parser +from final_task.rss_reader.scripts import News +import html + + +class TimeOutExeption(Exception): pass + + +module_logger = logging.getLogger("rss_reader.scripts.parser_rss") + + +@contextmanager +def timeout_sec(seconds): + """ + contextmanager to check the expectation of a response + and if the response does not come for a long time, an error + """ + + def signal_handler(signum, frame): + raise TimeOutExeption(Exception('Time out')) + + signal.signal(signal.SIGALRM, signal_handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + + +def clear_text(text: str) -> str: + """ + cleans text from problems that occurred when decoding formats + """ + logger = logging.getLogger("rss_reader.scripts.parser_rss.clear_text") + logger.info("clear text from news") + return html.unescape(text) + + +def get_info_about_image(summary: str) -> str: + logger = logging.getLogger("rss_reader.scripts.parser_rss.get_info_about_image") + logger.info("return info about image") + tag = 'alt=' + begin_position_info_about_image = summary.find(tag) + len(tag) + 1 + end_position_info_about_image = summary.find('"', begin_position_info_about_image) + info_about_image = summary[begin_position_info_about_image:end_position_info_about_image] + return clear_text(info_about_image) + + +def get_briefly_about_news(summary: str) -> str: + logger = logging.getLogger("rss_reader.scripts.parser_rss.get_briefly_about_news") + logger.info("return briefly info about news") + p = re.compile(r'<.*?>') + text = p.sub('', summary) + return clear_text(text) + + +def get_news_feed(sourse_url: str) -> feedparser.parse: + logger = logging.getLogger("rss_reader.scripts.parser_rss.get_news_feed") + logger.info("return news Feed") + with timeout_sec(10): + news_feed = feedparser.parse(sourse_url) + if news_feed['bozo'] != 0: + raise URLError(news_feed['bozo_exception'].args[0]) + return news_feed + + +def init_list_of_news( + list_of_news: list, + news_feed: feedparser.parse, + limit: int): + """ + Fills the list with news + """ + logger = logging.getLogger("rss_reader.scripts.parser_rss.init_list_of_news") + logger.info("Fills the list with news") + feed_title = news_feed['feed'].get('title', 'NO TITLE') + feed_title = clear_text(feed_title) + for index, entry in enumerate(news_feed['entries']): + if index == limit: + break + title = entry.get('title', '(NO TITLE') + title = clear_text(title) + summary = entry.get('summary', '(NO SUMMARY)') + date = parser.parse(entry['published']) + link = entry['link'] + info_about_image = get_info_about_image(summary) + briefly_about_news = get_briefly_about_news(summary) + link_on_image = entry.get("media_content")[0]["url"] + news = News.News(feed=feed_title, + title=title, + date=date, + link=link, + info_about_image=info_about_image, + briefly_about_news=briefly_about_news, + links_from_news=[link, link_on_image] + ) + + list_of_news.append(news) + + +def print_news(list_of_news: list): + """ + This function print news in the console + :param feed_title: + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.scripts.parser_rss.print_news") + logger.info("print news in the console") + for number, news in enumerate(list_of_news): + print(number + 1) # because number starts at 0 + print(news) + # print('-' * 100) + + +def print_news_in_json(list_of_news: list): + """ + This function print news in the console in json format + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.scripts.parser_rss.print_news_in_json") + logger.info("print news in the console in json format") + list_of_news_in_json = [] + for news in list_of_news: + list_of_news_in_json.append(news.get_json()) + print(json.dumps(list_of_news_in_json, indent=4, ensure_ascii=False)) diff --git a/final_task/tests/__init__.py b/final_task/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/tests/test_News.py b/final_task/tests/test_News.py new file mode 100644 index 0000000..721a889 --- /dev/null +++ b/final_task/tests/test_News.py @@ -0,0 +1,37 @@ +from final_task.rss_reader.scripts.News import * +import unittest +from dateutil import parser + + +class TestNews(unittest.TestCase): + + def setUp(self): + self.item = News(feed="feed", + title="title", + date=parser.parse("2019-11-17 10:44:20-05:00"), + link="link", + info_about_image="info_about_image", + briefly_about_news="briefly_about_news", + links_from_news=["link", "link_on_image"] + ) + + def test_str(self): + self.assertTrue(str(self.item) == "Feed: feed\n" + "Title: title \n" + "Date: 2019-11-17 10:44:20-05:00 \n" + "Link: link\n" + "Info about image: info_about_image\n" + "Briefly about news: briefly_about_news\n" + "Links: \n" + "[0] link\n" + "[1] link_on_image\n") + + def test_get_json(self): + data = self.item.get_json() + self.assertEqual(data['Feed'], 'feed') + self.assertEqual(data['Title'], 'title') + self.assertEqual(data['Date'], '2019-11-17 10:44:20-05:00') + self.assertEqual(data['Link'], 'link') + self.assertEqual(data['Info about image'], 'info_about_image') + self.assertEqual(data['Briefly about news'], 'briefly_about_news') + self.assertEqual(data['Links'], ['link', 'link_on_image']) \ No newline at end of file diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py new file mode 100644 index 0000000..7824f0a --- /dev/null +++ b/final_task/tests/test_parser_rss.py @@ -0,0 +1,56 @@ +from final_task.rss_reader.scripts.parser_rss import * +from final_task.rss_reader.scripts.News import News +import unittest +from io import StringIO +from unittest.mock import patch + + +class TestParserRss(unittest.TestCase): + + def setUp(self): + self.summary = '''

\ +NATO ally expels undercover Russian spy In a rare move,NATO ally Bulgaria has expelled an undercover spy affiliated with \ +the Russian military intelligence service, according to a Western intelligence source.


''' + self.item = News(feed="feed", + title="title", + date=parser.parse("2019-11-17 10:44:20-05:00"), + link="link", + info_about_image="info_about_image", + briefly_about_news="briefly_about_news", + links_from_news=["link", "link_on_image"] + ) + self.result = "1\n" + self.result += "Feed: feed\n" + self.result += "Title: title \n" + self.result += "Date: 2019-11-17 10:44:20-05:00 \n" + self.result += "Link: link\n" + self.result += "Info about image: info_about_image\n" + self.result += "Briefly about news: briefly_about_news\n" + self.result += "Links: \n" + self.result += "[0] link\n" + self.result += "[1] link_on_image" + + def test_clear_text(self): + self.assertEqual(clear_text("'"), "'") + + def test_get_info_about_image(self): + self.assertEqual(get_info_about_image(self.summary), '''NATO ally expels undercover Russian spy ''') + + def test_get_briefly_about_news(self): + self.assertEqual(get_briefly_about_news(self.summary), + '''In a rare move,NATO ally Bulgaria has expelled an undercover ''' + '''spy affiliated with the Russian military intelligence''' + ''' service, according to a Western intelligence source.''') + + def test_print_news(self): + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_news([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + + if __name__ == '__main__': + unittest.main() + + # nosetests --with-coverage --cover-erase From b0aea9e7d1db8d1017a2b8043564846b8bddea53 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Wed, 20 Nov 2019 20:44:49 +0300 Subject: [PATCH 2/8] second iteration,fixed some pr comments --- final_task/README.md | 12 +++ final_task/rss_reader/{scripts => }/News.py | 0 .../rss_reader/{scripts => }/__init__.py | 0 final_task/rss_reader/exceptions.py | 3 + .../rss_reader/{scripts => }/pars_args.py | 0 .../rss_reader/{scripts => }/parser_rss.py | 9 +-- final_task/rss_reader/rss_reader.py | 77 ++++++++++--------- final_task/setup.py | 22 ++++++ final_task/tests/test_News.py | 9 ++- final_task/tests/test_parser_rss.py | 40 +++++++++- 10 files changed, 124 insertions(+), 48 deletions(-) rename final_task/rss_reader/{scripts => }/News.py (100%) rename final_task/rss_reader/{scripts => }/__init__.py (100%) create mode 100644 final_task/rss_reader/exceptions.py rename final_task/rss_reader/{scripts => }/pars_args.py (100%) rename final_task/rss_reader/{scripts => }/parser_rss.py (96%) diff --git a/final_task/README.md b/final_task/README.md index e187657..3d492be 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -11,3 +11,15 @@ optional arguments: --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided + + + +Installation recommendation rss-reader: +1)Open terminal +2)Enter "pip install setuptools" or "pip3 install setuptools" +3)Go to the folder final_task +4)Enter "python3 setup.py install" +5)Application installed +6)To run the utility, type in the terminal "rss-reader" then a space and url on news +Example : rss-reader https://news.yahoo.com/rss + diff --git a/final_task/rss_reader/scripts/News.py b/final_task/rss_reader/News.py similarity index 100% rename from final_task/rss_reader/scripts/News.py rename to final_task/rss_reader/News.py diff --git a/final_task/rss_reader/scripts/__init__.py b/final_task/rss_reader/__init__.py similarity index 100% rename from final_task/rss_reader/scripts/__init__.py rename to final_task/rss_reader/__init__.py diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py new file mode 100644 index 0000000..55ec77a --- /dev/null +++ b/final_task/rss_reader/exceptions.py @@ -0,0 +1,3 @@ +class TimeOutExeption(Exception): + def __init__(self, msg): + super().__init__('Problems with internet connection: ' + str(msg)) diff --git a/final_task/rss_reader/scripts/pars_args.py b/final_task/rss_reader/pars_args.py similarity index 100% rename from final_task/rss_reader/scripts/pars_args.py rename to final_task/rss_reader/pars_args.py diff --git a/final_task/rss_reader/scripts/parser_rss.py b/final_task/rss_reader/parser_rss.py similarity index 96% rename from final_task/rss_reader/scripts/parser_rss.py rename to final_task/rss_reader/parser_rss.py index 094f5a5..d495eea 100644 --- a/final_task/rss_reader/scripts/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -2,16 +2,13 @@ import re import signal import logging -from contextlib import contextmanager, closing +from contextlib import contextmanager from urllib.error import URLError import feedparser from dateutil import parser -from final_task.rss_reader.scripts import News +import News import html - - -class TimeOutExeption(Exception): pass - +from exceptions import * module_logger = logging.getLogger("rss_reader.scripts.parser_rss") diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 023c90b..2ca2405 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,49 +1,50 @@ from urllib.error import URLError -from final_task.rss_reader.scripts import pars_args -from final_task.rss_reader.scripts import parser_rss +import pars_args +import parser_rss import logging import sys - +from News import News +from dateutil import parser def main(): """ The main entry point of the application """ - try: - args = pars_args.get_args() - logger = logging.getLogger("rss_reader") - logger.setLevel(logging.INFO) - # create the logging file handler - if not args.verbose: - fh = logging.FileHandler("new_snake.log") - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - fh.setFormatter(formatter) - logger.addHandler(fh) - else: - fh = logging.basicConfig(stream=sys.stdout, - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', - datefmt='%H:%M:%S', - level=logging.DEBUG) - - # add handler to logger object - - logger.info("Program started") - - list_of_news = [] - news_feed = parser_rss.get_news_feed(args.source) - parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) - if args.json: - parser_rss.print_news_in_json(list_of_news) - else: - parser_rss.print_news(list_of_news) - - except ValueError: - raise - except URLError: - raise - except parser_rss.TimeOutExeption: - raise + args = pars_args.get_args() + logger = logging.getLogger("rss_reader") + logger.setLevel(logging.INFO) + # create the logging file handler + if not args.verbose: + fh = logging.FileHandler("new_snake.log") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + logger.addHandler(fh) + else: + fh = logging.basicConfig(stream=sys.stdout, + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.DEBUG) + + # add handler to logger object + + logger.info("Program started") + + list_of_news = [] + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + item = News(feed="feed", + title="title", + date=parser.parse("2019-11-17 10:44:20-05:00"), + link="link", + info_about_image="info_about_image", + briefly_about_news="briefly_about_news", + links_from_news=["link", "link_on_image"] + ) + if args.json: + parser_rss.print_news_in_json([item,]) + else: + parser_rss.print_news(list_of_news) if __name__ == '__main__': diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..3678671 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,22 @@ +from setuptools import find_namespace_packages, setup + +setup( + name='rss_reader', + version='2.0', + description='RSS reader', + author='Matyushenok Sergey', + author_email='matyushenoksergei@yandex.by', + package_dir={'rss_reader': 'rss_reader'}, + scripts=['rss_reader/News.py', + 'rss_reader/pars_args.py', + 'rss_reader/parser_rss.py', + 'rss_reader/exceptions.py', + 'rss_reader/rss_reader.py'], + entry_points={ + 'console_scripts': ['rss-reader=rss_reader:main'], + }, + packages=find_namespace_packages(), + install_requires=['feedparser', 'python-dateutil'], + license="none", + platforms="Linux, Windows (not tested)", +) diff --git a/final_task/tests/test_News.py b/final_task/tests/test_News.py index 721a889..4968d2b 100644 --- a/final_task/tests/test_News.py +++ b/final_task/tests/test_News.py @@ -1,4 +1,7 @@ -from final_task.rss_reader.scripts.News import * +import sys + +sys.path.insert(1, 'final_task/rss_reader') +from News import News import unittest from dateutil import parser @@ -34,4 +37,6 @@ def test_get_json(self): self.assertEqual(data['Link'], 'link') self.assertEqual(data['Info about image'], 'info_about_image') self.assertEqual(data['Briefly about news'], 'briefly_about_news') - self.assertEqual(data['Links'], ['link', 'link_on_image']) \ No newline at end of file + self.assertEqual(data['Links'], ['link', 'link_on_image']) + + diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py index 7824f0a..4f9d47a 100644 --- a/final_task/tests/test_parser_rss.py +++ b/final_task/tests/test_parser_rss.py @@ -1,5 +1,8 @@ -from final_task.rss_reader.scripts.parser_rss import * -from final_task.rss_reader.scripts.News import News +import sys + +sys.path.insert(1, 'final_task/rss_reader') +from parser_rss import * +from News import News import unittest from io import StringIO from unittest.mock import patch @@ -50,7 +53,40 @@ def test_print_news(self): print_news([self.item, ]) self.assertEqual(fake_out_put.getvalue().strip(), self.result) + def test_print_news_in_json(self): + self.result = "[\n" + self.result += " {\n" + self.result += ''' "Feed": "feed",\n''' + self.result += ''' "Title": "title",\n''' + self.result += ''' "Date": "2019-11-17 10:44:20-05:00",\n''' + self.result += ''' "Link": "link",\n''' + self.result += ''' "Info about image": "info_about_image",\n''' + self.result += ''' "Briefly about news": "briefly_about_news",\n''' + self.result += ''' "Links": [\n''' + self.result += ''' "link",\n''' + self.result += ''' "link_on_image"\n''' + self.result += " ]\n" + self.result += " }\n" + self.result+="]" + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_news_in_json([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + if __name__ == '__main__': unittest.main() # nosetests --with-coverage --cover-erase +# [ +# { +# "Feed": "feed", +# "Title": "title", +# "Date": "2019-11-17 10:44:20-05:00", +# "Link": "link", +# "Info about image": "info_about_image", +# "Briefly about news": "briefly_about_news", +# "Links": [ +# "link", +# "link_on_image" +# ] +# } +# ] From 754ac910f95ed3ef191a8c99d70efde3aafb600c Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Wed, 20 Nov 2019 21:36:35 +0300 Subject: [PATCH 3/8] second iteration,deleted redundant log message --- final_task/rss_reader/parser_rss.py | 2 +- final_task/rss_reader/rss_reader.py | 10 +--------- final_task/tests/test_parser_rss.py | 18 +----------------- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index d495eea..08e0b1b 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -114,7 +114,7 @@ def print_news(list_of_news: list): for number, news in enumerate(list_of_news): print(number + 1) # because number starts at 0 print(news) - # print('-' * 100) + print('-' * 100) def print_news_in_json(list_of_news: list): diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 2ca2405..40bb780 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -33,16 +33,8 @@ def main(): list_of_news = [] news_feed = parser_rss.get_news_feed(args.source) parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) - item = News(feed="feed", - title="title", - date=parser.parse("2019-11-17 10:44:20-05:00"), - link="link", - info_about_image="info_about_image", - briefly_about_news="briefly_about_news", - links_from_news=["link", "link_on_image"] - ) if args.json: - parser_rss.print_news_in_json([item,]) + parser_rss.print_news_in_json(list_of_news) else: parser_rss.print_news(list_of_news) diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py index 4f9d47a..8de3f96 100644 --- a/final_task/tests/test_parser_rss.py +++ b/final_task/tests/test_parser_rss.py @@ -73,20 +73,4 @@ def test_print_news_in_json(self): self.assertEqual(fake_out_put.getvalue().strip(), self.result) if __name__ == '__main__': - unittest.main() - - # nosetests --with-coverage --cover-erase -# [ -# { -# "Feed": "feed", -# "Title": "title", -# "Date": "2019-11-17 10:44:20-05:00", -# "Link": "link", -# "Info about image": "info_about_image", -# "Briefly about news": "briefly_about_news", -# "Links": [ -# "link", -# "link_on_image" -# ] -# } -# ] + unittest.main() \ No newline at end of file From bd5831be9455472e5f0e23d488c0d486577dec34 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Thu, 21 Nov 2019 21:50:06 +0300 Subject: [PATCH 4/8] third iteration --- final_task/README.md | 34 +++++++ final_task/config.txt | 5 + final_task/rss_reader/News.py | 28 +++--- final_task/rss_reader/database.py | 121 +++++++++++++++++++++++++ final_task/rss_reader/exceptions.py | 4 +- final_task/rss_reader/pars_args.py | 33 ++----- final_task/rss_reader/parser_rss.py | 41 +++++++-- final_task/rss_reader/requirements.txt | 3 +- final_task/rss_reader/rss_reader.py | 85 ++++++++++------- final_task/setup.py | 6 +- final_task/tests/test_pars_args.py | 25 +++++ final_task/tests/test_parser_rss.py | 25 ++++- 12 files changed, 320 insertions(+), 90 deletions(-) create mode 100644 final_task/config.txt create mode 100644 final_task/rss_reader/database.py create mode 100644 final_task/tests/test_pars_args.py diff --git a/final_task/README.md b/final_task/README.md index 3d492be..17a645e 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -11,6 +11,7 @@ optional arguments: --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided + --date DATE to search in cache for news by date in the format in YYYYmmdd @@ -23,3 +24,36 @@ Installation recommendation rss-reader: 6)To run the utility, type in the terminal "rss-reader" then a space and url on news Example : rss-reader https://news.yahoo.com/rss + +To use caching you must have a postgresql database on your computer or laptop +with default parameters: + database="postgres", + user="postgres", + password="1", + host="127.0.0.1", + port="5432" +If you do not follow these commands: +1)Open terminal +2)sudo apt-get install postgresql +3)sudo -u postgres psql +4)\password +5)1 +6)1 + +You can change the database connection parameters in the "config.txt" file in the project, +but then you need to update the utility, for this you need: +1) Open a terminal +2) Go to the "final_task" folder +3) Enter "python3 setup.py install" + +How to use the --date parameter: +1)--date works with all other arguments (e.g. --limit, --json) +2)If you use --date you do not need an internet connection +3)if you didn’t succeed in correctly downloading the database, + you will be deprived of the caching parameter, but everything else will work correctly +4)Usage example:rss-reader https://news.tut.by/rss/ --date 20191120 + + + + + diff --git a/final_task/config.txt b/final_task/config.txt new file mode 100644 index 0000000..5839b6b --- /dev/null +++ b/final_task/config.txt @@ -0,0 +1,5 @@ +database postgres +user postgres +password 1 +host 127.0.0.1 +port 5432 \ No newline at end of file diff --git a/final_task/rss_reader/News.py b/final_task/rss_reader/News.py index a79f459..ea4d123 100644 --- a/final_task/rss_reader/News.py +++ b/final_task/rss_reader/News.py @@ -2,7 +2,7 @@ import datetime import logging -module_logger = logging.getLogger("rss_reader.scripts.News") +MODULE_LOGGER = logging.getLogger("rss_reader.News") @dataclass @@ -19,7 +19,7 @@ def get_json(self): """ returns news in json format """ - logger = logging.getLogger("rss_reader.scripts.News.get_json") + logger = logging.getLogger("rss_reader.News.get_json") logger.info("return news in json format") data = { "Feed": self.feed, @@ -37,22 +37,18 @@ def __str__(self): """ Return a string representation of the news for print in stdout. """ - logger = logging.getLogger("rss_reader.scripts.News.__str__") + logger = logging.getLogger("rss_reader.News.__str__") logger.info("return str") links = "" for index, link in enumerate(self.links_from_news or []): links += "[" + str(index) + "] " + link + "\n" - return "Feed: {feed}\n" \ - "Title: {title} \n" \ - "Date: {date} \n" \ - "Link: {link}\n" \ - "Info about image: {info_about_image}\n" \ - "Briefly about news: {briefly_about_news}\n" \ - "Links: \n{links}".format(feed=self.feed, - title=self.title, - date=self.date, - link=self.link, - info_about_image=self.info_about_image, - briefly_about_news=self.briefly_about_news, - links=links) + return "Feed: %s\n" \ + "Title: %s \n" \ + "Date: %s \n" \ + "Link: %s\n" \ + "Info about image: %s\n" \ + "Briefly about news: %s\n" \ + "Links: \n%s" % (self.feed, self.title, self.date, + self.link, self.info_about_image, + self.briefly_about_news, links) diff --git a/final_task/rss_reader/database.py b/final_task/rss_reader/database.py new file mode 100644 index 0000000..0f0205b --- /dev/null +++ b/final_task/rss_reader/database.py @@ -0,0 +1,121 @@ +import datetime +from contextlib import closing + +import psycopg2 +import News +import os +import logging + +MODULE_LOGGER = logging.getLogger("rss_reader.database") + + +def get_param_for_connect(filename) -> dict: + logger = logging.getLogger("rss_reader.database.get_param_for_connect") + logger.info("get param for connect from config.txt") + dict_parameters = {} + with open(filename, "r") as file: + for line in file: + key, value = line.split() + dict_parameters[key] = value + return dict_parameters + + +def connect_to_database(): + logger = logging.getLogger("rss_reader.database.connect_to_database") + logger.info("connect to database") + if os.path.isfile('final_task/config.txt'): + filename = "final_task/config.txt" + elif os.path.isfile('config.txt'): + filename = 'config.txt' + else: + raise psycopg2.OperationalError("check config") + + parameters = get_param_for_connect(filename) + con = psycopg2.connect( + database=parameters['database'], + user=parameters['user'], + password=parameters['password'], + host=parameters['host'], + port=parameters['port'] + ) + return con + + +def is_table(): + logger = logging.getLogger("rss_reader.database.is_table") + logger.info("check exist table") + flag_is_table = True + with closing(connect_to_database()) as con: + cursor = con.cursor() + + try: + cursor.execute("SELECT * FROM NEWS") + except psycopg2.DatabaseError: + flag_is_table = False + finally: + con.close() + return flag_is_table + + +def create_table(con, cursor): + logger = logging.getLogger("rss_reader.database.create_table") + logger.info("create table") + if not is_table(): + cursor.execute('''CREATE TABLE NEWS + (FEED TEXT , + SOURCE_LINK TEXT, + TITLE_OF_NEWS TEXT, + DATA timestamptz, + LINK TEXT , + INFO TEXT, + BRIEFLY TEXT, + LINKS TEXT[]);''') + con.commit() + + +def write_to(list_news: list, source_link: str, cursor): + logger = logging.getLogger("rss_reader.database.write_to") + logger.info("write news") + for news in list_news: + cursor.execute("SELECT * FROM NEWS WHERE LINK = %s", (news.link,)) + if not cursor.fetchall(): + cursor.execute( + "INSERT INTO NEWS (FEED,SOURCE_LINK,TITLE_OF_NEWS,DATA,LINK,INFO,BRIEFLY,LINKS) " + "VALUES (%s,%s, %s,%s, %s, %s, %s,%s)", (news.feed, + source_link, + news.title, + news.date, + news.link, + news.info_about_image, + news.briefly_about_news, + news.links_from_news,) + + ) + + +def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetime, cursor): + logger = logging.getLogger("rss_reader.database.read_news") + logger.info("return cache") + if limit: + cursor.execute( + "SELECT * FROM NEWS WHERE date(DATA) = DATE(%s) AND SOURCE_LINK = %s LIMIT %s", + (date_of_news, source_link, limit,)) + else: + cursor.execute("SELECT * FROM NEWS WHERE date(DATA) = DATE(%s) AND SOURCE_LINK = TEXT(%s)", + (date_of_news, source_link,)) + if not bool(cursor.rowcount): + print("Your news story is empty") + for row in cursor: + news = News.News(feed=row[0], + title=row[2], + date=row[3], + link=row[4], + info_about_image=row[5], + briefly_about_news=row[6], + links_from_news=row[7]) + list_of_news.append(news) + + +def clear_the_history(connect, cursor): + cursor.execute('DELETE FROM NEWS') + connect.commit() diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index 55ec77a..fe44052 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -1,3 +1 @@ -class TimeOutExeption(Exception): - def __init__(self, msg): - super().__init__('Problems with internet connection: ' + str(msg)) +class TimeOutExeption(Exception): pass diff --git a/final_task/rss_reader/pars_args.py b/final_task/rss_reader/pars_args.py index e073a77..cf4875e 100644 --- a/final_task/rss_reader/pars_args.py +++ b/final_task/rss_reader/pars_args.py @@ -2,12 +2,12 @@ import sys import logging -module_logger = logging.getLogger("rss_reader.scripts.pars_args") +MODULE_LOGGER = logging.getLogger("rss_reader.pars_args") def create_parser(): """ function to parse the command line """ - logger = logging.getLogger("rss_reader.scripts.create_parser") + logger = logging.getLogger("rss_reader.create_parser") logger.info("parse the command line ") parser = argparse.ArgumentParser( prog='rss_reader', @@ -20,33 +20,20 @@ def create_parser(): # add information about the expected parameters # using the add_argument method one call for each parameter). - parser.add_argument('source', - type=str, - default="not url", - help='RSS URL') + parser.add_argument('source', type=str, default="not url", help='RSS URL') - parser.add_argument('--version', - action='version', - help='Print version info', - version='%(prog)s {}'.format("1.1")) + parser.add_argument('--version', action='version', help='Print version info', version='%(prog)s {}'.format("2.0")) - parser.add_argument('--json', - action='store_const', - const=True, - default=False, + parser.add_argument('--json', action='store_const', const=True, default=False, help='Print result as JSON in stdout') - parser.add_argument('--verbose', - action='store_const', - const=True, - default=False, + parser.add_argument('--verbose', action='store_const', const=True, default=False, help='Outputs verbose status messages') - parser.add_argument('--limit', - type=int, - metavar='LIMIT', - default=None, + parser.add_argument('--limit', type=int, metavar='LIMIT', default=None, help='Limit news topics if this parameter provided') + parser.add_argument('--date', type=str, metavar='DATE', + help='to search in cache for news by date in the format in YYYYmmdd') return parser @@ -55,7 +42,7 @@ def get_args(): """ returns command line arguments """ - logger = logging.getLogger("rss_reader.scripts.get_args") + logger = logging.getLogger("rss_reader.get_args") logger.info("return args command line") parser = create_parser() args = parser.parse_args(sys.argv[1:]) diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index 08e0b1b..c9de59c 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -1,3 +1,4 @@ +import datetime import json import re import signal @@ -8,9 +9,10 @@ from dateutil import parser import News import html -from exceptions import * +from exceptions import TimeOutExeption +from pars_args import get_args -module_logger = logging.getLogger("rss_reader.scripts.parser_rss") +MODULE_LOGGER = logging.getLogger("rss_reader.parser_rss") @contextmanager @@ -31,17 +33,25 @@ def signal_handler(signum, frame): signal.alarm(0) +def valid_date(date_text): + try: + a = datetime.datetime.strptime(date_text, '%Y%m%d') + except ValueError: + raise ValueError("Incorrect data format, should be YYYYMMDD") + return a + + def clear_text(text: str) -> str: """ cleans text from problems that occurred when decoding formats """ - logger = logging.getLogger("rss_reader.scripts.parser_rss.clear_text") + logger = logging.getLogger("rss_reader.parser_rss.clear_text") logger.info("clear text from news") return html.unescape(text) def get_info_about_image(summary: str) -> str: - logger = logging.getLogger("rss_reader.scripts.parser_rss.get_info_about_image") + logger = logging.getLogger("rss_reader.parser_rss.get_info_about_image") logger.info("return info about image") tag = 'alt=' begin_position_info_about_image = summary.find(tag) + len(tag) + 1 @@ -51,7 +61,7 @@ def get_info_about_image(summary: str) -> str: def get_briefly_about_news(summary: str) -> str: - logger = logging.getLogger("rss_reader.scripts.parser_rss.get_briefly_about_news") + logger = logging.getLogger("rss_reader.parser_rss.get_briefly_about_news") logger.info("return briefly info about news") p = re.compile(r'<.*?>') text = p.sub('', summary) @@ -59,7 +69,7 @@ def get_briefly_about_news(summary: str) -> str: def get_news_feed(sourse_url: str) -> feedparser.parse: - logger = logging.getLogger("rss_reader.scripts.parser_rss.get_news_feed") + logger = logging.getLogger("rss_reader.parser_rss.get_news_feed") logger.info("return news Feed") with timeout_sec(10): news_feed = feedparser.parse(sourse_url) @@ -75,7 +85,7 @@ def init_list_of_news( """ Fills the list with news """ - logger = logging.getLogger("rss_reader.scripts.parser_rss.init_list_of_news") + logger = logging.getLogger("rss_reader.parser_rss.init_list_of_news") logger.info("Fills the list with news") feed_title = news_feed['feed'].get('title', 'NO TITLE') feed_title = clear_text(feed_title) @@ -109,12 +119,23 @@ def print_news(list_of_news: list): :param list_of_news: :return: """ - logger = logging.getLogger("rss_reader.scripts.parser_rss.print_news") + logger = logging.getLogger("rss_reader.parser_rss.print_news") logger.info("print news in the console") for number, news in enumerate(list_of_news): print(number + 1) # because number starts at 0 print(news) - print('-' * 100) + print('-'*100) + + +def print_news_without_cashing(): + args = get_args() + list_of_news = [] + news_feed = get_news_feed(args.source) + init_list_of_news(list_of_news, news_feed, args.limit) + if args.json: + print_news_in_json(list_of_news) + else: + print_news(list_of_news) def print_news_in_json(list_of_news: list): @@ -123,7 +144,7 @@ def print_news_in_json(list_of_news: list): :param list_of_news: :return: """ - logger = logging.getLogger("rss_reader.scripts.parser_rss.print_news_in_json") + logger = logging.getLogger("rss_reader.parser_rss.print_news_in_json") logger.info("print news in the console in json format") list_of_news_in_json = [] for news in list_of_news: diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index 354aefb..38db78b 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,2 +1,3 @@ feedparser == 5.2.1 -python-dateutil == 2.8.1 \ No newline at end of file +python-dateutil == 2.8.1 +psycopg2-binary == 2.8.4 \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 40bb780..45c0d6d 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,50 +1,71 @@ +from contextlib import closing from urllib.error import URLError +import psycopg2 +import database import pars_args import parser_rss import logging import sys -from News import News -from dateutil import parser + def main(): """ The main entry point of the application """ - args = pars_args.get_args() - logger = logging.getLogger("rss_reader") - logger.setLevel(logging.INFO) - # create the logging file handler - if not args.verbose: - fh = logging.FileHandler("new_snake.log") - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - fh.setFormatter(formatter) - logger.addHandler(fh) - else: - fh = logging.basicConfig(stream=sys.stdout, - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', - datefmt='%H:%M:%S', - level=logging.DEBUG) - - # add handler to logger object - - logger.info("Program started") - - list_of_news = [] - news_feed = parser_rss.get_news_feed(args.source) - parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) - if args.json: - parser_rss.print_news_in_json(list_of_news) - else: - parser_rss.print_news(list_of_news) + try: + args = pars_args.get_args() + logger = logging.getLogger("rss_reader") + logger.setLevel(logging.INFO) + # create the logging file handler + if not args.verbose: + fh = logging.FileHandler("new_snake.log") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + logger.addHandler(fh) + else: + fh = logging.basicConfig(stream=sys.stdout, + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.DEBUG) + # add handler to logger object -if __name__ == '__main__': - try: - main() + logger.info("Program started") + with closing( database.connect_to_database()) as con: + with con.cursor() as cursor: + database.create_table(con, cursor) + args = pars_args.get_args() + list_of_news = [] + if args.date: + date = parser_rss.valid_date(args.date) + database.read_news(list_of_news, args.limit, args.source, date, cursor) + else: + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + database.write_to(list_of_news, args.source, cursor) + if args.json: + parser_rss.print_news_in_json(list_of_news) + else: + parser_rss.print_news(list_of_news) + + con.commit() + + except psycopg2.OperationalError: + parser_rss.print_news_without_cashing() + print("Check your database," + "news is not saved " + "you cannot use --date\n" + "Please read README.md") except parser_rss.TimeOutExeption as e: print(e) except URLError as er: print(er) except ValueError as v: print(v) + except FileNotFoundError as e: + print(e) + + +if __name__ == '__main__': + main() diff --git a/final_task/setup.py b/final_task/setup.py index 3678671..8e5ab15 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -2,21 +2,23 @@ setup( name='rss_reader', - version='2.0', + version='3.0', description='RSS reader', author='Matyushenok Sergey', author_email='matyushenoksergei@yandex.by', package_dir={'rss_reader': 'rss_reader'}, scripts=['rss_reader/News.py', + 'config.txt', 'rss_reader/pars_args.py', 'rss_reader/parser_rss.py', 'rss_reader/exceptions.py', + 'rss_reader/database.py', 'rss_reader/rss_reader.py'], entry_points={ 'console_scripts': ['rss-reader=rss_reader:main'], }, packages=find_namespace_packages(), - install_requires=['feedparser', 'python-dateutil'], + install_requires=['feedparser', 'python-dateutil', 'psycopg2-binary'], license="none", platforms="Linux, Windows (not tested)", ) diff --git a/final_task/tests/test_pars_args.py b/final_task/tests/test_pars_args.py new file mode 100644 index 0000000..804e109 --- /dev/null +++ b/final_task/tests/test_pars_args.py @@ -0,0 +1,25 @@ +import sys + +sys.path.insert(1, 'final_task/rss_reader') +from pars_args import * +import unittest +import argparse +from unittest import mock # python 3.3+ + + +class TestParsArgs(unittest.TestCase): + @mock.patch('argparse.ArgumentParser.parse_args', + return_value=argparse.Namespace(source='https://news.tut.by/rss/', + version='2.0', + json=False, + verbose = False, + limit=2, + date="20191212")) + def test_command(self, mock_args): + data = get_args() + self.assertEqual(data.source, "https://news.tut.by/rss/") + self.assertEqual(data.version, "2.0") + self.assertEqual(data.json, False) + self.assertEqual(data.verbose, False) + self.assertEqual(data.limit, 2) + self.assertEqual(data.date, "20191212") diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py index 8de3f96..96e76d3 100644 --- a/final_task/tests/test_parser_rss.py +++ b/final_task/tests/test_parser_rss.py @@ -34,7 +34,9 @@ def setUp(self): self.result += "Briefly about news: briefly_about_news\n" self.result += "Links: \n" self.result += "[0] link\n" - self.result += "[1] link_on_image" + self.result += "[1] link_on_image\n" + self.result += '\n' + self.result += '-' * 100 def test_clear_text(self): self.assertEqual(clear_text("'"), "'") @@ -48,6 +50,23 @@ def test_get_briefly_about_news(self): '''spy affiliated with the Russian military intelligence''' ''' service, according to a Western intelligence source.''') + def test_valid_date(self): + self.assertEqual(str(valid_date("20191211")), "2019-12-11 00:00:00") + with self.assertRaises(ValueError) as error: + valid_date("dfgh") + self.assertEqual(str(error.exception), 'Incorrect data format, should be YYYYMMDD') + with self.assertRaises(ValueError) as error: + valid_date("20102111") + self.assertEqual(str(error.exception), 'Incorrect data format, should be YYYYMMDD') + + def test_get_news_feed(self): + with self.assertRaises(URLError) as error: + get_news_feed("wcxqa") + self.assertEqual(str(error.exception), '') + with self.assertRaises(URLError) as error: + get_news_feed(" https://news.tut.by/") + self.assertEqual(str(error.exception), '') + def test_print_news(self): with patch('sys.stdout', new=StringIO()) as fake_out_put: print_news([self.item, ]) @@ -67,10 +86,10 @@ def test_print_news_in_json(self): self.result += ''' "link_on_image"\n''' self.result += " ]\n" self.result += " }\n" - self.result+="]" + self.result += "]" with patch('sys.stdout', new=StringIO()) as fake_out_put: print_news_in_json([self.item, ]) self.assertEqual(fake_out_put.getvalue().strip(), self.result) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 5f3abd224fcf91b8cdee0eda65e0d5a55de1c539 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Fri, 22 Nov 2019 01:28:35 +0300 Subject: [PATCH 5/8] third iteration fixed README.md --- final_task/README.md | 16 +++++++++++++--- final_task/config.txt | 2 +- final_task/rss_reader/parser_rss.py | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/final_task/README.md b/final_task/README.md index 17a645e..9832f21 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -11,7 +11,6 @@ optional arguments: --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided - --date DATE to search in cache for news by date in the format in YYYYmmdd @@ -30,15 +29,26 @@ with default parameters: database="postgres", user="postgres", password="1", - host="127.0.0.1", + host="localhost", port="5432" -If you do not follow these commands: +If you do not, follow these commands: 1)Open terminal 2)sudo apt-get install postgresql 3)sudo -u postgres psql 4)\password 5)1 6)1 +7)\conninfo find out the port and if it is not equal 5432, change the config.txt port to your +If after all you couldn’t connect postgresql, change +localhost in the config.txt file to your IP address, +you can find it using the following commands: +1)Open terminal +2) Enter nslookup localhost +You will need an address in a format similar to this: +"127.0.0.1" in line "Address: 127.0.0.1" + + + You can change the database connection parameters in the "config.txt" file in the project, but then you need to update the utility, for this you need: diff --git a/final_task/config.txt b/final_task/config.txt index 5839b6b..d5e77b9 100644 --- a/final_task/config.txt +++ b/final_task/config.txt @@ -1,5 +1,5 @@ database postgres user postgres password 1 -host 127.0.0.1 +host localhost port 5432 \ No newline at end of file diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index c9de59c..b2bc5ef 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -138,6 +138,20 @@ def print_news_without_cashing(): print_news(list_of_news) +def print_news_without_cashing(): + try: + args = get_args() + list_of_news = [] + news_feed = get_news_feed(args.source) + init_list_of_news(list_of_news, news_feed, args.limit) + if args.json: + print_news_in_json(list_of_news) + else: + print_news(list_of_news) + except URLError as er: + print(er) + + def print_news_in_json(list_of_news: list): """ This function print news in the console in json format From 83fe14fcff27bcb29c62f0cec13098afcc6e59e6 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Fri, 22 Nov 2019 22:21:00 +0300 Subject: [PATCH 6/8] fourth iteration and fixed problem with database --- final_task/README.md | 46 ++----- final_task/rss_reader/News.py | 3 +- final_task/rss_reader/converter.py | 166 +++++++++++++++++++++++++ final_task/rss_reader/database.py | 66 +++++----- final_task/rss_reader/exceptions.py | 1 + final_task/rss_reader/pars_args.py | 7 +- final_task/rss_reader/parser_rss.py | 19 +-- final_task/rss_reader/requirements.txt | 6 +- final_task/rss_reader/rss_reader.py | 53 ++++---- final_task/setup.py | 6 +- 10 files changed, 257 insertions(+), 116 deletions(-) create mode 100644 final_task/rss_reader/converter.py diff --git a/final_task/README.md b/final_task/README.md index 9832f21..6ac77fd 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -11,6 +11,7 @@ optional arguments: --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided + --date DATE to search in cache for news by date in the format in YYYYmmdd @@ -23,46 +24,19 @@ Installation recommendation rss-reader: 6)To run the utility, type in the terminal "rss-reader" then a space and url on news Example : rss-reader https://news.yahoo.com/rss +News caching: +In order to see the history you must enter an additional parameter --date +Example: rss-reader https://news.tut.by/rss/ --limit 2 --date 20191122 +Search by date and source -To use caching you must have a postgresql database on your computer or laptop -with default parameters: - database="postgres", - user="postgres", - password="1", - host="localhost", - port="5432" -If you do not, follow these commands: -1)Open terminal -2)sudo apt-get install postgresql -3)sudo -u postgres psql -4)\password -5)1 -6)1 -7)\conninfo find out the port and if it is not equal 5432, change the config.txt port to your -If after all you couldn’t connect postgresql, change -localhost in the config.txt file to your IP address, -you can find it using the following commands: -1)Open terminal -2) Enter nslookup localhost -You will need an address in a format similar to this: -"127.0.0.1" in line "Address: 127.0.0.1" +Format converter: +1)Use --to-pdf to save news in pdf format +2)Use --to-html to save news in html format +3)If no internet connection, get a file without images +4)Enter the full path to the file - -You can change the database connection parameters in the "config.txt" file in the project, -but then you need to update the utility, for this you need: -1) Open a terminal -2) Go to the "final_task" folder -3) Enter "python3 setup.py install" - -How to use the --date parameter: -1)--date works with all other arguments (e.g. --limit, --json) -2)If you use --date you do not need an internet connection -3)if you didn’t succeed in correctly downloading the database, - you will be deprived of the caching parameter, but everything else will work correctly -4)Usage example:rss-reader https://news.tut.by/rss/ --date 20191120 - diff --git a/final_task/rss_reader/News.py b/final_task/rss_reader/News.py index ea4d123..0f57010 100644 --- a/final_task/rss_reader/News.py +++ b/final_task/rss_reader/News.py @@ -41,7 +41,8 @@ def __str__(self): logger.info("return str") links = "" for index, link in enumerate(self.links_from_news or []): - links += "[" + str(index) + "] " + link + "\n" + if link: + links += "[" + str(index) + "] " + link + "\n" return "Feed: %s\n" \ "Title: %s \n" \ diff --git a/final_task/rss_reader/converter.py b/final_task/rss_reader/converter.py new file mode 100644 index 0000000..d9104c2 --- /dev/null +++ b/final_task/rss_reader/converter.py @@ -0,0 +1,166 @@ +import fnmatch +import logging +import os +import textwrap +from io import BytesIO + +import dominate +import requests +from PIL import Image +from dominate import tags +from reportlab.lib.pagesizes import A4 +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfgen.canvas import Canvas + +MODULE_LOGGER = logging.getLogger("rss_reader.converter") + + +def get_path(path, expansion_file): + logger = logging.getLogger("rss_reader.converter.get_path") + logger.info("return correct path") + if not fnmatch.fnmatch(path, '*%s' % expansion_file): + raise FileNotFoundError("Invalid expansion ") + if not os.path.isdir(path[:path.rfind("/")]): + raise FileNotFoundError("File or directory not found") + result = path + return result + + +def conversion_of_news_in_html(path, list_of_news: list): + logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_html") + logger.info("conversion of news in html") + correct_path = get_path(path, ".html") + with open(correct_path, 'w') as file: + for news in list_of_news: + doc = dominate.document(title='RSS READER') + with doc.head: + tags.link(rel='stylesheet', href='style.css') + tags.script(type='text/javascript', src='script.js') + tags.style("""\ + body { + background-color: #F9F8F1; + color: #2C232A; + font-family: sans-serif; + font-size: 2.6em; + margin: 3em 1em; + } + + """) + + with doc: + with tags.div(id='header'): + tags.p("Feed: ", news.feed) + tags.p("Title: ", news.title) + tags.p("Date ", str(news.date)) + tags.p("Link: ", tags.a(news.link.title(), href=news.link, target="_blank")) + tags.p("Info about image: ", news.info_about_image) + tags.p("Briefly about news: ", news.briefly_about_news) + tags.p("Links: ", ) + for reference in news.links_from_news: + if reference: + tags.li(tags.a(reference.title(), href=reference, target="_blank")) + if news.links_from_news[1]: + tags.a(tags.img( + src=news.links_from_news[1], + width="200", height="200", alt=news.info_about_image), + href=news.links_from_news[1], target="_blank") + + file.write(doc.render()) + print("news successfully saved to file ", path) + + +def get_img(name, reference): + logger = logging.getLogger("rss_reader.converter.get_img") + logger.info("return img") + is_picture = False + try: + response = requests.get(reference) + img = Image.open(BytesIO(response.content)) + img = img.resize((100, 100)) + img = img.convert('RGB') + img.save(name, 'JPEG') + is_picture = True + except requests.exceptions.ConnectionError: + logger = logging.getLogger("rss_reader.converter.get_img") + logger.error("You do not have an internet connection\n" + "your news will be saved in pdf without pictures") + except requests.exceptions.MissingSchema: + logger = logging.getLogger("rss_reader.converter.get_img") + logger.error("Invalid url picture \n") + except OSError: + logger = logging.getLogger("rss_reader.converter.get_img") + logger.error("cannot identify image\n") + return is_picture + + +def text_separator(text: str, break_long_words) -> str: + logger = logging.getLogger("rss_reader.converter.text_separator") + logger.info("return text") + format_text = textwrap.fill(text, width=50, break_long_words=break_long_words) + ls = format_text.split('\n') + return ls + + +def print_text_in_pdf(canvas, text, x, y): + logger = logging.getLogger("rss_reader.converter.print_list_in_pdf") + logger.info("print list in pdf") + ls = text_separator(text, False) + for lines in ls: + if y < 45: + canvas.showPage() + canvas.setFont('FreeSans', 19) + y = 800 + y -= 25 + canvas.drawString(x, y, lines) + return y - 25 + + +def print_line_in_pdf(canvas, line, x, y): + logger = logging.getLogger("rss_reader.converter.print_line_in_pdf") + logger.info("print line in pdf") + if y < 25: + canvas.showPage() + canvas.setFont('FreeSans', 19) + y = 900 + y -= 25 + canvas.drawString(x, y, line) + return y - 25 + + +def conversion_of_news_in_pdf(path, list_of_news): + logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_pdf") + logger.info("conversion_of_news_in_pdf") + correct_path = get_path(path, ".pdf") + canvas = Canvas(correct_path, pagesize=A4) + pdfmetrics.registerFont(TTFont('FreeSans', 'FreeSans.ttf')) + canvas.setFont('FreeSans', 19) + canvas.setTitle("RSS READER") + x = 10 + y = 800 + pdfmetrics.registerFont(TTFont('FreeSans', 'FreeSans.ttf')) + name_buffer_picture_file = "tmp1" + for index, news in enumerate(list_of_news): + name_buffer_picture_file = name_buffer_picture_file[:-1] + str(index) + canvas.setFont('FreeSans', 19) + if get_img(name_buffer_picture_file + '.jpg', news.links_from_news[1]): + y -= 170 + if y < 45: + canvas.showPage() + canvas.setFont('FreeSans', 19) + y = 680 + canvas.drawImage(name_buffer_picture_file + ".jpg", x, y, 150, 150) + os.remove(name_buffer_picture_file + '.jpg') + y -= 40 + y = print_text_in_pdf(canvas, news.feed, x, y) + y = print_text_in_pdf(canvas, news.title, x, y) + y = print_text_in_pdf(canvas, str(news.date), x, y) + y = print_text_in_pdf(canvas, news.link, x, y) + y = print_text_in_pdf(canvas, news.info_about_image, x, y) + y = print_text_in_pdf(canvas, news.briefly_about_news, x, y) + if y < 45: + canvas.showPage() + y = 800 + logger.info("save news in pdf") + canvas.save() + print("news successfully saved to file ", path) diff --git a/final_task/rss_reader/database.py b/final_task/rss_reader/database.py index 0f0205b..fd68f98 100644 --- a/final_task/rss_reader/database.py +++ b/final_task/rss_reader/database.py @@ -1,10 +1,10 @@ import datetime +import logging +import sqlite3 from contextlib import closing -import psycopg2 import News -import os -import logging +from exceptions import DataBaseEmpty MODULE_LOGGER = logging.getLogger("rss_reader.database") @@ -23,21 +23,7 @@ def get_param_for_connect(filename) -> dict: def connect_to_database(): logger = logging.getLogger("rss_reader.database.connect_to_database") logger.info("connect to database") - if os.path.isfile('final_task/config.txt'): - filename = "final_task/config.txt" - elif os.path.isfile('config.txt'): - filename = 'config.txt' - else: - raise psycopg2.OperationalError("check config") - - parameters = get_param_for_connect(filename) - con = psycopg2.connect( - database=parameters['database'], - user=parameters['user'], - password=parameters['password'], - host=parameters['host'], - port=parameters['port'] - ) + con = sqlite3.connect("database.db") # или :memory: чтобы сохранить в RAM return con @@ -50,11 +36,10 @@ def is_table(): try: cursor.execute("SELECT * FROM NEWS") - except psycopg2.DatabaseError: + except sqlite3.OperationalError: flag_is_table = False - finally: - con.close() - return flag_is_table + + return flag_is_table def create_table(con, cursor): @@ -69,7 +54,7 @@ def create_table(con, cursor): LINK TEXT , INFO TEXT, BRIEFLY TEXT, - LINKS TEXT[]);''') + LINKS TEXT);''') con.commit() @@ -77,20 +62,25 @@ def write_to(list_news: list, source_link: str, cursor): logger = logging.getLogger("rss_reader.database.write_to") logger.info("write news") for news in list_news: - cursor.execute("SELECT * FROM NEWS WHERE LINK = %s", (news.link,)) + cursor.execute(f"SELECT * FROM NEWS WHERE LINK = ?", (news.link,)) if not cursor.fetchall(): + links_in_str = "" + for link in news.links_from_news: + links_in_str += link + "\n" cursor.execute( "INSERT INTO NEWS (FEED,SOURCE_LINK,TITLE_OF_NEWS,DATA,LINK,INFO,BRIEFLY,LINKS) " - "VALUES (%s,%s, %s,%s, %s, %s, %s,%s)", (news.feed, - source_link, - news.title, - news.date, - news.link, - news.info_about_image, - news.briefly_about_news, - news.links_from_news,) + "VALUES (?,?, ?,?, ?, ?, ?,?)", (news.feed, + source_link, + news.title, + news.date, + news.link, + news.info_about_image, + news.briefly_about_news, + links_in_str,) ) + logger = logging.getLogger("rss_reader.database.write_to") + logger.info("end write news") def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetime, cursor): @@ -98,22 +88,24 @@ def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetim logger.info("return cache") if limit: cursor.execute( - "SELECT * FROM NEWS WHERE date(DATA) = DATE(%s) AND SOURCE_LINK = %s LIMIT %s", + "SELECT * FROM NEWS WHERE date(DATA) = DATE(?) AND SOURCE_LINK = ? LIMIT ?", (date_of_news, source_link, limit,)) else: - cursor.execute("SELECT * FROM NEWS WHERE date(DATA) = DATE(%s) AND SOURCE_LINK = TEXT(%s)", + cursor.execute("SELECT * FROM NEWS WHERE date(DATA) = DATE(?) AND SOURCE_LINK = ?", (date_of_news, source_link,)) - if not bool(cursor.rowcount): - print("Your news story is empty") + for row in cursor: + links = row[7].split("\n") news = News.News(feed=row[0], title=row[2], date=row[3], link=row[4], info_about_image=row[5], briefly_about_news=row[6], - links_from_news=row[7]) + links_from_news=links[:-1]) list_of_news.append(news) + if not list_of_news: + raise DataBaseEmpty(Exception("Your news story on is empty ")) def clear_the_history(connect, cursor): diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index fe44052..f2ee456 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -1 +1,2 @@ class TimeOutExeption(Exception): pass +class DataBaseEmpty(Exception):pass \ No newline at end of file diff --git a/final_task/rss_reader/pars_args.py b/final_task/rss_reader/pars_args.py index cf4875e..b0084c3 100644 --- a/final_task/rss_reader/pars_args.py +++ b/final_task/rss_reader/pars_args.py @@ -22,7 +22,7 @@ def create_parser(): parser.add_argument('source', type=str, default="not url", help='RSS URL') - parser.add_argument('--version', action='version', help='Print version info', version='%(prog)s {}'.format("2.0")) + parser.add_argument('--version', action='version', help='Print version info', version='%s' % "4.0") parser.add_argument('--json', action='store_const', const=True, default=False, help='Print result as JSON in stdout') @@ -34,7 +34,10 @@ def create_parser(): help='Limit news topics if this parameter provided') parser.add_argument('--date', type=str, metavar='DATE', help='to search in cache for news by date in the format in YYYYmmdd') - + parser.add_argument('--to-html', type=str, metavar='PATH', default=None, + help='the conversion of news in html file') + parser.add_argument('--to-pdf', type=str, metavar='PATH', default=None, + help='the conversion of news in pdf file') return parser diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index b2bc5ef..0a8cf6c 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -35,10 +35,10 @@ def signal_handler(signum, frame): def valid_date(date_text): try: - a = datetime.datetime.strptime(date_text, '%Y%m%d') + date = datetime.datetime.strptime(date_text, '%Y%m%d') except ValueError: raise ValueError("Incorrect data format, should be YYYYMMDD") - return a + return date def clear_text(text: str) -> str: @@ -63,8 +63,8 @@ def get_info_about_image(summary: str) -> str: def get_briefly_about_news(summary: str) -> str: logger = logging.getLogger("rss_reader.parser_rss.get_briefly_about_news") logger.info("return briefly info about news") - p = re.compile(r'<.*?>') - text = p.sub('', summary) + result = re.compile(r'<.*?>') + text = result.sub('', summary) return clear_text(text) @@ -127,17 +127,6 @@ def print_news(list_of_news: list): print('-'*100) -def print_news_without_cashing(): - args = get_args() - list_of_news = [] - news_feed = get_news_feed(args.source) - init_list_of_news(list_of_news, news_feed, args.limit) - if args.json: - print_news_in_json(list_of_news) - else: - print_news(list_of_news) - - def print_news_without_cashing(): try: args = get_args() diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index 38db78b..3958d17 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,3 +1,7 @@ feedparser == 5.2.1 python-dateutil == 2.8.1 -psycopg2-binary == 2.8.4 \ No newline at end of file +psycopg2-binary == 2.8.4 +dominate == 2.4.0 +Pillow == 6.2.1 +requests == 2.22.0 +reportlab == 3.5.32 \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 45c0d6d..fe05c4c 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,11 +1,14 @@ +import logging +import sqlite3 +import sys from contextlib import closing from urllib.error import URLError -import psycopg2 + +import converter import database import pars_args import parser_rss -import logging -import sys +from exceptions import DataBaseEmpty def main(): @@ -32,35 +35,41 @@ def main(): # add handler to logger object logger.info("Program started") - with closing( database.connect_to_database()) as con: - with con.cursor() as cursor: - database.create_table(con, cursor) - args = pars_args.get_args() - list_of_news = [] - if args.date: - date = parser_rss.valid_date(args.date) - database.read_news(list_of_news, args.limit, args.source, date, cursor) - else: - news_feed = parser_rss.get_news_feed(args.source) - parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) - database.write_to(list_of_news, args.source, cursor) - if args.json: - parser_rss.print_news_in_json(list_of_news) - else: - parser_rss.print_news(list_of_news) - - con.commit() + with closing(database.connect_to_database()) as con: + cursor = con.cursor() + database.create_table(con, cursor) + args = pars_args.get_args() + list_of_news = [] + if args.date: + date = parser_rss.valid_date(args.date) + database.read_news(list_of_news, args.limit, args.source, date, cursor) + else: + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + database.write_to(list_of_news, args.source, cursor) + if args.json: + parser_rss.print_news_in_json(list_of_news) + else: + parser_rss.print_news(list_of_news) + if args.to_html: + converter.conversion_of_news_in_html(args.to_html, list_of_news) + if args.to_pdf: + converter.conversion_of_news_in_pdf(args.to_pdf, list_of_news) + con.commit() - except psycopg2.OperationalError: + except sqlite3.OperationalError as er: parser_rss.print_news_without_cashing() print("Check your database," "news is not saved " "you cannot use --date\n" "Please read README.md") + print(er) except parser_rss.TimeOutExeption as e: print(e) except URLError as er: print(er) + except DataBaseEmpty as d: + print(d) except ValueError as v: print(v) except FileNotFoundError as e: diff --git a/final_task/setup.py b/final_task/setup.py index 8e5ab15..992628e 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -2,7 +2,7 @@ setup( name='rss_reader', - version='3.0', + version='4.0', description='RSS reader', author='Matyushenok Sergey', author_email='matyushenoksergei@yandex.by', @@ -13,12 +13,14 @@ 'rss_reader/parser_rss.py', 'rss_reader/exceptions.py', 'rss_reader/database.py', + 'rss_reader/converter.py', 'rss_reader/rss_reader.py'], entry_points={ 'console_scripts': ['rss-reader=rss_reader:main'], }, packages=find_namespace_packages(), - install_requires=['feedparser', 'python-dateutil', 'psycopg2-binary'], + install_requires=['feedparser', 'python-dateutil', 'psycopg2-binary', 'dominate', 'Pillow', + 'requests','reportlab'], license="none", platforms="Linux, Windows (not tested)", ) From 01e748b21c80e8c05c0148ddfe5130c69d55bb5f Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Sun, 24 Nov 2019 17:14:16 +0300 Subject: [PATCH 7/8] fifth iteration --- final_task/README.md | 9 +- final_task/rss_reader/News.py | 16 ++-- final_task/rss_reader/converter.py | 104 +++++++++++------------ final_task/rss_reader/database.py | 23 +---- final_task/rss_reader/pars_args.py | 6 +- final_task/rss_reader/parser_rss.py | 68 ++++++--------- final_task/rss_reader/print_functions.py | 95 +++++++++++++++++++++ final_task/rss_reader/requirements.txt | 3 +- final_task/rss_reader/rss_reader.py | 15 +++- final_task/setup.py | 5 +- final_task/tests/README.md | 3 + final_task/tests/news_feed_for_test.xml | 26 ++++++ final_task/tests/test_converter.py | 92 ++++++++++++++++++++ final_task/tests/test_database.py | 24 ++++++ final_task/tests/test_parser_rss.py | 21 ++++- 15 files changed, 372 insertions(+), 138 deletions(-) create mode 100644 final_task/rss_reader/print_functions.py create mode 100644 final_task/tests/README.md create mode 100644 final_task/tests/news_feed_for_test.xml create mode 100644 final_task/tests/test_converter.py create mode 100644 final_task/tests/test_database.py diff --git a/final_task/README.md b/final_task/README.md index 6ac77fd..7a510c0 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -6,12 +6,12 @@ positional arguments: source RSS URL optional arguments: - -h, --help show this help message and exit + -h, --help Show this help message and exit --version Print version info --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided - --date DATE to search in cache for news by date in the format in YYYYmmdd + --date DATE To search in cache for news by date in the format in YYYYmmdd @@ -27,13 +27,14 @@ Example : rss-reader https://news.yahoo.com/rss News caching: In order to see the history you must enter an additional parameter --date Example: rss-reader https://news.tut.by/rss/ --limit 2 --date 20191122 -Search by date and source +Searching by date and source Format converter: 1)Use --to-pdf to save news in pdf format 2)Use --to-html to save news in html format 3)If no internet connection, get a file without images -4)Enter the full path to the file +4)Enter the full path to the file +5)If you enter path to directory,news successfully saved to file "your path+News.(pdf or html)" diff --git a/final_task/rss_reader/News.py b/final_task/rss_reader/News.py index 0f57010..f2a8bff 100644 --- a/final_task/rss_reader/News.py +++ b/final_task/rss_reader/News.py @@ -44,12 +44,10 @@ def __str__(self): if link: links += "[" + str(index) + "] " + link + "\n" - return "Feed: %s\n" \ - "Title: %s \n" \ - "Date: %s \n" \ - "Link: %s\n" \ - "Info about image: %s\n" \ - "Briefly about news: %s\n" \ - "Links: \n%s" % (self.feed, self.title, self.date, - self.link, self.info_about_image, - self.briefly_about_news, links) + return f"Feed: {self.feed}\n" \ + f"Title: {self.title} \n" \ + f"Date: {self.date} \n" \ + f"Link: {self.link}\n" \ + f"Info about image: {self.info_about_image}\n" \ + f"Briefly about news: {self.briefly_about_news}\n" \ + f"Links: \n{links}" diff --git a/final_task/rss_reader/converter.py b/final_task/rss_reader/converter.py index d9104c2..6c42d02 100644 --- a/final_task/rss_reader/converter.py +++ b/final_task/rss_reader/converter.py @@ -16,28 +16,29 @@ MODULE_LOGGER = logging.getLogger("rss_reader.converter") -def get_path(path, expansion_file): +def get_path(path: str, expansion_file: str) -> str: logger = logging.getLogger("rss_reader.converter.get_path") logger.info("return correct path") - if not fnmatch.fnmatch(path, '*%s' % expansion_file): - raise FileNotFoundError("Invalid expansion ") - if not os.path.isdir(path[:path.rfind("/")]): - raise FileNotFoundError("File or directory not found") - result = path + if os.path.isdir(path): + result = path + 'News' + expansion_file + else: + if not fnmatch.fnmatch(path, '*%s' % expansion_file): + raise FileNotFoundError("Invalid expansion ") + if not os.path.isdir(path[:path.rfind("/")]): + raise FileNotFoundError("File or directory not found") + result = path return result -def conversion_of_news_in_html(path, list_of_news: list): +def get_html(list_of_news: list): logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_html") logger.info("conversion of news in html") - correct_path = get_path(path, ".html") - with open(correct_path, 'w') as file: - for news in list_of_news: - doc = dominate.document(title='RSS READER') - with doc.head: - tags.link(rel='stylesheet', href='style.css') - tags.script(type='text/javascript', src='script.js') - tags.style("""\ + doc = dominate.document(title='RSS READER') + for news in list_of_news: + with doc.head: + tags.link(rel='stylesheet', href='style.css') + tags.script(type='text/javascript', src='script.js') + tags.style("""\ body { background-color: #F9F8F1; color: #2C232A; @@ -48,29 +49,40 @@ def conversion_of_news_in_html(path, list_of_news: list): """) - with doc: - with tags.div(id='header'): - tags.p("Feed: ", news.feed) - tags.p("Title: ", news.title) - tags.p("Date ", str(news.date)) - tags.p("Link: ", tags.a(news.link.title(), href=news.link, target="_blank")) - tags.p("Info about image: ", news.info_about_image) - tags.p("Briefly about news: ", news.briefly_about_news) - tags.p("Links: ", ) - for reference in news.links_from_news: - if reference: - tags.li(tags.a(reference.title(), href=reference, target="_blank")) - if news.links_from_news[1]: - tags.a(tags.img( - src=news.links_from_news[1], - width="200", height="200", alt=news.info_about_image), - href=news.links_from_news[1], target="_blank") - - file.write(doc.render()) - print("news successfully saved to file ", path) - - -def get_img(name, reference): + with doc: + with tags.div(id='header'): + tags.p("Feed: ", news.feed) + tags.p("Title: ", news.title) + tags.p("Date ", str(news.date)) + tags.p("Link: ", tags.a(news.link.title(), href=news.link, target="_blank")) + tags.p("Info about image: ", news.info_about_image) + tags.p("Briefly about news: ", news.briefly_about_news) + tags.p("Links: ", ) + for reference in news.links_from_news: + if reference: + tags.li(tags.a(reference.title(), href=reference, target="_blank")) + if news.links_from_news[1]: + tags.a(tags.img( + src=news.links_from_news[1], + width="200", height="200", alt=news.info_about_image), + href=news.links_from_news[1], target="_blank") + + return doc + + +def conversion_of_news_in_html(path, list_of_news): + correct_path = get_path(path, ".html") + html_content = get_html(list_of_news) + save_html(correct_path, html_content) + + +def save_html(path, html_content): + with open(path, 'w') as file: + file.write(html_content.render()) + print("news successfully saved to file ", path) + + +def get_img(image_name, reference): logger = logging.getLogger("rss_reader.converter.get_img") logger.info("return img") is_picture = False @@ -79,7 +91,7 @@ def get_img(name, reference): img = Image.open(BytesIO(response.content)) img = img.resize((100, 100)) img = img.convert('RGB') - img.save(name, 'JPEG') + img.save(image_name, 'JPEG') is_picture = True except requests.exceptions.ConnectionError: logger = logging.getLogger("rss_reader.converter.get_img") @@ -116,18 +128,6 @@ def print_text_in_pdf(canvas, text, x, y): return y - 25 -def print_line_in_pdf(canvas, line, x, y): - logger = logging.getLogger("rss_reader.converter.print_line_in_pdf") - logger.info("print line in pdf") - if y < 25: - canvas.showPage() - canvas.setFont('FreeSans', 19) - y = 900 - y -= 25 - canvas.drawString(x, y, line) - return y - 25 - - def conversion_of_news_in_pdf(path, list_of_news): logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_pdf") logger.info("conversion_of_news_in_pdf") @@ -163,4 +163,4 @@ def conversion_of_news_in_pdf(path, list_of_news): y = 800 logger.info("save news in pdf") canvas.save() - print("news successfully saved to file ", path) + print("news successfully saved to file ", correct_path) diff --git a/final_task/rss_reader/database.py b/final_task/rss_reader/database.py index fd68f98..0094d0a 100644 --- a/final_task/rss_reader/database.py +++ b/final_task/rss_reader/database.py @@ -9,25 +9,14 @@ MODULE_LOGGER = logging.getLogger("rss_reader.database") -def get_param_for_connect(filename) -> dict: - logger = logging.getLogger("rss_reader.database.get_param_for_connect") - logger.info("get param for connect from config.txt") - dict_parameters = {} - with open(filename, "r") as file: - for line in file: - key, value = line.split() - dict_parameters[key] = value - return dict_parameters - - def connect_to_database(): logger = logging.getLogger("rss_reader.database.connect_to_database") logger.info("connect to database") - con = sqlite3.connect("database.db") # или :memory: чтобы сохранить в RAM + con = sqlite3.connect("database.db") return con -def is_table(): +def is_table(table_name: str) -> bool: logger = logging.getLogger("rss_reader.database.is_table") logger.info("check exist table") flag_is_table = True @@ -35,7 +24,7 @@ def is_table(): cursor = con.cursor() try: - cursor.execute("SELECT * FROM NEWS") + cursor.execute(f"SELECT * FROM {table_name}") except sqlite3.OperationalError: flag_is_table = False @@ -45,7 +34,7 @@ def is_table(): def create_table(con, cursor): logger = logging.getLogger("rss_reader.database.create_table") logger.info("create table") - if not is_table(): + if not is_table("NEWS"): cursor.execute('''CREATE TABLE NEWS (FEED TEXT , SOURCE_LINK TEXT, @@ -107,7 +96,3 @@ def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetim if not list_of_news: raise DataBaseEmpty(Exception("Your news story on is empty ")) - -def clear_the_history(connect, cursor): - cursor.execute('DELETE FROM NEWS') - connect.commit() diff --git a/final_task/rss_reader/pars_args.py b/final_task/rss_reader/pars_args.py index b0084c3..522f709 100644 --- a/final_task/rss_reader/pars_args.py +++ b/final_task/rss_reader/pars_args.py @@ -11,7 +11,7 @@ def create_parser(): logger.info("parse the command line ") parser = argparse.ArgumentParser( prog='rss_reader', - description=''' This program which receives RSS URL + description=''' This program receives RSS URL and prints results in human-readable format.''', epilog='''Thank you for using this program''' @@ -22,7 +22,7 @@ def create_parser(): parser.add_argument('source', type=str, default="not url", help='RSS URL') - parser.add_argument('--version', action='version', help='Print version info', version='%s' % "4.0") + parser.add_argument('--version', action='version', help='Print version info', version=f'{5.0}') parser.add_argument('--json', action='store_const', const=True, default=False, help='Print result as JSON in stdout') @@ -38,6 +38,8 @@ def create_parser(): help='the conversion of news in html file') parser.add_argument('--to-pdf', type=str, metavar='PATH', default=None, help='the conversion of news in pdf file') + parser.add_argument('--colorize', action='store_const', const=True, default=False, + help='print news in multi colored format') return parser diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index 0a8cf6c..790bf92 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -1,16 +1,16 @@ import datetime -import json +import html +import logging import re import signal -import logging from contextlib import contextmanager from urllib.error import URLError + import feedparser from dateutil import parser + import News -import html from exceptions import TimeOutExeption -from pars_args import get_args MODULE_LOGGER = logging.getLogger("rss_reader.parser_rss") @@ -41,6 +41,17 @@ def valid_date(date_text): return date +def get_link_image(summary: str) -> str: + """ + + """ + tag = 'img src=' + begin_position_link_img = summary.find(tag) + len(tag) + 1 + end_position_link_img = summary.find('"', begin_position_link_img) + link = summary[begin_position_link_img:end_position_link_img + 1] + return link + + def clear_text(text: str) -> str: """ cleans text from problems that occurred when decoding formats @@ -99,7 +110,11 @@ def init_list_of_news( link = entry['link'] info_about_image = get_info_about_image(summary) briefly_about_news = get_briefly_about_news(summary) - link_on_image = entry.get("media_content")[0]["url"] + try: + link_on_image = entry.get("media_content")[0]["url"] + except TypeError: + link_on_image = "link not found" + info_about_image = "info about image not found" news = News.News(feed=feed_title, title=title, date=date, @@ -112,44 +127,9 @@ def init_list_of_news( list_of_news.append(news) -def print_news(list_of_news: list): - """ - This function print news in the console - :param feed_title: - :param list_of_news: - :return: - """ - logger = logging.getLogger("rss_reader.parser_rss.print_news") - logger.info("print news in the console") - for number, news in enumerate(list_of_news): - print(number + 1) # because number starts at 0 - print(news) - print('-'*100) -def print_news_without_cashing(): - try: - args = get_args() - list_of_news = [] - news_feed = get_news_feed(args.source) - init_list_of_news(list_of_news, news_feed, args.limit) - if args.json: - print_news_in_json(list_of_news) - else: - print_news(list_of_news) - except URLError as er: - print(er) - - -def print_news_in_json(list_of_news: list): - """ - This function print news in the console in json format - :param list_of_news: - :return: - """ - logger = logging.getLogger("rss_reader.parser_rss.print_news_in_json") - logger.info("print news in the console in json format") - list_of_news_in_json = [] - for news in list_of_news: - list_of_news_in_json.append(news.get_json()) - print(json.dumps(list_of_news_in_json, indent=4, ensure_ascii=False)) + + + + diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py new file mode 100644 index 0000000..2636509 --- /dev/null +++ b/final_task/rss_reader/print_functions.py @@ -0,0 +1,95 @@ +import json +import logging +from urllib.error import URLError +import parser_rss +from pars_args import get_args +import colorama +from colorama import Fore, Back, Style + + +def print_news_in_json(list_of_news: list): + """ + This function print news in the console in json format + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.parser_rss.print_news_in_json") + logger.info("print news in the console in json format") + list_of_news_in_json = [] + for news in list_of_news: + list_of_news_in_json.append(news.get_json()) + print(json.dumps(list_of_news_in_json, indent=4, ensure_ascii=False)) + + +def print_news_without_cashing(): + try: + args = get_args() + list_of_news = [] + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + if args.json: + print_news_in_json(list_of_news) + else: + print_news(list_of_news) + except URLError as er: + print(er) + except Exception as e: + print(e) + + +def print_news(list_of_news: list): + """ + This function print news in the console + :param feed_title: + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.parser_rss.print_news") + logger.info("print news in the console") + for number, news in enumerate(list_of_news): + print(number + 1) # because number starts at 0 + print(news) + print('-' * 100) + + +def print_news_in_multi_colored_format(list_of_news: list): + colorama.init() + for number, news in enumerate(list_of_news): + links = "" + for index, link in enumerate(news.links_from_news or []): + links += "[" + str(index) + "] " + link + "\n" + print('\033[1m\033[32m\033[4m' + str(number + 1) + ":") + print(Style.RESET_ALL + Fore.BLUE + f'Feed: {news.feed}') + print(Style.RESET_ALL + Fore.GREEN + f'Title: {news.title}') + print(Style.RESET_ALL + Fore.YELLOW + f'Date: {news.date}') + print(Style.RESET_ALL + Fore.CYAN + f'Link: {news.link}') + print(Style.RESET_ALL + Fore.YELLOW + f'Info about image: {news.info_about_image}') + print(Style.RESET_ALL + Fore.GREEN + f'Briefly about news: {news.briefly_about_news}') + print(Style.RESET_ALL + Fore.CYAN + f'Links: \n{links}') + + +def print_news_in_json_in_multi_colored_format(list_of_news: list): + result = "\033[1m\033[35m[\033[0m\n" + for number, news in enumerate(list_of_news): + + result += " \033[1m\033[31m{\033[0m\n" + result += f''' \033[1m\033[34m"Feed": "{news.feed}",\033[0m\n''' + result += f''' \033[32m"Title": "{news.title}",\033[0m\n''' + result += f''' \033[33m"Date": "{news.date}",\033[0m\n''' + result += f''' \033[36m"Link": "{news.link}",\033[0m\n''' + result += f''' \033[33m"Info about image": "{news.info_about_image}",\033[0m\n''' + result += f''' \033[32m"Briefly about news": "{news.briefly_about_news}",\033[0m\n''' + result += f''' \033[36m"Links": [\n''' + for index_link, link in enumerate(news.links_from_news): + result += f''' "{link}"''' + if index_link != len(news.links_from_news) - 1: + result += ',\n' + result += '\n' + result += " ]\033[0m\n" + result += " \033[1m\033[31m}\033[0m" + if len(list_of_news) - 1 != number: + result += ',' + result += '\n' + + result += "\033[1m\033[35m]\033[0m" + print(result) diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index 3958d17..c9150e5 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -4,4 +4,5 @@ psycopg2-binary == 2.8.4 dominate == 2.4.0 Pillow == 6.2.1 requests == 2.22.0 -reportlab == 3.5.32 \ No newline at end of file +reportlab == 3.5.32 +colorama == 0.4.1 \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index fe05c4c..89b21c8 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -9,6 +9,7 @@ import pars_args import parser_rss from exceptions import DataBaseEmpty +import print_functions def main(): @@ -48,9 +49,15 @@ def main(): parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) database.write_to(list_of_news, args.source, cursor) if args.json: - parser_rss.print_news_in_json(list_of_news) + if args.colorize: + print_functions.print_news_in_json_in_multi_colored_format(list_of_news) + else: + print_functions.print_news_in_json(list_of_news) else: - parser_rss.print_news(list_of_news) + if args.colorize: + print_functions.print_news_in_multi_colored_format(list_of_news) + else: + print_functions.print_news(list_of_news) if args.to_html: converter.conversion_of_news_in_html(args.to_html, list_of_news) if args.to_pdf: @@ -58,7 +65,7 @@ def main(): con.commit() except sqlite3.OperationalError as er: - parser_rss.print_news_without_cashing() + print_functions.print_news_without_cashing() print("Check your database," "news is not saved " "you cannot use --date\n" @@ -74,6 +81,8 @@ def main(): print(v) except FileNotFoundError as e: print(e) + except Exception as e: + print(e) if __name__ == '__main__': diff --git a/final_task/setup.py b/final_task/setup.py index 992628e..e132adb 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -2,7 +2,7 @@ setup( name='rss_reader', - version='4.0', + version='5.0', description='RSS reader', author='Matyushenok Sergey', author_email='matyushenoksergei@yandex.by', @@ -14,13 +14,14 @@ 'rss_reader/exceptions.py', 'rss_reader/database.py', 'rss_reader/converter.py', + 'rss_reader/print_functions.py', 'rss_reader/rss_reader.py'], entry_points={ 'console_scripts': ['rss-reader=rss_reader:main'], }, packages=find_namespace_packages(), install_requires=['feedparser', 'python-dateutil', 'psycopg2-binary', 'dominate', 'Pillow', - 'requests','reportlab'], + 'requests','reportlab','colorama'], license="none", platforms="Linux, Windows (not tested)", ) diff --git a/final_task/tests/README.md b/final_task/tests/README.md new file mode 100644 index 0000000..5c74b84 --- /dev/null +++ b/final_task/tests/README.md @@ -0,0 +1,3 @@ +Tests were run using: coverage run -m unittest discover -s final_task/tests/ or + coverage run -m unittest discover + diff --git a/final_task/tests/news_feed_for_test.xml b/final_task/tests/news_feed_for_test.xml new file mode 100644 index 0000000..d410183 --- /dev/null +++ b/final_task/tests/news_feed_for_test.xml @@ -0,0 +1,26 @@ + + + + CHANNEL TITLE + + + CHANNEL DESCRIPTION + + CHANNEL LINK + + + ITEM1 TITLE + ITEM1 LINK + 2003-12-31 + + + + ITEM2 TITLE + ITEM2 LINK + 2003-12-31 + + + + + \ No newline at end of file diff --git a/final_task/tests/test_converter.py b/final_task/tests/test_converter.py new file mode 100644 index 0000000..6806436 --- /dev/null +++ b/final_task/tests/test_converter.py @@ -0,0 +1,92 @@ +from dateutil import parser +import sys + +from News import News + +sys.path.insert(1, 'final_task/rss_reader') +import converter +import unittest + + +class TestConverters(unittest.TestCase): + def test_get_path(self): + with self.assertRaises(FileNotFoundError) as error: + converter.get_path('path_not_exist.txt', '.pdf') + self.assertEqual(str(error.exception), 'Invalid expansion ') + with self.assertRaises(FileNotFoundError) as error: + converter.get_path('path_not_exist.pdf', '.pdf') + self.assertEqual(str(error.exception), 'File or directory not found') + with self.assertRaises(FileNotFoundError) as error: + converter.get_path('path_not_exist.txt', '.html') + self.assertEqual(str(error.exception), 'Invalid expansion ') + with self.assertRaises(FileNotFoundError) as error: + converter.get_path('path_not_exist.html', '.html') + self.assertEqual(str(error.exception), 'File or directory not found') + + def test_get_img(self): + self.assertEqual(converter.get_img("name", "not link"), False) + self.assertEqual(converter.get_img('name', "https://news.tut.by/rss"), False) + + def test_text_separator(self): + text = "A Utah woman charged with a crime after her stepchildren saw her topless in her own home is " + result = ["A Utah woman charged with a crime after her", + "stepchildren saw her topless in her own home is"] + self.assertEqual(converter.text_separator(text, False), result) + + def test_get_html(self): + dat = parser.parse("2019-11-12 18:21:00+03:00") + link12 = "link" + link_on_image = "link on image" + + links1 = [link12, link_on_image] + news = News(feed="TUT.BY: Новости ТУТ", + title="wcds", + date=dat, + link="link", + info_about_image="uhinjвв", + briefly_about_news="Полпред России в контактной группе Борис Грызлов сообщил", + links_from_news=links1) + + list_of_new = [news, ] + verifiable_info = "\n" \ + "\n" \ + " \n" \ + " RSS READER\n" \ + """ \n""" \ + """ \n""" \ + """ \n""" \ + """ \n""" \ + """ \n""" \ + """

\n""" \ + """ \n""" \ + """""" + + document_of_html = converter.get_html(list_of_new) + self.assertEqual(document_of_html.render(), verifiable_info) diff --git a/final_task/tests/test_database.py b/final_task/tests/test_database.py new file mode 100644 index 0000000..4d6bb3d --- /dev/null +++ b/final_task/tests/test_database.py @@ -0,0 +1,24 @@ +import sys +from contextlib import closing +from dateutil import parser + +sys.path.insert(1, 'final_task/rss_reader') +import database +import unittest +import exceptions + + +class TestNews(unittest.TestCase): + def test_is_table(self): + self.assertEqual(database.is_table("NEWSFA"), False) + + def test_read_news(self): + with closing(database.connect_to_database())as con: + cursor = con.cursor() + database.create_table(con, cursor) + date = parser.parse("10011001") + list_of_news = [] + with self.assertRaises(exceptions.DataBaseEmpty) as error: + database.read_news(list_of_news, 2, "not link", date, cursor) + self.assertEqual(str(error.exception), 'Your news story on is empty ') + diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py index 96e76d3..a67143c 100644 --- a/final_task/tests/test_parser_rss.py +++ b/final_task/tests/test_parser_rss.py @@ -1,4 +1,5 @@ import sys +import os sys.path.insert(1, 'final_task/rss_reader') from parser_rss import * @@ -6,6 +7,8 @@ import unittest from io import StringIO from unittest.mock import patch +import feedparser +import print_functions class TestParserRss(unittest.TestCase): @@ -38,6 +41,13 @@ def setUp(self): self.result += '\n' self.result += '-' * 100 + if os.path.isfile('final_task/tests/news_feed_for_test.xml'): + self.url = 'final_task/tests/news_feed_for_test.xml' + else: + self.url = 'news_feed_for_test.xml' + + self.news_feed = feedparser.parse(self.url) + def test_clear_text(self): self.assertEqual(clear_text("'"), "'") @@ -66,12 +76,19 @@ def test_get_news_feed(self): with self.assertRaises(URLError) as error: get_news_feed(" https://news.tut.by/") self.assertEqual(str(error.exception), '') + self.assertEqual(get_news_feed(self.url), self.news_feed) def test_print_news(self): with patch('sys.stdout', new=StringIO()) as fake_out_put: - print_news([self.item, ]) + print_functions.print_news([self.item, ]) self.assertEqual(fake_out_put.getvalue().strip(), self.result) + def test_init_list_of_news(self): + + list_of_news = [] + init_list_of_news(list_of_news, self.news_feed, 2) + self.assertEqual(len(list_of_news), 2) + def test_print_news_in_json(self): self.result = "[\n" self.result += " {\n" @@ -88,7 +105,7 @@ def test_print_news_in_json(self): self.result += " }\n" self.result += "]" with patch('sys.stdout', new=StringIO()) as fake_out_put: - print_news_in_json([self.item, ]) + print_functions.print_news_in_json([self.item, ]) self.assertEqual(fake_out_put.getvalue().strip(), self.result) if __name__ == '__main__': From 7cd5544e46d08289a294c591c14207e9e67d8358 Mon Sep 17 00:00:00 2001 From: Sergey Matyushenok Date: Mon, 25 Nov 2019 21:47:08 +0300 Subject: [PATCH 8/8] final commit --- final_task/README.md | 84 +++++++------- final_task/rss_reader/converter.py | 71 +++++++++--- final_task/rss_reader/database.py | 137 ++++++++++++++++------- final_task/rss_reader/exceptions.py | 9 +- final_task/rss_reader/pars_args.py | 4 +- final_task/rss_reader/parser_rss.py | 44 +++++--- final_task/rss_reader/print_functions.py | 50 ++++++++- final_task/rss_reader/rss_reader.py | 69 ++++++------ final_task/setup.py | 1 - final_task/tests/test_News.py | 2 + final_task/tests/test_converter.py | 12 +- final_task/tests/test_database.py | 45 +++++++- final_task/tests/test_pars_args.py | 6 +- final_task/tests/test_parser_rss.py | 32 +----- final_task/tests/test_print_functions.py | 106 ++++++++++++++++++ 15 files changed, 478 insertions(+), 194 deletions(-) create mode 100644 final_task/tests/test_print_functions.py diff --git a/final_task/README.md b/final_task/README.md index 7a510c0..0850de7 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,44 +1,48 @@ -This program which receives RSS URL and prints results in human-readable -format. - -positional arguments: - source RSS URL - -optional arguments: - -h, --help Show this help message and exit - --version Print version info - --json Print result as JSON in stdout - --verbose Outputs verbose status messages - --limit LIMIT Limit news topics if this parameter provided - --date DATE To search in cache for news by date in the format in YYYYmmdd - - - -Installation recommendation rss-reader: -1)Open terminal -2)Enter "pip install setuptools" or "pip3 install setuptools" -3)Go to the folder final_task -4)Enter "python3 setup.py install" -5)Application installed -6)To run the utility, type in the terminal "rss-reader" then a space and url on news -Example : rss-reader https://news.yahoo.com/rss - -News caching: -In order to see the history you must enter an additional parameter --date -Example: rss-reader https://news.tut.by/rss/ --limit 2 --date 20191122 -Searching by date and source - -Format converter: -1)Use --to-pdf to save news in pdf format -2)Use --to-html to save news in html format -3)If no internet connection, get a file without images -4)Enter the full path to the file -5)If you enter path to directory,news successfully saved to file "your path+News.(pdf or html)" - - - - +#### This program receives RSS URL and prints results in human-readable format. + +- positional arguments: ++ source RSS URL + +- optional arguments: ++ -h, --help show this help message and exit ++ --version Print version info ++ --json Print result as JSON in stdout ++ --verbose Outputs verbose status messages ++ --limit LIMIT Limit news topics if this parameter provided ++ --date DATE to search in cache for news by date in the format in YYYYmmdd ++ --to-html PATH the conversion of news in html file ++ --to-pdf PATH the conversion of news in pdf file ++ --colorize print news in multi colored format ++ --clear Clears news story + + + +- Installation recommendation rss-reader: +1. Open terminal +2. Enter "pip install setuptools" or "pip3 install setuptools" +3. Go to the folder final_task +4. Enter "python3 setup.py install" +5. Application installed +6. To run the utility, type in the terminal "rss-reader" then a space and url on news +- Example : rss-reader https://news.yahoo.com/rss + +- News caching: ++ In order to see the history you must enter an additional parameter --date ++ Example: rss-reader https://news.tut.by/rss/ --limit 2 --date 20191122 ++ Searching by date and source or only by date + +- Format converter: +1. Use --to-pdf to save news in pdf format +2. Use --to-html to save news in html format +3. If no internet connection, get a file without images +4. Enter the full path to the file +5. If you enter path to directory,news successfully saved to file "your path+News.(pdf or html)" + +- If you enter --colorize,that will print the result of the utility in colorized mode. +- If you enter --colorize with --json,that will print the result of the utility in json in colorized mode. + +- If you enter --clear this will delete all cached news diff --git a/final_task/rss_reader/converter.py b/final_task/rss_reader/converter.py index 6c42d02..888ef3c 100644 --- a/final_task/rss_reader/converter.py +++ b/final_task/rss_reader/converter.py @@ -17,22 +17,39 @@ def get_path(path: str, expansion_file: str) -> str: + """ + Checks the correctness of the entered path + if received path to directory check her on exist + if directory exist add News and expansion file + if received path to file check his on exist and check correctness expansion file + :param path: + :param expansion_file: + :return: + """ logger = logging.getLogger("rss_reader.converter.get_path") - logger.info("return correct path") + logger.info("check path") if os.path.isdir(path): - result = path + 'News' + expansion_file + logger.info("path specified to directory") + result = path + '/News' + expansion_file else: if not fnmatch.fnmatch(path, '*%s' % expansion_file): - raise FileNotFoundError("Invalid expansion ") - if not os.path.isdir(path[:path.rfind("/")]): - raise FileNotFoundError("File or directory not found") + logger.error("Invalid expansion ") + raise FileNotFoundError(f"Invalid expansion {path}") + if not os.path.isdir(path[:path.rfind("/") + 1]): + logger.error("File or directory not found") + raise FileNotFoundError(f"File or directory not found {path}") result = path return result def get_html(list_of_news: list): - logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_html") - logger.info("conversion of news in html") + """ + Forms html content + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.converter.get_html") + logger.info("getting html content") doc = dominate.document(title='RSS READER') for news in list_of_news: with doc.head: @@ -66,23 +83,44 @@ def get_html(list_of_news: list): src=news.links_from_news[1], width="200", height="200", alt=news.info_about_image), href=news.links_from_news[1], target="_blank") - + logger.info("html content received") return doc def conversion_of_news_in_html(path, list_of_news): + logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_html") + logger.info("conversion of news in html") correct_path = get_path(path, ".html") html_content = get_html(list_of_news) save_html(correct_path, html_content) + logger.info("conversion of news in html successful completed") def save_html(path, html_content): - with open(path, 'w') as file: - file.write(html_content.render()) - print("news successfully saved to file ", path) + """ + Save news in file + :param path: + :param html_content: + :return: + """ + logger = logging.getLogger("rss_reader.converter.save_html") + try: + with open(path, 'w') as file: + file.write(html_content.render()) + print("news successfully saved to file ", path) + logger.info("news successfully saved to file ") + except MemoryError: + logger.error("not enough memory to save html file") + print("You do not have enough memory to save html file") def get_img(image_name, reference): + """ + Download image in file + :param image_name: + :param reference: + :return: True if image successfully downloaded + """ logger = logging.getLogger("rss_reader.converter.get_img") logger.info("return img") is_picture = False @@ -106,11 +144,17 @@ def get_img(image_name, reference): return is_picture -def text_separator(text: str, break_long_words) -> str: +def text_separator(text: str, break_long_words: bool) -> list: + """ + Breaks text into lines of 50 characters + :param text: + :param break_long_words: + :return: + """ logger = logging.getLogger("rss_reader.converter.text_separator") - logger.info("return text") format_text = textwrap.fill(text, width=50, break_long_words=break_long_words) ls = format_text.split('\n') + logger.info("text successfully broken") return ls @@ -130,7 +174,6 @@ def print_text_in_pdf(canvas, text, x, y): def conversion_of_news_in_pdf(path, list_of_news): logger = logging.getLogger("rss_reader.converter.conversion_of_news_in_pdf") - logger.info("conversion_of_news_in_pdf") correct_path = get_path(path, ".pdf") canvas = Canvas(correct_path, pagesize=A4) pdfmetrics.registerFont(TTFont('FreeSans', 'FreeSans.ttf')) diff --git a/final_task/rss_reader/database.py b/final_task/rss_reader/database.py index 0094d0a..95b157f 100644 --- a/final_task/rss_reader/database.py +++ b/final_task/rss_reader/database.py @@ -1,7 +1,6 @@ import datetime import logging import sqlite3 -from contextlib import closing import News from exceptions import DataBaseEmpty @@ -9,32 +8,45 @@ MODULE_LOGGER = logging.getLogger("rss_reader.database") -def connect_to_database(): +def connect_to_database(name_database: str): logger = logging.getLogger("rss_reader.database.connect_to_database") - logger.info("connect to database") - con = sqlite3.connect("database.db") + logger.info("connecting to database") + con = sqlite3.connect(f"{name_database}") + logger.info("connected to database") return con -def is_table(table_name: str) -> bool: +def is_table(connect, table_name: str, name_database: str) -> bool: + """ + Checks table existence + :param connect: + :param name_database: + :param table_name: + :return: True or False + """ logger = logging.getLogger("rss_reader.database.is_table") logger.info("check exist table") flag_is_table = True - with closing(connect_to_database()) as con: - cursor = con.cursor() - try: - cursor.execute(f"SELECT * FROM {table_name}") - except sqlite3.OperationalError: - flag_is_table = False + cursor = connect.cursor() + + try: + cursor.execute(f"SELECT * FROM {table_name}") + logger.info(" table exist") + except sqlite3.OperationalError: + flag_is_table = False + logger.error("table does not exist") return flag_is_table -def create_table(con, cursor): +def create_table(con, cursor, name_database_str): + """ + Creates a table NEWS + """ logger = logging.getLogger("rss_reader.database.create_table") - logger.info("create table") - if not is_table("NEWS"): + logger.info("creating table") + if not is_table(con, "NEWS", name_database_str): cursor.execute('''CREATE TABLE NEWS (FEED TEXT , SOURCE_LINK TEXT, @@ -45,43 +57,77 @@ def create_table(con, cursor): BRIEFLY TEXT, LINKS TEXT);''') con.commit() + logger.info("created table") def write_to(list_news: list, source_link: str, cursor): - logger = logging.getLogger("rss_reader.database.write_to") - logger.info("write news") - for news in list_news: - cursor.execute(f"SELECT * FROM NEWS WHERE LINK = ?", (news.link,)) - if not cursor.fetchall(): - links_in_str = "" - for link in news.links_from_news: - links_in_str += link + "\n" - cursor.execute( - "INSERT INTO NEWS (FEED,SOURCE_LINK,TITLE_OF_NEWS,DATA,LINK,INFO,BRIEFLY,LINKS) " - "VALUES (?,?, ?,?, ?, ?, ?,?)", (news.feed, - source_link, - news.title, - news.date, - news.link, - news.info_about_image, - news.briefly_about_news, - links_in_str,) - - ) - logger = logging.getLogger("rss_reader.database.write_to") - logger.info("end write news") + """ + Writes news to database + :param list_news: + :param source_link: + :param cursor: + :return: + """ + try: + logger = logging.getLogger("rss_reader.database.write_to") + logger.info("write news") + for news in list_news: + cursor.execute(f"SELECT * FROM NEWS WHERE LINK = ?", (news.link,)) + if not cursor.fetchall(): + # links_in_str = "" + # for link in news.links_from_news: + links_in_str = "\n".join(news.links_from_news) + cursor.execute( + "INSERT INTO NEWS (FEED,SOURCE_LINK,TITLE_OF_NEWS,DATA,LINK,INFO,BRIEFLY,LINKS) " + "VALUES (?,?, ?,?, ?, ?, ?,?)", (news.feed, + source_link, + news.title, + news.date, + news.link, + news.info_about_image, + news.briefly_about_news, + links_in_str,) + + ) + logger = logging.getLogger("rss_reader.database.write_to") + logger.info("end news recording") + except MemoryError: + logger = logging.getLogger("rss_reader.database.write_to") + logger.error("not enough memory") + raise MemoryError("You do not have enough memory to cache") def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetime, cursor): + """ + Read news from database + :param list_of_news: + :param limit: + :param source_link: + :param date_of_news: + :param cursor: + :return: + """ logger = logging.getLogger("rss_reader.database.read_news") - logger.info("return cache") - if limit: + # the user enter "source_link" + if limit and source_link: + logger.info("reading new from cache with limit") cursor.execute( "SELECT * FROM NEWS WHERE date(DATA) = DATE(?) AND SOURCE_LINK = ? LIMIT ?", (date_of_news, source_link, limit,)) - else: + elif not limit and source_link: + logger.info("reading new from cache without limit") cursor.execute("SELECT * FROM NEWS WHERE date(DATA) = DATE(?) AND SOURCE_LINK = ?", (date_of_news, source_link,)) + # the user did not enter "source_link" + if limit and not source_link: + logger.info("reading all news from cache with limit ") + cursor.execute( + "SELECT * FROM NEWS WHERE date(DATA) = DATE(?) LIMIT ?", + (date_of_news, limit,)) + elif not limit and not source_link: + logger.info("reading all news from cache without limit") + cursor.execute("SELECT * FROM NEWS WHERE date(DATA) = DATE(?)", + (date_of_news,)) for row in cursor: links = row[7].split("\n") @@ -91,8 +137,19 @@ def read_news(list_of_news: list, limit: int, source_link, date_of_news: datetim link=row[4], info_about_image=row[5], briefly_about_news=row[6], - links_from_news=links[:-1]) + links_from_news=links) list_of_news.append(news) if not list_of_news: + logger.error("story on is empty") raise DataBaseEmpty(Exception("Your news story on is empty ")) + logger.error("news read successfully") + +def clear_the_history(connect, name_database, name_table): + logger = logging.getLogger("rss_reader.database.clear_the_history") + if is_table(connect, name_table, name_database): + cursor = connect.cursor() + cursor.execute(f'DELETE FROM {name_table}') + connect.commit() + print('The story is cleared') + logger.info('The story is cleared') diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index f2ee456..761eeb0 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -1,2 +1,7 @@ -class TimeOutExeption(Exception): pass -class DataBaseEmpty(Exception):pass \ No newline at end of file +class TimeOutExeption(Exception): + pass + + +class DataBaseEmpty(Exception): + pass + diff --git a/final_task/rss_reader/pars_args.py b/final_task/rss_reader/pars_args.py index 522f709..77e03f2 100644 --- a/final_task/rss_reader/pars_args.py +++ b/final_task/rss_reader/pars_args.py @@ -20,7 +20,7 @@ def create_parser(): # add information about the expected parameters # using the add_argument method one call for each parameter). - parser.add_argument('source', type=str, default="not url", help='RSS URL') + parser.add_argument('source', type=str, nargs='?', default="", help='RSS URL') parser.add_argument('--version', action='version', help='Print version info', version=f'{5.0}') @@ -40,6 +40,8 @@ def create_parser(): help='the conversion of news in pdf file') parser.add_argument('--colorize', action='store_const', const=True, default=False, help='print news in multi colored format') + parser.add_argument('--clear', action='store_const', const=True, default=False, + help='Clears news story') return parser diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py index 790bf92..4a81f24 100644 --- a/final_task/rss_reader/parser_rss.py +++ b/final_task/rss_reader/parser_rss.py @@ -18,7 +18,7 @@ @contextmanager def timeout_sec(seconds): """ - contextmanager to check the expectation of a response + Contextmanager to check the expectation of a response and if the response does not come for a long time, an error """ @@ -33,7 +33,13 @@ def signal_handler(signum, frame): signal.alarm(0) -def valid_date(date_text): +def valid_date(date_text: str) -> datetime.datetime: + """ + Checks the entered date and and throws an exception + if the date does not match the format + :param date_text: + :return: + """ try: date = datetime.datetime.strptime(date_text, '%Y%m%d') except ValueError: @@ -43,7 +49,9 @@ def valid_date(date_text): def get_link_image(summary: str) -> str: """ - + Selects a photo link from html + :param summary: + :return: """ tag = 'img src=' begin_position_link_img = summary.find(tag) + len(tag) + 1 @@ -54,7 +62,7 @@ def get_link_image(summary: str) -> str: def clear_text(text: str) -> str: """ - cleans text from problems that occurred when decoding formats + Cleans text from problems that occurred when decoding formats """ logger = logging.getLogger("rss_reader.parser_rss.clear_text") logger.info("clear text from news") @@ -62,6 +70,11 @@ def clear_text(text: str) -> str: def get_info_about_image(summary: str) -> str: + """ + Selects a info about image from html + :param summary: + :return: + """ logger = logging.getLogger("rss_reader.parser_rss.get_info_about_image") logger.info("return info about image") tag = 'alt=' @@ -72,6 +85,11 @@ def get_info_about_image(summary: str) -> str: def get_briefly_about_news(summary: str) -> str: + """ + Selects a info about news from html + :param summary: + :return: + """ logger = logging.getLogger("rss_reader.parser_rss.get_briefly_about_news") logger.info("return briefly info about news") result = re.compile(r'<.*?>') @@ -81,18 +99,17 @@ def get_briefly_about_news(summary: str) -> str: def get_news_feed(sourse_url: str) -> feedparser.parse: logger = logging.getLogger("rss_reader.parser_rss.get_news_feed") - logger.info("return news Feed") + with timeout_sec(10): news_feed = feedparser.parse(sourse_url) if news_feed['bozo'] != 0: + logger.error(news_feed['bozo_exception'].args[0]) raise URLError(news_feed['bozo_exception'].args[0]) + logger.info("return news Feed") return news_feed -def init_list_of_news( - list_of_news: list, - news_feed: feedparser.parse, - limit: int): +def init_list_of_news(list_of_news: list, news_feed: feedparser.parse, limit: int): """ Fills the list with news """ @@ -125,11 +142,4 @@ def init_list_of_news( ) list_of_news.append(news) - - - - - - - - + logger.info("list completed successfully") diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py index 2636509..830cb02 100644 --- a/final_task/rss_reader/print_functions.py +++ b/final_task/rss_reader/print_functions.py @@ -5,15 +5,18 @@ from pars_args import get_args import colorama from colorama import Fore, Back, Style +import converter + +MODULE_LOGGER = logging.getLogger("rss_reader.print_functions") def print_news_in_json(list_of_news: list): """ - This function print news in the console in json format + Print news in the console in json format :param list_of_news: :return: """ - logger = logging.getLogger("rss_reader.parser_rss.print_news_in_json") + logger = logging.getLogger("rss_reader.print_functions.print_news_in_json") logger.info("print news in the console in json format") list_of_news_in_json = [] for news in list_of_news: @@ -22,24 +25,46 @@ def print_news_in_json(list_of_news: list): def print_news_without_cashing(): + """ + If you have problems with the database + user can use the program without caching + :return: + """ try: + logger = logging.getLogger("rss_reader.print_functions.print_news_without_cashing") + logger.info("print news without cashing") args = get_args() list_of_news = [] news_feed = parser_rss.get_news_feed(args.source) parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) if args.json: - print_news_in_json(list_of_news) + if args.colorize: + print_news_in_json_in_multi_colored_format(list_of_news) + else: + print_news_in_json(list_of_news) else: - print_news(list_of_news) + if args.colorize: + print_news_in_multi_colored_format(list_of_news) + else: + print_news(list_of_news) + if args.to_html: + converter.conversion_of_news_in_html(args.to_html, list_of_news) + if args.to_pdf: + converter.conversion_of_news_in_pdf(args.to_pdf, list_of_news) + logger.info("print news without cashing completed successfully") except URLError as er: + logger = logging.getLogger("rss_reader.print_functions.print_news_without_cashing") + logger.error(er) print(er) except Exception as e: + logger = logging.getLogger("rss_reader.print_functions.print_news_without_cashing") + logger.error(e) print(e) def print_news(list_of_news: list): """ - This function print news in the console + Print news in the console :param feed_title: :param list_of_news: :return: @@ -53,6 +78,12 @@ def print_news(list_of_news: list): def print_news_in_multi_colored_format(list_of_news: list): + """ + Print news in the console in colorized mode + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.parser_rss.print_news_in_multi_colored_format") colorama.init() for number, news in enumerate(list_of_news): links = "" @@ -66,9 +97,17 @@ def print_news_in_multi_colored_format(list_of_news: list): print(Style.RESET_ALL + Fore.YELLOW + f'Info about image: {news.info_about_image}') print(Style.RESET_ALL + Fore.GREEN + f'Briefly about news: {news.briefly_about_news}') print(Style.RESET_ALL + Fore.CYAN + f'Links: \n{links}') + logger.info("print completed successfully") def print_news_in_json_in_multi_colored_format(list_of_news: list): + """ + Print news in json format in the console in colorized mode + + :param list_of_news: + :return: + """ + logger = logging.getLogger("rss_reader.parser_rss.print_news_in_json_in_multi_colored_format") result = "\033[1m\033[35m[\033[0m\n" for number, news in enumerate(list_of_news): @@ -93,3 +132,4 @@ def print_news_in_json_in_multi_colored_format(list_of_news: list): result += "\033[1m\033[35m]\033[0m" print(result) + logger.info("print completed successfully") diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 89b21c8..c292f92 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -2,13 +2,11 @@ import sqlite3 import sys from contextlib import closing -from urllib.error import URLError import converter import database import pars_args import parser_rss -from exceptions import DataBaseEmpty import print_functions @@ -36,53 +34,50 @@ def main(): # add handler to logger object logger.info("Program started") - with closing(database.connect_to_database()) as con: - cursor = con.cursor() - database.create_table(con, cursor) - args = pars_args.get_args() - list_of_news = [] - if args.date: - date = parser_rss.valid_date(args.date) - database.read_news(list_of_news, args.limit, args.source, date, cursor) + + with closing(database.connect_to_database('database.db')) as con: + if args.clear: + database.clear_the_history(con, 'database.db', 'NEWS') else: - news_feed = parser_rss.get_news_feed(args.source) - parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) - database.write_to(list_of_news, args.source, cursor) - if args.json: - if args.colorize: - print_functions.print_news_in_json_in_multi_colored_format(list_of_news) + cursor = con.cursor() + database.create_table(con, cursor, 'database.db') + args = pars_args.get_args() + list_of_news = [] + if args.date: + date = parser_rss.valid_date(args.date) + database.read_news(list_of_news, args.limit, args.source, date, cursor) else: - print_functions.print_news_in_json(list_of_news) - else: - if args.colorize: - print_functions.print_news_in_multi_colored_format(list_of_news) + news_feed = parser_rss.get_news_feed(args.source) + parser_rss.init_list_of_news(list_of_news, news_feed, args.limit) + database.write_to(list_of_news, args.source, cursor) + if args.json: + if args.colorize: + print_functions.print_news_in_json_in_multi_colored_format(list_of_news) + else: + print_functions.print_news_in_json(list_of_news) else: - print_functions.print_news(list_of_news) - if args.to_html: - converter.conversion_of_news_in_html(args.to_html, list_of_news) - if args.to_pdf: - converter.conversion_of_news_in_pdf(args.to_pdf, list_of_news) - con.commit() - - except sqlite3.OperationalError as er: + if args.colorize: + print_functions.print_news_in_multi_colored_format(list_of_news) + else: + print_functions.print_news(list_of_news) + if args.to_html: + converter.conversion_of_news_in_html(args.to_html, list_of_news) + if args.to_pdf: + converter.conversion_of_news_in_pdf(args.to_pdf, list_of_news) + con.commit() + except (sqlite3.OperationalError, MemoryError)as er: print_functions.print_news_without_cashing() print("Check your database," "news is not saved " "you cannot use --date\n" - "Please read README.md") + ) print(er) except parser_rss.TimeOutExeption as e: print(e) - except URLError as er: - print(er) - except DataBaseEmpty as d: - print(d) - except ValueError as v: - print(v) - except FileNotFoundError as e: - print(e) except Exception as e: print(e) + except KeyboardInterrupt as key_error: + print("The program is interrupted " + str(key_error)) if __name__ == '__main__': diff --git a/final_task/setup.py b/final_task/setup.py index e132adb..d210cd9 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -8,7 +8,6 @@ author_email='matyushenoksergei@yandex.by', package_dir={'rss_reader': 'rss_reader'}, scripts=['rss_reader/News.py', - 'config.txt', 'rss_reader/pars_args.py', 'rss_reader/parser_rss.py', 'rss_reader/exceptions.py', diff --git a/final_task/tests/test_News.py b/final_task/tests/test_News.py index 4968d2b..f76b545 100644 --- a/final_task/tests/test_News.py +++ b/final_task/tests/test_News.py @@ -40,3 +40,5 @@ def test_get_json(self): self.assertEqual(data['Links'], ['link', 'link_on_image']) +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_converter.py b/final_task/tests/test_converter.py index 6806436..e57ede4 100644 --- a/final_task/tests/test_converter.py +++ b/final_task/tests/test_converter.py @@ -12,16 +12,16 @@ class TestConverters(unittest.TestCase): def test_get_path(self): with self.assertRaises(FileNotFoundError) as error: converter.get_path('path_not_exist.txt', '.pdf') - self.assertEqual(str(error.exception), 'Invalid expansion ') + self.assertEqual(str(error.exception), 'Invalid expansion path_not_exist.txt') with self.assertRaises(FileNotFoundError) as error: converter.get_path('path_not_exist.pdf', '.pdf') - self.assertEqual(str(error.exception), 'File or directory not found') + self.assertEqual(str(error.exception), 'File or directory not found path_not_exist.pdf') with self.assertRaises(FileNotFoundError) as error: converter.get_path('path_not_exist.txt', '.html') - self.assertEqual(str(error.exception), 'Invalid expansion ') + self.assertEqual(str(error.exception), 'Invalid expansion path_not_exist.txt') with self.assertRaises(FileNotFoundError) as error: converter.get_path('path_not_exist.html', '.html') - self.assertEqual(str(error.exception), 'File or directory not found') + self.assertEqual(str(error.exception), 'File or directory not found path_not_exist.html') def test_get_img(self): self.assertEqual(converter.get_img("name", "not link"), False) @@ -90,3 +90,7 @@ def test_get_html(self): document_of_html = converter.get_html(list_of_new) self.assertEqual(document_of_html.render(), verifiable_info) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_database.py b/final_task/tests/test_database.py index 4d6bb3d..f066e03 100644 --- a/final_task/tests/test_database.py +++ b/final_task/tests/test_database.py @@ -1,24 +1,63 @@ import sys from contextlib import closing from dateutil import parser +import os sys.path.insert(1, 'final_task/rss_reader') import database import unittest import exceptions +from News import News class TestNews(unittest.TestCase): + @staticmethod + def delete_database(database_name): + if os.path.isfile(database_name): + os.remove(database_name) + + def setUp(self): + self.item = News(feed="feed", + title="title", + date=parser.parse("2019-11-17 10:44:20-05:00"), + link="link", + info_about_image="info_about_image", + briefly_about_news="briefly_about_news", + links_from_news=["link", "link_on_image"] + ) + def test_is_table(self): - self.assertEqual(database.is_table("NEWSFA"), False) + with closing(database.connect_to_database('database_fail.db'))as con: + self.assertEqual(database.is_table(con, "NEWSFA", 'database_test.db'), False) + if os.path.isfile('database_test.db'): + os.remove('database_test.db') def test_read_news(self): - with closing(database.connect_to_database())as con: + with closing(database.connect_to_database('database_fail.db'))as con: cursor = con.cursor() - database.create_table(con, cursor) + database.create_table(con, cursor, 'database_fail.db') date = parser.parse("10011001") list_of_news = [] with self.assertRaises(exceptions.DataBaseEmpty) as error: database.read_news(list_of_news, 2, "not link", date, cursor) self.assertEqual(str(error.exception), 'Your news story on is empty ') + self.delete_database('database_fail.db') + + def test_create_table(self): + with closing(database.connect_to_database('databasefail.db'))as con: + database.create_table(con, con.cursor(), 'databasefail.db') + self.assertTrue(database.is_table(con, "NEWS", 'databasefail.db')) + self.delete_database('databasefail.db') + + def test_write_to(self): + with closing(database.connect_to_database('databasefail_wr.db'))as con: + database.create_table(con, con.cursor(), 'databasefail_wr.db') + database.write_to([self.item, ], self.item.link, con.cursor()) + list_of_news = [] + database.read_news(list_of_news, 1, self.item.link, self.item.date, con.cursor()) + self.assertEqual(len(list_of_news), 1) + self.delete_database('databasefail_wr.db') + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_pars_args.py b/final_task/tests/test_pars_args.py index 804e109..e84b89a 100644 --- a/final_task/tests/test_pars_args.py +++ b/final_task/tests/test_pars_args.py @@ -12,7 +12,7 @@ class TestParsArgs(unittest.TestCase): return_value=argparse.Namespace(source='https://news.tut.by/rss/', version='2.0', json=False, - verbose = False, + verbose=False, limit=2, date="20191212")) def test_command(self, mock_args): @@ -23,3 +23,7 @@ def test_command(self, mock_args): self.assertEqual(data.verbose, False) self.assertEqual(data.limit, 2) self.assertEqual(data.date, "20191212") + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py index a67143c..9ed3ca5 100644 --- a/final_task/tests/test_parser_rss.py +++ b/final_task/tests/test_parser_rss.py @@ -1,14 +1,11 @@ -import sys import os +import sys sys.path.insert(1, 'final_task/rss_reader') from parser_rss import * from News import News import unittest -from io import StringIO -from unittest.mock import patch import feedparser -import print_functions class TestParserRss(unittest.TestCase): @@ -78,35 +75,12 @@ def test_get_news_feed(self): self.assertEqual(str(error.exception), '') self.assertEqual(get_news_feed(self.url), self.news_feed) - def test_print_news(self): - with patch('sys.stdout', new=StringIO()) as fake_out_put: - print_functions.print_news([self.item, ]) - self.assertEqual(fake_out_put.getvalue().strip(), self.result) - def test_init_list_of_news(self): list_of_news = [] init_list_of_news(list_of_news, self.news_feed, 2) self.assertEqual(len(list_of_news), 2) - def test_print_news_in_json(self): - self.result = "[\n" - self.result += " {\n" - self.result += ''' "Feed": "feed",\n''' - self.result += ''' "Title": "title",\n''' - self.result += ''' "Date": "2019-11-17 10:44:20-05:00",\n''' - self.result += ''' "Link": "link",\n''' - self.result += ''' "Info about image": "info_about_image",\n''' - self.result += ''' "Briefly about news": "briefly_about_news",\n''' - self.result += ''' "Links": [\n''' - self.result += ''' "link",\n''' - self.result += ''' "link_on_image"\n''' - self.result += " ]\n" - self.result += " }\n" - self.result += "]" - with patch('sys.stdout', new=StringIO()) as fake_out_put: - print_functions.print_news_in_json([self.item, ]) - self.assertEqual(fake_out_put.getvalue().strip(), self.result) - if __name__ == '__main__': - unittest.main() +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_print_functions.py b/final_task/tests/test_print_functions.py new file mode 100644 index 0000000..2174cbe --- /dev/null +++ b/final_task/tests/test_print_functions.py @@ -0,0 +1,106 @@ +import sys +import os + +sys.path.insert(1, 'final_task/rss_reader') +from parser_rss import * +from News import News + +from io import StringIO +from unittest.mock import patch +import feedparser +import print_functions +import unittest + + +class TestPrintFunctios(unittest.TestCase): + + def setUp(self): + self.item = News(feed="feed", + title="title", + date=parser.parse("2019-11-17 10:44:20-05:00"), + link="link", + info_about_image="info_about_image", + briefly_about_news="briefly_about_news", + links_from_news=["link", "link_on_image"] + ) + self.result = "1\n" + self.result += "Feed: feed\n" + self.result += "Title: title \n" + self.result += "Date: 2019-11-17 10:44:20-05:00 \n" + self.result += "Link: link\n" + self.result += "Info about image: info_about_image\n" + self.result += "Briefly about news: briefly_about_news\n" + self.result += "Links: \n" + self.result += "[0] link\n" + self.result += "[1] link_on_image\n" + self.result += '\n' + self.result += '-' * 100 + + if os.path.isfile('final_task/tests/news_feed_for_test.xml'): + self.url = 'final_task/tests/news_feed_for_test.xml' + else: + self.url = 'news_feed_for_test.xml' + + self.news_feed = feedparser.parse(self.url) + + def test_print_news(self): + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_functions.print_news([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + + def test_print_news_in_json(self): + self.result = "[\n" + self.result += " {\n" + self.result += ''' "Feed": "feed",\n''' + self.result += ''' "Title": "title",\n''' + self.result += ''' "Date": "2019-11-17 10:44:20-05:00",\n''' + self.result += ''' "Link": "link",\n''' + self.result += ''' "Info about image": "info_about_image",\n''' + self.result += ''' "Briefly about news": "briefly_about_news",\n''' + self.result += ''' "Links": [\n''' + self.result += ''' "link",\n''' + self.result += ''' "link_on_image"\n''' + self.result += " ]\n" + self.result += " }\n" + self.result += "]" + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_functions.print_news_in_json([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + + def test_print_news_in_json_in_multi_colored_format(self): + self.result = "\033[1m\033[35m[\033[0m\n" + self.result += " \033[1m\033[31m{\033[0m\n" + self.result += ''' \033[1m\033[34m"Feed": "feed",\033[0m\n''' + self.result += ''' \033[32m"Title": "title",\033[0m\n''' + self.result += ''' \033[33m"Date": "2019-11-17 10:44:20-05:00",\033[0m\n''' + self.result += ''' \033[36m"Link": "link",\033[0m\n''' + self.result += ''' \033[33m"Info about image": "info_about_image",\033[0m\n''' + self.result += ''' \033[32m"Briefly about news": "briefly_about_news",\033[0m\n''' + self.result += ''' \033[36m"Links": [\n''' + self.result += ''' "link",\n''' + self.result += ''' "link_on_image"\n''' + self.result += " ]\033[0m\n" + self.result += " \033[1m\033[31m}\033[0m\n" + self.result += "\033[1m\033[35m]\033[0m" + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_functions.print_news_in_json_in_multi_colored_format([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + + def test_print_news_in_multi_colored_format(self): + self.result = "1:\n" + self.result += "Feed: feed\n" + self.result += "Title: title\n" + self.result += "Date: 2019-11-17 10:44:20-05:00\n" + self.result += "Link: link\n" + self.result += "Info about image: info_about_image\n" + self.result += "Briefly about news: briefly_about_news\n" + self.result += "Links: \n" + self.result += "[0] link\n" + self.result += "[1] link_on_image" + with patch('sys.stdout', new=StringIO()) as fake_out_put: + print_functions.print_news_in_multi_colored_format([self.item, ]) + self.assertEqual(fake_out_put.getvalue().strip(), self.result) + + +if __name__ == '__main__': + unittest.main()