diff --git a/final_task/README.md b/final_task/README.md
index 7af281f..8149a1e 100644
--- a/final_task/README.md
+++ b/final_task/README.md
@@ -1,3 +1,193 @@
-# Your readme here
-Some text.
-Checkout how to write this file using *markdown*.
+# Python RSS-reader
+Python RSS-reader is a command-line utility which receives RSS URL and prints results in human-readable format.
+
+REQUIREMENTS:
+-- feedparser 5.2.1
+-- fpdf 1.7.2
+-- dominate 2.4.0
+
+5 mains files of project:
+* rss_reader.py - the file which runs the application
+* ConsoleParse.py - contains code which parses arguments from console
+* Entry.py - contains class Entry which represent an article
+* Handler.py - contains class Handler which performes functions of processing objects Entry
+* Logging.py - contains decorator for printing loggs in stdout
+*
+To start Python RSS-reader run one of the following commands
+in command line:
+```shell
+$ python rss_reader.py "https://news.yahoo.com/rss/" --limit 1
+```
+```shell
+$ python rss_reader.py "https://timesofindia.indiatimes.com/rssfeedstopstories.cms" --json --limit 1
+```
+
+Structure of output when `--json` is selected:
+```
+{
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military",
+ "DateInt": "20200102",
+ "Date": "Tue, 1 Dec 2019 ",
+ "Link": "https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html",
+ "Summary": "[image 1: PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military][1] Award-winning photojournalist Mary F. Calvert has spent six years documenting the prevalence of rape in the military and the effects on victims. She began with a focus on female victims but more recently has examined the underreported incidence of sexual assaults on men and the lifelong trauma it can inflict.",
+ "Links": [
+ "https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html",
+ "http://l1.yimg.com/uu/api/res/1.2/LR4Vdg0MD6osVIDtZW75aA--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-12/316fa7e0-2c23-11ea-bed7-1ebe74b8c372"
+ ],
+ "Source": "https://news.yahoo.com/rss/"
+}
+```
+## Iteration 2
+If you have installed Python then to export CLI utility rss-reader follow these steps:
+1. Clone this repository
+```
+$ git clone https://github.com/IlyaTorch/FinalTaskRssParser.git
+```
+2. Go to the directory FinalTaskRssParser\final_task
+3. run ```$ python setup.py sdist```
+4. Go to the directory dist
+```
+$ cd dist
+```
+5. Install CLI utility rss-reader:
+```
+$ pip install rss-reader-4.0.tar.gz
+```
+And we can use CLI utility rss-reader:
+```
+rss-reader "https://news.yahoo.com/rss/" --limit 1
+```
+```
+Feed: Yahoo News - Latest News & Headlines
+
+Title: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo
+Date: Wed, 06 Nov 2019 14:22:10 -0500
+Link: https://news.yahoo.com/graham-trump-ukraine-incoherent-quid-pro-quo-192210175.html
+
+
+[image 1: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo][1] A day after saying he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and his conclusion is that the Trump administration’s Ukraine policy was too "incoherent" for it to have orchestrated the quid pro quo at the heart of the impeachment inquiry.
+
+
+Links:
+[0] https://news.yahoo.com/graham-trump-ukraine-incoherent-quid-pro-quo-192210175.html (link)
+[1] http://l2.yimg.com/uu/api/res/1.2/aWhGys7_IW5qIjKaiJpPfg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/5527ffe0-00ca-11ea-9f7d-d1e736c1315d (image)
+```
+If you don't have installed Python, follow these steps:
+1. Download and install python from https://www.python.org/downloads/
+2. Clone this repository
+```
+$ git clone https://github.com/IlyaTorch/FinalTaskRssParser.git
+```
+2. Go to the directory FinalTaskRssParser\final_task
+3. run ```$ python setup.py sdist```
+4. Go to the directory dist
+```
+$ cd dist
+```
+5. Install CLI utility rss-reader:
+```
+$ pip install rss-reader-1.0.tar.gz
+```
+And we can use it:
+```
+rss-reader "https://news.yahoo.com/rss/" --limit 1
+```
+```
+Feed: Yahoo News - Latest News & Headlines
+
+Title: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo
+Date: Wed, 06 Nov 2019 14:22:10 -0500
+Link: https://news.yahoo.com/graham-trump-ukraine-incoherent-quid-pro-quo-192210175.html
+
+
+[image 1: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo][1] A day after saying he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and his conclusion is that the Trump administration’s Ukraine policy was too "incoherent" for it to have orchestrated the quid pro quo at the heart of the impeachment inquiry.
+
+
+Links:
+[0] https://news.yahoo.com/graham-trump-ukraine-incoherent-quid-pro-quo-192210175.html (link)
+[1] http://l2.yimg.com/uu/api/res/1.2/aWhGys7_IW5qIjKaiJpPfg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/5527ffe0-00ca-11ea-9f7d-d1e736c1315d (image)
+```
+## Iteration 3
+News is stored in local file cache.json as list of json objects.
+App rss-reader can accept optional argument --date
+```
+$ python rss_reader.py "https://news.tut.by/rss/" --date 20200102
+```
+```
+Feed: TUT.BY: Новости ТУТ
+
+Title: Кристин Килер, любовница британского министра и советского шпиона: кем она была на самом деле?
+Date: Fri, 2 Jan 2020
+Link: https://news.tut.by/culture/667279.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news
+
+[image 2: Фото: bbc.com][2] Кристин Килер было всего 19, когда она оказалась в центре секс-скандала, приведшего к отставке британского кабинета министров. Ее выставили злодейкой, и затем всю оставшуюся жизнь эта история преследовала ее. Впервые ее трактовка событий была воплощена в сериале, созданном Би-би-си.
+
+Links:
+[0] https://news.tut.by/culture/667279.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news (link)
+[1] https://img.tyt.by/n/kultura/0c/9/kristin_killer3.jpg (image)
+[2] https://img.tyt.by/thumbnails/n/kultura/0c/9/kristin_killer3.jpg (image)
+```
+```
+$ python rss_reader.py --date 20200102
+```
+```
+Feed: Yahoo News - Latest News & Headlines
+
+Title: PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military
+Date: Tue, 1 Dec 2019
+Link: https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html
+
+[image 1: PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military][1] Award-winning photojournalist Mary F. Calvert has spent six years documenting the prevalence of rape in the military and the effects on victims. She began with a focus on female victims but more recently has examined the underreported incidence of sexual assaults on men and the lifelong trauma it can inflict.
+
+Links:
+[0] https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html (link)
+[1] http://l1.yimg.com/uu/api/res/1.2/LR4Vdg0MD6osVIDtZW75aA--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-12/316fa7e0-2c23-11ea-bed7-1ebe74b8c372 (image)
+
+
+Feed: TUT.BY: Новости ТУТ
+
+Title: Кристин Килер, любовница британского министра и советского шпиона: кем она была на самом деле?
+Date: Fri, 2 Jan 2020
+Link: https://news.tut.by/culture/667279.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news
+
+[image 2: Фото: bbc.com][2] Кристин Килер было всего 19, когда она оказалась в центре секс-скандала, приведшего к отставке британского кабинета министров. Ее выставили злодейкой, и затем всю оставшуюся жизнь эта история преследовала ее. Впервые ее трактовка событий была воплощена в сериале, созданном Би-би-си.
+
+Links:
+[0] https://news.tut.by/culture/667279.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news (link)
+[1] https://img.tyt.by/n/kultura/0c/9/kristin_killer3.jpg (image)
+[2] https://img.tyt.by/thumbnails/n/kultura/0c/9/kristin_killer3.jpg (image)
+```
+Argument --date work with all the other arguments
+```
+$ python rss_reader.py --date 20191113 --json --verbose
+```
+## Iteration 4
+Option of conversation of news in htmlf format is available.
+Example:
+```
+$ python rss_reader.py "https://news.yahoo.com/rss/" --to-html "F:/Path/to/your/folder" --to-pdf "F:/Path/to/your/folder"
+```
+Option works with all the other attributes.
+```
+$ python rss_reader.py --date 20191118 --to-html "F:/Path/to/your/folder" --limit 1
+```
+## Iteration 5
+A new optional argument `--colorize` is available. It prints the news in colorized mod.
+Option works with all the other attributes execept `--to-html` and `--to-pdf` arguments.
+```
+$ python rss_reader.py --date 20200102 --colorize
+```
+```diff
++ Feed: Yahoo News - Latest News & Headlines
+
++ Title: PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military
++ Date: Tue, 1 Dec 2019
++ Link: https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html
+
++[image 1: PHOTOS: #MenToo: The hidden tragedy of male sexual abuse in the military][1] Award-winning photojournalist Mary F. Calvert has spent six years documenting the prevalence of rape in the military and the effects on victims. She began with a focus on female victims but more recently has examined the underreported incidence of sexual assaults on men and the lifelong trauma it can inflict.
+
++ Links:
++ [0] https://news.yahoo.com/photos-men-too-the-hidden-tragedy-of-male-sexual-abuse-in-the-military-005342483.html (link)
++ [1] http://l1.yimg.com/uu/api/res/1.2/LR4Vdg0MD6osVIDtZW75aA--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-12/316fa7e0-2c23-11ea-bed7-1ebe74b8c372 (image)
+```
diff --git a/final_task/rss_reader/ConsoleParse.py b/final_task/rss_reader/ConsoleParse.py
new file mode 100644
index 0000000..3c7c7d4
--- /dev/null
+++ b/final_task/rss_reader/ConsoleParse.py
@@ -0,0 +1,17 @@
+import argparse
+
+
+def get_arguments_from_console():
+ """Reading command line arguments"""
+ arg_parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.")
+ arg_parser.add_argument("source", nargs='?', type=str, default="", help="RSS URL")
+ arg_parser.add_argument("--version", action="store_true", help="Print version info")
+ arg_parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout")
+ arg_parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages")
+ arg_parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided")
+ arg_parser.add_argument("--to-html", type=str, help="Output to html format")
+ arg_parser.add_argument("--to-pdf", type=str, help="Output to pdf format")
+ arg_parser.add_argument("--date", type=int, help="The new from the specified day will be printed out")
+ arg_parser.add_argument("--colorize", action="store_true", help="Print news in colorized mode")
+
+ return arg_parser.parse_args()
diff --git a/final_task/rss_reader/DejaVuSans.ttf b/final_task/rss_reader/DejaVuSans.ttf
new file mode 100644
index 0000000..e5f7eec
Binary files /dev/null and b/final_task/rss_reader/DejaVuSans.ttf differ
diff --git a/final_task/rss_reader/Entry.py b/final_task/rss_reader/Entry.py
new file mode 100644
index 0000000..391f7d2
--- /dev/null
+++ b/final_task/rss_reader/Entry.py
@@ -0,0 +1,120 @@
+import logging
+import time
+from Logging import logging_decorator
+import html
+from html import parser
+
+
+class Entry:
+ """class for every article from http:link...link.rss"""
+ @logging_decorator
+ def __init__(self, feed: str = "", title: str = "", date: str = "", article_link: str = "",
+ summary: str = "", links: tuple = (), published_parsed: time.struct_time = ()):
+ self.__feed = feed
+ self.__title = self.parse_html(title)
+ self.__article_link = article_link
+ self.__links = links
+ self.__summary = self.parse_html(summary)
+ if published_parsed:
+ self.__publish_year = published_parsed.tm_year
+ self.__publish_month = published_parsed.tm_mon
+ self.__publish_day = published_parsed.tm_mday
+ # sometimes there is a problem when in the attribute published entries have day that is wrong
+ # then code below corrects it and truncates date-string
+ self.__date = (date[:date.find(",")+2] + str(self.__publish_day) + date[date[5:].find(' ') + 5:]
+ )[:len("Fri, 22 Nov 2019")]
+ else:
+ self.__date = date[:len("Fri, 22 Nov 2019")]
+ logging.info("Entry object created")
+
+ @logging_decorator
+ def get_feed(self) -> str:
+ return self.__feed
+
+ @logging_decorator
+ def get_title(self) -> str:
+ return self.__title
+
+ @logging_decorator
+ def get_article_link(self) -> str:
+ return self.__article_link
+
+ @logging_decorator
+ def get_links(self) -> tuple:
+ return self.__links
+
+ @logging_decorator
+ def get_summary(self) -> str:
+ return self.__summary
+
+ @logging_decorator
+ def get_publish_year(self):
+ return self.__publish_year
+
+ @logging_decorator
+ def get_publish_month(self):
+ return self.__publish_month
+
+ @logging_decorator
+ def get_publish_day(self):
+ return self.__publish_day
+
+ @logging_decorator
+ def get_date(self) -> str:
+ return self.__date
+
+ @logging_decorator
+ def print_feed(self) -> None:
+ print(f"Feed: {self.__feed}\n")
+
+ @logging_decorator
+ def print_title(self) -> None:
+ print(f"Title: {self.__title}")
+
+ @logging_decorator
+ def print_date(self) -> None:
+ print(f"Date: {self.__date}")
+
+ @logging_decorator
+ def print_link(self) -> None:
+ print(f"Link: {self.__article_link}")
+
+ @logging_decorator
+ def print_summary(self) -> None:
+ print(f"\n{self.__summary}\n")
+
+ @logging_decorator
+ def print_links(self) -> None:
+ print("Links:")
+ for num_link, link in enumerate(self.__links):
+ if num_link == 0:
+ print(f'[{num_link}] {link} (link)')
+ else:
+ print(f'[{num_link}] {link} (image)')
+ print('\n')
+
+ @logging_decorator
+ def parse_html(self, summary: str) -> str:
+ """selects alt and src attributes from and removes all the html tags from the entry.summary"""
+ while summary.count('
", start_cut + len("
') + 1:]
+
+ summary = html.parser.unescape(summary)
+ return summary
diff --git a/final_task/rss_reader/Handler.py b/final_task/rss_reader/Handler.py
new file mode 100644
index 0000000..595e54d
--- /dev/null
+++ b/final_task/rss_reader/Handler.py
@@ -0,0 +1,353 @@
+import json
+import logging
+import os
+import sys
+
+this_dir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(this_dir)
+import feedparser
+from Entry import Entry
+from Logging import logging_decorator
+from RSSReaderException import RSSReaderException
+from RSS_PDF import PDF
+import urllib.request
+from dominate.tags import html, head, meta, body, div, img, p, b, br, h1, a
+
+
+class Handler:
+ """class for handling different options: --version, --json, --limit Limit, --date, --to-html, --to-pdf"""
+
+ @logging_decorator
+ def __init__(self, source: str, limit: int, version: float):
+ self.__source = source
+ self.__limit = limit
+ self.__version = version
+ self.__parsed = feedparser.parse(self.__source)
+ self.__entries = []
+ for ent in self.__parsed.entries[:self.__limit]:
+ self.__entries.append(Entry(
+ self.__parsed.feed.title, ent.title, ent.published, ent.link, ent.summary,
+ tuple([link["href"] for link in ent.links]), ent.published_parsed)
+ )
+ if not len(self.__entries) and self.__source:
+ raise RSSReaderException("Error, no news. Check correctness of the url or your internet-connection")
+ logging.info("Handler object created")
+
+ @logging_decorator
+ def set_source(self, source: str) -> None:
+ self.__source = source
+
+ @logging_decorator
+ def get_source(self) -> int:
+ return self.__source
+
+ @logging_decorator
+ def set_limit(self, limit: int) -> None:
+ self.__limit = limit
+
+ @logging_decorator
+ def get_limit(self) -> int:
+ return self.__limit
+
+ @logging_decorator
+ def set_version(self, version: float) -> None:
+ self.__version = version
+
+ @logging_decorator
+ def get_version(self) -> float:
+ return self.__version
+
+ @logging_decorator
+ def set_parsed(self, parsed: dict) -> None:
+ self.__parsed = parsed
+
+ @logging_decorator
+ def get_parsed(self) -> dict:
+ return self.__parsed
+
+ @logging_decorator
+ def set_entries(self, entries: list) -> None:
+ self.__entries = entries
+
+ @logging_decorator
+ def get_entries(self) -> list:
+ return self.__entries
+
+ # options of command line:
+ @logging_decorator
+ def option_version(self) -> None:
+ """case when command line argument --version is selected"""
+ print(f"version {self.__version}")
+
+ @logging_decorator
+ def option_html(self, path: str) -> None:
+ """case when command line argument --to-html is selected"""
+ for entry in self.__entries:
+ self.write_cache(self.convert_entry_to_dict(entry))
+ self.write_entries_to_html(path)
+ print("HTML document created successfully")
+
+ @logging_decorator
+ def option_pdf(self, path: str) -> None:
+ """case when command line argument --to-pdf is selected"""
+ for entry in self.__entries:
+ self.write_cache(self.convert_entry_to_dict(entry))
+ self.write_entries_to_pdf(path)
+ print("PDF document created successfully")
+
+ @logging_decorator
+ def option_json(self) -> None:
+ """case when command line argument --json is selected"""
+ for entry in self.__entries:
+ self.write_cache(self.convert_entry_to_dict(entry))
+ self.print_to_json(self.convert_entry_to_dict(entry))
+
+ @logging_decorator
+ def option_default(self) -> None:
+ """case when no one of optional command line attributes is selected"""
+ for entry in self.__entries:
+ self.write_cache(self.convert_entry_to_dict(entry))
+ self.print_entry(entry)
+
+ @logging_decorator
+ def correct_title(self, title: str) -> str:
+ return title.replace('"', "_").replace("?", "_").replace(":", "_").replace("'", "_").replace(" ", "_") \
+ .replace("%", "_").replace("#", "_")[:15]
+
+ @logging_decorator
+ def write_cache(self, entry_dict: dict) -> None:
+ """write new news to cache in json format"""
+ if os.path.exists("cache.json"):
+ try:
+ with open("cache.json") as cache:
+ entries = json.load(cache)
+ except json.JSONDecodeError:
+ entries = []
+ else:
+ entries = []
+ # writing entries and saving images of entries to cache if list of entries with this title is empty
+ # else this entry already is the cache
+ if not [entry for entry in entries if entry["Title"] == entry_dict["Title"]]:
+ entries.append(entry_dict)
+ if len(entry_dict["Links"]) > 1:
+ for num_img_l, img_l in enumerate(entry_dict["Links"][1:]):
+ self.save_image(img_l, self.correct_title(entry_dict["Title"]) + str(num_img_l))
+
+ with open("cache.json", "w") as cache:
+ json.dump(entries, cache, indent=2)
+
+ @logging_decorator
+ def save_image(self, img_url: str, img_name: str) -> None:
+ """saving images to cache"""
+ if os.path.exists("images"):
+ if img_name.find(".jpg") == -1:
+ urllib.request.urlretrieve(img_url, f"images/{img_name}.jpg")
+ else:
+ urllib.request.urlretrieve(img_url, f"images/{img_name}")
+ else:
+ os.makedirs("images")
+ urllib.request.urlretrieve(img_url, f"images/{img_name}.jpg")
+
+ @logging_decorator
+ def option_date(self, date: str, do_json: bool, html_path: str = "", pdf_path: str = "", source: str = ""):
+ """case when command line argument --date is selected add entries from cache.json into daily_news: list
+ if they have date that is equal to user's --date DATE and then raise an exception in case when such articles
+ don't exist or print to console or to files in html or pdf formats"""
+ if os.path.exists("cache.json"):
+ try:
+ with open("cache.json") as cache:
+ entries = json.load(cache)
+ except json.JSONDecodeError:
+ entries = []
+ else:
+ raise RSSReaderException("Error. You have no cache. Try to run app with internet-connection")
+
+ # list of entries with the same date as user's --date DATE
+ if source:
+ daily_news = [entry for entry in entries if entry["DateInt"] == date and entry["Source"] == source]
+ else:
+ daily_news = [entry for entry in entries if entry["DateInt"] == date]
+ if not self.__limit:
+ self.__limit = len(daily_news)
+ if not daily_news:
+ raise RSSReaderException("Error. News aren't found")
+ # different cases of command line arguments
+ if pdf_path:
+ self.write_entries_to_pdf(pdf_path, daily_news[:self.__limit])
+ print("PDF document created successfully")
+ if html_path:
+ self.write_entries_to_html(html_path, daily_news[:self.__limit])
+ print("HTML document created successfully")
+ if do_json:
+ for news_json in daily_news[:self.__limit]:
+ self.print_to_json(news_json)
+ # default case
+ if not (pdf_path or html_path or do_json):
+ for news in daily_news[:self.__limit]:
+ entry = self.get_entry_from_dict(news)
+ self.print_entry(entry)
+
+ @logging_decorator
+ def print_entry(self, entry: Entry) -> None:
+ """output of instance of class Entry to console"""
+ entry.print_feed()
+ entry.print_title()
+ entry.print_date()
+ entry.print_link()
+ entry.print_summary()
+ entry.print_links()
+
+ @logging_decorator
+ def get_entry_from_dict(self, entry_dict: dict) -> Entry:
+ return Entry(entry_dict["Feed"], entry_dict["Title"], entry_dict["Date"], entry_dict["Link"],
+ entry_dict["Summary"], entry_dict["Links"])
+
+ @logging_decorator
+ def print_to_json(self, obj: dict) -> None:
+ try:
+ print(json.dumps(obj, indent=2, ensure_ascii=False))
+ except UnicodeEncodeError:
+ print(json.dumps(obj, indent=2))
+
+ @logging_decorator
+ def convert_entry_to_dict(self, entry: Entry) -> dict:
+ return {
+ "Feed": entry.get_feed(),
+ "Title": entry.get_title(),
+ "DateInt": str(entry.get_publish_year())
+ +
+ (
+ str(entry.get_publish_month()) if len(str(entry.get_publish_month())) == 2 else '0' + str(
+ entry.get_publish_month())
+ )
+ +
+ (
+ str(entry.get_publish_day()) if len(str(entry.get_publish_day())) == 2 else '0' + str(
+ entry.get_publish_day())
+ ),
+ "Date": entry.get_date(),
+ "Link": entry.get_article_link(),
+ "Summary": entry.get_summary(),
+ "Links": entry.get_links(),
+ "Source": self.__source
+ }
+
+ @logging_decorator
+ def write_entries_to_html(self, path: str, entries: tuple=()) -> None:
+ """writing of text view of instances of Entry class to html file"""
+ # in case of reading news from cache list of entries are got as dict
+ # and in case of online reading news only the path is passed to the method without list of entries
+ if os.path.exists(path) is False:
+ raise RSSReaderException('Error. No such folder. Check the correctness of the entered path \n')
+ if os.path.isdir(path) is False:
+ raise RSSReaderException("Error. It isn't a folder")
+
+ if not entries:
+ entries = self.__entries
+ # in case of reading news from cache, entries is the list of dicts and they are converted to Entry-object
+ # in case of absence of internet connection list of entries is empty
+
+ if isinstance(entries[0], dict):
+ entries = [self.get_entry_from_dict(entry) for entry in entries]
+
+ _html = html()
+ _html.add(head(meta(charset='utf-8')))
+ _body = _html.add(body())
+ with _body:
+ for entry in entries:
+ _div = _body.add(div())
+ _div += h1(entry.get_title())
+ _div += p(b("Feed: "), a(entry.get_feed()))
+ _div += p(b("Date: "), a(entry.get_date()))
+
+ text = entry.get_summary()
+ # adding of an image if the entry has image
+ if len(entry.get_links()) > 1:
+ while text.count('['):
+ text = text[:text.find('[')] + text[text.find(']') + 1:]
+ if text[0] == ' ':
+ text = text[1:]
+ for num_img_l in range(len(entry.get_links()[1:])):
+ _div += img(src=f"file:///{this_dir}/images/"
+ f"{self.correct_title(entry.get_title()) + str(num_img_l)}.jpg")
+ _div += br(), br()
+ else:
+ # formatting news to more readable format: deleting extra spaces and brackets
+ while text.count('['):
+ text = text[:text.find('[')] + text[text.find(']') + 1:]
+ if text[0] == ' ':
+ text = text[1:]
+
+ import html.parser as html_parser
+ _div += p(html_parser.unescape(text), br(), br())
+
+ if os.path.exists(path):
+ path = os.path.join(path, "RSS_News.html")
+ with open(path, 'w', encoding='utf-8') as rss_html:
+ rss_html.write(str(_html))
+ else:
+ raise RSSReaderException('Error. No such folder. Check the correctness of the entered path \n')
+
+ @logging_decorator
+ def write_entries_to_pdf(self, path: str, entries=()) -> None:
+ """writing of text view of instances of Entry class to pdf file"""
+ # in case of reading news from cache list of entries are got as dict
+ # and in case of online reading news only the path is passed to the method without list of entries
+ if os.path.exists(path) is False:
+ raise RSSReaderException('Error. No such folder. Check the correctness of the entered path \n')
+ if os.path.isdir(path) is False:
+ raise RSSReaderException("Error. It isn't a folder")
+
+ if not entries:
+ entries = self.__entries
+
+ pdf = PDF()
+ try:
+ pdf.add_font('DejaVuSans', '', 'DejaVuSans.ttf', uni=True)
+ pdf.set_font("DejaVuSans")
+ except RuntimeError:
+ raise RSSReaderException("Error. File with fonts not found")
+ pdf.alias_nb_pages()
+ pdf.add_page()
+ # in case of reading news from cache, entries is the list of dicts and they are converted to Entry-object
+ # in case of absence of internet connection list of entries is empty
+
+ if isinstance(entries[0], dict):
+ entries = [self.get_entry_from_dict(entry) for entry in entries]
+
+ for entry in entries:
+ text = entry.get_summary()
+ # adding of an image if the entry has image
+ if len(entry.get_links()) > 1:
+ while text.count('['):
+ text = text[:text.find('[')] + text[text.find(']') + 1:]
+ if text[0] == ' ':
+ text = text[1:]
+ else:
+ # formatting news to more readable format: deleting extra spaces and brackets
+ while text.count('['):
+ text = text[:text.find('[')] + text[text.find(']') + 1:]
+ if text[0] == ' ':
+ text = text[1:]
+
+ pdf.set_font_size(24)
+ pdf.write(10, entry.get_title() + '\n\n')
+ pdf.set_font_size(14)
+ pdf.write(10, f"Feed: {entry.get_feed()}\n")
+ pdf.write(10, f"Date: {entry.get_date()}\n")
+ if len(entry.get_links()) > 1:
+ try:
+ for num_img_l in range(len(entry.get_links()[1:])):
+ pdf.image(
+ f'{this_dir}/images/{self.correct_title(entry.get_title()) + str(num_img_l)}.jpg', w=60, h=50)
+ pdf.write(10, "\n")
+ except RuntimeError:
+ pass
+ pdf.write(10, "\n")
+ pdf.write(10, text + "\n\n\n\n")
+
+ if os.path.exists(path):
+ path = os.path.join(path, "RSS_News.pdf")
+ pdf.output(path, 'F')
+ else:
+ raise RSSReaderException('Error. No such folder. Check the correctness of the entered path \n')
diff --git a/final_task/rss_reader/Logging.py b/final_task/rss_reader/Logging.py
new file mode 100644
index 0000000..a538ffc
--- /dev/null
+++ b/final_task/rss_reader/Logging.py
@@ -0,0 +1,11 @@
+import logging
+
+
+def logging_decorator(func):
+ """decorator for print logs in stdout"""
+ def wrapper(*args, **kwargs):
+ logging.info(f"function \"{func.__name__}\" started")
+ res = func(*args, **kwargs)
+ logging.info(f"function \"{func.__name__}\" finished")
+ return res
+ return wrapper
diff --git a/final_task/rss_reader/RSSReaderException.py b/final_task/rss_reader/RSSReaderException.py
new file mode 100644
index 0000000..b90df8b
--- /dev/null
+++ b/final_task/rss_reader/RSSReaderException.py
@@ -0,0 +1,3 @@
+class RSSReaderException(Exception):
+ """Exception class for output information about the mistake in human-readable error explanation"""
+ pass
diff --git a/final_task/rss_reader/RSS_PDF.py b/final_task/rss_reader/RSS_PDF.py
new file mode 100644
index 0000000..4d7bbbe
--- /dev/null
+++ b/final_task/rss_reader/RSS_PDF.py
@@ -0,0 +1,9 @@
+from fpdf import FPDF
+
+
+class PDF(FPDF):
+ def footer(self):
+ """writing number of the page at the bottom of the page"""
+ self.set_y(-20)
+ self.set_font('Arial', 'I', 10)
+ self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')
diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt
index e69de29..2b83351 100644
--- a/final_task/rss_reader/requirements.txt
+++ b/final_task/rss_reader/requirements.txt
@@ -0,0 +1,3 @@
+feedparser==5.2.1
+fpdf==1.7.2
+dominate==2.4.0
\ No newline at end of file
diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py
index e69de29..6f16651 100644
--- a/final_task/rss_reader/rss_reader.py
+++ b/final_task/rss_reader/rss_reader.py
@@ -0,0 +1,63 @@
+import os
+import sys
+import logging
+this_dir = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(this_dir)
+from Handler import Handler
+from Logging import logging_decorator
+from ConsoleParse import get_arguments_from_console
+from RSSReaderException import RSSReaderException
+
+
+@logging_decorator
+def main():
+ try:
+ arg_parser_args = get_arguments_from_console()
+ version = 4.0
+ if not (arg_parser_args.source or arg_parser_args.version or arg_parser_args.date or arg_parser_args.json
+ or arg_parser_args.to_html or arg_parser_args.to_pdf):
+ return
+ if arg_parser_args.version:
+ print(f"version {version}")
+ return
+ handler = Handler(arg_parser_args.source, arg_parser_args.limit, version)
+ if not arg_parser_args.limit:
+ arg_parser_args.limit = len(handler.get_parsed().entries)
+ handler.set_limit(arg_parser_args.limit)
+ elif arg_parser_args.limit <= 0:
+ raise RSSReaderException("Error. Incorrect value of argument limit")
+
+ if arg_parser_args.verbose:
+ logging.getLogger().setLevel(logging.INFO)
+
+ if arg_parser_args.colorize:
+ from colorama import init, Fore
+ init()
+ print(Fore.YELLOW)
+
+ if arg_parser_args.date:
+ handler.option_date(str(arg_parser_args.date), arg_parser_args.json,
+ arg_parser_args.to_html, arg_parser_args.to_pdf, arg_parser_args.source
+ )
+ return
+ if arg_parser_args.json:
+ handler.option_json()
+ if arg_parser_args.to_html:
+ handler.option_html(arg_parser_args.to_html)
+ if arg_parser_args.to_pdf:
+ handler.option_pdf(arg_parser_args.to_pdf)
+ if not (arg_parser_args.version or arg_parser_args.date or arg_parser_args.json
+ or arg_parser_args.to_html or arg_parser_args.to_pdf):
+ handler.option_default()
+ except AttributeError:
+ print("Error, failed to get an attribute.")
+ except PermissionError:
+ print("Error, close the file for output of news")
+ except RSSReaderException as rss_exc:
+ print(rss_exc)
+ except TimeoutError:
+ print("Error, check your internet-connection")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/final_task/rss_reader/tests.py b/final_task/rss_reader/tests.py
new file mode 100644
index 0000000..0487316
--- /dev/null
+++ b/final_task/rss_reader/tests.py
@@ -0,0 +1,300 @@
+import os
+import unittest
+from Handler import Handler
+from Entry import Entry
+from RSSReaderException import RSSReaderException
+from io import StringIO
+from unittest.mock import patch, mock_open, MagicMock
+
+
+class TestHandler(unittest.TestCase):
+ """Class for testing Handler methods"""
+
+ def test__init__(self):
+ handler = Handler("https://news.yahoo.com/rss/", 1, 1.0)
+ self.assertIsInstance(handler, Handler)
+ self.assertNotIsInstance("handler", Handler)
+
+ def setUp(self):
+ self.handler = Handler("https://news.yahoo.com/rss/", 1, 4.0)
+
+ def test_convert_to_dict(self):
+ entry = Entry("Yahoo News - Latest News & Headlines", "Title 1", "Wed, 06 Nov 2019 14:22:10 +0500",
+ "https://link1.com", "summary", ("https://link1.com",))
+ entry.publish_year = 2019
+ entry.publish_month = 12
+ entry.publish_day = 11
+ entry_dict = {
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ }
+ self.assertEqual(entry_dict, self.handler.convert_Entry_to_dict(entry))
+
+ def test_convert_to_dict_fail(self):
+ entry = Entry("Yahoo News - Latest News & Headlines", "Title 15", "Wed, 06 Nov 2019 14:22:10 +0500",
+ "https://link1.com", "summary", ("https://link1.com",))
+ entry.publish_year = 2019
+ entry.publish_month = 12
+ entry.publish_day = 11
+ entry_dict = {
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ }
+ self.assertNotEqual(self.handler.convert_Entry_to_dict(entry), entry_dict)
+
+ def test_get_entry_from_dict_instance(self):
+ entry_dict = {
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019 14:22:10 +0500",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ }
+ self.assertIsInstance(self.handler.get_entry_from_dict(entry_dict), Entry)
+
+ def test_get_entry_from_dict_not_instance(self):
+ entry_dict = {
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019 14:22:10 +0500",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ }
+ self.assertNotIsInstance(self.handler.get_entry_from_dict(entry_dict), Handler)
+
+ def test_option_date_exc(self):
+ self.assertRaises(RSSReaderException, lambda: self.handler.option_date("19950514", False, "", ""))
+
+ def test_print_entry(self):
+ expected_entry = 'Feed: feed\n\nTitle: title\nDate: Wed, 20 Nov 2019\nLink: link\n\n' \
+ 'some_text\n\nLinks:\n[0] article_link (link)\n[1] img_link (image)\n\n\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ entry = Entry(feed="feed", title="title", date="Wed, 20 Nov 2019", article_link="link",
+ summary="some_text", links=("article_link", "img_link"))
+ self.handler.print_entry(entry)
+ self.assertEqual(expected_entry, fake_out.getvalue())
+
+ def test_print_to_json(self):
+ expected_json = '{\n "Feed": "Yahoo News - Latest News & Headlines",\n "Title": "Title 1",' \
+ '\n "DateInt": "20191211",\n "Date": "Wed, 06 Nov 2019",' \
+ '\n "Link": "https://link1.com",\n "Summary": "summary",\n "Links": ' \
+ '[\n "https://link1.com"\n ]\n}\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ entry_dict = dict(Feed="Yahoo News - Latest News & Headlines", Title="Title 1", DateInt="20191211",
+ Date="Wed, 06 Nov 2019", Link="https://link1.com", Summary="summary",
+ Links=("https://link1.com",))
+ self.handler.print_to_json(entry_dict)
+ self.assertEqual(expected_json, fake_out.getvalue())
+
+ def test_correct_title(self):
+ expected_corrected_title = "title_of_articl"
+ title = 'title?of:article correct"ed'
+ self.assertEqual(expected_corrected_title, self.handler.correct_title(title))
+
+ def test_write_cache(self):
+ open_mock = mock_open()
+ entry_dict = {
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019 14:22:10 +0500",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ }
+ with patch("Handler.open", open_mock, create=True):
+ self.handler.write_cache(entry_dict)
+
+ open_mock.assert_called_with("cache.json", "w")
+ # the last record:
+ open_mock.return_value.write.assert_called_with(']')
+
+ def test_option_version(self):
+ expected_output = 'version 4.0\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.handler.option_version()
+ self.assertEqual(expected_output, fake_out.getvalue())
+
+ def test_option_json(self):
+ self.handler.write_cache = MagicMock(return_value=True)
+ self.assertEqual(True, self.handler.write_cache())
+ entry = Entry(feed="feed", title="title", date="Wed, 20 Nov 2019", article_link="link",
+ summary="some_text", links=("article_link", "img_link"))
+ self.handler.entries = [entry]
+ self.handler.convert_Entry_to_dict = MagicMock(return_value={
+ "Feed": "Yahoo News - Latest News & Headlines",
+ "Title": "Title 1",
+ "DateInt": "20191211",
+ "Date": "Wed, 06 Nov 2019",
+ "Link": "https://link1.com",
+ "Summary": "summary",
+ "Links": ("https://link1.com",)
+ })
+ expected_json = '{\n "Feed": "Yahoo News - Latest News & Headlines",\n "Title": "Title 1",' \
+ '\n "DateInt": "20191211",\n "Date": "Wed, 06 Nov 2019",' \
+ '\n "Link": "https://link1.com",\n "Summary": "summary",\n "Links": [\n ' \
+ ' "https://link1.com"\n ]\n}\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.handler.option_json()
+ self.assertEqual(expected_json, fake_out.getvalue())
+
+ def test_write_entries_to_html(self):
+ open_mock = mock_open()
+ entry = Entry(feed="feed", title="title", date="Wed, 20 Nov 2019", article_link="link",
+ summary=" []some_text", links=("article_link", "img_link"))
+ with patch("Handler.open", open_mock, create=True):
+ self.handler.write_entries_to_html(os.path.abspath(os.path.dirname(__file__)), (entry,))
+
+ open_mock.assert_called_with(
+ f"{os.path.abspath(os.path.dirname(__file__))}\\RSS_News.html", "w", encoding='utf-8')
+ open_mock.return_value.write.assert_called_once()
+ open_mock.return_value.write.assert_called_once_with(
+ '\n
'
+ 'A day after sayi'
+ 'ng he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and hi'
+ 's conclusion is that the Trump administration’s Ukraine policy was too "incoherent" fo'
+ 'r it to have orchestrated the quid pro quo at the heart of the impeachment inquiry.
'
+ ),
+ "[image 2: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo][2] A day after "
+ "saying he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and his "
+ "conclusion is that the Trump administration’s Ukraine policy was too \"incoherent\" for it to have"
+ " orchestrated the quid pro quo at the heart of the impeachment inquiry."
+ )
+ self.assertNotEqual(self.entry.parse_html(
+ "Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo"
+ ),
+ "Ukraine policy was too 'incoherent' for quid pro quo"
+ )
+ self.assertNotEqual(self.entry.parse_html(
+ '
'
+ 'A day after sayi'
+ 'ng he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and hi'
+ 's conclusion is that the Trump administration’s Ukraine policy was too "incoherent" fo'
+ 'r it to have orchestrated the quid pro quo at the heart of the impeachment inquiry.
'
+ ),
+ "[image 5: Graham now says Trump's Ukraine policy was too 'incoherent' for quid pro quo][5] A day after "
+ "saying he wouldn’t bother to read the testimony, Sen. Lindsey Graham now says he did read it, and his "
+ "conclusion is that the Trump administration’s Ukraine policy was too \"incoherent\" for it to have"
+ " orchestrated the quid pro quo at the heart of the impeachment inquiry."
+ )
+
+ def test_print_feed(self):
+ expected_feed = 'Feed: feed\n\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_feed()
+ self.assertEqual(expected_feed, fake_out.getvalue())
+
+ def test_print_title(self):
+ expected_title = 'Title: title\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_title()
+ self.assertEqual(expected_title, fake_out.getvalue())
+
+ def test_print_date(self):
+ expected_date = 'Date: Wed, 20 Nov 2019\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_date()
+ self.assertEqual(expected_date, fake_out.getvalue())
+
+ def test_print_link(self):
+ expected_link = 'Link: link\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_link()
+ self.assertEqual(expected_link, fake_out.getvalue())
+
+ def test_print_summary(self):
+ expected_summary = '\nsome_text\n\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_summary()
+ self.assertEqual(expected_summary, fake_out.getvalue())
+
+ def test_print_links(self):
+ expected_links = 'Links:\n[0] article_link (link)\n[1] img_link (image)\n\n\n'
+ with patch('sys.stdout', new=StringIO()) as fake_out:
+ self.entry.print_links()
+ self.assertEqual(expected_links, fake_out.getvalue())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/final_task/setup.py b/final_task/setup.py
index e69de29..0a9cefd 100644
--- a/final_task/setup.py
+++ b/final_task/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup, find_packages
+from os import path
+
+here = path.abspath(path.dirname(__file__))
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+ long_description = f.read()
+
+setup(
+ name='rss-reader',
+ version='4.0',
+ packages=find_packages(),
+ description='Python RSS reader',
+ long_description=long_description,
+ long_description_content_type='text/markdown',
+ url='https://github.com/IlyaTorch/FinalTaskRssParser',
+ author='Ilya Torch',
+ author_email='itorch2001@gmail.com',
+ package_data={'rss_reader': ['DejaVuSans.ttf']},
+ install_requires=['feedparser==5.2.1', 'fpdf==1.7.2', 'dominate==2.4.0'],
+ entry_points={
+ 'console_scripts': [
+ 'rss-reader = rss_reader.rss_reader:main',
+ ],
+ },
+ test_suite='rss_reader.tests'
+)