From 8b5be99ac4aa9dd53ede0f973cc5b11511db37c5 Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Sat, 9 Nov 2019 21:59:13 +0300 Subject: [PATCH 1/7] Implementation of the first iteration without tests --- .gitignore | 10 ++ final_task/rss_reader/action_functions.py | 118 ++++++++++++++++++ final_task/rss_reader/json_structure.txt | 20 +++ final_task/rss_reader/models.py | 48 +++++++ final_task/rss_reader/requirements.txt | 2 + final_task/rss_reader/rss_reader.py | 19 +++ final_task/rss_reader/validation_functions.py | 82 ++++++++++++ 7 files changed, 299 insertions(+) create mode 100644 .gitignore create mode 100644 final_task/rss_reader/action_functions.py create mode 100644 final_task/rss_reader/json_structure.txt create mode 100644 final_task/rss_reader/models.py create mode 100644 final_task/rss_reader/validation_functions.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..338735d --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Editors +.idea/ + +# Environments +venv/ + +# Byte-compiled / optimized +__pycache__/ +*.py[cod] + diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py new file mode 100644 index 0000000..6bd053f --- /dev/null +++ b/final_task/rss_reader/action_functions.py @@ -0,0 +1,118 @@ +""" Module of creation functions and action functions. + + Functions: + create_logger(com_line_args) -> logger + get_news(command_line_args, logger) -> news_collection + print_news_stdout(news_collection) -> None + print_news_json(news_collection) -> None + print_news(news_collection, com_line_args, logger) -> None """ + +import feedparser +import json +import logging +from models import NewsEntry +from validation_functions import check_limit_arg + + +def create_logger(com_line_args): + """Create logger function. + + Creates a logger considering the --verbose argument. """ + # Create a logger + logger = logging.getLogger("rss_reader_logger") + logger.setLevel(logging.DEBUG) + + # Create handlers + c_handler = logging.StreamHandler() + f_handler = logging.FileHandler("file.log") + + # Check --verbose argument + if com_line_args.verbose: + c_handler.setLevel(logging.DEBUG) + else: + c_handler.setLevel(logging.ERROR) + + # Create formatters and add it to handlers + c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s') + f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + c_handler.setFormatter(c_format) + f_handler.setFormatter(f_format) + + # Add handlers to the logger + logger.addHandler(f_handler) + logger.addHandler(c_handler) + + return logger + + +def get_news(command_line_args, logger): + """ Get news function. + + Uses feedparser library to receive news. """ + logger.info("Getting news.") + news_feed = feedparser.parse(command_line_args.source) + news_collection = {} + feed = {"title": news_feed.feed.get("title", ""), + "date": news_feed.feed.get("published", ""), + "language": news_feed.feed.get("language", "")} + + news_collection["feed"] = feed + news_collection["entries"] = [] + + for entry in news_feed.entries: + news_entry = NewsEntry() + news_entry.title = entry.get("title", "") + news_entry.date = entry.get("published", "") + news_entry.link = entry.get("link", "") + news_entry.summary = entry.get("summary", "") + news_collection["entries"].append(news_entry) + + return news_collection + + +def print_news_stdout(news_collection): + """ Function for print news to stdout in text format. """ + print("###############################################################") + print() + print("Feed: ", news_collection["feed"]["title"]) + print("Publication date: ", news_collection["feed"]["date"]) + print("Language: ", news_collection["feed"]["language"]) + print() + + for entry in news_collection["entries"]: + entry.print_entry() + + +def print_news_json(news_collection): + """ Function for print news to stdout in json format. """ + news_collection_for_json = {"feed": news_collection["feed"], + "entries": []} + + for entry in news_collection["entries"]: + entry_for_json = {"title": entry.title, + "summary": entry.summary, + "date": entry.date, + "link": entry.link} + news_collection_for_json["entries"].append(entry_for_json) + + print(json.dumps(news_collection_for_json, indent=4)) + + +def print_news(news_collection, com_line_args, logger): + """ Function for print news to stdout + that take account of limit and json arguments. """ + + # get valid limit argument + limit = check_limit_arg(news_collection, com_line_args, logger) + if len(news_collection["entries"]) < limit: + logger.warning("The number of news is less than the value of the argument limit.") + new_news_collection = news_collection + else: + new_news_collection = {"feed": news_collection["feed"], + "entries": news_collection["entries"][:limit]} + + logger.info("Printing news.") + if com_line_args.json: + print_news_json(new_news_collection) + else: + print_news_stdout(new_news_collection) diff --git a/final_task/rss_reader/json_structure.txt b/final_task/rss_reader/json_structure.txt new file mode 100644 index 0000000..bde9678 --- /dev/null +++ b/final_task/rss_reader/json_structure.txt @@ -0,0 +1,20 @@ +JSON structure: + +{ + "feed": { "title": feed title, + "date": feed publication date, + "language": feed language } + "entries": [ + { "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + { "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + ... + ] +} \ No newline at end of file diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py new file mode 100644 index 0000000..f8aa28a --- /dev/null +++ b/final_task/rss_reader/models.py @@ -0,0 +1,48 @@ +""" Data models module """ +import argparse + + +class ArgReader: + """ Class for receiving command line arguments. + + Class stores the arguments of an instance + of the ArgumentParser class in its fields """ + version: bool + json: bool + limit: int + source: str + verbose: bool + + def __init__(self): + parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) + parser.add_argument("source", type=str, help="RSS URL") + parser.add_argument("--version", action="store_true", help="Print version info") + parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") + parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + + parser.parse_args(namespace=self) + + +class NewsEntry: + """ Class representing a news article(entry). + + Methods: + print_entry(self) - print entry in stdout """ + + title: str + summary: str + date: str + link: str + + def print_entry(self): + print() + print("-------------------------------------------------------------") + print() + print("Title: " + self.title) + print() + print("Summary: " + self.summary) + print() + print("Publication date: " + self.date) + print() + print("Link: " + self.link) diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..a1f0ba6 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,2 @@ +requests +feedparser diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..4230f37 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,19 @@ +""" Main module """ +import models +import action_functions +import validation_functions + +# get command line arguments +com_line_args = models.ArgReader() + +logger = action_functions.create_logger(com_line_args) + +validation_functions.check_version_arg(com_line_args, logger) +validation_functions.check_internet_connection(com_line_args, logger) +validation_functions.check_url(com_line_args, logger) + +news_collection = action_functions.get_news(com_line_args, logger) +validation_functions.check_emptiness(news_collection, logger) + +action_functions.print_news(news_collection, com_line_args, logger) + diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py new file mode 100644 index 0000000..e54db7f --- /dev/null +++ b/final_task/rss_reader/validation_functions.py @@ -0,0 +1,82 @@ +""" Module of validation functions. + + Functions: + check_url(com_line_args, logger) -> True + check_internet_connection(com_line_args, logger) -> True + check_emptiness(news_collection, logger) -> True + check_version_arg(com_line_args, logger) -> None + check_limit_arg(news_collection, com_line_args, logger) -> limit (int) """ + +import requests +from urllib.request import Request, urlopen +from urllib.error import URLError + + +def check_url(com_line_args, logger): + """ Check URL function. """ + req = Request(com_line_args.source) + try: + logger.info("Checking url.") + response = urlopen(req) + except URLError as e: + if hasattr(e, "reason"): + logger.error(f"Failed to reach a server. ") + print("Please, check your URL.") + exit() + elif hasattr(e, 'code'): + print("The server couldn\'t fulfill the request.") + print('Error code: ', e.code) + else: + return True + + +def check_internet_connection(com_line_args, logger): + """ Check internet connection function. """ + try: + logger.info("Checking internet connection.") + response = requests.get("http://google.com", timeout=5) + return True + except requests.exceptions.ConnectionError: + logger.error("No internet connection. " + "Check your internet connection") + answer = input("Would you like to try again? (Y/n): ") + if answer.lower() == 'y': + check_internet_connection(com_line_args, logger) + else: + exit() + + +def check_emptiness(news_collection, logger): + """ Function for checking news availability in news collection. """ + logger.info("Checking news collection emptiness.") + if not (news_collection["feed"] and news_collection["entries"]): + logger.error("Empty RSS-feed. Please, check URL.") + exit() + else: + return True + + +def check_version_arg(com_line_args, logger): + """ Check --version argument function. """ + if com_line_args.version: + logger.info("View program version.") + print("rss_reader.py 1.0") + exit() + + +def check_limit_arg(news_collection, com_line_args, logger): + """ Check --limit argument function. + + Analyzes the received value and makes it valid for correct program work. """ + if com_line_args.limit or com_line_args.limit == 0: + limit = com_line_args.limit + else: + limit = len(news_collection["entries"]) + if limit < 0: + logger.warning("Command line argument limit is invalid.") + print("Command line argument limit is invalid. " + "It should not be negative.") + com_line_args.limit = int(input("Please, enter a valid value:")) + check_limit_arg(news_collection, com_line_args, logger) + + return limit From 1e18f4ffb9ffab587ff5c3c0ad40f2b9cde61af8 Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Sun, 10 Nov 2019 22:49:17 +0300 Subject: [PATCH 2/7] Some recommended fixes made --- .../{json_structure.txt => README.md} | 24 ++++++----- final_task/rss_reader/action_functions.py | 26 ++++++++--- final_task/rss_reader/exceptions.py | 10 +++++ final_task/rss_reader/models.py | 43 ++++++------------- final_task/rss_reader/rss_reader.py | 29 +++++++------ final_task/rss_reader/validation_functions.py | 42 ++++++++---------- 6 files changed, 90 insertions(+), 84 deletions(-) rename final_task/rss_reader/{json_structure.txt => README.md} (57%) create mode 100644 final_task/rss_reader/exceptions.py diff --git a/final_task/rss_reader/json_structure.txt b/final_task/rss_reader/README.md similarity index 57% rename from final_task/rss_reader/json_structure.txt rename to final_task/rss_reader/README.md index bde9678..65696a3 100644 --- a/final_task/rss_reader/json_structure.txt +++ b/final_task/rss_reader/README.md @@ -1,20 +1,24 @@ -JSON structure: - -{ - "feed": { "title": feed title, - "date": feed publication date, - "language": feed language } - "entries": [ +
JSON structure:
+ + { + + "feed": { "title": feed title, + "date": feed publication date, + "language": feed language } + + "entries": [ + { "title": news title, "summary": news content, "date": news publication date, "link": news link }, - + { "title": news title, "summary": news content, "date": news publication date, "link": news link }, ... - ] -} \ No newline at end of file + + ] + } diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index 6bd053f..9909e97 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -2,15 +2,18 @@ Functions: create_logger(com_line_args) -> logger + get_com_line_args() -> com_line_args get_news(command_line_args, logger) -> news_collection print_news_stdout(news_collection) -> None print_news_json(news_collection) -> None print_news(news_collection, com_line_args, logger) -> None """ import feedparser +import argparse import json import logging from models import NewsEntry +from dataclasses import asdict from validation_functions import check_limit_arg @@ -45,6 +48,18 @@ def create_logger(com_line_args): return logger +def get_com_line_args(): + """ Function to get command line arguments. """ + parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) + parser.add_argument("source", type=str, nargs="?", help="RSS URL") + parser.add_argument("--version", action="store_true", help="Print version info") + parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") + parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + + return parser.parse_args() + + def get_news(command_line_args, logger): """ Get news function. @@ -72,12 +87,12 @@ def get_news(command_line_args, logger): def print_news_stdout(news_collection): """ Function for print news to stdout in text format. """ - print("###############################################################") - print() + print("################################################################################") + print("") print("Feed: ", news_collection["feed"]["title"]) print("Publication date: ", news_collection["feed"]["date"]) print("Language: ", news_collection["feed"]["language"]) - print() + print("") for entry in news_collection["entries"]: entry.print_entry() @@ -89,10 +104,7 @@ def print_news_json(news_collection): "entries": []} for entry in news_collection["entries"]: - entry_for_json = {"title": entry.title, - "summary": entry.summary, - "date": entry.date, - "link": entry.link} + entry_for_json = asdict(entry) news_collection_for_json["entries"].append(entry_for_json) print(json.dumps(news_collection_for_json, indent=4)) diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py new file mode 100644 index 0000000..42c4fb6 --- /dev/null +++ b/final_task/rss_reader/exceptions.py @@ -0,0 +1,10 @@ +""" Module for non-standard exceptions. """ + + +class Error(Exception): + """ Class to raising exceptions. """ + def __init__(self, message): + super().__init__(message) + + + diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py index f8aa28a..6bf3b7e 100644 --- a/final_task/rss_reader/models.py +++ b/final_task/rss_reader/models.py @@ -1,48 +1,29 @@ """ Data models module """ -import argparse - -class ArgReader: - """ Class for receiving command line arguments. - - Class stores the arguments of an instance - of the ArgumentParser class in its fields """ - version: bool - json: bool - limit: int - source: str - verbose: bool - - def __init__(self): - parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) - parser.add_argument("source", type=str, help="RSS URL") - parser.add_argument("--version", action="store_true", help="Print version info") - parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") - parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") - parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") - - parser.parse_args(namespace=self) +from dataclasses import dataclass +@dataclass class NewsEntry: """ Class representing a news article(entry). Methods: print_entry(self) - print entry in stdout """ - title: str - summary: str - date: str - link: str + title: str = "" + summary: str = "" + date: str = "" + link: str = "" def print_entry(self): - print() + print("") print("-------------------------------------------------------------") - print() + print("") print("Title: " + self.title) - print() + print("") print("Summary: " + self.summary) - print() + print("") print("Publication date: " + self.date) - print() + print("") print("Link: " + self.link) + print("") diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 4230f37..b01e6dc 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,19 +1,24 @@ """ Main module """ -import models -import action_functions -import validation_functions -# get command line arguments -com_line_args = models.ArgReader() -logger = action_functions.create_logger(com_line_args) +if __name__ == "__main__": + import action_functions + import validation_functions + import exceptions -validation_functions.check_version_arg(com_line_args, logger) -validation_functions.check_internet_connection(com_line_args, logger) -validation_functions.check_url(com_line_args, logger) + # get command line arguments + com_line_args = action_functions.get_com_line_args() -news_collection = action_functions.get_news(com_line_args, logger) -validation_functions.check_emptiness(news_collection, logger) + logger = action_functions.create_logger(com_line_args) + try: + validation_functions.check_version_arg(com_line_args, logger) + validation_functions.check_internet_connection(com_line_args, logger) + validation_functions.check_url(com_line_args, logger) -action_functions.print_news(news_collection, com_line_args, logger) + news_collection = action_functions.get_news(com_line_args, logger) + validation_functions.check_emptiness(news_collection, logger) + action_functions.print_news(news_collection, com_line_args, logger) + except exceptions.Error as e: + print(e) + exit() diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py index e54db7f..78da994 100644 --- a/final_task/rss_reader/validation_functions.py +++ b/final_task/rss_reader/validation_functions.py @@ -10,22 +10,26 @@ import requests from urllib.request import Request, urlopen from urllib.error import URLError +from exceptions import Error def check_url(com_line_args, logger): """ Check URL function. """ - req = Request(com_line_args.source) try: + req = Request(com_line_args.source) logger.info("Checking url.") response = urlopen(req) + except ValueError: + logger.error("Invalid URL.") + raise Error("Please, check your URL.") except URLError as e: if hasattr(e, "reason"): - logger.error(f"Failed to reach a server. ") - print("Please, check your URL.") - exit() + logger.error(f"Failed to reach a server. Reason: {e.reason}.") + raise Error("Please, check your URL.") elif hasattr(e, 'code'): - print("The server couldn\'t fulfill the request.") - print('Error code: ', e.code) + logger.error("The server couldn\'t fulfill the request. " + f"Error code: {e.code}") + raise Error("Service problem.") else: return True @@ -37,21 +41,16 @@ def check_internet_connection(com_line_args, logger): response = requests.get("http://google.com", timeout=5) return True except requests.exceptions.ConnectionError: - logger.error("No internet connection. " - "Check your internet connection") - answer = input("Would you like to try again? (Y/n): ") - if answer.lower() == 'y': - check_internet_connection(com_line_args, logger) - else: - exit() + logger.error("No internet connection.") + raise Error("Please, check your internet connection.") def check_emptiness(news_collection, logger): """ Function for checking news availability in news collection. """ logger.info("Checking news collection emptiness.") if not (news_collection["feed"] and news_collection["entries"]): - logger.error("Empty RSS-feed. Please, check URL.") - exit() + logger.error("Empty RSS-feed.") + raise Error("Please, check URL.") else: return True @@ -61,22 +60,17 @@ def check_version_arg(com_line_args, logger): if com_line_args.version: logger.info("View program version.") print("rss_reader.py 1.0") - exit() + raise Error("") def check_limit_arg(news_collection, com_line_args, logger): - """ Check --limit argument function. - - Analyzes the received value and makes it valid for correct program work. """ + """ Check --limit argument function. """ if com_line_args.limit or com_line_args.limit == 0: limit = com_line_args.limit else: limit = len(news_collection["entries"]) if limit < 0: - logger.warning("Command line argument limit is invalid.") - print("Command line argument limit is invalid. " - "It should not be negative.") - com_line_args.limit = int(input("Please, enter a valid value:")) - check_limit_arg(news_collection, com_line_args, logger) + logger.error("Command line argument limit is invalid.") + raise Error("Command line argument limit should not be negative.") return limit From f57768b989286e638c37c9ea735f2e8e458da671 Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Sun, 17 Nov 2019 01:41:19 +0300 Subject: [PATCH 3/7] Defects of the first iteration are fixed --- final_task/rss_reader/action_functions.py | 17 +++++++++++++---- final_task/rss_reader/requirements.txt | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index 9909e97..8817a1a 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -9,6 +9,8 @@ print_news(news_collection, com_line_args, logger) -> None """ import feedparser +from bs4 import BeautifulSoup +import html import argparse import json import logging @@ -63,11 +65,14 @@ def get_com_line_args(): def get_news(command_line_args, logger): """ Get news function. - Uses feedparser library to receive news. """ + Uses feedparser library to receive news, + and BeautifulSoup library to converting news in readable format. """ logger.info("Getting news.") news_feed = feedparser.parse(command_line_args.source) news_collection = {} - feed = {"title": news_feed.feed.get("title", ""), + + # convert title string to unicode + feed = {"title": html.unescape(news_feed.feed.get("title", "")), "date": news_feed.feed.get("published", ""), "language": news_feed.feed.get("language", "")} @@ -76,10 +81,14 @@ def get_news(command_line_args, logger): for entry in news_feed.entries: news_entry = NewsEntry() - news_entry.title = entry.get("title", "") + news_entry.title = html.unescape(entry.get("title", "")) news_entry.date = entry.get("published", "") news_entry.link = entry.get("link", "") - news_entry.summary = entry.get("summary", "") + + # get rid of html tags + soup = BeautifulSoup(entry.get("summary", ""), "html.parser") + news_entry.summary = html.unescape(soup.text) + news_collection["entries"].append(news_entry) return news_collection diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index a1f0ba6..a4632b8 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,2 +1,3 @@ +beautifulsoup4 requests feedparser From 2f84f71100db1432ac34854f2a3bcd9c715cb70d Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Tue, 19 Nov 2019 01:13:26 +0300 Subject: [PATCH 4/7] Second iteration --- final_task/README.md | 12 ++++-- final_task/rss_reader/__init__.py | 1 + final_task/rss_reader/action_functions.py | 15 ++++---- final_task/rss_reader/models.py | 17 +++------ final_task/rss_reader/rss_reader.py | 38 ++++++++++--------- final_task/rss_reader/validation_functions.py | 12 +++--- final_task/setup.py | 28 ++++++++++++++ 7 files changed, 79 insertions(+), 44 deletions(-) create mode 100644 final_task/rss_reader/__init__.py diff --git a/final_task/README.md b/final_task/README.md index 7af281f..87f3907 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,9 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +# RSS reader +RSS reader is a command-line utility which receives RSS URL and prints results in human-readable +format. + +[The source for this project is available here](https://github.com/AnnaPotter/FinalTaskRssParser). + + +### Installation +$ pip install rss-reader-Anna-Gonchar \ No newline at end of file diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader/__init__.py @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index 8817a1a..c98e207 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -14,9 +14,9 @@ import argparse import json import logging -from models import NewsEntry +from rss_reader.models import NewsEntry from dataclasses import asdict -from validation_functions import check_limit_arg +from rss_reader.validation_functions import check_limit_arg def create_logger(com_line_args): @@ -96,12 +96,11 @@ def get_news(command_line_args, logger): def print_news_stdout(news_collection): """ Function for print news to stdout in text format. """ - print("################################################################################") - print("") - print("Feed: ", news_collection["feed"]["title"]) - print("Publication date: ", news_collection["feed"]["date"]) - print("Language: ", news_collection["feed"]["language"]) - print("") + print("################################################################################\n", + "Feed: " + news_collection["feed"]["title"], + "Publication date: " + news_collection["feed"]["date"], + "Language: " + news_collection["feed"]["language"] + '\n', + sep='\n') for entry in news_collection["entries"]: entry.print_entry() diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py index 6bf3b7e..4d087ba 100644 --- a/final_task/rss_reader/models.py +++ b/final_task/rss_reader/models.py @@ -16,14 +16,9 @@ class NewsEntry: link: str = "" def print_entry(self): - print("") - print("-------------------------------------------------------------") - print("") - print("Title: " + self.title) - print("") - print("Summary: " + self.summary) - print("") - print("Publication date: " + self.date) - print("") - print("Link: " + self.link) - print("") + print("-------------------------------------------------------------", + "Title: " + self.title + '\n', + "Summary: " + self.summary + '\n', + "Publication date: " + self.date + '\n', + "Link: " + self.link + '\n', + sep='\n') diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index b01e6dc..df41283 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,24 +1,28 @@ """ Main module """ +from rss_reader import action_functions +from rss_reader import validation_functions +from rss_reader import exceptions -if __name__ == "__main__": - import action_functions - import validation_functions - import exceptions - +def main(): # get command line arguments com_line_args = action_functions.get_com_line_args() logger = action_functions.create_logger(com_line_args) - try: - validation_functions.check_version_arg(com_line_args, logger) - validation_functions.check_internet_connection(com_line_args, logger) - validation_functions.check_url(com_line_args, logger) - - news_collection = action_functions.get_news(com_line_args, logger) - validation_functions.check_emptiness(news_collection, logger) - - action_functions.print_news(news_collection, com_line_args, logger) - except exceptions.Error as e: - print(e) - exit() + + if not validation_functions.check_version_arg(com_line_args, logger): + try: + validation_functions.check_internet_connection(logger) + validation_functions.check_url(com_line_args, logger) + + news_collection = action_functions.get_news(com_line_args, logger) + validation_functions.check_emptiness(news_collection, logger) + + action_functions.print_news(news_collection, com_line_args, logger) + except exceptions.Error as e: + print(e) + + +if __name__ == "__main__": + main() + diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py index 78da994..c1b50c0 100644 --- a/final_task/rss_reader/validation_functions.py +++ b/final_task/rss_reader/validation_functions.py @@ -2,15 +2,15 @@ Functions: check_url(com_line_args, logger) -> True - check_internet_connection(com_line_args, logger) -> True + check_internet_connection(logger) -> True check_emptiness(news_collection, logger) -> True - check_version_arg(com_line_args, logger) -> None + check_version_arg(com_line_args, logger) -> True check_limit_arg(news_collection, com_line_args, logger) -> limit (int) """ import requests from urllib.request import Request, urlopen from urllib.error import URLError -from exceptions import Error +from rss_reader.exceptions import Error def check_url(com_line_args, logger): @@ -34,7 +34,7 @@ def check_url(com_line_args, logger): return True -def check_internet_connection(com_line_args, logger): +def check_internet_connection(logger): """ Check internet connection function. """ try: logger.info("Checking internet connection.") @@ -60,7 +60,9 @@ def check_version_arg(com_line_args, logger): if com_line_args.version: logger.info("View program version.") print("rss_reader.py 1.0") - raise Error("") + return True + else: + return False def check_limit_arg(news_collection, com_line_args, logger): diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..c45d232 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages +from os import path + +directory = path.abspath(path.dirname(__file__)) +with open(path.join(directory, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name="rss_reader_Anna_Gonchar", + version="2.4", + description="RSS reader - simple command-line utility.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/AnnaPotter/FinalTaskRssParser", + author="Anna Gonchar", + author_email="raphaelkyzy@gmail.com", + packages=find_packages(), + classifiers=[ + "Programming Language :: Python :: 3.8", + "Operating System :: OS Independent", + ], + python_requires='>=3.8', + install_requires=['feedparser', 'requests', 'beautifulsoup4==4.8.1'], + entry_points={ + 'console_scripts': + ['rss-reader = rss_reader.rss_reader:main'] + }, +) From a70d1eb82ecf05e1c1cebdbc6939d8bea0fbee38 Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Sat, 23 Nov 2019 00:43:10 +0300 Subject: [PATCH 5/7] Third iteration. --- .gitignore | 7 ++ final_task/README.md | 8 +- final_task/rss_reader/README.md | 46 ++++++++--- final_task/rss_reader/action_functions.py | 79 ++++++++++++++++--- final_task/rss_reader/caching_functions.py | 60 ++++++++++++++ final_task/rss_reader/exceptions.py | 11 ++- final_task/rss_reader/models.py | 31 +++++++- final_task/rss_reader/requirements.txt | 2 +- final_task/rss_reader/rss_reader.py | 38 +++++---- final_task/rss_reader/validation_functions.py | 28 ++++--- final_task/setup.py | 2 +- 11 files changed, 254 insertions(+), 58 deletions(-) create mode 100644 final_task/rss_reader/caching_functions.py diff --git a/.gitignore b/.gitignore index 338735d..3781a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,10 @@ venv/ __pycache__/ *.py[cod] +*.log + +# Distribution / packaging +*.egg-info/ +build/ +develop-eggs/ +dist/ \ No newline at end of file diff --git a/final_task/README.md b/final_task/README.md index 87f3907..3ec38ea 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -6,4 +6,10 @@ format. ### Installation -$ pip install rss-reader-Anna-Gonchar \ No newline at end of file +$ pip install rss-reader-Anna-Gonchar + +### Storage +All the pieces of news received from the source are saved to the binary file. +Shelve module is used for this. It saves object with the specific key to the file. +The key is the rss news publication date, the value is the news. + diff --git a/final_task/rss_reader/README.md b/final_task/rss_reader/README.md index 65696a3..8a59401 100644 --- a/final_task/rss_reader/README.md +++ b/final_task/rss_reader/README.md @@ -1,24 +1,48 @@ -
JSON structure:
- +#JSON structure: +####1) for news from the internet: { + "feed": { + "feed_title": feed title, + "feed_language": feed language + } - "feed": { "title": feed title, - "date": feed publication date, - "language": feed language } - - "entries": [ - - { "title": news title, + "entries": + [ + { + "title": news title, "summary": news content, "date": news publication date, "link": news link }, - { "title": news title, + { + "title": news title, "summary": news content, "date": news publication date, "link": news link }, ... - ] + ] } + +####2) for news from the local storage: + [ + { + "feed_title": feed title, + "feed_language": feed language, + "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + { + "feed_title": feed title, + "feed_language": feed language, + "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + ... + + ] diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index c98e207..918a9e3 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -6,7 +6,10 @@ get_news(command_line_args, logger) -> news_collection print_news_stdout(news_collection) -> None print_news_json(news_collection) -> None - print_news(news_collection, com_line_args, logger) -> None """ + print_news(news_collection, com_line_args, logger) -> None + print_cache_news(news_collection, logger) -> None + print_cache_news_json(news_collection, logger) -> None + convert_date(date_str, logger) -> str_date """ import feedparser from bs4 import BeautifulSoup @@ -14,9 +17,12 @@ import argparse import json import logging -from rss_reader.models import NewsEntry + +from datetime import datetime +from exceptions import Error +from models import NewsEntry from dataclasses import asdict -from rss_reader.validation_functions import check_limit_arg +from validation_functions import check_limit_arg def create_logger(com_line_args): @@ -54,6 +60,10 @@ def get_com_line_args(): """ Function to get command line arguments. """ parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) parser.add_argument("source", type=str, nargs="?", help="RSS URL") + parser.add_argument("--date", type=convert_date, + help="Take a date in %Y%m%d format. Print news from the specified date.") + # --to-epub + # --to-pdf parser.add_argument("--version", action="store_true", help="Print version info") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") @@ -69,18 +79,20 @@ def get_news(command_line_args, logger): and BeautifulSoup library to converting news in readable format. """ logger.info("Getting news.") news_feed = feedparser.parse(command_line_args.source) - news_collection = {} # convert title string to unicode feed = {"title": html.unescape(news_feed.feed.get("title", "")), - "date": news_feed.feed.get("published", ""), "language": news_feed.feed.get("language", "")} - - news_collection["feed"] = feed - news_collection["entries"] = [] + news_collection = {"feed": feed, + "entries": []} + source = command_line_args.source for entry in news_feed.entries: news_entry = NewsEntry() + news_entry.feed_title = feed["title"] + news_entry.feed_language = feed["language"] + + news_entry.source = source news_entry.title = html.unescape(entry.get("title", "")) news_entry.date = entry.get("published", "") news_entry.link = entry.get("link", "") @@ -88,6 +100,11 @@ def get_news(command_line_args, logger): # get rid of html tags soup = BeautifulSoup(entry.get("summary", ""), "html.parser") news_entry.summary = html.unescape(soup.text) + # get images links + images = soup.findAll("img") + #mayby add images_alt text in future + for img in images: + news_entry.image_links.append(img["src"]) news_collection["entries"].append(news_entry) @@ -98,7 +115,6 @@ def print_news_stdout(news_collection): """ Function for print news to stdout in text format. """ print("################################################################################\n", "Feed: " + news_collection["feed"]["title"], - "Publication date: " + news_collection["feed"]["date"], "Language: " + news_collection["feed"]["language"] + '\n', sep='\n') @@ -119,11 +135,16 @@ def print_news_json(news_collection): def print_news(news_collection, com_line_args, logger): - """ Function for print news to stdout + """ Function for print news to stdout, that take account of limit and json arguments. """ # get valid limit argument - limit = check_limit_arg(news_collection, com_line_args, logger) + # if not initialize limit argument + if not check_limit_arg(com_line_args, logger): + limit = len(news_collection["entries"]) + else: + limit = com_line_args.limit + if len(news_collection["entries"]) < limit: logger.warning("The number of news is less than the value of the argument limit.") new_news_collection = news_collection @@ -133,6 +154,42 @@ def print_news(news_collection, com_line_args, logger): logger.info("Printing news.") if com_line_args.json: + logger.info("Printing news in json format.") print_news_json(new_news_collection) else: + logger.info("Printing news stdout.") print_news_stdout(new_news_collection) + + +def print_cache_news(cached_news_collection, com_line_args, logger): + """ Function for print cached news to stdout, + that take account of json argument. """ + if cached_news_collection: # for the case when limit = 0 + if com_line_args.json: + print_cache_news_json(cached_news_collection, logger) + else: + logger.info("Printing cache news.") + for entry in cached_news_collection: + entry.print_cache_entry() + + +def print_cache_news_json(cached_news_collection, logger): + """ Function for print cached news to stdout in json format.""" + if cached_news_collection: # for the case when limit = 0 + logger.info("Printing cache news in json format.") + news_collection_for_json = [] + for entry in cached_news_collection: + entry_for_json = asdict(entry) + news_collection_for_json.append(entry_for_json) + + print(json.dumps(news_collection_for_json, indent=4)) + + +def convert_date(date_str): + """ Converting date function. """ + try: + datetime_obj = datetime.strptime(date_str, '%Y%m%d') + str_date = datetime_obj.strftime("%d %b %Y") + return str_date + except ValueError as e: + raise Error("Invalid date argument. Please, check your input.") diff --git a/final_task/rss_reader/caching_functions.py b/final_task/rss_reader/caching_functions.py new file mode 100644 index 0000000..3def15b --- /dev/null +++ b/final_task/rss_reader/caching_functions.py @@ -0,0 +1,60 @@ +""" Module of caching functions. + + Functions: + cache_news(news_collection, logger) -> None + get_cached_news(com_line_args, logger) -> cached_news_collection """ + +import shelve +from os import path + +from validation_functions import check_limit_arg +from exceptions import EmptyFileError, EmptyCollectionError + +DIRECTORY = path.abspath(path.dirname(__file__)) + + +def cache_news(news_collection, logger): + """ Caching news function. """ + logger.info("Collecting news to cache file.") + with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: + for news in news_collection["entries"]: + hash_date = news.date + news_dict[hash_date] = news + logger.info("News was cached successfully.") + + +def get_cached_news(com_line_args, logger): + logger.info("Getting cache news.") + date = com_line_args.date + source = com_line_args.source + check_limit_arg(com_line_args, logger) + + cached_news_collection = [] + + with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: + if not news_dict: + raise EmptyFileError("Cache file is empty. Please, retrieve news from internet. ") + + limit = min(com_line_args.limit, len(news_dict)) + + if source: + for hash_date_key in news_dict: + if date in hash_date_key: + news = news_dict[hash_date_key] + if source == news.source: + cached_news_collection.append(news) + else: + for hash_date_key in news_dict: + if date in hash_date_key: + news = news_dict[hash_date_key] + cached_news_collection.append(news) + + if not cached_news_collection: + if source: + raise EmptyCollectionError("There are no news in cache file on specified date and source.") + else: + raise EmptyCollectionError("There are no news in cache file on specified date.") + + else: + logger.info("Successfully get news from cache.") + return cached_news_collection[:limit] diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index 42c4fb6..ef64edf 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -3,8 +3,15 @@ class Error(Exception): """ Class to raising exceptions. """ - def __init__(self, message): - super().__init__(message) + pass + + +class EmptyFileError(Error): + pass + + +class EmptyCollectionError(Error): + pass diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py index 4d087ba..6f1824e 100644 --- a/final_task/rss_reader/models.py +++ b/final_task/rss_reader/models.py @@ -1,6 +1,5 @@ """ Data models module """ - -from dataclasses import dataclass +from dataclasses import dataclass, field @dataclass @@ -10,15 +9,41 @@ class NewsEntry: Methods: print_entry(self) - print entry in stdout """ + feed_title: str = "" + feed_language: str = "" + title: str = "" summary: str = "" date: str = "" link: str = "" + image_links: list = field(default_factory=list) + source: str = "" + id: str = "" def print_entry(self): print("-------------------------------------------------------------", - "Title: " + self.title + '\n', + "News title: " + self.title + '\n', + "Summary: " + self.summary + '\n', + "Publication date: " + self.date + '\n', + "Source: " + self.source + '\n', + "Link: " + self.link + '\n', + sep='\n') + if self.image_links: + print("Images links: ") + for num, img_link in enumerate(self.image_links): + print(f"[{num+1}] {img_link}") + + def print_cache_entry(self): + print("-------------------------------------------------------------", + "Feed title: " + self.feed_title + '\n', + "Feed language: " + self.feed_language + '\n' + '\n', + "News title: " + self.title + '\n', "Summary: " + self.summary + '\n', "Publication date: " + self.date + '\n', + "Source: " + self.source + '\n', "Link: " + self.link + '\n', sep='\n') + if self.image_links: + print("Images links: ") + for num, img_link in enumerate(self.image_links): + print(f"[{num+1}] {img_link}") \ No newline at end of file diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index a4632b8..84936f1 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,3 +1,3 @@ -beautifulsoup4 +beautifulsoup4==4.8.1 requests feedparser diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index df41283..df9bc69 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,28 +1,32 @@ """ Main module """ -from rss_reader import action_functions -from rss_reader import validation_functions -from rss_reader import exceptions +import action_functions +import validation_functions +import exceptions +import caching_functions def main(): - # get command line arguments - com_line_args = action_functions.get_com_line_args() + try: + # get command line arguments + com_line_args = action_functions.get_com_line_args() - logger = action_functions.create_logger(com_line_args) + logger = action_functions.create_logger(com_line_args) - if not validation_functions.check_version_arg(com_line_args, logger): - try: - validation_functions.check_internet_connection(logger) - validation_functions.check_url(com_line_args, logger) + if not validation_functions.check_version_arg(com_line_args, logger): + if com_line_args.date: + news_collection = caching_functions.get_cached_news(com_line_args, logger) + action_functions.print_cache_news(news_collection, com_line_args, logger) + else: + validation_functions.check_internet_connection(logger) + validation_functions.check_url(com_line_args, logger) - news_collection = action_functions.get_news(com_line_args, logger) - validation_functions.check_emptiness(news_collection, logger) - - action_functions.print_news(news_collection, com_line_args, logger) - except exceptions.Error as e: - print(e) + news_collection = action_functions.get_news(com_line_args, logger) + validation_functions.check_emptiness(news_collection, logger) + caching_functions.cache_news(news_collection, logger) + action_functions.print_news(news_collection, com_line_args, logger) + except exceptions.Error as e: + print(e) if __name__ == "__main__": main() - diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py index c1b50c0..aa4d284 100644 --- a/final_task/rss_reader/validation_functions.py +++ b/final_task/rss_reader/validation_functions.py @@ -4,13 +4,14 @@ check_url(com_line_args, logger) -> True check_internet_connection(logger) -> True check_emptiness(news_collection, logger) -> True - check_version_arg(com_line_args, logger) -> True - check_limit_arg(news_collection, com_line_args, logger) -> limit (int) """ + check_version_arg(com_line_args, logger) -> True/False + check_limit_arg(news_collection, com_line_args, logger) -> limit (int) + check_date_arg(com_line_args, logger) -> True/False """ import requests from urllib.request import Request, urlopen from urllib.error import URLError -from rss_reader.exceptions import Error +from exceptions import Error, EmptyCollectionError def check_url(com_line_args, logger): @@ -25,7 +26,7 @@ def check_url(com_line_args, logger): except URLError as e: if hasattr(e, "reason"): logger.error(f"Failed to reach a server. Reason: {e.reason}.") - raise Error("Please, check your URL.") + raise Error("Please, check your internet connection and your URL.") elif hasattr(e, 'code'): logger.error("The server couldn\'t fulfill the request. " f"Error code: {e.code}") @@ -65,14 +66,19 @@ def check_version_arg(com_line_args, logger): return False -def check_limit_arg(news_collection, com_line_args, logger): +def check_limit_arg(com_line_args, logger): """ Check --limit argument function. """ if com_line_args.limit or com_line_args.limit == 0: - limit = com_line_args.limit - else: - limit = len(news_collection["entries"]) - if limit < 0: + return True + if com_line_args.limit < 0: logger.error("Command line argument limit is invalid.") - raise Error("Command line argument limit should not be negative.") + raise EmptyCollectionError("Command line argument limit should not be negative.") + - return limit +def check_date_arg(com_line_args, logger): + """ Check --date argument function. """ + if com_line_args.date: + logger.info("Checking date argument.") + return True + else: + return False diff --git a/final_task/setup.py b/final_task/setup.py index c45d232..0bedd6c 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -7,7 +7,7 @@ setup( name="rss_reader_Anna_Gonchar", - version="2.4", + version="3.0", description="RSS reader - simple command-line utility.", long_description=long_description, long_description_content_type="text/markdown", From 4e1ab2549ef5ab1ce8e402218d4473f225b606b1 Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Mon, 25 Nov 2019 03:59:14 +0300 Subject: [PATCH 6/7] Fourth iteration --- final_task/README.md | 38 +++++ final_task/rss_reader/README.md | 34 +---- final_task/rss_reader/action_functions.py | 110 +++++++-------- final_task/rss_reader/caching_functions.py | 27 ++-- final_task/rss_reader/conversion_functions.py | 130 ++++++++++++++++++ final_task/rss_reader/exceptions.py | 4 + final_task/rss_reader/models.py | 15 +- final_task/rss_reader/requirements.txt | 2 + final_task/rss_reader/rss_reader.py | 56 +++++--- final_task/rss_reader/tests/__init__.py | 0 .../rss_reader/tests/test_print_functions.py | 3 + final_task/rss_reader/validation_functions.py | 24 +++- final_task/setup.py | 4 +- 13 files changed, 307 insertions(+), 140 deletions(-) create mode 100644 final_task/rss_reader/conversion_functions.py create mode 100644 final_task/rss_reader/tests/__init__.py create mode 100644 final_task/rss_reader/tests/test_print_functions.py diff --git a/final_task/README.md b/final_task/README.md index 3ec38ea..fa4973e 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -8,6 +8,44 @@ format. ### Installation $ pip install rss-reader-Anna-Gonchar +### Usage +$ rss-reader (-h | --help) + + Show help message and exit + +$ rss-reader + + Print rss feeds in human-readable format + +$ rss-reader --version + + Print version info + +$ rss-reader --json + + Print result as JSON in stdout + +$ rss-reader.py --verbose + + Outputs verbose status messages + +$ rss-reader.py --limit LIMIT + + Limit news topics, if this parameter provided + +$ rss-reader.py --date DATE + + Gets a date in %Y%m%d format. Print news from the specified date + and source (), if it specified + +$ rss-reader.py --to-pdf PATH_TO_PDF + + Gets file path. Convert news to pdf and save them to pdf file on the specified path + +$ rss-reader.py --to-html PATH_TO_HTML + + Gets file path. Convert news to html and save them to html file on the specified path + ### Storage All the pieces of news received from the source are saved to the binary file. Shelve module is used for this. It saves object with the specific key to the file. diff --git a/final_task/rss_reader/README.md b/final_task/rss_reader/README.md index 8a59401..35ebe9b 100644 --- a/final_task/rss_reader/README.md +++ b/final_task/rss_reader/README.md @@ -1,32 +1,9 @@ #JSON structure: -####1) for news from the internet: - { - "feed": { - "feed_title": feed title, - "feed_language": feed language - } - - "entries": - [ - { - "title": news title, - "summary": news content, - "date": news publication date, - "link": news link }, - - { - "title": news title, - "summary": news content, - "date": news publication date, - "link": news link }, - ... - - ] - } - -####2) for news from the local storage: +{ + [ + { "feed_title": feed title, "feed_language": feed language, @@ -44,5 +21,6 @@ "link": news link }, ... - - ] + + ] +} \ No newline at end of file diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index 918a9e3..ba91062 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -3,6 +3,7 @@ Functions: create_logger(com_line_args) -> logger get_com_line_args() -> com_line_args + clean_str(string) -> clean_string get_news(command_line_args, logger) -> news_collection print_news_stdout(news_collection) -> None print_news_json(news_collection) -> None @@ -12,6 +13,7 @@ convert_date(date_str, logger) -> str_date """ import feedparser +import re from bs4 import BeautifulSoup import html import argparse @@ -61,9 +63,11 @@ def get_com_line_args(): parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) parser.add_argument("source", type=str, nargs="?", help="RSS URL") parser.add_argument("--date", type=convert_date, - help="Take a date in %Y%m%d format. Print news from the specified date.") - # --to-epub - # --to-pdf + help="Gets a date in %Y%m%d format. Print news from the specified date.") + parser.add_argument("--to-html", type=str, + help="Gets file path. Convert news to html and save them to html file.") + parser.add_argument("--to-pdf", type=str, + help="Gets file path. Convert news to pdf and save them to pdf file.") parser.add_argument("--version", action="store_true", help="Print version info") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") @@ -72,6 +76,24 @@ def get_com_line_args(): return parser.parse_args() +def get_limit_news_collection(news_collection, com_line_args, logger): + if not check_limit_arg(com_line_args, logger): + limit = len(news_collection) + else: + limit = com_line_args.limit + + if len(news_collection) < limit: + logger.warning("The number of news is less than the value of the argument limit.") + return news_collection + else: + return news_collection[:limit] + + +def clean_str(string): + clean_string = re.sub(u"(\u2018|\u2019|\u2014|\u2013)", "'", html.unescape(string)) + return clean_string + + def get_news(command_line_args, logger): """ Get news function. @@ -80,11 +102,9 @@ def get_news(command_line_args, logger): logger.info("Getting news.") news_feed = feedparser.parse(command_line_args.source) - # convert title string to unicode - feed = {"title": html.unescape(news_feed.feed.get("title", "")), + feed = {"title": clean_str(news_feed.feed.get("title", "")), "language": news_feed.feed.get("language", "")} - news_collection = {"feed": feed, - "entries": []} + news_collection = [] source = command_line_args.source for entry in news_feed.entries: @@ -93,96 +113,59 @@ def get_news(command_line_args, logger): news_entry.feed_language = feed["language"] news_entry.source = source - news_entry.title = html.unescape(entry.get("title", "")) + news_entry.title = clean_str(entry.get("title", "")) news_entry.date = entry.get("published", "") news_entry.link = entry.get("link", "") # get rid of html tags soup = BeautifulSoup(entry.get("summary", ""), "html.parser") - news_entry.summary = html.unescape(soup.text) + news_entry.summary = clean_str(soup.text) # get images links images = soup.findAll("img") - #mayby add images_alt text in future + for img in images: news_entry.image_links.append(img["src"]) - news_collection["entries"].append(news_entry) + news_collection.append(news_entry) return news_collection def print_news_stdout(news_collection): """ Function for print news to stdout in text format. """ - print("################################################################################\n", - "Feed: " + news_collection["feed"]["title"], - "Language: " + news_collection["feed"]["language"] + '\n', - sep='\n') - - for entry in news_collection["entries"]: - entry.print_entry() + if news_collection: + for entry in news_collection: + entry.print_entry() def print_news_json(news_collection): """ Function for print news to stdout in json format. """ - news_collection_for_json = {"feed": news_collection["feed"], - "entries": []} + news_collection_for_json = [] - for entry in news_collection["entries"]: + for entry in news_collection: entry_for_json = asdict(entry) - news_collection_for_json["entries"].append(entry_for_json) + news_collection_for_json.append(entry_for_json) print(json.dumps(news_collection_for_json, indent=4)) def print_news(news_collection, com_line_args, logger): """ Function for print news to stdout, - that take account of limit and json arguments. """ - - # get valid limit argument - # if not initialize limit argument - if not check_limit_arg(com_line_args, logger): - limit = len(news_collection["entries"]) - else: - limit = com_line_args.limit + that take account of json argument. """ - if len(news_collection["entries"]) < limit: - logger.warning("The number of news is less than the value of the argument limit.") - new_news_collection = news_collection + # news_collection already get valid limit argument + logger.info("Printing news.") + if com_line_args.date: + logger.info("Printing cache news.") else: - new_news_collection = {"feed": news_collection["feed"], - "entries": news_collection["entries"][:limit]} + logger.info("Printing news.") - logger.info("Printing news.") if com_line_args.json: logger.info("Printing news in json format.") - print_news_json(new_news_collection) + print_news_json(news_collection) else: logger.info("Printing news stdout.") - print_news_stdout(new_news_collection) - - -def print_cache_news(cached_news_collection, com_line_args, logger): - """ Function for print cached news to stdout, - that take account of json argument. """ - if cached_news_collection: # for the case when limit = 0 - if com_line_args.json: - print_cache_news_json(cached_news_collection, logger) - else: - logger.info("Printing cache news.") - for entry in cached_news_collection: - entry.print_cache_entry() - - -def print_cache_news_json(cached_news_collection, logger): - """ Function for print cached news to stdout in json format.""" - if cached_news_collection: # for the case when limit = 0 - logger.info("Printing cache news in json format.") - news_collection_for_json = [] - for entry in cached_news_collection: - entry_for_json = asdict(entry) - news_collection_for_json.append(entry_for_json) - - print(json.dumps(news_collection_for_json, indent=4)) + print_news_stdout(news_collection) def convert_date(date_str): @@ -190,6 +173,9 @@ def convert_date(date_str): try: datetime_obj = datetime.strptime(date_str, '%Y%m%d') str_date = datetime_obj.strftime("%d %b %Y") + if str_date[0] == '0': + str_date = str_date[1:] return str_date except ValueError as e: raise Error("Invalid date argument. Please, check your input.") + diff --git a/final_task/rss_reader/caching_functions.py b/final_task/rss_reader/caching_functions.py index 3def15b..adf29b3 100644 --- a/final_task/rss_reader/caching_functions.py +++ b/final_task/rss_reader/caching_functions.py @@ -17,7 +17,7 @@ def cache_news(news_collection, logger): """ Caching news function. """ logger.info("Collecting news to cache file.") with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: - for news in news_collection["entries"]: + for news in news_collection: hash_date = news.date news_dict[hash_date] = news logger.info("News was cached successfully.") @@ -27,29 +27,34 @@ def get_cached_news(com_line_args, logger): logger.info("Getting cache news.") date = com_line_args.date source = com_line_args.source - check_limit_arg(com_line_args, logger) - cached_news_collection = [] + news_collection = [] with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: if not news_dict: raise EmptyFileError("Cache file is empty. Please, retrieve news from internet. ") - limit = min(com_line_args.limit, len(news_dict)) + if not check_limit_arg(com_line_args, logger): + limit = len(news_dict) + else: + limit = min(com_line_args.limit, len(news_dict)) if source: for hash_date_key in news_dict: if date in hash_date_key: - news = news_dict[hash_date_key] - if source == news.source: - cached_news_collection.append(news) + if hash_date_key.split()[1] == date.partition(' ')[0]: + news = news_dict[hash_date_key] + if source == news.source: + news_collection.append(news) + else: for hash_date_key in news_dict: if date in hash_date_key: - news = news_dict[hash_date_key] - cached_news_collection.append(news) + if hash_date_key.split()[1] == date.partition(' ')[0]: + news = news_dict[hash_date_key] + news_collection.append(news) - if not cached_news_collection: + if not news_collection: if source: raise EmptyCollectionError("There are no news in cache file on specified date and source.") else: @@ -57,4 +62,4 @@ def get_cached_news(com_line_args, logger): else: logger.info("Successfully get news from cache.") - return cached_news_collection[:limit] + return news_collection[:limit] diff --git a/final_task/rss_reader/conversion_functions.py b/final_task/rss_reader/conversion_functions.py new file mode 100644 index 0000000..838136d --- /dev/null +++ b/final_task/rss_reader/conversion_functions.py @@ -0,0 +1,130 @@ +""" Module of functions for converting and saving news to pdf and html files. + + Functions: + create_and_fill_pdf_file(news_collection, com_line_args, logger) -> None + add_news_to_pdf_file(news, pdf, com_line_args, logger) -> None + add_image(num, link, pdf, logger) -> None + add_news_to_html_file(news, html_file, com_line_args, logger) -> None + add_news_to_html_file(news, html_file, com_line_args, logger) -> html_file """ + +import os +import urllib.request +import urllib.error +from dominate.tags import * +from fpdf import FPDF, set_global +from validation_functions import check_path_to_directory +from exceptions import FilePathError + + +def create_and_fill_pdf_file(news_collection, com_line_args, logger): + """ Function for creating and filling in the pdf file with news. """ + path_to_directory = com_line_args.to_pdf + + check_path_to_directory(path_to_directory, logger) + + pdf = FPDF(orientation='P', unit='mm', format='A4') + pdf.set_margins(5, 13.5, 5) + pdf.add_page() + + pdf.set_font('Arial', size=16) + pdf.set_text_color(255, 0, 0) + if com_line_args.date: + pdf.cell(200, 10, txt="RSS news from the local storage", ln=1, align="C") + else: + pdf.cell(200, 10, txt="RSS news from the internet", ln=1, align="C") + + pdf.ln(5) + pdf.set_text_color(0, 0, 0) + for news in news_collection: + add_news_to_pdf_file(news, pdf, com_line_args, logger) + logger.info("Creating pdf file with news.") + path_to_pdf_file = os.path.join(path_to_directory, "rss_news.pdf") + pdf.output(path_to_pdf_file, 'F') + logger.info("PDF file is created.") + + +def add_news_to_pdf_file(news, pdf, com_line_args, logger): + """ Function that add news to pdf file. """ + pdf.set_font('Arial', size=12) + pdf.set_text_color(0, 255, 0) + pdf.ln(10) + pdf.multi_cell(0, 10, align="C", txt="News") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"Feed title: {news.feed_title}") + pdf.multi_cell(0, 10, txt=f"News title: {news.title}") + pdf.multi_cell(0, 10, txt=f"News publication date: {news.date}") + pdf.multi_cell(0, 10, txt=f"Summary: {news.summary}") + + pdf.write(10, f"News link: {news.link}") + + if news.image_links: + if com_line_args.date: + pdf.write(10, "Images links: ") + for num, image_link in enumerate(news.image_links): + pdf.write(10, f"[{num + 1}]: {image_link}" + '\n') + else: + pdf.write(10, "Images to summary: ") + for num, img_link in enumerate(news.image_links): + add_image(num + 1, img_link, pdf, logger) + + +def add_image(num, image_link, pdf, logger): + """ Function for getting image from image url and adding it to pdf file. """ + logger.info(f"Download image from {image_link}.") + + (filename, headers) = urllib.request.urlretrieve(image_link) + image_format = headers['content-type'].replace('image/', '') + + if image_format not in ('jpeg', 'jpg', 'png'): + logger.info(f"Image from {image_link} is not in an appropriate format.") + pdf.write(10, f"[{num}]: {image_link}" + '\n') + else: + pdf.image(filename, x=50, y=pdf.get_y(), h=50, type=image_format, link=image_link) + pdf.ln(50) + os.remove(filename) + + +def create_and_fill_html_file(news_collection, com_line_args, logger): + """ Function for creating and filling in the html file with news. """ + path_to_directory = com_line_args.to_html + + check_path_to_directory(path_to_directory, logger) + + html_file = html(title="RSS news") + html_file.add(head(meta(charset='utf-8'))) + + for news in news_collection: + add_news_to_html_file(news, html_file, com_line_args) + + path = os.path.join(path_to_directory, "rss_news.html") + try: + logger.info("Creating html file with news.") + with open(path, 'w', encoding='utf-8') as rss_html: + rss_html.write(str(html_file)) + logger.info("HTML file is created.") + except FileNotFoundError: + logger.error("No html file directory.") + raise FilePathError("No html file directory. Please, checked path.") + + +def add_news_to_html_file(news, html_file, com_line_args): + """ Function that add news to html file. """ + with html_file: + h1(news.title) + p(b("Feed title: "), news.feed_title) + p(b("Publication date: "), news.date) + p(b("Summary: "), news.summary) + p(a("Link for this news.", href=news.link)) + with p(): + if news.image_links: + if com_line_args.date: + b("Images links: ") + for image_link in news.image_links: + a("Image link", href=image_link) + else: + b("Images to summary: ") + for img_link in news.image_links: + img(src=img_link) + br() + br() + return html_file diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index ef64edf..03eada7 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -14,4 +14,8 @@ class EmptyCollectionError(Error): pass +class FilePathError(Error): + pass + + diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py index 6f1824e..35c8e12 100644 --- a/final_task/rss_reader/models.py +++ b/final_task/rss_reader/models.py @@ -21,19 +21,6 @@ class NewsEntry: id: str = "" def print_entry(self): - print("-------------------------------------------------------------", - "News title: " + self.title + '\n', - "Summary: " + self.summary + '\n', - "Publication date: " + self.date + '\n', - "Source: " + self.source + '\n', - "Link: " + self.link + '\n', - sep='\n') - if self.image_links: - print("Images links: ") - for num, img_link in enumerate(self.image_links): - print(f"[{num+1}] {img_link}") - - def print_cache_entry(self): print("-------------------------------------------------------------", "Feed title: " + self.feed_title + '\n', "Feed language: " + self.feed_language + '\n' + '\n', @@ -46,4 +33,4 @@ def print_cache_entry(self): if self.image_links: print("Images links: ") for num, img_link in enumerate(self.image_links): - print(f"[{num+1}] {img_link}") \ No newline at end of file + print(f"[{num+1}] {img_link}") diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index 84936f1..fe0ebf8 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,3 +1,5 @@ beautifulsoup4==4.8.1 requests feedparser +fpdf +dominate diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index df9bc69..574f4c5 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,29 +1,47 @@ """ Main module """ -import action_functions -import validation_functions +from action_functions import get_limit_news_collection, get_news, get_com_line_args, \ + create_logger, print_news +from validation_functions import check_url, check_version_arg, check_internet_connection, \ + check_emptiness import exceptions -import caching_functions +from caching_functions import get_cached_news, cache_news +from conversion_functions import create_and_fill_pdf_file, create_and_fill_html_file def main(): try: # get command line arguments - com_line_args = action_functions.get_com_line_args() - - logger = action_functions.create_logger(com_line_args) - - if not validation_functions.check_version_arg(com_line_args, logger): - if com_line_args.date: - news_collection = caching_functions.get_cached_news(com_line_args, logger) - action_functions.print_cache_news(news_collection, com_line_args, logger) - else: - validation_functions.check_internet_connection(logger) - validation_functions.check_url(com_line_args, logger) - - news_collection = action_functions.get_news(com_line_args, logger) - validation_functions.check_emptiness(news_collection, logger) - caching_functions.cache_news(news_collection, logger) - action_functions.print_news(news_collection, com_line_args, logger) + com_line_args = get_com_line_args() + + logger = create_logger(com_line_args) + + if not check_version_arg(com_line_args, logger): + if com_line_args.date: # getting news from local storage + news_collection = get_cached_news(com_line_args, logger) + if com_line_args.to_pdf: + create_and_fill_pdf_file(news_collection, com_line_args, logger) + elif com_line_args.to_html: + create_and_fill_html_file(news_collection, com_line_args, logger) + else: + # account of --json argument + print_news(news_collection, com_line_args, logger) + else: # getting news from the internet + check_internet_connection(logger) + check_url(com_line_args, logger) + + news_collection = get_news(com_line_args, logger) + check_emptiness(news_collection, logger) + cache_news(news_collection, logger) + # account of --limit argument + news_collection = get_limit_news_collection(news_collection, com_line_args, logger) + + if com_line_args.to_pdf: + create_and_fill_pdf_file(news_collection, com_line_args, logger) + elif com_line_args.to_html: + create_and_fill_html_file(news_collection, com_line_args, logger) + else: + # account of --json argument + print_news(news_collection, com_line_args, logger) except exceptions.Error as e: print(e) diff --git a/final_task/rss_reader/tests/__init__.py b/final_task/rss_reader/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/rss_reader/tests/test_print_functions.py b/final_task/rss_reader/tests/test_print_functions.py new file mode 100644 index 0000000..0b43f88 --- /dev/null +++ b/final_task/rss_reader/tests/test_print_functions.py @@ -0,0 +1,3 @@ +import unittest + +class TestPrintFunctions(unittest.TestCase): diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py index aa4d284..15a6313 100644 --- a/final_task/rss_reader/validation_functions.py +++ b/final_task/rss_reader/validation_functions.py @@ -6,12 +6,14 @@ check_emptiness(news_collection, logger) -> True check_version_arg(com_line_args, logger) -> True/False check_limit_arg(news_collection, com_line_args, logger) -> limit (int) - check_date_arg(com_line_args, logger) -> True/False """ + check_date_arg(com_line_args, logger) -> True/False + check_path_to_directory(path_to_directory, logger) -> True """ import requests +import os from urllib.request import Request, urlopen from urllib.error import URLError -from exceptions import Error, EmptyCollectionError +from exceptions import Error, EmptyCollectionError, FilePathError def check_url(com_line_args, logger): @@ -49,7 +51,7 @@ def check_internet_connection(logger): def check_emptiness(news_collection, logger): """ Function for checking news availability in news collection. """ logger.info("Checking news collection emptiness.") - if not (news_collection["feed"] and news_collection["entries"]): + if not news_collection: logger.error("Empty RSS-feed.") raise Error("Please, check URL.") else: @@ -60,7 +62,7 @@ def check_version_arg(com_line_args, logger): """ Check --version argument function. """ if com_line_args.version: logger.info("View program version.") - print("rss_reader.py 1.0") + print("rss_reader.py 4.0") return True else: return False @@ -70,6 +72,8 @@ def check_limit_arg(com_line_args, logger): """ Check --limit argument function. """ if com_line_args.limit or com_line_args.limit == 0: return True + if not com_line_args.limit: + return False if com_line_args.limit < 0: logger.error("Command line argument limit is invalid.") raise EmptyCollectionError("Command line argument limit should not be negative.") @@ -82,3 +86,15 @@ def check_date_arg(com_line_args, logger): return True else: return False + + +def check_path_to_directory(path_to_directory, logger): + logger.info("Checking path to file directory.") + if os.path.isdir(path_to_directory) is False: + logger.error("Path to directory is invalid. Path not to folder.") + raise FilePathError("Path to directory is invalid. Path not to folder.") + elif not os.path.exists(path_to_directory): + logger.error("Path to file is invalid.") + raise FilePathError("Received path to file is invalid.") + else: + return True diff --git a/final_task/setup.py b/final_task/setup.py index 0bedd6c..f5b7c4a 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -7,7 +7,7 @@ setup( name="rss_reader_Anna_Gonchar", - version="3.0", + version="3.4", description="RSS reader - simple command-line utility.", long_description=long_description, long_description_content_type="text/markdown", @@ -20,7 +20,7 @@ "Operating System :: OS Independent", ], python_requires='>=3.8', - install_requires=['feedparser', 'requests', 'beautifulsoup4==4.8.1'], + install_requires=['feedparser', 'requests', 'beautifulsoup4==4.8.1', 'fpdf', 'dominate'], entry_points={ 'console_scripts': ['rss-reader = rss_reader.rss_reader:main'] From 941b772a086b66bacd0f1732e8b8382ef8cd9cfc Mon Sep 17 00:00:00 2001 From: AnnaPotter Date: Mon, 25 Nov 2019 23:40:37 +0300 Subject: [PATCH 7/7] Add tests and fixed some bugs --- final_task/rss_reader/action_functions.py | 22 +++-- final_task/rss_reader/conversion_functions.py | 32 +++++-- final_task/rss_reader/exceptions.py | 4 + final_task/rss_reader/models.py | 7 +- final_task/rss_reader/rss_reader.py | 4 +- final_task/rss_reader/tests/__init__.py | 0 .../rss_reader/tests/test_action_functions.py | 43 ++++++++++ .../tests/test_caching_functions.py | 86 +++++++++++++++++++ .../rss_reader/tests/test_print_functions.py | 3 - .../tests/test_validation_functions.py | 63 ++++++++++++++ final_task/rss_reader/validation_functions.py | 18 ++-- final_task/setup.py | 2 +- 12 files changed, 249 insertions(+), 35 deletions(-) delete mode 100644 final_task/rss_reader/tests/__init__.py create mode 100644 final_task/rss_reader/tests/test_action_functions.py create mode 100644 final_task/rss_reader/tests/test_caching_functions.py delete mode 100644 final_task/rss_reader/tests/test_print_functions.py create mode 100644 final_task/rss_reader/tests/test_validation_functions.py diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py index ba91062..71d102f 100644 --- a/final_task/rss_reader/action_functions.py +++ b/final_task/rss_reader/action_functions.py @@ -3,23 +3,21 @@ Functions: create_logger(com_line_args) -> logger get_com_line_args() -> com_line_args - clean_str(string) -> clean_string get_news(command_line_args, logger) -> news_collection print_news_stdout(news_collection) -> None print_news_json(news_collection) -> None print_news(news_collection, com_line_args, logger) -> None print_cache_news(news_collection, logger) -> None print_cache_news_json(news_collection, logger) -> None - convert_date(date_str, logger) -> str_date """ + convert_date(date_str, logger) -> str_date + clean_str(string) -> clean_string """ import feedparser -import re from bs4 import BeautifulSoup import html import argparse import json import logging - from datetime import datetime from exceptions import Error from models import NewsEntry @@ -61,9 +59,8 @@ def create_logger(com_line_args): def get_com_line_args(): """ Function to get command line arguments. """ parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) - parser.add_argument("source", type=str, nargs="?", help="RSS URL") parser.add_argument("--date", type=convert_date, - help="Gets a date in %Y%m%d format. Print news from the specified date.") + help="Gets a date in %%Y%%m%%d format. Print news from the specified date.") parser.add_argument("--to-html", type=str, help="Gets file path. Convert news to html and save them to html file.") parser.add_argument("--to-pdf", type=str, @@ -72,6 +69,7 @@ def get_com_line_args(): parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + parser.add_argument("source", type=str, nargs="?", help="RSS URL") return parser.parse_args() @@ -89,11 +87,6 @@ def get_limit_news_collection(news_collection, com_line_args, logger): return news_collection[:limit] -def clean_str(string): - clean_string = re.sub(u"(\u2018|\u2019|\u2014|\u2013)", "'", html.unescape(string)) - return clean_string - - def get_news(command_line_args, logger): """ Get news function. @@ -124,7 +117,8 @@ def get_news(command_line_args, logger): images = soup.findAll("img") for img in images: - news_entry.image_links.append(img["src"]) + if img["src"]: + news_entry.image_links.append(img["src"]) news_collection.append(news_entry) @@ -179,3 +173,7 @@ def convert_date(date_str): except ValueError as e: raise Error("Invalid date argument. Please, check your input.") + +def clean_str(string): + clean_string = html.unescape(string).encode('ascii', 'ignore').decode("utf-8") + return clean_string diff --git a/final_task/rss_reader/conversion_functions.py b/final_task/rss_reader/conversion_functions.py index 838136d..88d8f31 100644 --- a/final_task/rss_reader/conversion_functions.py +++ b/final_task/rss_reader/conversion_functions.py @@ -49,21 +49,39 @@ def add_news_to_pdf_file(news, pdf, com_line_args, logger): pdf.set_text_color(0, 255, 0) pdf.ln(10) pdf.multi_cell(0, 10, align="C", txt="News") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Feed title: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f" {news.feed_title}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News title: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.title}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News publication date: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.date}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Summary: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.summary}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News link: ") pdf.set_text_color(0, 0, 0) - pdf.multi_cell(0, 10, txt=f"Feed title: {news.feed_title}") - pdf.multi_cell(0, 10, txt=f"News title: {news.title}") - pdf.multi_cell(0, 10, txt=f"News publication date: {news.date}") - pdf.multi_cell(0, 10, txt=f"Summary: {news.summary}") - pdf.write(10, f"News link: {news.link}") + pdf.multi_cell(0, 10, txt=f"{news.link}") if news.image_links: if com_line_args.date: + pdf.set_text_color(0, 0, 255) pdf.write(10, "Images links: ") + pdf.set_text_color(0, 0, 0) for num, image_link in enumerate(news.image_links): pdf.write(10, f"[{num + 1}]: {image_link}" + '\n') else: + pdf.set_text_color(0, 0, 255) pdf.write(10, "Images to summary: ") + pdf.set_text_color(0, 0, 0) for num, img_link in enumerate(news.image_links): add_image(num + 1, img_link, pdf, logger) @@ -79,7 +97,7 @@ def add_image(num, image_link, pdf, logger): logger.info(f"Image from {image_link} is not in an appropriate format.") pdf.write(10, f"[{num}]: {image_link}" + '\n') else: - pdf.image(filename, x=50, y=pdf.get_y(), h=50, type=image_format, link=image_link) + pdf.image(filename, x=50, y=pdf.get_y(), h=30, type=image_format, link=image_link) pdf.ln(50) os.remove(filename) @@ -120,7 +138,7 @@ def add_news_to_html_file(news, html_file, com_line_args): if com_line_args.date: b("Images links: ") for image_link in news.image_links: - a("Image link", href=image_link) + a("Link to image", href=image_link) else: b("Images to summary: ") for img_link in news.image_links: diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py index 03eada7..c46b762 100644 --- a/final_task/rss_reader/exceptions.py +++ b/final_task/rss_reader/exceptions.py @@ -14,6 +14,10 @@ class EmptyCollectionError(Error): pass +class ComLineArgError(Error): + pass + + class FilePathError(Error): pass diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py index 35c8e12..9657b4f 100644 --- a/final_task/rss_reader/models.py +++ b/final_task/rss_reader/models.py @@ -16,9 +16,8 @@ class NewsEntry: summary: str = "" date: str = "" link: str = "" - image_links: list = field(default_factory=list) source: str = "" - id: str = "" + image_links: list = field(default_factory=list) def print_entry(self): print("-------------------------------------------------------------", @@ -30,7 +29,9 @@ def print_entry(self): "Source: " + self.source + '\n', "Link: " + self.link + '\n', sep='\n') + if self.image_links: print("Images links: ") for num, img_link in enumerate(self.image_links): - print(f"[{num+1}] {img_link}") + if img_link: + print(f"[{num+1}] {img_link}") diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 574f4c5..a70f752 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -3,7 +3,7 @@ create_logger, print_news from validation_functions import check_url, check_version_arg, check_internet_connection, \ check_emptiness -import exceptions +from exceptions import Error from caching_functions import get_cached_news, cache_news from conversion_functions import create_and_fill_pdf_file, create_and_fill_html_file @@ -42,7 +42,7 @@ def main(): else: # account of --json argument print_news(news_collection, com_line_args, logger) - except exceptions.Error as e: + except Error as e: print(e) diff --git a/final_task/rss_reader/tests/__init__.py b/final_task/rss_reader/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/final_task/rss_reader/tests/test_action_functions.py b/final_task/rss_reader/tests/test_action_functions.py new file mode 100644 index 0000000..33610ff --- /dev/null +++ b/final_task/rss_reader/tests/test_action_functions.py @@ -0,0 +1,43 @@ +""" Testing module for action functions. """ +import unittest +from unittest import mock +import action_functions as act_func +import exceptions as exc + + +class TestActionFunctions(unittest.TestCase): + """ Class for testing some of action functions. """ + def setUp(self): + self.logger = mock.Mock() + self.com_line_args = mock.Mock() + + def test_get_limit_news_collection(self): + news_collection = [num for num in range(10)] + + with mock.patch('validation_functions.check_limit_arg') as check_limit_mock: + check_limit_mock.return_value = True + self.com_line_args.limit = 4 + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 4) + self.com_line_args.limit = 11 + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 10) + + check_limit_mock.return_value = False + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 10) + + def test_clean_str(self): + test_str = "Netanyahu \u2014rival seeks support from PM's party to form government." + expect_str = "Netanyahu rival seeks support from PM's party to form government." + self.assertEqual(act_func.clean_str(test_str), expect_str) + + def test_convert_date(self): + self.assertEqual(act_func.convert_date("20190207"), "7 Feb 2019") + self.assertEqual(act_func.convert_date("20190410"), "10 Apr 2019") + with self.assertRaises(exc.Error): + act_func.convert_date("2000") + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_caching_functions.py b/final_task/rss_reader/tests/test_caching_functions.py new file mode 100644 index 0000000..01c3ce8 --- /dev/null +++ b/final_task/rss_reader/tests/test_caching_functions.py @@ -0,0 +1,86 @@ +""" Testing module for caching functions. """ +import unittest +from unittest import mock + +import os +from models import NewsEntry +import caching_functions as cache_func +import exceptions as exc + +"""Create news instances""" +title = "Stars Are Being Born in the Depths of a Black Hole" +date = "Tue, 19 Nov 2019 15:47 EST" +link = "http://www.nasa.gov/image-feature/stars-are-being-born-in-the-depths-of-a-black-hole" +summary = """ In the Phoenix Constellation, astronomers have confirmed the first example + of a galaxy cluster where large numbers of stars are being born at its core. """ +feed_title = "NASA Image of the Day" +feed_language = "en-us" +source = "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" + +news1 = NewsEntry(feed_title, feed_language, title, summary, date, link, source) + +title = "Stars Are Being Born in the Depths of a Black Hole - 2" +date = "Tue, 19 Nov 2018 15:47 EST" +link = "http://www.nasa./new/gov/image-feature/stars-are-being-born-in-the-depths-of-a-black-hole" +summary = """ In the Phoenix Constellation, astronomers have confirmed the first example + of a galaxy cluster where large numbers of stars are being born at its core. """ +feed_title = "NASA Image of the Day" +feed_language = "en-us" +source = "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" +image_links = ["link1, link2"] +news2 = NewsEntry(feed_title, feed_language, title, summary, date, link, source, image_links) + +news_collection = [news1, news2] + +DIRECTORY = os.path.abspath(os.path.dirname(__file__)) + + +class TestCachingFunctions(unittest.TestCase): + def setUp(self): + """Initialize collections of news""" + self.news1 = news1 + self.news2 = news2 + self.news_collection = news_collection + self.logger = mock.Mock() + self.command_line_args = mock.Mock() + + self.home_dir = os.path.expanduser('~') + self.test_file_path = os.path.join(DIRECTORY, '.test_cache_rss_news') + if os.path.exists(self.test_file_path): + os.remove(self.test_file_path) + + @mock.patch("os.path.join") + def test_cache_news_and_get_cache_news(self, path): + path.return_value = self.test_file_path + cache_func.cache_news(self.news_collection, self.logger) + + self.command_line_args.limit = 3 + self.command_line_args.date = "11 Nov 2018" + self.command_line_args.source = '' + with self.assertRaises(exc.EmptyCollectionError): + cache_func.get_cached_news(self.command_line_args, self.logger) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source = 'Source' + with self.assertRaises(exc.EmptyCollectionError): + cache_func.get_cached_news(self.command_line_args, self.logger) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source = '' + get_news_collection = cache_func. \ + get_cached_news(self.command_line_args, self.logger) + num_of_news = len(get_news_collection) + self.assertEqual(num_of_news, 1) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source =\ + "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" + get_news_collection = cache_func.get_cached_news(self.command_line_args, self.logger) + num_of_news = len(get_news_collection) + self.assertEqual(num_of_news, 1) + news_title = "Stars Are Being Born in the Depths of a Black Hole" + self.assertEqual(news_collection[0].title, news_title) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_print_functions.py b/final_task/rss_reader/tests/test_print_functions.py deleted file mode 100644 index 0b43f88..0000000 --- a/final_task/rss_reader/tests/test_print_functions.py +++ /dev/null @@ -1,3 +0,0 @@ -import unittest - -class TestPrintFunctions(unittest.TestCase): diff --git a/final_task/rss_reader/tests/test_validation_functions.py b/final_task/rss_reader/tests/test_validation_functions.py new file mode 100644 index 0000000..b2a3d3e --- /dev/null +++ b/final_task/rss_reader/tests/test_validation_functions.py @@ -0,0 +1,63 @@ +""" Testing module for validation functions. """ +import unittest +from unittest import mock + +import requests +import validation_functions as val_func +import exceptions as exc + + +class TestValidationFunctions(unittest.TestCase): + """ Class for testing some validation functions. """ + + def setUp(self): + self.logger = mock.Mock() + self.com_line_args = mock.Mock() + + def test_check_internet_connection(self): + with mock.patch('requests.get'): + self.assertTrue(val_func.check_internet_connection(self.logger)) + + with self.assertRaises(exc.Error): + with mock.patch('requests.get', side_effect=requests.ConnectionError): + val_func.check_internet_connection(self.logger) + + def test_check_emptiness(self): + with self.assertRaises(exc.Error): + news_collection = [] + val_func.check_emptiness(news_collection, self.logger) + + news_collection = ["Smile"] + self.assertTrue(val_func.check_emptiness(news_collection, self.logger)) + + @mock.patch('urllib.request.Request', side_effect=ValueError) + def test_check_url_Request(self, req): + with self.assertRaises(exc.Error): + val_func.check_url(self.com_line_args, self.logger) + + def test_check_limit_arg(self): + self.com_line_args.limit = 0 + self.assertTrue(val_func.check_limit_arg(self.com_line_args, self.logger)) + + self.com_line_args.limit = 5 + self.assertTrue(val_func.check_limit_arg(self.com_line_args, self.logger)) + + self.com_line_args.limit = None + self.assertFalse(val_func.check_limit_arg(self.com_line_args, self.logger)) + + with self.assertRaises(exc.ComLineArgError): + self.com_line_args.limit = -5 + val_func.check_limit_arg(self.com_line_args, self.logger) + + def test_check_date_arg(self): + self.com_line_args.date = "20190111" + self.assertTrue(val_func.check_date_arg(self.com_line_args, self.logger)) + self.com_line_args.date = "" + self.assertFalse(val_func.check_date_arg(self.com_line_args, self.logger)) + + +if __name__ == '__main__': + unittest.main() + + + diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py index 15a6313..c0e95d6 100644 --- a/final_task/rss_reader/validation_functions.py +++ b/final_task/rss_reader/validation_functions.py @@ -13,7 +13,7 @@ import os from urllib.request import Request, urlopen from urllib.error import URLError -from exceptions import Error, EmptyCollectionError, FilePathError +from exceptions import Error, EmptyCollectionError, FilePathError, ComLineArgError def check_url(com_line_args, logger): @@ -70,13 +70,17 @@ def check_version_arg(com_line_args, logger): def check_limit_arg(com_line_args, logger): """ Check --limit argument function. """ - if com_line_args.limit or com_line_args.limit == 0: - return True - if not com_line_args.limit: - return False - if com_line_args.limit < 0: + limit = com_line_args.limit + if not limit: + if limit == 0: + return True + else: + return False + elif com_line_args.limit < 0: logger.error("Command line argument limit is invalid.") - raise EmptyCollectionError("Command line argument limit should not be negative.") + raise ComLineArgError("Command line argument limit should not be negative.") + else: + return True def check_date_arg(com_line_args, logger): diff --git a/final_task/setup.py b/final_task/setup.py index f5b7c4a..44f19c6 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -7,7 +7,7 @@ setup( name="rss_reader_Anna_Gonchar", - version="3.4", + version="4.1", description="RSS reader - simple command-line utility.", long_description=long_description, long_description_content_type="text/markdown",