diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3781a3c --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +# Editors +.idea/ + +# Environments +venv/ + +# Byte-compiled / optimized +__pycache__/ +*.py[cod] + +*.log + +# Distribution / packaging +*.egg-info/ +build/ +develop-eggs/ +dist/ \ No newline at end of file diff --git a/final_task/README.md b/final_task/README.md index 7af281f..fa4973e 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,53 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +# RSS reader +RSS reader is a command-line utility which receives RSS URL and prints results in human-readable +format. + +[The source for this project is available here](https://github.com/AnnaPotter/FinalTaskRssParser). + + +### Installation +$ pip install rss-reader-Anna-Gonchar + +### Usage +$ rss-reader (-h | --help) + + Show help message and exit + +$ rss-reader + + Print rss feeds in human-readable format + +$ rss-reader --version + + Print version info + +$ rss-reader --json + + Print result as JSON in stdout + +$ rss-reader.py --verbose + + Outputs verbose status messages + +$ rss-reader.py --limit LIMIT + + Limit news topics, if this parameter provided + +$ rss-reader.py --date DATE + + Gets a date in %Y%m%d format. Print news from the specified date + and source (), if it specified + +$ rss-reader.py --to-pdf PATH_TO_PDF + + Gets file path. Convert news to pdf and save them to pdf file on the specified path + +$ rss-reader.py --to-html PATH_TO_HTML + + Gets file path. Convert news to html and save them to html file on the specified path + +### Storage +All the pieces of news received from the source are saved to the binary file. +Shelve module is used for this. It saves object with the specific key to the file. +The key is the rss news publication date, the value is the news. + diff --git a/final_task/rss_reader/README.md b/final_task/rss_reader/README.md new file mode 100644 index 0000000..35ebe9b --- /dev/null +++ b/final_task/rss_reader/README.md @@ -0,0 +1,26 @@ +#JSON structure: + +{ + + [ + + { + "feed_title": feed title, + "feed_language": feed language, + "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + { + "feed_title": feed title, + "feed_language": feed language, + "title": news title, + "summary": news content, + "date": news publication date, + "link": news link }, + + ... + + ] +} \ No newline at end of file diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader/__init__.py @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader/action_functions.py b/final_task/rss_reader/action_functions.py new file mode 100644 index 0000000..71d102f --- /dev/null +++ b/final_task/rss_reader/action_functions.py @@ -0,0 +1,179 @@ +""" Module of creation functions and action functions. + + Functions: + create_logger(com_line_args) -> logger + get_com_line_args() -> com_line_args + get_news(command_line_args, logger) -> news_collection + print_news_stdout(news_collection) -> None + print_news_json(news_collection) -> None + print_news(news_collection, com_line_args, logger) -> None + print_cache_news(news_collection, logger) -> None + print_cache_news_json(news_collection, logger) -> None + convert_date(date_str, logger) -> str_date + clean_str(string) -> clean_string """ + +import feedparser +from bs4 import BeautifulSoup +import html +import argparse +import json +import logging +from datetime import datetime +from exceptions import Error +from models import NewsEntry +from dataclasses import asdict +from validation_functions import check_limit_arg + + +def create_logger(com_line_args): + """Create logger function. + + Creates a logger considering the --verbose argument. """ + # Create a logger + logger = logging.getLogger("rss_reader_logger") + logger.setLevel(logging.DEBUG) + + # Create handlers + c_handler = logging.StreamHandler() + f_handler = logging.FileHandler("file.log") + + # Check --verbose argument + if com_line_args.verbose: + c_handler.setLevel(logging.DEBUG) + else: + c_handler.setLevel(logging.ERROR) + + # Create formatters and add it to handlers + c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s') + f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + c_handler.setFormatter(c_format) + f_handler.setFormatter(f_format) + + # Add handlers to the logger + logger.addHandler(f_handler) + logger.addHandler(c_handler) + + return logger + + +def get_com_line_args(): + """ Function to get command line arguments. """ + parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True) + parser.add_argument("--date", type=convert_date, + help="Gets a date in %%Y%%m%%d format. Print news from the specified date.") + parser.add_argument("--to-html", type=str, + help="Gets file path. Convert news to html and save them to html file.") + parser.add_argument("--to-pdf", type=str, + help="Gets file path. Convert news to pdf and save them to pdf file.") + parser.add_argument("--version", action="store_true", help="Print version info") + parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") + parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + parser.add_argument("source", type=str, nargs="?", help="RSS URL") + + return parser.parse_args() + + +def get_limit_news_collection(news_collection, com_line_args, logger): + if not check_limit_arg(com_line_args, logger): + limit = len(news_collection) + else: + limit = com_line_args.limit + + if len(news_collection) < limit: + logger.warning("The number of news is less than the value of the argument limit.") + return news_collection + else: + return news_collection[:limit] + + +def get_news(command_line_args, logger): + """ Get news function. + + Uses feedparser library to receive news, + and BeautifulSoup library to converting news in readable format. """ + logger.info("Getting news.") + news_feed = feedparser.parse(command_line_args.source) + + feed = {"title": clean_str(news_feed.feed.get("title", "")), + "language": news_feed.feed.get("language", "")} + news_collection = [] + source = command_line_args.source + + for entry in news_feed.entries: + news_entry = NewsEntry() + news_entry.feed_title = feed["title"] + news_entry.feed_language = feed["language"] + + news_entry.source = source + news_entry.title = clean_str(entry.get("title", "")) + news_entry.date = entry.get("published", "") + news_entry.link = entry.get("link", "") + + # get rid of html tags + soup = BeautifulSoup(entry.get("summary", ""), "html.parser") + news_entry.summary = clean_str(soup.text) + # get images links + images = soup.findAll("img") + + for img in images: + if img["src"]: + news_entry.image_links.append(img["src"]) + + news_collection.append(news_entry) + + return news_collection + + +def print_news_stdout(news_collection): + """ Function for print news to stdout in text format. """ + if news_collection: + for entry in news_collection: + entry.print_entry() + + +def print_news_json(news_collection): + """ Function for print news to stdout in json format. """ + news_collection_for_json = [] + + for entry in news_collection: + entry_for_json = asdict(entry) + news_collection_for_json.append(entry_for_json) + + print(json.dumps(news_collection_for_json, indent=4)) + + +def print_news(news_collection, com_line_args, logger): + """ Function for print news to stdout, + that take account of json argument. """ + + # news_collection already get valid limit argument + logger.info("Printing news.") + if com_line_args.date: + logger.info("Printing cache news.") + else: + logger.info("Printing news.") + + if com_line_args.json: + logger.info("Printing news in json format.") + print_news_json(news_collection) + else: + logger.info("Printing news stdout.") + print_news_stdout(news_collection) + + +def convert_date(date_str): + """ Converting date function. """ + try: + datetime_obj = datetime.strptime(date_str, '%Y%m%d') + str_date = datetime_obj.strftime("%d %b %Y") + if str_date[0] == '0': + str_date = str_date[1:] + return str_date + except ValueError as e: + raise Error("Invalid date argument. Please, check your input.") + + +def clean_str(string): + clean_string = html.unescape(string).encode('ascii', 'ignore').decode("utf-8") + return clean_string diff --git a/final_task/rss_reader/caching_functions.py b/final_task/rss_reader/caching_functions.py new file mode 100644 index 0000000..adf29b3 --- /dev/null +++ b/final_task/rss_reader/caching_functions.py @@ -0,0 +1,65 @@ +""" Module of caching functions. + + Functions: + cache_news(news_collection, logger) -> None + get_cached_news(com_line_args, logger) -> cached_news_collection """ + +import shelve +from os import path + +from validation_functions import check_limit_arg +from exceptions import EmptyFileError, EmptyCollectionError + +DIRECTORY = path.abspath(path.dirname(__file__)) + + +def cache_news(news_collection, logger): + """ Caching news function. """ + logger.info("Collecting news to cache file.") + with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: + for news in news_collection: + hash_date = news.date + news_dict[hash_date] = news + logger.info("News was cached successfully.") + + +def get_cached_news(com_line_args, logger): + logger.info("Getting cache news.") + date = com_line_args.date + source = com_line_args.source + + news_collection = [] + + with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict: + if not news_dict: + raise EmptyFileError("Cache file is empty. Please, retrieve news from internet. ") + + if not check_limit_arg(com_line_args, logger): + limit = len(news_dict) + else: + limit = min(com_line_args.limit, len(news_dict)) + + if source: + for hash_date_key in news_dict: + if date in hash_date_key: + if hash_date_key.split()[1] == date.partition(' ')[0]: + news = news_dict[hash_date_key] + if source == news.source: + news_collection.append(news) + + else: + for hash_date_key in news_dict: + if date in hash_date_key: + if hash_date_key.split()[1] == date.partition(' ')[0]: + news = news_dict[hash_date_key] + news_collection.append(news) + + if not news_collection: + if source: + raise EmptyCollectionError("There are no news in cache file on specified date and source.") + else: + raise EmptyCollectionError("There are no news in cache file on specified date.") + + else: + logger.info("Successfully get news from cache.") + return news_collection[:limit] diff --git a/final_task/rss_reader/conversion_functions.py b/final_task/rss_reader/conversion_functions.py new file mode 100644 index 0000000..88d8f31 --- /dev/null +++ b/final_task/rss_reader/conversion_functions.py @@ -0,0 +1,148 @@ +""" Module of functions for converting and saving news to pdf and html files. + + Functions: + create_and_fill_pdf_file(news_collection, com_line_args, logger) -> None + add_news_to_pdf_file(news, pdf, com_line_args, logger) -> None + add_image(num, link, pdf, logger) -> None + add_news_to_html_file(news, html_file, com_line_args, logger) -> None + add_news_to_html_file(news, html_file, com_line_args, logger) -> html_file """ + +import os +import urllib.request +import urllib.error +from dominate.tags import * +from fpdf import FPDF, set_global +from validation_functions import check_path_to_directory +from exceptions import FilePathError + + +def create_and_fill_pdf_file(news_collection, com_line_args, logger): + """ Function for creating and filling in the pdf file with news. """ + path_to_directory = com_line_args.to_pdf + + check_path_to_directory(path_to_directory, logger) + + pdf = FPDF(orientation='P', unit='mm', format='A4') + pdf.set_margins(5, 13.5, 5) + pdf.add_page() + + pdf.set_font('Arial', size=16) + pdf.set_text_color(255, 0, 0) + if com_line_args.date: + pdf.cell(200, 10, txt="RSS news from the local storage", ln=1, align="C") + else: + pdf.cell(200, 10, txt="RSS news from the internet", ln=1, align="C") + + pdf.ln(5) + pdf.set_text_color(0, 0, 0) + for news in news_collection: + add_news_to_pdf_file(news, pdf, com_line_args, logger) + logger.info("Creating pdf file with news.") + path_to_pdf_file = os.path.join(path_to_directory, "rss_news.pdf") + pdf.output(path_to_pdf_file, 'F') + logger.info("PDF file is created.") + + +def add_news_to_pdf_file(news, pdf, com_line_args, logger): + """ Function that add news to pdf file. """ + pdf.set_font('Arial', size=12) + pdf.set_text_color(0, 255, 0) + pdf.ln(10) + pdf.multi_cell(0, 10, align="C", txt="News") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Feed title: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f" {news.feed_title}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News title: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.title}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News publication date: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.date}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Summary: ") + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(0, 10, txt=f"{news.summary}") + pdf.set_text_color(0, 0, 255) + pdf.write(10, "News link: ") + pdf.set_text_color(0, 0, 0) + + pdf.multi_cell(0, 10, txt=f"{news.link}") + + if news.image_links: + if com_line_args.date: + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Images links: ") + pdf.set_text_color(0, 0, 0) + for num, image_link in enumerate(news.image_links): + pdf.write(10, f"[{num + 1}]: {image_link}" + '\n') + else: + pdf.set_text_color(0, 0, 255) + pdf.write(10, "Images to summary: ") + pdf.set_text_color(0, 0, 0) + for num, img_link in enumerate(news.image_links): + add_image(num + 1, img_link, pdf, logger) + + +def add_image(num, image_link, pdf, logger): + """ Function for getting image from image url and adding it to pdf file. """ + logger.info(f"Download image from {image_link}.") + + (filename, headers) = urllib.request.urlretrieve(image_link) + image_format = headers['content-type'].replace('image/', '') + + if image_format not in ('jpeg', 'jpg', 'png'): + logger.info(f"Image from {image_link} is not in an appropriate format.") + pdf.write(10, f"[{num}]: {image_link}" + '\n') + else: + pdf.image(filename, x=50, y=pdf.get_y(), h=30, type=image_format, link=image_link) + pdf.ln(50) + os.remove(filename) + + +def create_and_fill_html_file(news_collection, com_line_args, logger): + """ Function for creating and filling in the html file with news. """ + path_to_directory = com_line_args.to_html + + check_path_to_directory(path_to_directory, logger) + + html_file = html(title="RSS news") + html_file.add(head(meta(charset='utf-8'))) + + for news in news_collection: + add_news_to_html_file(news, html_file, com_line_args) + + path = os.path.join(path_to_directory, "rss_news.html") + try: + logger.info("Creating html file with news.") + with open(path, 'w', encoding='utf-8') as rss_html: + rss_html.write(str(html_file)) + logger.info("HTML file is created.") + except FileNotFoundError: + logger.error("No html file directory.") + raise FilePathError("No html file directory. Please, checked path.") + + +def add_news_to_html_file(news, html_file, com_line_args): + """ Function that add news to html file. """ + with html_file: + h1(news.title) + p(b("Feed title: "), news.feed_title) + p(b("Publication date: "), news.date) + p(b("Summary: "), news.summary) + p(a("Link for this news.", href=news.link)) + with p(): + if news.image_links: + if com_line_args.date: + b("Images links: ") + for image_link in news.image_links: + a("Link to image", href=image_link) + else: + b("Images to summary: ") + for img_link in news.image_links: + img(src=img_link) + br() + br() + return html_file diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py new file mode 100644 index 0000000..c46b762 --- /dev/null +++ b/final_task/rss_reader/exceptions.py @@ -0,0 +1,25 @@ +""" Module for non-standard exceptions. """ + + +class Error(Exception): + """ Class to raising exceptions. """ + pass + + +class EmptyFileError(Error): + pass + + +class EmptyCollectionError(Error): + pass + + +class ComLineArgError(Error): + pass + + +class FilePathError(Error): + pass + + + diff --git a/final_task/rss_reader/models.py b/final_task/rss_reader/models.py new file mode 100644 index 0000000..9657b4f --- /dev/null +++ b/final_task/rss_reader/models.py @@ -0,0 +1,37 @@ +""" Data models module """ +from dataclasses import dataclass, field + + +@dataclass +class NewsEntry: + """ Class representing a news article(entry). + + Methods: + print_entry(self) - print entry in stdout """ + + feed_title: str = "" + feed_language: str = "" + + title: str = "" + summary: str = "" + date: str = "" + link: str = "" + source: str = "" + image_links: list = field(default_factory=list) + + def print_entry(self): + print("-------------------------------------------------------------", + "Feed title: " + self.feed_title + '\n', + "Feed language: " + self.feed_language + '\n' + '\n', + "News title: " + self.title + '\n', + "Summary: " + self.summary + '\n', + "Publication date: " + self.date + '\n', + "Source: " + self.source + '\n', + "Link: " + self.link + '\n', + sep='\n') + + if self.image_links: + print("Images links: ") + for num, img_link in enumerate(self.image_links): + if img_link: + print(f"[{num+1}] {img_link}") diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..fe0ebf8 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4==4.8.1 +requests +feedparser +fpdf +dominate diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..a70f752 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,50 @@ +""" Main module """ +from action_functions import get_limit_news_collection, get_news, get_com_line_args, \ + create_logger, print_news +from validation_functions import check_url, check_version_arg, check_internet_connection, \ + check_emptiness +from exceptions import Error +from caching_functions import get_cached_news, cache_news +from conversion_functions import create_and_fill_pdf_file, create_and_fill_html_file + + +def main(): + try: + # get command line arguments + com_line_args = get_com_line_args() + + logger = create_logger(com_line_args) + + if not check_version_arg(com_line_args, logger): + if com_line_args.date: # getting news from local storage + news_collection = get_cached_news(com_line_args, logger) + if com_line_args.to_pdf: + create_and_fill_pdf_file(news_collection, com_line_args, logger) + elif com_line_args.to_html: + create_and_fill_html_file(news_collection, com_line_args, logger) + else: + # account of --json argument + print_news(news_collection, com_line_args, logger) + else: # getting news from the internet + check_internet_connection(logger) + check_url(com_line_args, logger) + + news_collection = get_news(com_line_args, logger) + check_emptiness(news_collection, logger) + cache_news(news_collection, logger) + # account of --limit argument + news_collection = get_limit_news_collection(news_collection, com_line_args, logger) + + if com_line_args.to_pdf: + create_and_fill_pdf_file(news_collection, com_line_args, logger) + elif com_line_args.to_html: + create_and_fill_html_file(news_collection, com_line_args, logger) + else: + # account of --json argument + print_news(news_collection, com_line_args, logger) + except Error as e: + print(e) + + +if __name__ == "__main__": + main() diff --git a/final_task/rss_reader/tests/test_action_functions.py b/final_task/rss_reader/tests/test_action_functions.py new file mode 100644 index 0000000..33610ff --- /dev/null +++ b/final_task/rss_reader/tests/test_action_functions.py @@ -0,0 +1,43 @@ +""" Testing module for action functions. """ +import unittest +from unittest import mock +import action_functions as act_func +import exceptions as exc + + +class TestActionFunctions(unittest.TestCase): + """ Class for testing some of action functions. """ + def setUp(self): + self.logger = mock.Mock() + self.com_line_args = mock.Mock() + + def test_get_limit_news_collection(self): + news_collection = [num for num in range(10)] + + with mock.patch('validation_functions.check_limit_arg') as check_limit_mock: + check_limit_mock.return_value = True + self.com_line_args.limit = 4 + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 4) + self.com_line_args.limit = 11 + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 10) + + check_limit_mock.return_value = False + self.assertEqual(len(act_func.get_limit_news_collection + (news_collection, self.com_line_args, self.logger)), 10) + + def test_clean_str(self): + test_str = "Netanyahu \u2014rival seeks support from PM's party to form government." + expect_str = "Netanyahu rival seeks support from PM's party to form government." + self.assertEqual(act_func.clean_str(test_str), expect_str) + + def test_convert_date(self): + self.assertEqual(act_func.convert_date("20190207"), "7 Feb 2019") + self.assertEqual(act_func.convert_date("20190410"), "10 Apr 2019") + with self.assertRaises(exc.Error): + act_func.convert_date("2000") + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_caching_functions.py b/final_task/rss_reader/tests/test_caching_functions.py new file mode 100644 index 0000000..01c3ce8 --- /dev/null +++ b/final_task/rss_reader/tests/test_caching_functions.py @@ -0,0 +1,86 @@ +""" Testing module for caching functions. """ +import unittest +from unittest import mock + +import os +from models import NewsEntry +import caching_functions as cache_func +import exceptions as exc + +"""Create news instances""" +title = "Stars Are Being Born in the Depths of a Black Hole" +date = "Tue, 19 Nov 2019 15:47 EST" +link = "http://www.nasa.gov/image-feature/stars-are-being-born-in-the-depths-of-a-black-hole" +summary = """ In the Phoenix Constellation, astronomers have confirmed the first example + of a galaxy cluster where large numbers of stars are being born at its core. """ +feed_title = "NASA Image of the Day" +feed_language = "en-us" +source = "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" + +news1 = NewsEntry(feed_title, feed_language, title, summary, date, link, source) + +title = "Stars Are Being Born in the Depths of a Black Hole - 2" +date = "Tue, 19 Nov 2018 15:47 EST" +link = "http://www.nasa./new/gov/image-feature/stars-are-being-born-in-the-depths-of-a-black-hole" +summary = """ In the Phoenix Constellation, astronomers have confirmed the first example + of a galaxy cluster where large numbers of stars are being born at its core. """ +feed_title = "NASA Image of the Day" +feed_language = "en-us" +source = "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" +image_links = ["link1, link2"] +news2 = NewsEntry(feed_title, feed_language, title, summary, date, link, source, image_links) + +news_collection = [news1, news2] + +DIRECTORY = os.path.abspath(os.path.dirname(__file__)) + + +class TestCachingFunctions(unittest.TestCase): + def setUp(self): + """Initialize collections of news""" + self.news1 = news1 + self.news2 = news2 + self.news_collection = news_collection + self.logger = mock.Mock() + self.command_line_args = mock.Mock() + + self.home_dir = os.path.expanduser('~') + self.test_file_path = os.path.join(DIRECTORY, '.test_cache_rss_news') + if os.path.exists(self.test_file_path): + os.remove(self.test_file_path) + + @mock.patch("os.path.join") + def test_cache_news_and_get_cache_news(self, path): + path.return_value = self.test_file_path + cache_func.cache_news(self.news_collection, self.logger) + + self.command_line_args.limit = 3 + self.command_line_args.date = "11 Nov 2018" + self.command_line_args.source = '' + with self.assertRaises(exc.EmptyCollectionError): + cache_func.get_cached_news(self.command_line_args, self.logger) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source = 'Source' + with self.assertRaises(exc.EmptyCollectionError): + cache_func.get_cached_news(self.command_line_args, self.logger) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source = '' + get_news_collection = cache_func. \ + get_cached_news(self.command_line_args, self.logger) + num_of_news = len(get_news_collection) + self.assertEqual(num_of_news, 1) + + self.command_line_args.date = "19 Nov 2019" + self.command_line_args.source =\ + "file:///tmp/mozilla_anna0/lg_image_of_the_day.rss" + get_news_collection = cache_func.get_cached_news(self.command_line_args, self.logger) + num_of_news = len(get_news_collection) + self.assertEqual(num_of_news, 1) + news_title = "Stars Are Being Born in the Depths of a Black Hole" + self.assertEqual(news_collection[0].title, news_title) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_validation_functions.py b/final_task/rss_reader/tests/test_validation_functions.py new file mode 100644 index 0000000..b2a3d3e --- /dev/null +++ b/final_task/rss_reader/tests/test_validation_functions.py @@ -0,0 +1,63 @@ +""" Testing module for validation functions. """ +import unittest +from unittest import mock + +import requests +import validation_functions as val_func +import exceptions as exc + + +class TestValidationFunctions(unittest.TestCase): + """ Class for testing some validation functions. """ + + def setUp(self): + self.logger = mock.Mock() + self.com_line_args = mock.Mock() + + def test_check_internet_connection(self): + with mock.patch('requests.get'): + self.assertTrue(val_func.check_internet_connection(self.logger)) + + with self.assertRaises(exc.Error): + with mock.patch('requests.get', side_effect=requests.ConnectionError): + val_func.check_internet_connection(self.logger) + + def test_check_emptiness(self): + with self.assertRaises(exc.Error): + news_collection = [] + val_func.check_emptiness(news_collection, self.logger) + + news_collection = ["Smile"] + self.assertTrue(val_func.check_emptiness(news_collection, self.logger)) + + @mock.patch('urllib.request.Request', side_effect=ValueError) + def test_check_url_Request(self, req): + with self.assertRaises(exc.Error): + val_func.check_url(self.com_line_args, self.logger) + + def test_check_limit_arg(self): + self.com_line_args.limit = 0 + self.assertTrue(val_func.check_limit_arg(self.com_line_args, self.logger)) + + self.com_line_args.limit = 5 + self.assertTrue(val_func.check_limit_arg(self.com_line_args, self.logger)) + + self.com_line_args.limit = None + self.assertFalse(val_func.check_limit_arg(self.com_line_args, self.logger)) + + with self.assertRaises(exc.ComLineArgError): + self.com_line_args.limit = -5 + val_func.check_limit_arg(self.com_line_args, self.logger) + + def test_check_date_arg(self): + self.com_line_args.date = "20190111" + self.assertTrue(val_func.check_date_arg(self.com_line_args, self.logger)) + self.com_line_args.date = "" + self.assertFalse(val_func.check_date_arg(self.com_line_args, self.logger)) + + +if __name__ == '__main__': + unittest.main() + + + diff --git a/final_task/rss_reader/validation_functions.py b/final_task/rss_reader/validation_functions.py new file mode 100644 index 0000000..c0e95d6 --- /dev/null +++ b/final_task/rss_reader/validation_functions.py @@ -0,0 +1,104 @@ +""" Module of validation functions. + + Functions: + check_url(com_line_args, logger) -> True + check_internet_connection(logger) -> True + check_emptiness(news_collection, logger) -> True + check_version_arg(com_line_args, logger) -> True/False + check_limit_arg(news_collection, com_line_args, logger) -> limit (int) + check_date_arg(com_line_args, logger) -> True/False + check_path_to_directory(path_to_directory, logger) -> True """ + +import requests +import os +from urllib.request import Request, urlopen +from urllib.error import URLError +from exceptions import Error, EmptyCollectionError, FilePathError, ComLineArgError + + +def check_url(com_line_args, logger): + """ Check URL function. """ + try: + req = Request(com_line_args.source) + logger.info("Checking url.") + response = urlopen(req) + except ValueError: + logger.error("Invalid URL.") + raise Error("Please, check your URL.") + except URLError as e: + if hasattr(e, "reason"): + logger.error(f"Failed to reach a server. Reason: {e.reason}.") + raise Error("Please, check your internet connection and your URL.") + elif hasattr(e, 'code'): + logger.error("The server couldn\'t fulfill the request. " + f"Error code: {e.code}") + raise Error("Service problem.") + else: + return True + + +def check_internet_connection(logger): + """ Check internet connection function. """ + try: + logger.info("Checking internet connection.") + response = requests.get("http://google.com", timeout=5) + return True + except requests.exceptions.ConnectionError: + logger.error("No internet connection.") + raise Error("Please, check your internet connection.") + + +def check_emptiness(news_collection, logger): + """ Function for checking news availability in news collection. """ + logger.info("Checking news collection emptiness.") + if not news_collection: + logger.error("Empty RSS-feed.") + raise Error("Please, check URL.") + else: + return True + + +def check_version_arg(com_line_args, logger): + """ Check --version argument function. """ + if com_line_args.version: + logger.info("View program version.") + print("rss_reader.py 4.0") + return True + else: + return False + + +def check_limit_arg(com_line_args, logger): + """ Check --limit argument function. """ + limit = com_line_args.limit + if not limit: + if limit == 0: + return True + else: + return False + elif com_line_args.limit < 0: + logger.error("Command line argument limit is invalid.") + raise ComLineArgError("Command line argument limit should not be negative.") + else: + return True + + +def check_date_arg(com_line_args, logger): + """ Check --date argument function. """ + if com_line_args.date: + logger.info("Checking date argument.") + return True + else: + return False + + +def check_path_to_directory(path_to_directory, logger): + logger.info("Checking path to file directory.") + if os.path.isdir(path_to_directory) is False: + logger.error("Path to directory is invalid. Path not to folder.") + raise FilePathError("Path to directory is invalid. Path not to folder.") + elif not os.path.exists(path_to_directory): + logger.error("Path to file is invalid.") + raise FilePathError("Received path to file is invalid.") + else: + return True diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..44f19c6 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages +from os import path + +directory = path.abspath(path.dirname(__file__)) +with open(path.join(directory, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name="rss_reader_Anna_Gonchar", + version="4.1", + description="RSS reader - simple command-line utility.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/AnnaPotter/FinalTaskRssParser", + author="Anna Gonchar", + author_email="raphaelkyzy@gmail.com", + packages=find_packages(), + classifiers=[ + "Programming Language :: Python :: 3.8", + "Operating System :: OS Independent", + ], + python_requires='>=3.8', + install_requires=['feedparser', 'requests', 'beautifulsoup4==4.8.1', 'fpdf', 'dominate'], + entry_points={ + 'console_scripts': + ['rss-reader = rss_reader.rss_reader:main'] + }, +)