diff --git a/final_task/.gitignore b/final_task/.gitignore new file mode 100644 index 0000000..b060ea3 --- /dev/null +++ b/final_task/.gitignore @@ -0,0 +1,5 @@ +__init__.py +[__pycache__]/ +*.pyc +.test_cache +event_tracker.log \ No newline at end of file diff --git a/final_task/FinalTask.md b/final_task/FinalTask.md deleted file mode 100644 index 1515169..0000000 --- a/final_task/FinalTask.md +++ /dev/null @@ -1,136 +0,0 @@ -# Introduction to Python. Hometask -You are proposed to implement Python RSS-reader using **python 3.8**. - -The task consists of few iterations. Do not start new iteration if the previous one is not implemented yet. - -## Common requirements -* It is mandatory to use `argparse` module. -* Codebase must be covered with unit tests with at least 50% coverage. -* In case of any mistakes utility should print human-readable -error explanation. Exception tracebacks in stdout are prohibited in final version of application. -* Docstrings are mandatory for all methods, classes, functions and modules. -* Code must correspond to `pep8` (use `pycodestyle` utility for self-check). - * You can set line length up to 120 symbols. -* Commit messages should provide correct and helpful information about changes in commit. Messages like `Fix bug`, -`Tried to make workable`, `Temp commit` and `Finally works` are prohibited. -* Usage of external APIs is prohibited (except of APIs for receiving RSS) - -## [Iteration 1] One-shot command-line RSS reader. -RSS reader should be a command-line utility which receives [RSS](wikipedia.org/wiki/RSS) URL and prints results in human-readable format. - -You are free to choose format of the news console output. The textbox below provides an example of how it can be implemented: - -```shell -$ rss_reader.py "https://news.yahoo.com/rss/" --limit 1 - -Feed: Yahoo News - Latest News & Headlines - -Title: Nestor heads into Georgia after tornados damage Florida -Date: Sun, 20 Oct 2019 04:21:44 +0300 -Link: https://news.yahoo.com/wet-weekend-tropical-storm-warnings-131131925.html - -[image 2: Nestor heads into Georgia after tornados damage Florida][2]Nestor raced across Georgia as a post-tropical cyclone late Saturday, hours after the former tropical storm spawned a tornado that damaged -homes and a school in central Florida while sparing areas of the Florida Panhandle devastated one year earlier by Hurricane Michael. The storm made landfall Saturday on St. Vincent Island, a nature preserve -off Florida's northern Gulf Coast in a lightly populated area of the state, the National Hurricane Center said. Nestor was expected to bring 1 to 3 inches of rain to drought-stricken inland areas on its -march across a swath of the U.S. Southeast. - - -Links: -[1]: https://news.yahoo.com/wet-weekend-tropical-storm-warnings-131131925.html (link) -[2]: http://l2.yimg.com/uu/api/res/1.2/Liyq2kH4HqlYHaS5BmZWpw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/ap.org/5ecc06358726cabef94585f99050f4f0 (image) - -``` - -Utility should provide the following interface: -```shell -usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT] - source - -Pure Python command-line RSS reader. - -positional arguments: - source RSS URL - -optional arguments: - -h, --help show this help message and exit - --version Print version info - --json Print result as JSON in stdout - --verbose Outputs verbose status messages - --limit LIMIT Limit news topics if this parameter provided - -``` - -In case of using `--json` argument your utility should convert the news into [JSON](https://en.wikipedia.org/wiki/JSON) format. -You should come up with the JSON structure on you own and describe it in the README.md file for your repository or in a separate documentation file. - -The `--limit` argument should also affect JSON generation. - -With the argument `--verbose` your program should print all logs in stdout. - -Withe the argument `--version` your program should print in stdout it's current version and complete it's work. The version supposed to change with every iteration. - - -## [Iteration 2] Distribution - -* Utility should be wrapped into distribution package with `setuptools`. -* This package should export CLI utility named `rss-reader`. - -> Note: Double-check, that your utility works correctly after its new package was installed on a clean machine. - -## [Iteration 3] News caching -The RSS news should be stored in a local storage while reading. The way and format of this storage you can choose yourself. -Please describe it in a separate section of README.md or in the documentation. - -New optional argument `--date` must be added to your utility. It should take a date in `%Y%m%d` format. -For example: `--date 20191020` - -The cashed news can be read with it. The new from the specified day will be printed out. -If the news are not found return an error. - -If the `--date` argument is not provided, the utility should work like in the previous iterations. - -## [Iteration 4] Format converter - -You should implement the conversion of news in at least two of the suggested format: `.mobi`, `.epub`, `.fb2`, `.html`, `.pdf` - -New optional argument must be added to your utility. This argument receives the path where new file will be saved. The arguments should represents which format will be generated. - -For example: `--to-mobi` or `--to-fb2` or `--to-epub` - - -You can choose yourself the way in which the news will be displayed, but the final text result should contain pictures and links, if they exist in the original article and if the format permits to store this type of data. - -## * [Iteration 5] Output colorization -> Note: An optional iteration, it is not necessary to implement it. You can move on with it only if all the previous iterations (from 1 to 4) are completely implemented. - -You should add new optional argument `--colorize`, that will print the result of the utility in colorized mode. - -If the argument is not provided, the utility should work like in the previous iterations. - -> Note: Take a look at the [colorize](https://pypi.org/project/colorize/) library - -## * [Iteration 6] Web-server -> Note: An optional iteration, it is not necessary to implement it. You can move on with it only if all the previous iterations (from 1 to 4) are completely implemented. Introduction to Python course does not cover the topics that are needed for the implementation of this part. - -There are several mandatory requirements in this iteration: -* `Docker` + `docker-compose` usage (at least 2 containers: one for web-application, one for DB) -* Web application should provide all the implemented in the previous parts of the task functionality, using the REST API: - - One-shot conversion from RSS to Human readable format - - Server-side news caching - - Conversion in epub, mobi, fb2 or other formats - -Feel free to choose the way of implementation, libraries and frameworks. (We suggest you `Django Rest Framework` + `PostgreSQL` combination) - -You can implement any functionality that you want. The only requirement is to add the description into README file or update project documentation, for example: -* authorization/authentication -* automatic scheduled news update -* adding new RSS sources using API - - - ---- -Implementations will be checked with the latest cPython interpreter of 3.8 branch. ---- - - -> Always code as if the guy who ends up maintaining your code will be a violent psychopath who knows where you live. Code for readability. **John F. Woods** diff --git a/final_task/MANIFEST.in b/final_task/MANIFEST.in new file mode 100644 index 0000000..355ca2c --- /dev/null +++ b/final_task/MANIFEST.in @@ -0,0 +1,3 @@ +recursive-include rss_reader/templates * +recursive-include rss_reader/tests/files * +recursive-include rss_reader/fonts * diff --git a/final_task/README.md b/final_task/README.md index 7af281f..61d2e51 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,109 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +# Python RSS-reader +RSS reader is a command-line utility which receives RSS URL and prints results in human-readable format. + +**Utility provides the following interface:** + +``` +usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT] + [--date DATE] [--to-html TO_HTML] [--to-pdf TO_PDF] + [--colorize] + [source] + +Performs a variety of operations on a file. + +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limits the number of displayed news + --date DATE Displays news for the specified day. It takes a date in %Y%m%d format. + --to-html TO_HTML Converts news in html format. Receives the path for file + saving + --to-pdf TO_PDF Converts news in pdf format. Receives the path for file + saving + --colorize Make stdout in colour +``` + +In addition to the --verbose argument, the utility also provides recording logging events in a event_tracker.log file that remembering all messages from earlier runs. +In case of --limit is 0 or greater than amount of received news - all available news will be displayed. + + + +## [Requirements](https://github.com/brechka/FinalTaskRssParser/blob/master/final_task/rss_reader/requirements.txt) + +The REST API was created using Python 3.8. To run the APP you need to install with pip packages listed in [requirements.txt](https://github.com/brechka/FinalTaskRssParser/blob/master/final_task/rss_reader/requirements.txt) +(better to use virtual environment): + +``` +pip install -r requirements.txt +``` + + + +## Usage of RSS-reader + +For usage the utility use followed option: + +clone current repository and install the requirements (see the description above), it's better to use isolated environment with virtualenv. +The entire application is contained within the rss_reader.py file. For running the utility use previously listed command line arguments. For example: + +``` +rss-reader https://news.yahoo.com/rss/ --limit 1 +``` + +The output would be the following structure: + +``` +Feed: Yahoo News - Latest News & Headlines + +Title: GOP claim that Trump cares about corruption takes a hit at impeachment hearing + +Date: Wed, 20 Nov 2019 20:26:09 -0500 +Link: https://news.yahoo.com/gop-claim-that-trump-cares-about-corruption-takes-a-hit-at-impeachment-heaing-012609516.html + +[image 2: GOP claim that Trump cares about corruption takes a hit at impeachment hearing] +Rep. Jim Himes, D-Conn., took issue with a defense of President Trump floated by Rep. John Ratcliffe, R-Texas. + +Links: +[1]: https://news.yahoo.com/gop-claim-that-trump-cares-about-corruption-takes-a-hit-at-impeachment-heaing-012609516.html (link to the article) + +[2]: http://l.yimg.com/uu/api/res/1.2/cpj4jzp35ZTzQ6ds8B1M0w--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/64948b40-0bee-11ea-ad7c-1300326b62d1 +``` + + + +## Description + +###### [rss_reader/rss_reader.py](https://github.com/brechka/FinalTaskRssParser/blob/master/final_task/rss_reader/rss_reader.py) + +In rss_reader.py RSS-reader utility is initialized and configured. The utility provides the following features: + +##### 1) *Data caching:* + +The RSS news are stored in the local storage while reading. For this purpose [shelve](https://docs.python.org/3/library/shelve.html), dictionary-like object, was used. +With the optional argument ```--date %Y%m%d``` (for example: ```--date 20191120```) the cashed news can be read. Also, adding URL make the displayed news sorted by date and link. +If the news are not found - error should be returned. + +##### 2) *Data conversion to .pdf and .html:* + +The RSS-reader utility provides news converter to .pdf and .html formats. It can be accomplished with the following optional arguments: ```--to-pdf``` and ```--to-html```. +Both arguments receive the path where new file would be saved. The name of the file generates automatically. +There are two options for news conversion: + +- *convert from cache* + +For this case news conversion doesn't depend on internet. With ```rss_reader.py --date 20191120 --limit 1 --to-html ~/finaltask/rss-reader``` one news for the specified day would be converted (the same with ```--to-pdf```), and file with it would be generated in the specified PATH. +There would be clickable links to the full article and images. + +- *convert fresh news from the Internet* + +For this case Internet is required. With ```rss_reader.py https://news.yahoo.com/rss/ --limit 1 --to-html ~/finaltask/rss-reader``` one news from listed link would be converted (the same with ```--to-pdf```), and file with it would be generated in the specified PATH. +In this case all pictures would be clickable and would be displayed, if the format permits. Otherwise, there would be clickable links to the images. Also result would be contain a links to the full articles. + +##### 3) *Output in colour:* + +For colorizing the output there are ```--colorize``` optional argument. Utility provides colorization of the logs and RSS news output (json included). \ No newline at end of file diff --git a/final_task/__init__.py b/final_task/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/final_task/rss_reader/cache.py b/final_task/rss_reader/cache.py new file mode 100644 index 0000000..036ed99 --- /dev/null +++ b/final_task/rss_reader/cache.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3.8 +"""Module for caching news in .feeds_cache file""" + +import itertools +import shelve +from os import path + +from logger import LOGGER +from rss_exceptions import SpecifiedDayNewsError, EmptyCacheError + + +DIRECTORY = path.abspath(path.dirname(__file__)) + + +def cache_news(parsed_news): + """ + Cache news in 'feeds_cache' file. + """ + LOGGER.info('Trying to cache news into a file') + with shelve.open(path.join(DIRECTORY, '.feeds_cache')) as cache: + for new in parsed_news: + date = new['date'] + cache[date] = new + + LOGGER.info('News was cached successfully.') + + +def get_cached_news(cmd_args): + """ + Extract news from the cache for a specified day. + """ + LOGGER.info("Get started fetching cached news if exists.") + + with shelve.open(path.join(DIRECTORY, '.feeds_cache')) as cache: + if not cache: + raise EmptyCacheError('Cache is empty. Please, retrieve data from internet.') + + limit = cmd_args.limit or len(cache) + all_news = list(itertools.islice(make_news_item(cmd_args, cache), 0, limit)) + + if not all_news: + raise SpecifiedDayNewsError('On the specified day there are no entries in the cache.') + + LOGGER.info('News was extracted from the cache successfully.') + + return all_news + + +def make_news_item(cmd_args, cache): + """ + Iterate the cache with required key 'data' and 'url', if specified. + """ + for key_date, news_date in cache.items(): + if cmd_args.date in key_date: + if cmd_args.source == news_date['feed_url']: + yield news_date + elif not cmd_args.source: + yield news_date diff --git a/final_task/rss_reader/cmd_line_parser.py b/final_task/rss_reader/cmd_line_parser.py new file mode 100644 index 0000000..2b33835 --- /dev/null +++ b/final_task/rss_reader/cmd_line_parser.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3.8 +"""Module for parsing command line arguments and processing two of them: --json and --verbose""" + +import argparse +import datetime +import json +import logging +import sys + +import coloredlogs +from termcolor import cprint + +from logger import LOGGER +from rss_exceptions import FormatDateError + + +def make_arg_parser(): + """ + Make a parser for parsing exact arguments out of sys.argv + :return: parser + """ + parser = argparse.ArgumentParser(description="Performs a variety of operations on a file.") + + parser.add_argument('source', help='RSS URL', nargs='?', default='') + parser.add_argument('--version', action='version', version=f'RSS-reader 5.0', help='Print version info') + parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') + parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') + parser.add_argument('--limit', type=int, default=None, help='Limits the number of displayed news') + parser.add_argument('--date', type=convert_date, help='Displays news for the specified day') + parser.add_argument('--to-html', type=str, default='', + help='Converts news in html format. Receives the path for file saving') + parser.add_argument('--to-pdf', type=str, default='', + help='Converts news in pdf format. Receives the path for file saving.') + parser.add_argument('--colorize', action='store_true', help='Make stdout in colour.') + return parser + + +def convert_date(date): + """ + Converts an argument in %%Y%%m%%d format to %d%m%Y format + """ + try: + date = datetime.datetime.strptime(date, '%Y%m%d') + reformed_date = date.strftime("%d %b %Y") + return reformed_date + except ValueError: + raise FormatDateError("Invalid date format. Date format should be like '%Y%m%d' -> 20191120.") + + +def output_json(all_news, cmd_args): + """ + If the 'json' argument was passed - converts data in json format and prints it + """ + if cmd_args.json: + LOGGER.info('Convert RSS data in JSON format') + news_in_json = json.dumps(all_news, indent=4, ensure_ascii=False) + if cmd_args.colorize: + LOGGER.info('Output result of parsing RSS in colorized JSON format') + cprint(news_in_json, 'cyan') + else: + LOGGER.info('Output result of parsing RSS in JSON format') + print(news_in_json) + + +def output_verbose(cmd_args): + """ + If the 'verbose' argument was passed, func reports events + that occur during normal operation of a program + """ + if cmd_args.verbose: + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter(u'%(levelname)-8s [%(asctime)s] %(message)s') + handler.setFormatter(formatter) + logging.getLogger().addHandler(handler) + + if cmd_args.colorize: + coloredlogs.install() + diff --git a/final_task/rss_reader/fonts/DejaVuSans.cw127.pkl b/final_task/rss_reader/fonts/DejaVuSans.cw127.pkl new file mode 100644 index 0000000..143d8be Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSans.cw127.pkl differ diff --git a/final_task/rss_reader/fonts/DejaVuSans.pkl b/final_task/rss_reader/fonts/DejaVuSans.pkl new file mode 100644 index 0000000..d9dddd2 Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSans.pkl differ diff --git a/final_task/rss_reader/fonts/DejaVuSans.ttf b/final_task/rss_reader/fonts/DejaVuSans.ttf new file mode 100644 index 0000000..e5f7eec Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSans.ttf differ diff --git a/final_task/rss_reader/html_converter.py b/final_task/rss_reader/html_converter.py new file mode 100644 index 0000000..1ee0506 --- /dev/null +++ b/final_task/rss_reader/html_converter.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3.8 +"""Module for creating and filling unique HTML file with required news""" + +import dominate +import dominate.tags as tag + +from logger import LOGGER +from rss_exceptions import PATHError +from utils import create_path_to_file + + +def convert_news_to_html(cmd_args, all_news): + """ + If the 'to-html' argument was passed - creates HTML file and push data in it. + """ + if cmd_args.to_html: + LOGGER.info("Call function for creation HTML file") + create_html_file(cmd_args, all_news) + + +def create_html_file(cmd_args, all_news): + """ + Creates and fills in the HTML file with the required data + """ + path_to_html = create_path_to_file(cmd_args.to_html, 'RSS_NEWS.html') + + rss_html_doc = dominate.document(title='RSS NEWS') + + with rss_html_doc: + tag.h1("RSS News") + + for num, new in enumerate(all_news, 1): + LOGGER.info(f'Add new № {num} in HTML file.') + rss_html_doc = convert_new_in_html(cmd_args, new, rss_html_doc) + + try: + LOGGER.info('Create HTML file in the specified directory.') + + with open(path_to_html, 'w') as file: + file.write(str(rss_html_doc)) + + LOGGER.info("HTML file was created and filled in successfully") + + except FileNotFoundError: + raise PATHError('Setted PATH is invalid') + + +def convert_new_in_html(cmd_args, new, html_file): + """ + Convert one new to HTML format + """ + + with html_file: + with tag.div(): + tag.h2(new.get('title')) + tag.p(new.get('date')) + tag.br() + tag.a("Read the full article", href=new.get('link')) + tag.br() + tag.br() + + if cmd_args.date: + for num, link in enumerate(new.get('img_link'), 1): + tag.a(f"Image link № {num}", href=link) + tag.br() + else: + for num, link in enumerate(new.get('img_link')): + tag.img(src=link, alt=new.get('img_title')[num]) + tag.br() + + tag.p(new.get('text')) + tag.br() + + return html_file diff --git a/final_task/rss_reader/logger.py b/final_task/rss_reader/logger.py new file mode 100644 index 0000000..90aec7c --- /dev/null +++ b/final_task/rss_reader/logger.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3.8 +"""Module for creating logging system for the utility: LOGGER with RSS_reader events tracker name""" + +import logging + + +# Create a logger for tracking events that happen when program runs +fileHandler = logging.FileHandler("event_tracker.log", "a", encoding="utf-8") + +logging.basicConfig(format=u'%(levelname)-8s [%(asctime)s] %(message)s', + level=logging.DEBUG, + handlers=[fileHandler]) + +LOGGER = logging.getLogger("RSS_reader events tracker") diff --git a/final_task/rss_reader/pdf_converter.py b/final_task/rss_reader/pdf_converter.py new file mode 100644 index 0000000..64e2f2f --- /dev/null +++ b/final_task/rss_reader/pdf_converter.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3.8 +"""Module for creating and filling unique PDF file with required news""" + +import os +import urllib.request + +from fpdf import FPDF, set_global + +from logger import LOGGER +from rss_exceptions import PATHError +from utils import create_path_to_file + + +CUR_DIRECTORY = os.path.abspath(os.path.dirname(__file__)) +FONT_PATH = os.path.join(CUR_DIRECTORY, 'fonts') + + +def convert_news_to_pdf(cmd_args, all_news): + """ + If the 'to-pdf' argument was passed - creates PDF file and push data in it. + """ + if cmd_args.to_pdf: + LOGGER.info("Call function for creation PDF file") + create_pdf_file(cmd_args, all_news) + + +def create_pdf_file(cmd_args, all_news): + """ + Creates and fills in the PDF file with the required data + """ + path_to_pdf = create_path_to_file(cmd_args.to_pdf, 'RSS_NEWS.pdf') + + # --- normal cache mode --- + set_global("FPDF_CACHE_MODE", 0) + + pdf = FPDF(orientation='P', unit='mm', format='A4') + pdf.set_margins(5, 10, 5) + pdf.add_page() + + # use downloaded unicode font + pdf.add_font('dejavu', '', os.path.join(FONT_PATH, 'DejaVuSans.ttf'), uni=True) + + pdf.set_font('dejavu', size=20) + pdf.set_text_color(5, 14, 110) + pdf.cell(200, 10, txt=f'RSS News', ln=1, align="C") + pdf.ln(10) + pdf.set_text_color(0, 0, 0) + + for new in all_news: + add_new_to_pdf(cmd_args, new, pdf) + + LOGGER.info(f'Download PDF file with required news to the {path_to_pdf}') + + try: + pdf.output(path_to_pdf, 'F') + except FileNotFoundError: + raise PATHError('Setted PATH is invalid') + + +def add_new_to_pdf(cmd_args, new, pdf): + """ + Add one new to PDF file + """ + pdf.set_font_size(16) + pdf.set_text_color(84, 10, 10) + pdf.multi_cell(200, 8, txt=new.get('title')) + pdf.ln(5) + pdf.set_font_size(10) + pdf.set_text_color(0, 0, 0) + pdf.cell(200, 5, txt=new.get('date')) + pdf.ln(10) + pdf.set_font_size(11) + pdf.set_text_color(0, 0, 128) + pdf.write(6, 'Link to the full article', new.get('link')) + pdf.set_text_color(0, 0, 0) + pdf.set_font_size(12) + pdf.ln(10) + + if not cmd_args.date: + LOGGER.info('Add image(s) from the received links to the file') + + for num, link in enumerate(new.get('img_link'), 1): + if link: + add_downloaded_image(num, link, pdf) + else: + pdf.cell(200, 8, txt='Links to the image(s): ') + pdf.ln(8) + for num, link in enumerate(new.get('img_link'), 1): + add_image_link(num, link, pdf) + + pdf.ln(5) + pdf.set_font_size(12) + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(200, 8, txt=new.get('text')) + pdf.ln(10) + + +def add_downloaded_image(num, link, pdf): + """ + Download image, add it to PDF file and delete it + """ + LOGGER.info(f'Download image № {num} from {link} from received URL.') + + filename, headers = urllib.request.urlretrieve(link) + image_format = headers['content-type'].replace('image/', '') + + if image_format not in ('jpeg', 'png'): + LOGGER.info(f"Image № {num} from {link} is not in an appropriate format.") + add_image_link(num, link, pdf) + else: + LOGGER.info(f"Format of image № {num} from {link} is appropriate.") + + pdf.image(filename, x=70, y=pdf.get_y(), h=40, type=image_format, link=link) + pdf.ln(40) + + LOGGER.info(f'Delete downloaded image № {num} from {link}.') + os.remove(filename) + + +def add_image_link(num, link, pdf): + """ + Add image link to the PDF file + """ + pdf.set_text_color(0, 0, 128) + pdf.set_font_size(11) + pdf.write(8, f'Link to the image № {num}', link) + pdf.ln(6) diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..4c922a8 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,9 @@ +argparse +beautifulsoup4==4.8.1 +coloredlogs==10.0 +dominate==2.4.0 +feedparser==5.2.1 +fpdf==1.7.2 +Jinja2==2.10.3 +requests==2.22.0 +termcolor==1.1.0 diff --git a/final_task/rss_reader/rss_exceptions.py b/final_task/rss_reader/rss_exceptions.py new file mode 100644 index 0000000..b211b07 --- /dev/null +++ b/final_task/rss_reader/rss_exceptions.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3.8 +"""Module with custom exceptions""" + + +class Error(Exception): + """Base class for exceptions in this module""" + pass + + +class LimitSignError(Error): + """Exception is raised negative limit value""" + pass + + +class FeedError(Error): + """Exception is raised for link without news""" + pass + + +class UndefinedURL(Error): + """Exception is raised if URL is not define""" + pass + + +class InternetConnectionError(Error): + """Exception is raised if no Internet connection.""" + pass + + +class UnreachableURLError(Error): + """Exception is raised if URL is unreachable""" + pass + + +class URLResponseError(Error): + """Exception occurs while retrieving status code from the URL other than 200""" + pass + + +class FormatDateError(Error): + """Exception is raised if date was setted in invalid format""" + pass + + +class SpecifiedDayNewsError(Error): + """Exception is raised if on the specified day there are no entries in DB.""" + pass + + +class EmptyCacheError(Error): + """Exception is raised if retrieving data from empty cache.""" + pass + + +class PATHError(Error): + """Exception is raised if the wrong PATH was specified.""" + pass diff --git a/final_task/rss_reader/rss_parser.py b/final_task/rss_reader/rss_parser.py new file mode 100644 index 0000000..ce8b81e --- /dev/null +++ b/final_task/rss_reader/rss_parser.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3.8 +""" +Module for parsing news from the given link, extracting required info from it +and filling the list with dictionaries containing received data +""" + +import html + +from bs4 import BeautifulSoup +import feedparser + +from logger import LOGGER +from validator import check_limit_value, check_news_collection + + +class RSSparser: + """ + Parsed the RSS news from received link, extracts required amount of news + + Parameters: + cmd_args: dict: parsed arguments out of sys.argv + + Returns: + all_news: list of dictionaries with extracted info of required amount of parsed news + """ + + def __init__(self, cmd_args): + self.url = cmd_args.source + self.limit = cmd_args.limit + check_limit_value(self.limit) + + LOGGER.info(f'Get RSS_url {self.url} and value = {self.limit} (limits amount of output news)') + + self.response = self.get_the_response() + + # Extract the news-site name converting it to the unicode + self.feed_name = html.unescape(self.response.feed.get('title', '')) + + LOGGER.info('Trying to separate news from the URL.') + + # Extract all news separately in one list + self.news = self.response.entries + check_news_collection(self.news) + + def get_the_response(self): + """ + Get the response from the url. + """ + response = feedparser.parse(self.url) + LOGGER.info(f'Getting the response from the URL: {self.url}.') + return response + + def parse_feed(self): + """ + Parse set amount of news from URL and write required news to the list of dictionaries + :return: list of dictionaries with appropriate info + """ + all_news = [] + + LOGGER.info('Extract the required data from the separated news and fill the dictionaries with required data.') + + limit = self.limit or len(self.news) + + for info in self.news[:limit]: + img_link, img_title = [], [] + info_title = html.unescape(info.title) + info_link = info.link + info_date = info.published + info_description = info.description + + # Pulling data out of HTML part + soup = BeautifulSoup(info_description, features="html.parser") + + img_tag_list = soup.find_all('img') + if img_tag_list: + for link in img_tag_list: + img_link.append(link.get('src')) + img_title.append(html.unescape(link.get('title', ''))) + + info_text = html.unescape(soup.text).rstrip() + + all_news.append({ + "feed_title": self.feed_name, + "feed_url": self.url, + "title": info_title, + "link": info_link, + "date": info_date, + "img_title": img_title, + "img_link": img_link, + "text": info_text, + }) + return all_news diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py old mode 100644 new mode 100755 index e69de29..e213ba4 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3.8 +"""The rss_reader.py file launches the entire application""" + +import os +import sys + +current_dir = os.path.abspath(os.path.dirname(__file__)) +sys.path.append(current_dir) + +from cache import cache_news, get_cached_news +from cmd_line_parser import make_arg_parser, output_json, output_verbose +from html_converter import convert_news_to_html +from logger import LOGGER +from pdf_converter import convert_news_to_pdf +import rss_exceptions as er +from rss_parser import RSSparser +from utils import output_txt_news +import validator as valid + + +def main(): + # parse arguments received from the command line + parser = make_arg_parser() + command_line_args = parser.parse_args() + output_verbose(command_line_args) + + if command_line_args.date: + # retrieve data from the cache + valid.check_limit_value(command_line_args.limit) + all_news = get_cached_news(command_line_args) + else: + # retrieve data from the internet + valid.check_internet_connection() + valid.check_url_availability(command_line_args) + valid.check_response_status_code(command_line_args) + news_parser = RSSparser(command_line_args) + all_news = news_parser.parse_feed() + cache_news(all_news) + + convert_news_to_html(command_line_args, all_news) + convert_news_to_pdf(command_line_args, all_news) + + if not command_line_args.json: + output_txt_news(command_line_args, all_news) + + output_json(all_news, command_line_args) + + +if __name__ == "__main__": + try: + main() + except ( + er.EmptyCacheError, + er.FeedError, + er.FormatDateError, + er.LimitSignError, + er.PATHError, + er.SpecifiedDayNewsError, + er.UnreachableURLError, + er.UndefinedURL, + er.URLResponseError + ) as error: + LOGGER.error(str(error)) + print('Error: ', error) + + except er.InternetConnectionError as error: + LOGGER.error("ConnectionError: " + str(error)) + print("ConnectionError: ", error) \ No newline at end of file diff --git a/final_task/rss_reader/templates/colorized_template.txt b/final_task/rss_reader/templates/colorized_template.txt new file mode 100644 index 0000000..8e57256 --- /dev/null +++ b/final_task/rss_reader/templates/colorized_template.txt @@ -0,0 +1,24 @@ + +{% for item in all_news -%} + +Feed: {{ item['feed_title'] | colorizetext('red') }} + +Title: {{item['title'] | colorizetext('yellow')}} + +Date: {{item['date'] | colorizetext('magenta')}} +Link: {{item['link'] | colorizetext('blue')}} + +{% if item['img_title'] %}{% for title in item['img_title'] -%} +{% if title %}[image {{loop.index+1}}: {{title | colorizetext('cyan')}}] +{% endif -%}{% endfor %}{% endif -%} +{{item['text'] | colorizetext('green')}} + +Links: +[1]: {{item['link'] | colorizetext('blue')}} (link to the article) + +{% if item['img_link'] %}{% for link in item['img_link'] -%} +{% if link|length %}[{{loop.index+1}}]: {{link | colorizetext('blue')}}{% else %}[{{loop.index+1}}]: Missed link +{% endif %} +{% endfor %} +{% endif -%} +{% endfor %} diff --git a/final_task/rss_reader/templates/template.txt b/final_task/rss_reader/templates/template.txt new file mode 100644 index 0000000..78ef4ad --- /dev/null +++ b/final_task/rss_reader/templates/template.txt @@ -0,0 +1,23 @@ + +{% for item in all_news -%} +Feed: {{ item['feed_title']}} + +Title: {{item['title']}} + +Date: {{item['date']}} +Link: {{item['link']}} + +{% if item['img_title'] %}{% for title in item['img_title'] -%} +{% if title %}[image {{loop.index+1}}: {{title}}] +{% endif -%}{% endfor %}{% endif -%} +{{item['text']}} + +Links: +[1]: {{item['link']}} (link to the article) + +{% if item['img_link'] %}{% for link in item['img_link'] -%} +{% if link|length %}[{{loop.index+1}}]: {{link}}{% else %}[{{loop.index+1}}]: Missed link +{% endif %} +{% endfor %} +{% endif -%} +{% endfor %} \ No newline at end of file diff --git a/final_task/rss_reader/tests/files/example_html.html b/final_task/rss_reader/tests/files/example_html.html new file mode 100644 index 0000000..2624391 --- /dev/null +++ b/final_task/rss_reader/tests/files/example_html.html @@ -0,0 +1,15 @@ + + + + RSS News + + +
+

4 reasons Democrats have an uphill climb on Donald Trump impeachment and removal

+

Fri, 22 Nov 2019 05:00:09 -0500


+ Read the full article

+ 4 reasons Democrats have an uphill climb on Donald Trump impeachment and removal
+

If the ending of this movie is inevitable, the undecided public may move toward the Republican claim that the whole thing is a waste of time and money.


+
+ + \ No newline at end of file diff --git a/final_task/rss_reader/tests/files/rss_xml_template.xml b/final_task/rss_reader/tests/files/rss_xml_template.xml new file mode 100644 index 0000000..0ad67de --- /dev/null +++ b/final_task/rss_reader/tests/files/rss_xml_template.xml @@ -0,0 +1,64 @@ + + + + + + CNN.com - Science and Space + http://edition.cnn.com/TECH/space/?eref=edition_space + CNN.com delivers up-to-the-minute news and information on the latest top stories, weather, entertainment, politics and more. + en-us + � 2008 Cable News Network LP, LLLP. + Wed, 31 Dec 2008 10:36:24 EST + 10 + + CNN.com - Science and Space + http://edition.cnn.com/TECH/space/?eref=edition_space + http://i2.cdn.turner.com/cnn/.element/img/1.0/logo/cnn.logo.rss.gif + 144 + 33 + CNN.com delivers up-to-the-minute news and information on the latest top stories, weather, entertainment, politics and more. + + + Mars Science Lab launch delayed two years + http://edition.cnn.com/2008/TECH/space/12/04/nasa.mars.delay/index.html?eref=edition_space + http://rss.cnn.com/~r/rss/edition_space/~3/OkAPP_GA5mE/index.html + NASA's launch of the Mars Science Laboratory -- hampered by technical difficulties and cost overruns -- has been delayed until the fall of 2011, NASA officials said at a news conference Thursday in Washington.<img src="http://feeds.feedburner.com/~r/rss/edition_space/~4/OkAPP_GA5mE" height="1" width="1" alt=""/> + Fri, 05 Dec 2008 10:34:54 EST + http://edition.cnn.com/2008/TECH/space/12/04/nasa.mars.delay/index.html?eref=edition_space + + + Shuttle lands at California air base + http://edition.cnn.com/2008/TECH/space/11/30/space.shuttle/index.html?eref=edition_space + http://rss.cnn.com/~r/rss/edition_space/~3/YzjKXjuxFKY/index.html + NASA officials Sunday waved off the first opportunity for space shuttle Endeavour to return to Earth, citing poor weather conditions.<img src="http://feeds.feedburner.com/~r/rss/edition_space/~4/YzjKXjuxFKY" height="1" width="1" alt=""/> + Mon, 01 Dec 2008 00:17:51 EST + http://edition.cnn.com/2008/TECH/space/11/30/space.shuttle/index.html?eref=edition_space + + + iReporters watch planets, moon align + http://edition.cnn.com/2008/TECH/space/12/02/planetsalign.irpt/index.html?eref=edition_space + http://rss.cnn.com/~r/rss/edition_space/~3/lNpVH39eh3U/index.html + <img src="http://feeds.feedburner.com/~r/rss/edition_space/~4/lNpVH39eh3U" height="1" width="1" alt=""/> + Tue, 02 Dec 2008 12:52:24 EST + http://edition.cnn.com/2008/TECH/space/12/02/planetsalign.irpt/index.html?eref=edition_space + + + Inspiration for 'Contact' still listening + http://edition.cnn.com/2008/TECH/space/11/26/aliens.tarter/index.html?eref=edition_space + http://rss.cnn.com/~r/rss/edition_space/~3/84C0ckrT5D4/index.html + From a remote valley in Northern California, Jill Tarter is listening to the universe.<img src="http://feeds.feedburner.com/~r/rss/edition_space/~4/84C0ckrT5D4" height="1" width="1" alt=""/> + Wed, 26 Nov 2008 12:02:33 EST + http://edition.cnn.com/2008/TECH/space/11/26/aliens.tarter/index.html?eref=edition_space + + + Indian lunar orbiter hit by heat rise + http://edition.cnn.com/2008/TECH/space/11/26/india.moon.probe/index.html?eref=edition_space + http://rss.cnn.com/~r/rss/edition_space/~3/utoHWQBr1hc/index.html + Scientists have switched off several on-board instruments to halt rising temperatures inside India's first unmanned lunar spacecraft.<img src="http://feeds.feedburner.com/~r/rss/edition_space/~4/utoHWQBr1hc" height="1" width="1" alt=""/> + Wed, 26 Nov 2008 07:26:44 EST + http://edition.cnn.com/2008/TECH/space/11/26/india.moon.probe/index.html?eref=edition_space + + + diff --git a/final_task/rss_reader/tests/test_cache.py b/final_task/rss_reader/tests/test_cache.py new file mode 100644 index 0000000..a9dd145 --- /dev/null +++ b/final_task/rss_reader/tests/test_cache.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3.8 +"""Module for testing cache.py""" + +import os +import unittest +from unittest.mock import patch, Mock + +import cache +import rss_exceptions as ex + + +class TestCheckFunctions(unittest.TestCase): + """ + Tests functions from cache.py + """ + + def setUp(self): + self.logger = Mock() + self.cmd_args = Mock() + self.limit = Mock() + self.parsed_news = [ + {'feed_title': 'Yahoo News - Latest News & Headlines', + 'feed_url': 'https://news.yahoo.com/rss/', + 'title': 'First news name', + 'link': 'Link № 1.1', + 'date': 'Fri, 22 Nov 2019 10:36:29 -0500', + 'img_title': ['Title for image № 1'], + 'img_link': ['Link № 1.2'], + 'text': 'some text № 1'}, + {'feed_title': 'Yahoo News - Latest News & Headlines', + 'feed_url': 'https://news.yahoo.com/rss/', + 'title': 'Second news name', + 'link': 'Link № 2.1', + 'date': 'Fri, 21 Nov 2019 21:36:29 -0500', + 'img_title': ['Title for image № 2'], + 'img_link': ['Link № 2.2'], + 'text': 'some text № 2'} + ] + self.directory = os.path.abspath(os.path.dirname(__file__)) + self.test_db_path = os.path.join(self.directory, '.test_cache') + + # remove '.test_cache', if exists, before testing cache.py + if os.path.exists(self.test_db_path): + os.remove(self.test_db_path) + + def test_get_cached_news(self): + """ + Test get_cached_news(cmd_args, logger) + """ + with patch("os.path.join", return_value=os.path.join(self.test_db_path)): + # If storage doesn't exists raise an exception + with self.assertRaises(ex.EmptyCacheError): + cache.get_cached_news(self.cmd_args) + + # Create storage + cache.cache_news(self.parsed_news) + + self.cmd_args.date = "21 Nov 2019" + self.cmd_args.limit = 10 + self.cmd_args.source = 'https://news.yahoo.com/rss/' + list_of_extracted_new = cache.get_cached_news(self.cmd_args) + length_new_collection = len(list_of_extracted_new) + self.assertEqual(length_new_collection, 1) + + self.cmd_args.date = "22 Nov 2019" + self.cmd_args.limit = 10 + self.cmd_args.source = 'wrong_url' + with self.assertRaises(ex.SpecifiedDayNewsError): + cache.get_cached_news(self.cmd_args) + + self.cmd_args.date = "23 Nov 2019" + self.cmd_args.limit = 10 + self.cmd_args.source = '' + with self.assertRaises(ex.SpecifiedDayNewsError): + cache.get_cached_news(self.cmd_args) + + self.cmd_args.date = "21 Nov 2019" + self.cmd_args.limit = 0 + self.cmd_args.source = '' + list_of_extracted_new = cache.get_cached_news(self.cmd_args) + length_new_collection = len(list_of_extracted_new) + self.assertEqual(length_new_collection, 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_cmd_line_parser.py b/final_task/rss_reader/tests/test_cmd_line_parser.py new file mode 100644 index 0000000..e43c32b --- /dev/null +++ b/final_task/rss_reader/tests/test_cmd_line_parser.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3.8 +"""Module for testing cmd_line_parser.py""" + +import unittest +from unittest.mock import Mock + +import cmd_line_parser as cml_parser +import rss_exceptions as er + + +class TestRSSReader(unittest.TestCase): + """ + Tests functions from cmd_line_parser.py + """ + + def setUp(self): + self.LOGGER = Mock() + self.cmd_args = Mock() + + def test_convert_date(self): + """ + Test convert_date(date) + """ + date = Mock() + + date = '20191121' + self.assertEqual(cml_parser.convert_date(date), '21 Nov 2019') + + date = '21 Nov 2019' + with self.assertRaises(er.FormatDateError): + cml_parser.convert_date(date) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_html_converter.py b/final_task/rss_reader/tests/test_html_converter.py new file mode 100644 index 0000000..c9d9956 --- /dev/null +++ b/final_task/rss_reader/tests/test_html_converter.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3.8 +"""Module for testing html_converter.py""" + +import unittest +from unittest.mock import Mock + +import dominate + +import html_converter as conv + + +class TestCheckFunctions(unittest.TestCase): + """ + Tests functions from html_converter.py + """ + + def setUp(self): + self.all_news = [ + {'feed_title': 'Yahoo News - Latest News & Headlines', + 'feed_url': 'https://news.yahoo.com/rss/', + 'title': '4 reasons Democrats have an uphill climb on Donald Trump impeachment and removal', + 'link': 'https://news.yahoo.com/4-reasons-democrats-uphill-climb-100009122.html', + 'date': 'Fri, 22 Nov 2019 05:00:09 -0500', + 'img_title': + ['4 reasons Democrats have an uphill climb on Donald Trump impeachment and removal'], + 'img_link': + ['http://l1.yimg.com/uu/api/res/1.2/o5MXQ3QgenGRlHhxIdEvWg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs' + '-/https://media.zenfs.com/en-us/usa_today_opinion_532/e76829b608aaa4e825a37860afc0e4aa'], + 'text': 'If the ending of this movie is inevitable, the undecided public may move toward the ' + 'Republican claim that the whole thing is a waste of time and money.'} + ] + + self.cmd_args = Mock() + self.cmd_args.date = '' + self.rss_html_doc = dominate.document(title='RSS News') + + def test_convert_new_in_html(self): + """ + Test convert_new_in_html(cmd_args, all_news, rss_html_doc, logger) + """ + html_from_func = conv.convert_new_in_html(self.cmd_args, self.all_news[0], self.rss_html_doc) + with open("tests/files/example_html.html", "r") as file: + html_draft = file.read() + self.assertEqual(str(html_from_func), html_draft) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_pdf_converter.py b/final_task/rss_reader/tests/test_pdf_converter.py new file mode 100644 index 0000000..c6d4413 --- /dev/null +++ b/final_task/rss_reader/tests/test_pdf_converter.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3.8 +"""Module for testing pdf_converter.py""" + +import unittest +from unittest.mock import patch, MagicMock + +import pdf_converter + + +class TestAddDownloaderImage(unittest.TestCase): + """ + Test function from pdf_converter.py + """ + + @patch('pdf_converter.add_image_link') + @patch('os.remove') + @patch('urllib.request.urlretrieve') + def test_add_downloaded_image_not_jpeg_png(self, urlretrieve, remove, add_image_link): + """ + Test add_downloaded_image(num, link, pdf) if image format isn't jpeg or png + """ + num, link = 1, 'link' + pdf = MagicMock() + urlretrieve.return_value = ('filename', {'content-type': 'image/'}) + + self.assertEqual(None, pdf_converter.add_downloaded_image(num, link, pdf)) + + pdf.image.assert_not_called() + pdf.ln.assert_not_called() + add_image_link.assert_called_once_with(num, link, pdf) + remove.assert_not_called() + + @patch('pdf_converter.add_image_link') + @patch('os.remove') + @patch('urllib.request.urlretrieve') + def test_add_downloaded_image_jpeg_or_png(self, urlretrieve, remove, add_img_link): + """ + Test add_downloaded_image(num, link, pdf) if image format is jpeg or png + """ + num, link = 1, 'link' + pdf = MagicMock() + urlretrieve.return_value = ('filename', {'content-type': 'image/jpeg'}) + + self.assertEqual(None, pdf_converter.add_downloaded_image(num, link, pdf)) + + pdf.image.assert_called_once() + pdf.ln.assert_called_once_with(40) + remove.assert_called_once_with('filename') + add_img_link.assert_not_called() + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_rss_parser.py b/final_task/rss_reader/tests/test_rss_parser.py new file mode 100644 index 0000000..27ea57b --- /dev/null +++ b/final_task/rss_reader/tests/test_rss_parser.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3.8 +"""Module for testing rss_parser.py""" + +import os +import unittest +from unittest.mock import Mock + +from rss_parser import RSSparser + + +class TestRSSparser(unittest.TestCase): + """ + Tests functions from cmd_line_parser.py + """ + + def setUp(self): + self.cmd_args = Mock() + directory = os.path.abspath(os.path.dirname(__file__)) + test_db_path = os.path.join(directory, 'files/rss_xml_template.xml') + self.cmd_args.source = test_db_path + self.cmd_args.limit = 100 + + def test_rss_parser(self): + """ + Test class RSSparser(self.cmd_args) + """ + parser = RSSparser(self.cmd_args) + all_news = parser.parse_feed() + all_news_length = len(all_news) + self.assertEqual(all_news_length, 5) + + first_news = all_news[0] + title = 'Mars Science Lab launch delayed two years' + self.assertEqual(first_news['title'], title) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_utils.py b/final_task/rss_reader/tests/test_utils.py new file mode 100644 index 0000000..6f0a27c --- /dev/null +++ b/final_task/rss_reader/tests/test_utils.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3.8 +"""Module for testing utils.py""" + +import unittest +from unittest.mock import patch, MagicMock + +import utils + + +class TestOutPutTxtNews(unittest.TestCase): + """ + Test function from utils.py + """ + + @patch('builtins.print', MagicMock()) + @patch('os.path.abspath') + @patch('utils.FileSystemLoader') + @patch('utils.Environment') + def test_output_txt_news_not_color(self, env, file_sys_load, abspath): + """ + Test output_txt_news_not_color(self, env, file_sys_load, abspath) + if cmd_args.colorize is not set + """ + cmd_args = MagicMock() + cmd_args.colorize = False + + path_name = 'path_name' + abspath.return_value = path_name + + all_news = {'test': 'news'} + self.assertEqual(None, utils.output_txt_news(cmd_args, all_news)) + + file_sys_load.assert_called_once_with((path_name + '/templates/'), followlinks=True) + env.assert_called_once() + + @patch('builtins.print', MagicMock()) + @patch('os.path.abspath') + @patch('utils.FileSystemLoader') + @patch('utils.Environment') + def test_output_txt_news_set_color(self, env, file_sys_load, abspath): + """ + Test output_txt_news_not_color(self, env, file_sys_load, abspath) + if cmd_args.colorize is set + """ + cmd_args = MagicMock() + cmd_args.colorize = True + + path_name = 'path_name' + abspath.return_value = path_name + + all_news = {'test': 'news'} + self.assertEqual(None, utils.output_txt_news(cmd_args, all_news)) + + file_sys_load.assert_called_once_with((path_name + '/templates/'), followlinks=True) + env.assert_called_once() + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/tests/test_validator.py b/final_task/rss_reader/tests/test_validator.py new file mode 100644 index 0000000..79e57f2 --- /dev/null +++ b/final_task/rss_reader/tests/test_validator.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3.8 +"""Module for testing validator.py""" + +import unittest +from unittest.mock import patch, Mock + +import requests + +import validator +import rss_exceptions as er + + +class TestCheckFunctions(unittest.TestCase): + """ + Tests functions from validator.py + """ + + def setUp(self): + self.cmd_args = Mock() + + def test_check_internet_connection(self): + """ + Test check_internet_connection() + """ + # If internet is available, return True + with patch('requests.get'): + self.assertTrue(validator.check_internet_connection()) + + # If internet is unavailable - raise InternetConnectionError, return None + with self.assertRaises(er.InternetConnectionError): + with patch('requests.get', side_effect=requests.exceptions.ConnectionError): + self.assertIsNone(validator.check_internet_connection()) + + def test_check_url_availability(self): + """ + Test check_url_availability(cmd_args) + """ + # if URL is not defined + self.cmd_args.source = False + with self.assertRaises(er.UndefinedURL): + validator.check_url_availability(self.cmd_args) + + # if URL is defined + self.cmd_args.source = True + # if URL is available, return True + with patch('requests.get'): + self.assertTrue(validator.check_url_availability(self.cmd_args)) + + # if URL is unavailable, raise UnreachableURLError, return None + with self.assertRaises(er.UnreachableURLError): + with patch('requests.get', side_effect=Exception): + self.assertIsNone(validator.check_url_availability(self.cmd_args)) + + def mock_status_code_200(self, *args): + # Create a new Mock to imitate a Response + response_mock = Mock() + response_mock.status_code = 200 + return response_mock + + def mock_status_code_404(self, *args): + # Create a new Mock to imitate a Response + response_mock = Mock() + response_mock.status_code = 404 + response_mock.raise_for_status.side_effect = er.URLResponseError + return response_mock + + @patch('requests.get') + def test_check_response_status_code(self, get_mock): + """ + Test check_response_status_code(cmd_args) + """ + # if status code is 200: OK, return True + + get_mock.side_effect = self.mock_status_code_200 + + self.assertTrue(validator.check_response_status_code(self.cmd_args)) + + # if status code is greater than 400 - raise URLResponseError, return None + get_mock.side_effect = self.mock_status_code_404 + with self.assertRaises(er.URLResponseError): + self.assertIsNone(validator.check_response_status_code(self.cmd_args)) + + def test_check_limit_value(self): + """ + Test check_limit_value(limit) + """ + limit = Mock() + + # if limit value is a positive - return True + limit = 10 + self.assertTrue(validator.check_limit_value(limit)) + + # if limit value is None - return True + limit = None + self.assertTrue(validator.check_limit_value(limit)) + + # if limit value is a negative - raise LimitSignError, return None + limit = -2 + with self.assertRaises(er.LimitSignError): + self.assertIsNone(validator.check_limit_value(limit)) + + def test_check_news_collection(self): + """ + Test check_news_collection(news_collection) + """ + news_collection = Mock() + + # if news_collection is empty, raise FeedError, return None + news_collection = [] + with self.assertRaises(er.FeedError): + self.assertIsNone(validator.check_news_collection(news_collection)) + + # if news_collection is not empty, return True + news_collection = [1] + self.assertTrue(validator.check_news_collection(news_collection)) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/rss_reader/utils.py b/final_task/rss_reader/utils.py new file mode 100644 index 0000000..e09dc55 --- /dev/null +++ b/final_task/rss_reader/utils.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3.8 +"""Contains functions for printing the output result and create path to the file with unique name""" + +from datetime import datetime +from os import path + +from jinja2 import Environment, FileSystemLoader +from termcolor import colored + +from logger import LOGGER + + +def output_txt_news(cmd_args, all_news): + """ + Print result in a human readable format (filling the prepared) + """ + LOGGER.info('Load the template.') + + directory = path.abspath(path.dirname(__file__)) + file_loader = FileSystemLoader((directory + '/templates/'), followlinks=True) + env = Environment(loader=file_loader) + + if cmd_args.colorize: + env.filters['colorizetext'] = colored + template = env.get_template('colorized_template.txt') + else: + template = env.get_template('template.txt') + + LOGGER.info('Fill the template with relevant data.') + + output = template.render(all_news=all_news) + + print(output) + + +def create_path_to_file(file_path, file_name): + """ + Create a PATH to the file with it's unique name: 'date_time_name.format' + """ + LOGGER.info("Create a PATH to the file with it's unique name: 'date_time_name.format'") + cur_time = datetime.now().strftime('%Y-%m-%d_%H:%M:%S_') + file_name = cur_time + file_name + path_to_file = path.join(file_path, file_name) + + return path_to_file diff --git a/final_task/rss_reader/validator.py b/final_task/rss_reader/validator.py new file mode 100644 index 0000000..fe09629 --- /dev/null +++ b/final_task/rss_reader/validator.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3.8 +"""Module contains functions for validation utility work""" + +import requests + +from logger import LOGGER +import rss_exceptions as er + + +def check_internet_connection(): + """ + Check the internet connection + """ + try: + LOGGER.info("Check the Internet connection") + requests.get('https://www.google.com/', timeout=1) + except requests.exceptions.ConnectionError: + raise er.InternetConnectionError("No connection to the Internet.") + return True + + +def check_url_availability(cmd_args): + """ + Check the URL availability + """ + if cmd_args.source: + url = cmd_args.source + try: + requests.get(url) + LOGGER.info('Check the URL availability.') + except Exception: + raise er.UnreachableURLError("URL is invalid.") + else: + LOGGER.info('URL is valid. Connection established.') + return True + else: + raise er.UndefinedURL('URL is required') + + +def check_response_status_code(cmd_args): + """ + Check if the response status code is not greater than 400 + """ + try: + url = cmd_args.source + response = requests.get(url) + response.raise_for_status() + except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError): + raise er.URLResponseError(f'Bad response status code. Use another URL.') + + LOGGER.info(f'The response status code is {response.status_code}.') + return True + + +def check_limit_value(limit): + """ + Check if received limit value is valid + """ + if limit and limit < 0: + raise er.LimitSignError('Limit value must be positive.') + elif limit is None: + LOGGER.info("Limit set to maximum news") + else: + LOGGER.info(f"Limit set to {limit}.") + + return True + + +def check_news_collection(news_collection): + """ + Check news_collection is not empty + """ + if not news_collection: + raise er.FeedError("Link doesn't contain any news.") + + LOGGER.info("News collected successfully.") + return True diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..304422d 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,38 @@ +from setuptools import setup, find_packages +from os import path + + +directory = path.abspath(path.dirname(__file__)) +with open(path.join(directory, 'README.md'), encoding='utf-8') as project_description: + long_description = project_description.read() + +setup( + name='rss-reader', + version='5.0', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'Jinja2==2.10.3', + 'beautifulsoup4==4.8.1', + 'feedparser==5.2.1', + 'requests==2.22.0', + 'dominate==2.4.0', + 'fpdf==1.7.2', + 'termcolor==1.1.0', + 'coloredlogs==10.0', + ], + url='https://github.com/brechka/FinalTaskRssParser', + author='Yuliya Brechka', + author_email='juliabrechka@gmail.com', + description='RSS reader', + long_description=long_description, + long_description_content_type="text/markdown", + zip_safe=False, + entry_points={ + 'console_scripts': [ + 'rss-reader = rss_reader.rss_reader:main', + ], + }, + test_suite='rss_reader.tests', + python_requires='>=3.8', +) \ No newline at end of file