diff --git a/final_task/README.md b/final_task/README.md
index 7af281f..ce111e7 100644
--- a/final_task/README.md
+++ b/final_task/README.md
@@ -1,3 +1,68 @@
-# Your readme here
-Some text.
-Checkout how to write this file using *markdown*.
+RSS_READER
+RSS reader is a command-line utility.
+
+Usage
+usage: rss_reader.py [-h] [--source SOURCE] [--version] [--json] [--verbose]
+[--limit LIMIT] [--date DATE]
+
+Pure Python command-line RSS reader.
+
+optional arguments:
+
+-h, --help show this help message and exit
+--source SOURCE RSS URL
+--version Print version info
+--json Print result as JSON in stdout
+--verbose Outputs verbose status messages
+--limit LIMIT Limit news topics if this parameter provided
+--date DATE News from the specified day will be printed out. Format: YYYYMMDD
+It is mandatory to specify date or/and time.
+If both are specified, then news will be searched by date and by source.
+
+Json structure
+[
+ {
+ "feed": [feed],
+ "title": [title],
+ "date": [date],
+ "link": [link],
+ "text": [text],
+ "image links": [
+ [link1]
+ [link2]
+ ...
+ ]
+ },
+ ...
+]
+
+Local storage
+All read news is saved in database by using Mysql. You should have database final_task_database with table news_cache
+news_cache structure:
+ feed:longtext
+ title:longtext
+ date:date
+ link:longtext
+ image_description:longtext
+ new_description:longtext
+ image_links:longtext
+When using the --date argument, news is searched by date in database
+
+Saving in format feature
+You can save getted news in 2 formats: html, fb2
+If news are got from Internet and Internet on news images are downloaded from website
+and converted in base64 string. After saved html or fb2 format files can show them without connecting
+to Internet. If Internet off images aren't downloaded, in html instead of images utility writes links of images.
+When using the --date argument, news are got from database. Image are downloaded the same way depending on whether
+the Internet is on
+
+Colorize mode
+When using the --colorize argument the output news in console will be colorized. If using --json at the same time
+the output news will be printed in colorized json format
+
+How to install application
+To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'.
+Using 'pyhton setup.py install' in cmd install application.
+Install requirements 'pip install -r requirements.txt'
+You are now ready to run the application. Use 'rss-reader [arguments]' to run it.
+Warning: If path to rss-reader is not in Path variable, use full path to file at running.
\ No newline at end of file
diff --git a/final_task/rss_reader/arguments_functions.py b/final_task/rss_reader/arguments_functions.py
new file mode 100644
index 0000000..86639b1
--- /dev/null
+++ b/final_task/rss_reader/arguments_functions.py
@@ -0,0 +1,63 @@
+import argparse
+import os
+import re
+import logging
+from custom_exceptions import IncorrectFilePath
+
+
+def get_arguments():
+ """
+ :return: Arguments of application
+ Read and returns arguments of application
+ """
+ argument_parser = argparse.ArgumentParser()
+ argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity')
+ argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout')
+ argument_parser.add_argument('--version', action='store_true', help='print version info')
+ argument_parser.add_argument('--limit', help='limit news topics if this parameter provided')
+ argument_parser.add_argument('--date', help='represent news from local storage by date')
+ argument_parser.add_argument('--to-html', help='save news in html format')
+ argument_parser.add_argument('--to-fb2', help='save news in fb2 format')
+ argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode')
+ argument_parser.add_argument('source', nargs='?')
+ return argument_parser.parse_args()
+
+
+def check_html_argument(html_argument):
+ """
+ :param html_argument: html directory path
+ If argument wrong raises exception
+ """
+ if not os.path.exists(html_argument):
+ logging.error('Inrorrect html filepath')
+ raise IncorrectFilePath('Inrorrect html filepath')
+
+
+def check_fb2_argument(fb2_argument):
+ """
+ :param fb2_argument: fb2 directory path
+ If argument wrong raises exception
+ """
+ if not os.path.exists(fb2_argument):
+ logging.error('Inrorrect fb2 filepath')
+ raise IncorrectFilePath('Inrorrect fb2 filepath')
+
+
+def check_limit_argument(limit_argument):
+ """
+ :param limit_argument: limit of news
+ If argument wrong raises exception
+ """
+ if not re.match('\\d+', limit_argument):
+ logging.error('Input value of --limit is incorrect')
+ raise ValueError('Input value of --limit is incorrect')
+
+
+def check_date_argument(date_argument):
+ """
+ :param date_argument: Date of news in database
+ If argument wrong raises exception
+ """
+ if not re.match('\\d+', date_argument) or len(date_argument) != 8:
+ logging.error('Input value of --date is incorrect')
+ raise ValueError('Input value of --date is incorrect')
diff --git a/final_task/rss_reader/custom_exceptions.py b/final_task/rss_reader/custom_exceptions.py
new file mode 100644
index 0000000..50c1cf9
--- /dev/null
+++ b/final_task/rss_reader/custom_exceptions.py
@@ -0,0 +1,14 @@
+class IncorrectURL(Exception):
+ pass
+
+
+class NoInternet(Exception):
+ pass
+
+
+class IncorrectFilePath(Exception):
+ pass
+
+
+class DatabaseConnectionError(Exception):
+ pass
diff --git a/final_task/rss_reader/database_functions.py b/final_task/rss_reader/database_functions.py
new file mode 100644
index 0000000..c06416a
--- /dev/null
+++ b/final_task/rss_reader/database_functions.py
@@ -0,0 +1,74 @@
+from contextlib import closing
+import logging
+import pymysql
+from custom_exceptions import DatabaseConnectionError
+
+
+def get_news_list_by_date(date, limit):
+ """
+ :param date: Date of publication of news
+ :return: : List with news publicated by date
+ Returns list of news by date from database
+ """
+ logging.info('Connecting to database')
+ try:
+ with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
+ database='final_task_database')) as connection:
+ with closing(connection.cursor()) as cursor:
+ logging.info('Connected to database')
+ logging.info('Giving request')
+ try:
+ cursor.execute(f'select * from news_cache where date="{date}"')
+ except pymysql.err.InternalError:
+ logging.error('Input value of --date is incorrect')
+ raise ValueError('Input value of --date is incorrect')
+ logging.info('Getting response')
+ database_response = cursor.fetchall()
+ if limit:
+ limit = min(len(database_response), limit)
+ else:
+ limit = len(database_response)
+ logging.info('Response was got')
+ news_list = []
+ for index in range(limit):
+ news_list.append({'Feed': database_response[index][0],
+ 'Title': database_response[index][1],
+ 'Date': str(database_response[index][2]),
+ 'Link': database_response[index][3],
+ 'Image description': database_response[index][4],
+ 'New description': database_response[index][5],
+ 'Image links': database_response[index][6].split('|||')})
+ return news_list
+ except pymysql.err.OperationalError:
+ logging.error('Not connected to database')
+ raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
+ "database with table described in README")
+
+
+def write_news_to_database(news_list):
+ """
+ :param news_list: List of news
+ Writes news to database
+ """
+ logging.info('Connecting to database')
+ try:
+ with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
+ database='final_task_database')) as connection:
+ with closing(connection.cursor()) as cursor:
+ logging.info('Connected to database')
+ for new in news_list:
+ # Try to find new in database by link, if exists
+ cursor.execute(f'select * from news_cache where link = "{new["Link"]}"')
+ if cursor.fetchall():
+ continue
+ insert_values = [value for value in new.values()]
+ #converting list of image links into string to store in database
+ insert_values[6] = '|||'.join(insert_values[6])
+ insert_values = [tuple(insert_values), ]
+ cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values)
+ connection.commit()
+ logging.info('Data write successful')
+ except pymysql.err.OperationalError:
+ logging.error('Not connected to database')
+ raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
+ "database with table described in README")
diff --git a/final_task/rss_reader/parse_rss_functions.py b/final_task/rss_reader/parse_rss_functions.py
new file mode 100644
index 0000000..72a793c
--- /dev/null
+++ b/final_task/rss_reader/parse_rss_functions.py
@@ -0,0 +1,81 @@
+import re
+import feedparser
+import socket
+import logging
+from dateutil import parser as date_parser
+import html
+from custom_exceptions import IncorrectURL, NoInternet
+
+
+def ckeck_internet():
+ """
+ Checks Internet connetction
+ """
+ try:
+ logging.info("checking Internet connection")
+ socket.setdefaulttimeout(5)
+ host = socket.gethostbyname("www.google.com")
+ sock = socket.create_connection((host, 80), 2)
+ sock.close()
+ logging.info('Internet on.')
+ return True
+ except Exception as e:
+ logging.error("Internet off.")
+ return False
+
+
+def get_new_description(summary_str):
+ """
+ :param summary_str: Summary string from parsing RSS
+ :return: New description
+ Extract new description from summary string
+ """
+ pattern = re.compile(r'<.*?>')
+ return pattern.sub('', summary_str)
+
+
+def get_image_description(summary_str):
+ """
+ :param summary_str: Summary string from parsing RSS
+ :return: Image description
+ Extract image description from summary string
+ """
+ return summary_str[summary_str.find('alt') + 5::].split('"')[0]
+
+
+def get_news_list(source, limit):
+ """
+ :param source - RSS URL:
+ :param limit - Limit of viewing news:
+ :return - RSS display list:
+ Function parsing the rss received from source
+ into a list of news which will then be used for printing or parsing into JSON
+ """
+ logging.info('Creating news list')
+ if not ckeck_internet():
+ raise NoInternet("Internet off, please check your connection")
+ logging.info('Getting and parsing RSS')
+ parsed_rss = feedparser.parse(source)
+ if parsed_rss['bozo']:
+ raise IncorrectURL('The entered URL is incorrect')
+ if limit:
+ limit = min(limit, len(parsed_rss['entries']))
+ else:
+ limit = len(parsed_rss['entries'])
+ news_list = []
+ for index in range(limit):
+ news_list.append({'Feed':
+ html.unescape(parsed_rss['feed']['title']),
+ 'Title':
+ html.unescape(parsed_rss['entries'][index]['title']),
+ 'Date':
+ str(date_parser.parse(parsed_rss['entries'][index]['published'])).split(" ")[0],
+ 'Link':
+ parsed_rss['entries'][index]['link'],
+ 'Image description':
+ html.unescape(get_image_description(parsed_rss['entries'][index]['summary'])),
+ 'New description':
+ html.unescape(get_new_description(parsed_rss['entries'][index]['summary'])),
+ 'Image links':
+ [content['url'] for content in parsed_rss['entries'][index]['media_content']]})
+ return news_list
diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py
new file mode 100644
index 0000000..7fdd059
--- /dev/null
+++ b/final_task/rss_reader/print_functions.py
@@ -0,0 +1,84 @@
+import logging
+import json
+from colorama import Fore, Back, Style
+
+
+def print_news(news_list):
+ """
+ :param news_list: The list of news
+ Prints news in readable format
+ """
+ logging.info('Printing news')
+ for index, new in enumerate(news_list):
+ print(f'New {index + 1}\n')
+ print(f'Feed:\n\t{new["Feed"]}')
+ print('Title:')
+ print(f'\t{new["Title"]}')
+ print(f'Date:\n\t{new["Date"]}')
+ print(f'Link:\n\t{new["Link"]}')
+ print(f'Image description:\n\t{new["Image description"]}')
+ print(f'New description:\n\t{new["New description"]}')
+ print('Image links:')
+ for image_link in new['Image links']:
+ print(f'\t{image_link}')
+ print('\n')
+
+
+def print_news_colorize(news_list):
+ """
+ :param news_list: The list of news
+ Prints news in readable colorize format
+ """
+ logging.info('Printing news colorize')
+ for index, new in enumerate(news_list):
+ print(Style.RESET_ALL + Fore.WHITE + Back.MAGENTA + f'New {index + 1}\n')
+ print(Style.RESET_ALL + Fore.WHITE + Back.BLUE + 'Feed:\n' + Style.RESET_ALL + Fore.BLUE + f'\t{new["Feed"]}')
+ print(
+ Style.RESET_ALL + Fore.WHITE + Back.GREEN + 'Title:\n' + Style.RESET_ALL + Fore.GREEN + f'\t{new["Title"]}')
+ print(Style.RESET_ALL + Fore.WHITE + Back.CYAN + 'Date:\n' + Style.RESET_ALL + Fore.CYAN + f'\t{new["Date"]}')
+ print(Style.RESET_ALL + Fore.WHITE + Back.RED + 'Link:\n' + Style.RESET_ALL + Fore.RED + f'\t{new["Link"]}')
+ print(
+ Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL + \
+ Fore.YELLOW + f'\t{new["Image description"]}')
+ print(
+ Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL + \
+ Fore.LIGHTBLUE_EX + f'\t{new["New description"]}')
+ print(Style.RESET_ALL + Fore.BLACK + Back.LIGHTGREEN_EX + 'Image links:')
+ for image_link in new['Image links']:
+ print(Style.RESET_ALL + Fore.LIGHTGREEN_EX + f'\t{image_link}')
+ print('\n')
+
+
+def print_news_JSON(news_list):
+ """
+ :param news_list: The list of news
+ Prints news in readable JSON format
+ """
+ logging.info('Printing news as JSON')
+ print(json.dumps(news_list, ensure_ascii=False, indent=4))
+
+
+def print_news_JSON_colorize(news_list):
+ """
+ :param news_list: The list of news
+ Prints news in readable colorize JSON format
+ """
+ logging.info('Printing news as JSON')
+ result_str = "["
+ for new_index, new in enumerate(news_list):
+ result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\"," \
+ f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\"," \
+ f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\"," \
+ f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\"," \
+ f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\"," \
+ f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\"," \
+ f"\n\t\t\"\033[44mImage links\033[0m\": ["
+ for link_index, link in enumerate(new['Image links']):
+ result_str += f"\n\t\t\t\"\033[34m{link}\033[0m\""
+ if link_index + 1 != len(new['Image links']):
+ result_str += ','
+ result_str += "\n\t\t]\n\t}"
+ if new_index + 1 != len(news_list):
+ result_str += ','
+ result_str += '\n]'
+ print(result_str)
diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt
index e69de29..5bab75d 100644
--- a/final_task/rss_reader/requirements.txt
+++ b/final_task/rss_reader/requirements.txt
@@ -0,0 +1,8 @@
+feedparser
+termcolor
+pymysql
+colorama
+mysql-connector-python
+py-dateutil
+requests
+Pillow
\ No newline at end of file
diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py
index e69de29..cca9100 100644
--- a/final_task/rss_reader/rss_reader.py
+++ b/final_task/rss_reader/rss_reader.py
@@ -0,0 +1,101 @@
+import argparse
+import re
+from termcolor import colored
+import os
+import colorama
+import logging
+from database_functions import get_news_list_by_date, write_news_to_database
+from parse_rss_functions import get_news_list
+from custom_exceptions import NoInternet, IncorrectURL, IncorrectFilePath, DatabaseConnectionError
+from print_functions import print_news_colorize, print_news_JSON_colorize, print_news, print_news_JSON
+from save_in_format_functions import save_in_fb2, save_in_html
+from arguments_functions import check_date_argument, check_fb2_argument, check_html_argument, \
+ check_limit_argument, get_arguments
+
+VERSION = 5
+
+
+def main():
+ """
+ The main entry point of the application
+ """
+ colorama.init()
+ arguments = get_arguments()
+ if arguments.version:
+ print(f'Program version - {VERSION}')
+ return
+ if arguments.verbose:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO)
+ logging.info('Program started')
+ if arguments.to_html:
+ check_html_argument(arguments.to_html)
+ if arguments.to_fb2:
+ check_fb2_argument(arguments.to_fb2)
+ if arguments.limit:
+ check_limit_argument(arguments.limit)
+ arguments.limit = int(arguments.limit)
+ if arguments.date:
+ check_date_argument(arguments.date)
+ news_list = get_news_list_by_date(arguments.date, arguments.limit)
+ if arguments.to_html or arguments.to_fb2:
+ if arguments.to_html:
+ save_in_html(arguments.to_html, news_list, f"news_by_date-{arguments.date}.html")
+ if arguments.to_fb2:
+ save_in_fb2(arguments.to_fb2, news_list, f"news_by_date-{arguments.date}.fb2")
+ else:
+ if news_list:
+ if arguments.colorize:
+ if arguments.json:
+ print_news_JSON_colorize(news_list)
+ else:
+ print_news_colorize(news_list)
+ else:
+ if arguments.json:
+ print_news_JSON(news_list)
+ else:
+ print_news(news_list)
+ else:
+ print('No news by this date')
+ return
+ news_list = get_news_list(arguments.source, arguments.limit)
+ if arguments.to_html or arguments.to_fb2:
+ if arguments.to_html:
+ save_in_html(arguments.to_html, news_list, f"news_from-{arguments.source[8:-4]}.html")
+ if arguments.to_fb2:
+ save_in_fb2(arguments.to_fb2, news_list, f"news_from-{arguments.source[8:-4]}.fb2")
+ else:
+ if arguments.colorize:
+ if arguments.json:
+ print_news_JSON_colorize(news_list)
+ else:
+ print_news_colorize(news_list)
+ else:
+ if arguments.json:
+ print_news_JSON(news_list)
+ else:
+ print_news(news_list)
+ write_news_to_database(news_list)
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except IncorrectURL as e:
+ print(colored(e, 'red'))
+ logging.error(e)
+ except NoInternet as e:
+ print(colored(e, 'red'))
+ logging.error(e)
+ except ValueError as e:
+ print(colored(e, 'red'))
+ logging.error(e)
+ except IncorrectFilePath as e:
+ print(colored(e, 'red'))
+ logging.error(e)
+ except DatabaseConnectionError as e:
+ print(colored(e, 'red'))
+ logging.error(e)
+ finally:
+ logging.info('Program ended')
diff --git a/final_task/rss_reader/save_in_format_functions.py b/final_task/rss_reader/save_in_format_functions.py
new file mode 100644
index 0000000..7d8a62b
--- /dev/null
+++ b/final_task/rss_reader/save_in_format_functions.py
@@ -0,0 +1,117 @@
+from PIL import Image
+import requests
+import base64
+from io import BytesIO
+import os
+import logging
+from parse_rss_functions import ckeck_internet
+
+
+def get_new_content_html(new):
+ """
+ :param new: The new
+ :return: string representation of new in html
+ Converts new into string which will be used in html format
+ """
+ images_content = ""
+ if not ckeck_internet():
+ for image_link in new['Image links']:
+ images_content += f"{image_link}"
+ else:
+ for image_link in new['Image links']:
+ if image_link == "":
+ continue
+ response = requests.get(image_link)
+ encoded_string = str(base64.b64encode(response.content))
+ images_content += "\n"
+ return f"""
+
{new['Feed']}
+{new['Title']}
+{new['Date']}
+ +{images_content}
+{new['New description']}
+{new['Feed'].replace('&', 'and')}
+{new['Title'].replace('&', 'and')}
+{new['Date']}
+{images_content}
+{new['New description'].replace('&', 'and')}
+Ocasio-Cortez discussed the issue with Yahoo News on ' \
+ 'Capitol Hill on Tuesday as the third day of public hearings was being conducted in ' \
+ 'the Democrats’ ongoing impeachment inquiry.
'
+ correct_result='Ocasio-Cortez discussed the issue with Yahoo News on Capitol Hill on Tuesday as ' \
+ 'the third day of public hearings was being conducted in the Democrats’ ' \
+ 'ongoing impeachment inquiry.'
+ self.assertEqual(get_new_description(summary_str),correct_result)
+
+ def test_get_image_description(self):
+ summary_str = '
Ocasio-Cortez discussed the issue with Yahoo News on ' \
+ 'Capitol Hill on Tuesday as the third day of public hearings was being conducted in ' \
+ 'the Democrats’ ongoing impeachment inquiry.
'
+ correct_result="Ocasio-Cortez: Trump was 'clearly engaged in extortion and bribery';"
+ self.assertEqual(get_image_description(summary_str), correct_result)
+
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file