epam-python-courses-7-bsu · paxalos · Nov 20, 2019 · Nov 20, 2019 · Nov 20, 2019 · Nov 20, 2019
diff --git a/final_task/README.md b/final_task/README.md
@@ -1,3 +1,68 @@
-# Your readme here
-Some text.
-Checkout how to write this file using *markdown*.
+RSS_READER
+RSS reader is a command-line utility.
+
+Usage
+usage: rss_reader.py [-h] [--source SOURCE] [--version] [--json] [--verbose]
+[--limit LIMIT] [--date DATE]
+
+Pure Python command-line RSS reader.
+
+optional arguments:
+
+-h, --help            show this help message and exit
+--source SOURCE            RSS URL
+--version            Print version info
+--json            Print result as JSON in stdout
+--verbose            Outputs verbose status messages
+--limit LIMIT            Limit news topics if this parameter provided
+--date DATE            News from the specified day will be printed out. Format: YYYYMMDD
+It is mandatory to specify date or/and time.
+If both are specified, then news will be searched by date and by source.
+
+Json structure
+[
+    {
+            "feed": [feed],
+            "title": [title],
+            "date": [date],
+            "link": [link],
+            "text": [text],
+            "image links": [
+                [link1]
+                [link2]
+                ...
+            ]
+    },
+        ...
+]
+
+Local storage
+All read news is saved in database by using Mysql. You should have database final_task_database with table news_cache
+news_cache structure:
+    feed:longtext
+    title:longtext
+    date:date
+    link:longtext
+    image_description:longtext
+    new_description:longtext
+    image_links:longtext
+When using the --date argument, news is searched by date in database
+
+Saving in format feature
+You can save getted news in 2 formats: html, fb2
+If news are got from Internet and Internet on news images are downloaded from website
+and converted in base64 string. After saved html or fb2 format files can show them without connecting
+to Internet. If Internet  off images aren't downloaded, in html instead of images utility writes links of images.
+When using the --date argument, news are got from database. Image are downloaded the same way depending on whether 
+the Internet is on
+
+Colorize mode
+When using the --colorize argument the output news in console will be colorized. If using --json at the same time
+the output news will be printed in colorized json format
+
+How to install application
+To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'.
+Using 'pyhton setup.py install' in cmd install application.
+Install requirements 'pip install -r requirements.txt'
+You are now ready to run the application. Use 'rss-reader [arguments]' to run it.
+Warning: If path to rss-reader is not in Path variable, use full path to file at running.
diff --git a/final_task/rss_reader/arguments_functions.py b/final_task/rss_reader/arguments_functions.py
@@ -0,0 +1,63 @@
+import argparse
+import os
+import re
+import logging
+from custom_exceptions import IncorrectFilePath
+
+
+def get_arguments():
+    """
+    :return: Arguments of application
+    Read and returns arguments of application
+    """
+    argument_parser = argparse.ArgumentParser()
+    argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity')
+    argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout')
+    argument_parser.add_argument('--version', action='store_true', help='print version info')
+    argument_parser.add_argument('--limit', help='limit news topics if this parameter provided')
+    argument_parser.add_argument('--date', help='represent news from local storage by date')
+    argument_parser.add_argument('--to-html', help='save news in html format')
+    argument_parser.add_argument('--to-fb2', help='save news in fb2 format')
+    argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode')
+    argument_parser.add_argument('source', nargs='?')
+    return argument_parser.parse_args()
+
+
+def check_html_argument(html_argument):
+    """
+    :param html_argument: html directory path
+    If argument wrong raises exception
+    """
+    if not os.path.exists(html_argument):
+        logging.error('Inrorrect html filepath')
+        raise IncorrectFilePath('Inrorrect html filepath')
+
+
+def check_fb2_argument(fb2_argument):
+    """
+       :param fb2_argument: fb2 directory path
+       If argument wrong raises exception
+       """
+    if not os.path.exists(fb2_argument):
+        logging.error('Inrorrect fb2 filepath')
+        raise IncorrectFilePath('Inrorrect fb2 filepath')
+
+
+def check_limit_argument(limit_argument):
+    """
+          :param limit_argument: limit of news
+          If argument wrong raises exception
+          """
+    if not re.match('\\d+', limit_argument):
+        logging.error('Input value of --limit is incorrect')
+        raise ValueError('Input value of --limit is incorrect')
+
+
+def check_date_argument(date_argument):
+    """
+              :param date_argument: Date of news in database
+              If argument wrong raises exception
+              """
+    if not re.match('\\d+', date_argument) or len(date_argument) != 8:
+        logging.error('Input value of --date is incorrect')
+        raise ValueError('Input value of --date is incorrect')
diff --git a/final_task/rss_reader/custom_exceptions.py b/final_task/rss_reader/custom_exceptions.py
@@ -0,0 +1,14 @@
+class IncorrectURL(Exception):
+    pass
+
+
+class NoInternet(Exception):
+    pass
+
+
+class IncorrectFilePath(Exception):
+    pass
+
+
+class DatabaseConnectionError(Exception):
+    pass
diff --git a/final_task/rss_reader/database_functions.py b/final_task/rss_reader/database_functions.py
@@ -0,0 +1,74 @@
+from contextlib import closing
+import logging
+import pymysql
+from custom_exceptions import DatabaseConnectionError
+
+
+def get_news_list_by_date(date, limit):
+    """
+    :param date: Date of publication of news
+    :return: : List with news publicated by date
+    Returns list of news by date from database
+    """
+    logging.info('Connecting to database')
+    try:
+        with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
+                                     database='final_task_database')) as connection:
+            with closing(connection.cursor()) as cursor:
+                logging.info('Connected to database')
+                logging.info('Giving request')
+                try:
+                    cursor.execute(f'select * from news_cache where date="{date}"')
+                except pymysql.err.InternalError:
+                    logging.error('Input value of --date is incorrect')
+                    raise ValueError('Input value of --date is incorrect')
+                logging.info('Getting response')
+                database_response = cursor.fetchall()
+                if limit:
+                    limit = min(len(database_response), limit)
+                else:
+                    limit = len(database_response)
+                logging.info('Response was got')
+                news_list = []
+                for index in range(limit):
+                    news_list.append({'Feed': database_response[index][0],
+                                      'Title': database_response[index][1],
+                                      'Date': str(database_response[index][2]),
+                                      'Link': database_response[index][3],
+                                      'Image description': database_response[index][4],
+                                      'New description': database_response[index][5],
+                                      'Image links': database_response[index][6].split('|||')})
+                return news_list
+    except pymysql.err.OperationalError:
+        logging.error('Not connected to database')
+        raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
+                                      "database with table described in README")
+
+
+def write_news_to_database(news_list):
+    """
+    :param news_list: List of news
+    Writes news to database
+    """
+    logging.info('Connecting to database')
+    try:
+        with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
+                                     database='final_task_database')) as connection:
+            with closing(connection.cursor()) as cursor:
+                logging.info('Connected to database')
+                for new in news_list:
+                    # Try to find new in database by link, if exists
+                    cursor.execute(f'select * from news_cache where link = "{new["Link"]}"')
+                    if cursor.fetchall():
+                        continue
+                    insert_values = [value for value in new.values()]
+                    #converting list of image links into string to store in database
+                    insert_values[6] = '|||'.join(insert_values[6])
+                    insert_values = [tuple(insert_values), ]
+                    cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values)
+                connection.commit()
+        logging.info('Data write successful')
+    except pymysql.err.OperationalError:
+        logging.error('Not connected to database')
+        raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
+                                      "database with table described in README")
diff --git a/final_task/rss_reader/parse_rss_functions.py b/final_task/rss_reader/parse_rss_functions.py
@@ -0,0 +1,81 @@
+import re
+import feedparser
+import socket
+import logging
+from dateutil import parser as date_parser
+import html
+from custom_exceptions import IncorrectURL, NoInternet
+
+
+def ckeck_internet():
+    """
+    Checks Internet connetction
+    """
+    try:
+        logging.info("checking Internet connection")
+        socket.setdefaulttimeout(5)
+        host = socket.gethostbyname("www.google.com")
+        sock = socket.create_connection((host, 80), 2)
+        sock.close()
+        logging.info('Internet on.')
+        return True
+    except Exception as e:
+        logging.error("Internet off.")
+        return False
+
+
+def get_new_description(summary_str):
+    """
+    :param summary_str: Summary string from parsing RSS
+    :return: New description
+    Extract new description from summary string
+    """
+    pattern = re.compile(r'<.*?>')
+    return pattern.sub('', summary_str)
+
+
+def get_image_description(summary_str):
+    """
+       :param summary_str: Summary string from parsing RSS
+       :return: Image description
+       Extract image description from summary string
+       """
+    return summary_str[summary_str.find('alt') + 5::].split('"')[0]
+
+
+def get_news_list(source, limit):
+    """
+    :param source - RSS URL:
+    :param limit - Limit of viewing news:
+    :return - RSS display list:
+    Function parsing the rss received from source
+    into a list of news which will then be used for printing or parsing into JSON
+    """
+    logging.info('Creating news list')
+    if not ckeck_internet():
+        raise NoInternet("Internet off, please check your connection")
+    logging.info('Getting and parsing RSS')
+    parsed_rss = feedparser.parse(source)
+    if parsed_rss['bozo']:
+        raise IncorrectURL('The entered URL is incorrect')
+    if limit:
+        limit = min(limit, len(parsed_rss['entries']))
+    else:
+        limit = len(parsed_rss['entries'])
+    news_list = []
+    for index in range(limit):
+        news_list.append({'Feed':
+                              html.unescape(parsed_rss['feed']['title']),
+                          'Title':
+                              html.unescape(parsed_rss['entries'][index]['title']),
+                          'Date':
+                              str(date_parser.parse(parsed_rss['entries'][index]['published'])).split(" ")[0],
+                          'Link':
+                              parsed_rss['entries'][index]['link'],
+                          'Image description':
+                              html.unescape(get_image_description(parsed_rss['entries'][index]['summary'])),
+                          'New description':
+                              html.unescape(get_new_description(parsed_rss['entries'][index]['summary'])),
+                          'Image links':
+                              [content['url'] for content in parsed_rss['entries'][index]['media_content']]})
+    return news_list
diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py
@@ -0,0 +1,84 @@
+import logging
+import json
+from colorama import Fore, Back, Style
+
+
+def print_news(news_list):
+    """
+    :param news_list: The list of news
+    Prints news in readable format
+    """
+    logging.info('Printing news')
+    for index, new in enumerate(news_list):
+        print(f'New {index + 1}\n')
+        print(f'Feed:\n\t{new["Feed"]}')
+        print('Title:')
+        print(f'\t{new["Title"]}')
+        print(f'Date:\n\t{new["Date"]}')
+        print(f'Link:\n\t{new["Link"]}')
+        print(f'Image description:\n\t{new["Image description"]}')
+        print(f'New description:\n\t{new["New description"]}')
+        print('Image links:')
+        for image_link in new['Image links']:
+            print(f'\t{image_link}')
+        print('\n')
+
+
+def print_news_colorize(news_list):
+    """
+    :param news_list: The list of news
+    Prints news in readable colorize format
+    """
+    logging.info('Printing news colorize')
+    for index, new in enumerate(news_list):
+        print(Style.RESET_ALL + Fore.WHITE + Back.MAGENTA + f'New {index + 1}\n')
+        print(Style.RESET_ALL + Fore.WHITE + Back.BLUE + 'Feed:\n' + Style.RESET_ALL + Fore.BLUE + f'\t{new["Feed"]}')
+        print(
+            Style.RESET_ALL + Fore.WHITE + Back.GREEN + 'Title:\n' + Style.RESET_ALL + Fore.GREEN + f'\t{new["Title"]}')
+        print(Style.RESET_ALL + Fore.WHITE + Back.CYAN + 'Date:\n' + Style.RESET_ALL + Fore.CYAN + f'\t{new["Date"]}')
+        print(Style.RESET_ALL + Fore.WHITE + Back.RED + 'Link:\n' + Style.RESET_ALL + Fore.RED + f'\t{new["Link"]}')
+        print(
+            Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL + \
+            Fore.YELLOW + f'\t{new["Image description"]}')
+        print(
+            Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL + \
+            Fore.LIGHTBLUE_EX + f'\t{new["New description"]}')
+        print(Style.RESET_ALL + Fore.BLACK + Back.LIGHTGREEN_EX + 'Image links:')
+        for image_link in new['Image links']:
+            print(Style.RESET_ALL + Fore.LIGHTGREEN_EX + f'\t{image_link}')
+        print('\n')
+
+
+def print_news_JSON(news_list):
+    """
+        :param news_list: The list of news
+        Prints news in readable JSON format
+        """
+    logging.info('Printing news as JSON')
+    print(json.dumps(news_list, ensure_ascii=False, indent=4))
+
+
+def print_news_JSON_colorize(news_list):
+    """
+        :param news_list: The list of news
+        Prints news in readable colorize JSON format
+        """
+    logging.info('Printing news as JSON')
+    result_str = "["
+    for new_index, new in enumerate(news_list):
+        result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\"," \
+                      f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\"," \
+                      f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\"," \
+                      f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\"," \
+                      f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\"," \
+                      f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\"," \
+                      f"\n\t\t\"\033[44mImage links\033[0m\": ["
+        for link_index, link in enumerate(new['Image links']):
+            result_str += f"\n\t\t\t\"\033[34m{link}\033[0m\""
+            if link_index + 1 != len(new['Image links']):
+                result_str += ','
+        result_str += "\n\t\t]\n\t}"
+        if new_index + 1 != len(news_list):
+            result_str += ','
+    result_str += '\n]'
+    print(result_str)