From 03a00a2bc79624c67cc6b81ff1e28ffffbba5f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Thu, 21 Nov 2019 00:18:02 +0300 Subject: [PATCH 1/8] Complete first iteration with tests --- final_task/rss_reader/parse_rss_functions.py | 81 ++++++++++++++++++++ final_task/rss_reader/personal_exceptions.py | 8 ++ final_task/rss_reader/print_functions.py | 32 ++++++++ final_task/rss_reader/requirements.txt | 4 + final_task/rss_reader/rss_reader.py | 57 ++++++++++++++ final_task/tests/test.py | 34 ++++++++ 6 files changed, 216 insertions(+) create mode 100644 final_task/rss_reader/parse_rss_functions.py create mode 100644 final_task/rss_reader/personal_exceptions.py create mode 100644 final_task/rss_reader/print_functions.py create mode 100644 final_task/tests/test.py diff --git a/final_task/rss_reader/parse_rss_functions.py b/final_task/rss_reader/parse_rss_functions.py new file mode 100644 index 0000000..f453028 --- /dev/null +++ b/final_task/rss_reader/parse_rss_functions.py @@ -0,0 +1,81 @@ +import re +import feedparser +import socket +import logging +from dateutil import parser as date_parser +import html +from personal_exceptions import * + + +def ckeck_internet(): + """ + Checks Internet connetction + """ + try: + logging.info("checking Internet connection") + socket.setdefaulttimeout(5) + host = socket.gethostbyname("www.google.com") + s = socket.create_connection((host, 80), 2) + s.close() + logging.info('Internet on.') + return True + except Exception as e: + logging.error("Internet off.") + return False + + +def get_new_description(summary_str): + """ + :param summary_str: Summary string from parsing RSS + :return: New description + Extract new description from summary string + """ + pattern = re.compile(r'<.*?>') + return pattern.sub('', summary_str) + + +def get_image_description(summary_str): + """ + :param summary_str: Summary string from parsing RSS + :return: Image description + Extract image description from summary string + """ + return summary_str[summary_str.find('alt') + 5::].split('"')[0] + + +def get_news_list(source, limit): + """ + :param source - RSS URL: + :param limit - Limit of viewing news: + :return - RSS display list: + Function parsing the rss received from source + into a list of news which will then be used for printing or parsing into JSON + """ + logging.info('Creating news list') + if not ckeck_internet(): + raise NoInternet + logging.info('Getting and parsing RSS') + parsed_rss = feedparser.parse(source) + if parsed_rss['bozo'] == 1: + raise IncorrectURL + if limit: + limit = min(limit, len(parsed_rss['entries'])) + else: + limit = len(parsed_rss['entries']) + news_list = [] + for index in range(limit): + news_list.append({'Feed': + html.unescape(parsed_rss['feed']['title']), + 'Title': + html.unescape(parsed_rss['entries'][index]['title']), + 'Date': + str(date_parser.parse(parsed_rss['entries'][index]['published'])), + 'Link': + parsed_rss['entries'][index]['link'], + 'Image description': + html.unescape(get_image_description(parsed_rss['entries'][index]['summary'])), + 'New description': + html.unescape(get_new_description(parsed_rss['entries'][index]['summary'])), + 'Image links': + [content['url'] for content in parsed_rss['entries'][index]['media_content']]}) + return news_list diff --git a/final_task/rss_reader/personal_exceptions.py b/final_task/rss_reader/personal_exceptions.py new file mode 100644 index 0000000..958ce2e --- /dev/null +++ b/final_task/rss_reader/personal_exceptions.py @@ -0,0 +1,8 @@ +class IncorrectURL(Exception): + def __str__(self) -> str: + return 'The entered URL is incorrect' + + +class NoInternet(Exception): + def __str__(self) -> str: + return "Internet off, please check your connection" diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py new file mode 100644 index 0000000..a1f0f7f --- /dev/null +++ b/final_task/rss_reader/print_functions.py @@ -0,0 +1,32 @@ +import logging +import json + + +def print_news(news_list): + """ + :param news_list: The list of news + Prints news in readable format + """ + logging.info('Printing news') + for index, new in enumerate(news_list): + print(f'New {index + 1}\n') + print(f'Feed:\n\t{new["Feed"]}') + print('Title:') + print(f'\t{new["Title"]}') + print(f'Date:\n\t{new["Date"]}') + print(f'Link:\n\t{new["Link"]}') + print(f'Image description:\n\t{new["Image description"]}') + print(f'New description:\n\t{new["New description"]}') + print('Image links:') + for image_link in new['Image links']: + print(f'\t{image_link}') + print('\n') + + +def print_news_JSON(news_list): + """ + :param news_list: The list of news + Prints news in readable JSON format + """ + logging.info('Printing news as JSON') + print(json.dumps(news_list, ensure_ascii=False, indent=4)) diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..4a30224 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,4 @@ +feedparser +termcolor +colorama +py-dateutil \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..3208e0d 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,57 @@ +import argparse +import re +from termcolor import colored +import os +import colorama +from parse_rss_functions import get_news_list +from personal_exceptions import * +from print_functions import * + +VERSION = 1 + + +def main(): + """ + The main entry point of the application + """ + colorama.init() + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') + argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout') + argument_parser.add_argument('--version', action='store_true', help='print version info') + argument_parser.add_argument('--limit', help='limit news topics if this parameter provided') + argument_parser.add_argument('source') + arguments = argument_parser.parse_args() + if arguments.verbose: + logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO) + logging.info('Program started') + if arguments.limit: + if not re.match('\\d+', arguments.limit): + logging.error('Input value of --limit is incorrect') + raise ValueError('Input value of --limit is incorrect') + arguments.limit = int(arguments.limit) + if arguments.version: + print(f'Program version - {VERSION}') + news_list = get_news_list(arguments.source, arguments.limit) + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) + + +if __name__ == '__main__': + try: + main() + except IncorrectURL as e: + print(colored(e, 'red')) + logging.error(e) + except NoInternet as e: + print(colored(e, 'red')) + logging.error(e) + except ValueError as e: + print(colored(e, 'red')) + logging.error(e) + finally: + logging.info('Program ended') diff --git a/final_task/tests/test.py b/final_task/tests/test.py new file mode 100644 index 0000000..6b841c7 --- /dev/null +++ b/final_task/tests/test.py @@ -0,0 +1,34 @@ +import unittest +from final_task.rss_reader.parse_rss_functions import get_new_description, get_image_description +class TestMethods(unittest.TestCase): + def test_get_new_description(self): + summary_str='

Ocasio-Cortez: Trump was 'clearly engaged in extortion and bribery\';Ocasio-Cortez discussed the issue with Yahoo News on ' \ + 'Capitol Hill on Tuesday as the third day of public hearings was being conducted in ' \ + 'the Democrats’ ongoing impeachment inquiry.


' + correct_result='Ocasio-Cortez discussed the issue with Yahoo News on Capitol Hill on Tuesday as ' \ + 'the third day of public hearings was being conducted in the Democrats’ ' \ + 'ongoing impeachment inquiry.' + self.assertEqual(get_new_description(summary_str),correct_result) + + def test_get_image_description(self): + summary_str = '

Ocasio-Cortez: Trump was 'clearly engaged in extortion and bribery\';Ocasio-Cortez discussed the issue with Yahoo News on ' \ + 'Capitol Hill on Tuesday as the third day of public hearings was being conducted in ' \ + 'the Democrats’ ongoing impeachment inquiry.


' + correct_result="Ocasio-Cortez: Trump was 'clearly engaged in extortion and bribery';" + self.assertEqual(get_image_description(summary_str), correct_result) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 3c71841d28f0a991a0bfeaa8f3d92363f095f80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Thu, 21 Nov 2019 00:18:44 +0300 Subject: [PATCH 2/8] Complete second iteration --- final_task/rss_reader/rss_reader.py | 2 +- final_task/setup.py | 36 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 3208e0d..020f741 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -7,7 +7,7 @@ from personal_exceptions import * from print_functions import * -VERSION = 1 +VERSION = 2 def main(): diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..9d25541 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,36 @@ +from setuptools import setup +from os import path + +here = path.abspath(path.dirname(__file__)) + +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='rss-reader', + version='5.0', + description='Pure Python command-line RSS reader', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/paxalos/FinalTaskRssParser/tree/master/final_task', + author='Pavel Los', + author_email='Lospawel@yandex.ru', + license='MIT', + zip_safe=False, + scripts=['rss_reader/personal_exceptions.py', + 'rss_reader/database_functions.py', + 'rss_reader/parse_rss_functions.py', + 'rss_reader/print_functions.py', + 'rss_reader/rss_reader.py', + 'rss_reader/save_in_format_functions.py'], + install_requires=['feedparser', + 'termcolor', + 'pymysql', + 'colorama', + 'mysql-connector-python', + 'py-dateutil', + 'requests'], + entry_points={ + 'console_scripts': ['rss-reader=rss_reader:main'], + } +) From 5fb38b577f6d4b3ed2582198aefed351e2b1955b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Thu, 21 Nov 2019 00:25:42 +0300 Subject: [PATCH 3/8] Complete third iteration --- final_task/rss_reader/database_functions.py | 60 +++++++++++++++++++++ final_task/rss_reader/rss_reader.py | 18 ++++++- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 final_task/rss_reader/database_functions.py diff --git a/final_task/rss_reader/database_functions.py b/final_task/rss_reader/database_functions.py new file mode 100644 index 0000000..aa5b2b3 --- /dev/null +++ b/final_task/rss_reader/database_functions.py @@ -0,0 +1,60 @@ +from contextlib import closing +import logging +import pymysql +from dateutil import parser as date_parser + + +def get_news_list_by_date(date, limit): + """ + :param date: Date of publication of news + :return: : List with news publicated by date + Returns list of news by date from database + """ + logging.info('Connecting to database') + with closing(pymysql.connect(host='localhost', user='root', password='Password12345', + database='final_task_database')) as connection: + with closing(connection.cursor()) as cursor: + logging.info('Connected to database') + logging.info('Giving request') + cursor.execute(f'select * from news_cache where date="{date}"') + logging.info('Getting response') + database_response = cursor.fetchall() + if limit: + limit = min(len(database_response), limit) + else: + limit = len(database_response) + logging.info('Response was got') + news_list = [] + for index in range(limit): + news_list.append({'Feed': database_response[index][0], + 'Title': database_response[index][1], + 'Date': database_response[index][2], + 'Link': database_response[index][3], + 'Image description': database_response[index][4], + 'New description': database_response[index][5], + 'Image links': database_response[index][6].split('|||')}) + return news_list + + +def write_news_to_database(news_list): + """ + :param news_list: List of news + Writes news to database + """ + logging.info('Connecting to database') + with closing(pymysql.connect(host='localhost', user='root', password='Password12345', + database='final_task_database')) as connection: + with closing(connection.cursor()) as cursor: + logging.info('Connected to database') + for new in news_list: + # Try to find new in database by link, if exists + cursor.execute(f'select * from news_cache where link = "{new["Link"]}"') + if cursor.fetchall(): + continue + insert_values = [value for value in new.values()] + insert_values[2] = date_parser.parse(insert_values[2]) + insert_values[6] = '|||'.join(insert_values[6]) + insert_values = [tuple(insert_values), ] + cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values) + connection.commit() + logging.info('Data write successful') diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 020f741..b541ca9 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -3,11 +3,12 @@ from termcolor import colored import os import colorama +from database_functions import * from parse_rss_functions import get_news_list from personal_exceptions import * from print_functions import * -VERSION = 2 +VERSION = 3 def main(): @@ -20,6 +21,7 @@ def main(): argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout') argument_parser.add_argument('--version', action='store_true', help='print version info') argument_parser.add_argument('--limit', help='limit news topics if this parameter provided') + argument_parser.add_argument('--date', help='represent news from local storage by date') argument_parser.add_argument('source') arguments = argument_parser.parse_args() if arguments.verbose: @@ -32,6 +34,19 @@ def main(): logging.error('Input value of --limit is incorrect') raise ValueError('Input value of --limit is incorrect') arguments.limit = int(arguments.limit) + if arguments.date: + if not re.match('\\d+', arguments.date) or len(arguments.date) != 8: + logging.error('Input value of --date is incorrect') + raise ValueError('Input value of --date is incorrect') + news_list = get_news_list_by_date(arguments.date, arguments.limit) + if news_list: + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) + else: + print('No news by this date') + return if arguments.version: print(f'Program version - {VERSION}') news_list = get_news_list(arguments.source, arguments.limit) @@ -39,6 +54,7 @@ def main(): print_news_JSON(news_list) else: print_news(news_list) + write_news_to_database(news_list) if __name__ == '__main__': From 63c31356350416d97adf29a09eb20464e619e556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Thu, 21 Nov 2019 00:47:53 +0300 Subject: [PATCH 4/8] Complete fourth iteration with README --- final_task/README.md | 59 ++++++++- final_task/rss_reader/personal_exceptions.py | 7 ++ final_task/rss_reader/rss_reader.py | 46 +++++-- .../rss_reader/save_in_format_functions.py | 118 ++++++++++++++++++ 4 files changed, 217 insertions(+), 13 deletions(-) create mode 100644 final_task/rss_reader/save_in_format_functions.py diff --git a/final_task/README.md b/final_task/README.md index 7af281f..45807e6 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,56 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +RSS_READER +RSS reader is a command-line utility. + +Usage +usage: rss_reader.py [-h] [--source SOURCE] [--version] [--json] [--verbose] +[--limit LIMIT] [--date DATE] + +Pure Python command-line RSS reader. + +optional arguments: + +-h, --help show this help message and exit +--source SOURCE RSS URL +--version Print version info +--json Print result as JSON in stdout +--verbose Outputs verbose status messages +--limit LIMIT Limit news topics if this parameter provided +--date DATE News from the specified day will be printed out. Format: YYYYMMDD +It is mandatory to specify date or/and time. +If both are specified, then news will be searched by date and by source. + +Json structure +[ + { + "feed": [feed], + "title": [title], + "date": [date], + "link": [link], + "text": [text], + "image links": [ + [link1] + [link2] + ... + ] + }, + ... +] + +Local storage +All read news is saved in database by using Mysql. You should have database final_task_database with table news_cache +new-cache structure: + feed:longtext + title:longtext + date:date + link:longtext + image_description:longtext + new_description:longtext + image_links:longtext +When using the --date argument, news is searched by date in database + +How to install application +To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'. +Using 'pyhton setup.py install' in cmd install application. +Install requirements 'pip install -r requirements.txt' +You are now ready to run the application. Use 'rss-reader [arguments]' to run it. +Warning: If path to rss-reader is not in Path variable, use full path to file at running. \ No newline at end of file diff --git a/final_task/rss_reader/personal_exceptions.py b/final_task/rss_reader/personal_exceptions.py index 958ce2e..9b82aba 100644 --- a/final_task/rss_reader/personal_exceptions.py +++ b/final_task/rss_reader/personal_exceptions.py @@ -6,3 +6,10 @@ def __str__(self) -> str: class NoInternet(Exception): def __str__(self) -> str: return "Internet off, please check your connection" + +class IncorrectFilePath(Exception): + def __init__(self, message): + self.message=message + + def __str__(self) -> str: + return self.message \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index b541ca9..8a107d7 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -7,8 +7,9 @@ from parse_rss_functions import get_news_list from personal_exceptions import * from print_functions import * +from save_in_format_functions import save_in_fb2, save_in_html -VERSION = 3 +VERSION = 4 def main(): @@ -22,6 +23,8 @@ def main(): argument_parser.add_argument('--version', action='store_true', help='print version info') argument_parser.add_argument('--limit', help='limit news topics if this parameter provided') argument_parser.add_argument('--date', help='represent news from local storage by date') + argument_parser.add_argument('--to-html', help='save news in html format') + argument_parser.add_argument('--to-fb2', help='save news in fb2 format') argument_parser.add_argument('source') arguments = argument_parser.parse_args() if arguments.verbose: @@ -29,6 +32,14 @@ def main(): else: logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO) logging.info('Program started') + if arguments.to_html: + if not os.path.exists(arguments.to_html): + logging.error('Inrorrect html filepath') + raise IncorrectFilePath('Inrorrect html filepath') + if arguments.to_fb2: + if not os.path.exists(arguments.to_html): + logging.error('Inrorrect fb2 filepath') + raise IncorrectFilePath('Inrorrect fb2 filepath') if arguments.limit: if not re.match('\\d+', arguments.limit): logging.error('Input value of --limit is incorrect') @@ -39,21 +50,33 @@ def main(): logging.error('Input value of --date is incorrect') raise ValueError('Input value of --date is incorrect') news_list = get_news_list_by_date(arguments.date, arguments.limit) - if news_list: - if arguments.json: - print_news_JSON(news_list) - else: - print_news(news_list) + if arguments.to_html or arguments.to_fb2: + if arguments.to_html: + save_in_html(arguments.to_html, news_list) + if arguments.to_fb2: + save_in_fb2(arguments.to_fb2, news_list) else: - print('No news by this date') + if news_list: + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) + else: + print('No news by this date') return if arguments.version: print(f'Program version - {VERSION}') news_list = get_news_list(arguments.source, arguments.limit) - if arguments.json: - print_news_JSON(news_list) + if arguments.to_html or arguments.to_fb2: + if arguments.to_html: + save_in_html(arguments.to_html, news_list) + if arguments.to_fb2: + save_in_fb2(arguments.to_fb2, news_list) else: - print_news(news_list) + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) write_news_to_database(news_list) @@ -69,5 +92,8 @@ def main(): except ValueError as e: print(colored(e, 'red')) logging.error(e) + except IncorrectFilePath as e: + print(colored(e, 'red')) + logging.error(e) finally: logging.info('Program ended') diff --git a/final_task/rss_reader/save_in_format_functions.py b/final_task/rss_reader/save_in_format_functions.py new file mode 100644 index 0000000..4b46a86 --- /dev/null +++ b/final_task/rss_reader/save_in_format_functions.py @@ -0,0 +1,118 @@ +from PIL import Image +import requests +import base64 +from io import BytesIO +import os +import logging +from parse_rss_functions import ckeck_internet + + +def get_new_content_html(new): + """ + :param new: The new + :return: string representation of new in html + Converts new into string which will be used in html format + """ + images_content = "" + if not ckeck_internet(): + for image_link in new['Image links']: + images_content+=f"{image_link}" + else: + for image_link in new['Image links']: + if image_link=="": + continue + response = requests.get(image_link) + encoded_string = str(base64.b64encode(response.content)) + images_content += "\n" + return f""" +

{new['Feed']}

+

{new['Title']}

+

{new['Date']}

+

{new['Link']}

+

{images_content}

+

{new['New description']}

+

+ """ + + +def save_in_html(path, news_list): + """ + :param path: The path of html format file + :param news_list: The list of news + Saves news in html format by path + """ + logging.info('Creating html format file') + html_content = "\n\n" + for new in news_list: + html_content += get_new_content_html(new) + html_content += "\n" + with open(path, 'w', encoding="utf-8") as html_file: + html_file.write(html_content) + logging.info('Html format file created') + + +def get_new_content_fb2(new): + """ + :param new: The new + :return: string representation of new in fb2 + Converts new into string which will be used in fb2 format + """ + images_content = "" + for image_link in new['Image links']: + images_content += f"" + return f""" +
+

{new['Feed'].replace('&','and')}

+

{new['Title'].replace('&','and')}

+

{new['Date']}

+

{images_content}

+

{new['New description'].replace('&','and')}

+
+ """ + + +def get_images_content(news_list): + """ + :param news_list: The list of news + :return: string representation of images + Transforms images into string by using base64 + """ + if not ckeck_internet(): + return "" + images_content = "" + for new in news_list: + for image_link in new['Image links']: + if image_link=="": + continue + response = requests.get(image_link) + img = Image.open(BytesIO(response.content)) + img = img.resize((100, 100)) + img=img.convert('RGB') + img.save('tmp.jpg', 'JPEG') + with open('tmp.jpg', 'rb') as f: + encoded_string = str(base64.b64encode(f.read())) + images_content += f"\n" + encoded_string[2:len( + encoded_string) - 1] + "\n\n" + os.remove('tmp.jpg') + return images_content + + +def save_in_fb2(path, news_list): + """ + :param path: The path of fb2 format file + :param news_list: The list of news + Saves news in fb2 format by path + """ + logging.info('Creating fb2 format file') + fb2_content = """ + + """ + for new in news_list: + fb2_content += get_new_content_fb2(new) + fb2_content += "\n\n" + fb2_content += get_images_content(news_list) + "" + with open(path, 'w', encoding="utf-8") as fb2_file: + fb2_file.write(fb2_content) + logging.info('Fb2 format file created') + From 0747d20a57dfd46d3cb71b6bdd135ccc600aa826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Thu, 21 Nov 2019 00:50:42 +0300 Subject: [PATCH 5/8] Complete fifth iteration --- final_task/rss_reader/print_functions.py | 53 ++++++++++++++++++++++++ final_task/rss_reader/requirements.txt | 6 ++- final_task/rss_reader/rss_reader.py | 27 ++++++++---- final_task/setup.py | 2 +- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py index a1f0f7f..a2d6b51 100644 --- a/final_task/rss_reader/print_functions.py +++ b/final_task/rss_reader/print_functions.py @@ -1,5 +1,7 @@ import logging import json +import colorama +from colorama import Fore, Back, Style def print_news(news_list): @@ -23,6 +25,31 @@ def print_news(news_list): print('\n') +def print_news_colorize(news_list): + """ + :param news_list: The list of news + Prints news in readable colorize format + """ + logging.info('Printing news colorize') + for index, new in enumerate(news_list): + print(Style.RESET_ALL + Fore.WHITE + Back.MAGENTA + f'New {index + 1}\n') + print(Style.RESET_ALL + Fore.WHITE + Back.BLUE + 'Feed:\n' + Style.RESET_ALL + Fore.BLUE + f'\t{new["Feed"]}') + print( + Style.RESET_ALL + Fore.WHITE + Back.GREEN + 'Title:\n' + Style.RESET_ALL + Fore.GREEN + f'\t{new["Title"]}') + print(Style.RESET_ALL + Fore.WHITE + Back.CYAN + 'Date:\n' + Style.RESET_ALL + Fore.CYAN + f'\t{new["Date"]}') + print(Style.RESET_ALL + Fore.WHITE + Back.RED + 'Link:\n' + Style.RESET_ALL + Fore.RED + f'\t{new["Link"]}') + print( + Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL +\ + Fore.YELLOW + f'\t{new["Image description"]}') + print( + Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL +\ + Fore.LIGHTBLUE_EX + f'\t{new["New description"]}') + print(Style.RESET_ALL + Fore.BLACK + Back.LIGHTGREEN_EX + 'Image links:') + for image_link in new['Image links']: + print(Style.RESET_ALL + Fore.LIGHTGREEN_EX + f'\t{image_link}') + print('\n') + + def print_news_JSON(news_list): """ :param news_list: The list of news @@ -30,3 +57,29 @@ def print_news_JSON(news_list): """ logging.info('Printing news as JSON') print(json.dumps(news_list, ensure_ascii=False, indent=4)) + + +def print_news_JSON_colorize(news_list): + """ + :param news_list: The list of news + Prints news in readable colorize JSON format + """ + logging.info('Printing news as JSON') + result_str = "[" + for new_index, new in enumerate(news_list): + result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\","\ + f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\","\ + f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\","\ + f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\","\ + f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\","\ + f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\","\ + f"\n\t\t\"\033[44mImage links\033[0m\": [" + for link_index, link in enumerate(new['Image links']): + result_str += f"\n\t\t\t\"\033[34m{link}\033[0m\"" + if link_index+1!=len(new['Image links']): + result_str+=',' + result_str += "\n\t\t]\n\t}" + if new_index + 1 != len(news_list): + result_str += ',' + result_str += '\n]' + print(result_str) diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index 4a30224..5bab75d 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -1,4 +1,8 @@ feedparser termcolor +pymysql colorama -py-dateutil \ No newline at end of file +mysql-connector-python +py-dateutil +requests +Pillow \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 8a107d7..41a0350 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -9,7 +9,7 @@ from print_functions import * from save_in_format_functions import save_in_fb2, save_in_html -VERSION = 4 +VERSION = 5 def main(): @@ -25,6 +25,7 @@ def main(): argument_parser.add_argument('--date', help='represent news from local storage by date') argument_parser.add_argument('--to-html', help='save news in html format') argument_parser.add_argument('--to-fb2', help='save news in fb2 format') + argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode') argument_parser.add_argument('source') arguments = argument_parser.parse_args() if arguments.verbose: @@ -57,10 +58,16 @@ def main(): save_in_fb2(arguments.to_fb2, news_list) else: if news_list: - if arguments.json: - print_news_JSON(news_list) + if arguments.colorize: + if arguments.json: + print_news_JSON_colorize(news_list) + else: + print_news_colorize(news_list) else: - print_news(news_list) + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) else: print('No news by this date') return @@ -73,10 +80,16 @@ def main(): if arguments.to_fb2: save_in_fb2(arguments.to_fb2, news_list) else: - if arguments.json: - print_news_JSON(news_list) + if arguments.colorize: + if arguments.json: + print_news_JSON_colorize(news_list) + else: + print_news_colorize(news_list) else: - print_news(news_list) + if arguments.json: + print_news_JSON(news_list) + else: + print_news(news_list) write_news_to_database(news_list) diff --git a/final_task/setup.py b/final_task/setup.py index 9d25541..032cc9b 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -33,4 +33,4 @@ entry_points={ 'console_scripts': ['rss-reader=rss_reader:main'], } -) +) \ No newline at end of file From aa9295c9bccd41aa06ff50766c2e54034852d610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Fri, 22 Nov 2019 01:54:22 +0300 Subject: [PATCH 6/8] Add info in README about 4 and 5 iteration, remove wildcard imports, move parsing arguments in separate function, --date and --version can be used without source, fix bugs with writing and reading database. Some other little changes --- final_task/README.md | 14 ++- final_task/rss_reader/database_functions.py | 93 +++++++++++-------- final_task/rss_reader/parse_rss_functions.py | 14 +-- final_task/rss_reader/personal_exceptions.py | 15 ++- final_task/rss_reader/print_functions.py | 21 ++--- final_task/rss_reader/rss_reader.py | 39 +++++--- .../rss_reader/save_in_format_functions.py | 15 ++- 7 files changed, 122 insertions(+), 89 deletions(-) diff --git a/final_task/README.md b/final_task/README.md index 45807e6..ce111e7 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -38,7 +38,7 @@ Json structure Local storage All read news is saved in database by using Mysql. You should have database final_task_database with table news_cache -new-cache structure: +news_cache structure: feed:longtext title:longtext date:date @@ -48,6 +48,18 @@ new-cache structure: image_links:longtext When using the --date argument, news is searched by date in database +Saving in format feature +You can save getted news in 2 formats: html, fb2 +If news are got from Internet and Internet on news images are downloaded from website +and converted in base64 string. After saved html or fb2 format files can show them without connecting +to Internet. If Internet off images aren't downloaded, in html instead of images utility writes links of images. +When using the --date argument, news are got from database. Image are downloaded the same way depending on whether +the Internet is on + +Colorize mode +When using the --colorize argument the output news in console will be colorized. If using --json at the same time +the output news will be printed in colorized json format + How to install application To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'. Using 'pyhton setup.py install' in cmd install application. diff --git a/final_task/rss_reader/database_functions.py b/final_task/rss_reader/database_functions.py index aa5b2b3..265cc22 100644 --- a/final_task/rss_reader/database_functions.py +++ b/final_task/rss_reader/database_functions.py @@ -1,7 +1,7 @@ from contextlib import closing import logging import pymysql -from dateutil import parser as date_parser +from personal_exceptions import DatabaseConnectionError def get_news_list_by_date(date, limit): @@ -11,29 +11,38 @@ def get_news_list_by_date(date, limit): Returns list of news by date from database """ logging.info('Connecting to database') - with closing(pymysql.connect(host='localhost', user='root', password='Password12345', - database='final_task_database')) as connection: - with closing(connection.cursor()) as cursor: - logging.info('Connected to database') - logging.info('Giving request') - cursor.execute(f'select * from news_cache where date="{date}"') - logging.info('Getting response') - database_response = cursor.fetchall() - if limit: - limit = min(len(database_response), limit) - else: - limit = len(database_response) - logging.info('Response was got') - news_list = [] - for index in range(limit): - news_list.append({'Feed': database_response[index][0], - 'Title': database_response[index][1], - 'Date': database_response[index][2], - 'Link': database_response[index][3], - 'Image description': database_response[index][4], - 'New description': database_response[index][5], - 'Image links': database_response[index][6].split('|||')}) - return news_list + try: + with closing(pymysql.connect(host='localhost', user='root', password='Password12345', + database='final_task_database')) as connection: + with closing(connection.cursor()) as cursor: + logging.info('Connected to database') + logging.info('Giving request') + try: + cursor.execute(f'select * from news_cache where date="{date}"') + except pymysql.err.InternalError: + logging.error('Input value of --date is incorrect') + raise ValueError('Input value of --date is incorrect') + logging.info('Getting response') + database_response = cursor.fetchall() + if limit: + limit = min(len(database_response), limit) + else: + limit = len(database_response) + logging.info('Response was got') + news_list = [] + for index in range(limit): + news_list.append({'Feed': database_response[index][0], + 'Title': database_response[index][1], + 'Date': database_response[index][2], + 'Link': database_response[index][3], + 'Image description': database_response[index][4], + 'New description': database_response[index][5], + 'Image links': database_response[index][6].split('|||')}) + return news_list + except pymysql.err.OperationalError: + logging.error('Not connected to database') + raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary" + "database with table described in README") def write_news_to_database(news_list): @@ -42,19 +51,23 @@ def write_news_to_database(news_list): Writes news to database """ logging.info('Connecting to database') - with closing(pymysql.connect(host='localhost', user='root', password='Password12345', - database='final_task_database')) as connection: - with closing(connection.cursor()) as cursor: - logging.info('Connected to database') - for new in news_list: - # Try to find new in database by link, if exists - cursor.execute(f'select * from news_cache where link = "{new["Link"]}"') - if cursor.fetchall(): - continue - insert_values = [value for value in new.values()] - insert_values[2] = date_parser.parse(insert_values[2]) - insert_values[6] = '|||'.join(insert_values[6]) - insert_values = [tuple(insert_values), ] - cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values) - connection.commit() - logging.info('Data write successful') + try: + with closing(pymysql.connect(host='localhost', user='root', password='Password12345', + database='final_task_database')) as connection: + with closing(connection.cursor()) as cursor: + logging.info('Connected to database') + for new in news_list: + # Try to find new in database by link, if exists + cursor.execute(f'select * from news_cache where link = "{new["Link"]}"') + if cursor.fetchall(): + continue + insert_values = [value for value in new.values()] + insert_values[6] = '|||'.join(insert_values[6]) + insert_values = [tuple(insert_values), ] + cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values) + connection.commit() + logging.info('Data write successful') + except pymysql.err.OperationalError: + logging.error('Not connected to database') + raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary" + "database with table described in README") diff --git a/final_task/rss_reader/parse_rss_functions.py b/final_task/rss_reader/parse_rss_functions.py index f453028..21f54df 100644 --- a/final_task/rss_reader/parse_rss_functions.py +++ b/final_task/rss_reader/parse_rss_functions.py @@ -4,7 +4,7 @@ import logging from dateutil import parser as date_parser import html -from personal_exceptions import * +from personal_exceptions import IncorrectURL, NoInternet def ckeck_internet(): @@ -15,8 +15,8 @@ def ckeck_internet(): logging.info("checking Internet connection") socket.setdefaulttimeout(5) host = socket.gethostbyname("www.google.com") - s = socket.create_connection((host, 80), 2) - s.close() + sock = socket.create_connection((host, 80), 2) + sock.close() logging.info('Internet on.') return True except Exception as e: @@ -53,11 +53,11 @@ def get_news_list(source, limit): """ logging.info('Creating news list') if not ckeck_internet(): - raise NoInternet + raise NoInternet("Internet off, please check your connection") logging.info('Getting and parsing RSS') parsed_rss = feedparser.parse(source) - if parsed_rss['bozo'] == 1: - raise IncorrectURL + if parsed_rss['bozo']: + raise IncorrectURL('The entered URL is incorrect') if limit: limit = min(limit, len(parsed_rss['entries'])) else: @@ -69,7 +69,7 @@ def get_news_list(source, limit): 'Title': html.unescape(parsed_rss['entries'][index]['title']), 'Date': - str(date_parser.parse(parsed_rss['entries'][index]['published'])), + str(date_parser.parse(parsed_rss['entries'][index]['published'])).split(" ")[0], 'Link': parsed_rss['entries'][index]['link'], 'Image description': diff --git a/final_task/rss_reader/personal_exceptions.py b/final_task/rss_reader/personal_exceptions.py index 9b82aba..50c1cf9 100644 --- a/final_task/rss_reader/personal_exceptions.py +++ b/final_task/rss_reader/personal_exceptions.py @@ -1,15 +1,14 @@ class IncorrectURL(Exception): - def __str__(self) -> str: - return 'The entered URL is incorrect' + pass class NoInternet(Exception): - def __str__(self) -> str: - return "Internet off, please check your connection" + pass + class IncorrectFilePath(Exception): - def __init__(self, message): - self.message=message + pass + - def __str__(self) -> str: - return self.message \ No newline at end of file +class DatabaseConnectionError(Exception): + pass diff --git a/final_task/rss_reader/print_functions.py b/final_task/rss_reader/print_functions.py index a2d6b51..7fdd059 100644 --- a/final_task/rss_reader/print_functions.py +++ b/final_task/rss_reader/print_functions.py @@ -1,6 +1,5 @@ import logging import json -import colorama from colorama import Fore, Back, Style @@ -39,10 +38,10 @@ def print_news_colorize(news_list): print(Style.RESET_ALL + Fore.WHITE + Back.CYAN + 'Date:\n' + Style.RESET_ALL + Fore.CYAN + f'\t{new["Date"]}') print(Style.RESET_ALL + Fore.WHITE + Back.RED + 'Link:\n' + Style.RESET_ALL + Fore.RED + f'\t{new["Link"]}') print( - Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL +\ + Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL + \ Fore.YELLOW + f'\t{new["Image description"]}') print( - Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL +\ + Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL + \ Fore.LIGHTBLUE_EX + f'\t{new["New description"]}') print(Style.RESET_ALL + Fore.BLACK + Back.LIGHTGREEN_EX + 'Image links:') for image_link in new['Image links']: @@ -67,17 +66,17 @@ def print_news_JSON_colorize(news_list): logging.info('Printing news as JSON') result_str = "[" for new_index, new in enumerate(news_list): - result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\","\ - f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\","\ - f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\","\ - f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\","\ - f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\","\ - f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\","\ + result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\"," \ + f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\"," \ + f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\"," \ + f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\"," \ + f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\"," \ + f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\"," \ f"\n\t\t\"\033[44mImage links\033[0m\": [" for link_index, link in enumerate(new['Image links']): result_str += f"\n\t\t\t\"\033[34m{link}\033[0m\"" - if link_index+1!=len(new['Image links']): - result_str+=',' + if link_index + 1 != len(new['Image links']): + result_str += ',' result_str += "\n\t\t]\n\t}" if new_index + 1 != len(news_list): result_str += ',' diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 41a0350..d157bbb 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -3,20 +3,17 @@ from termcolor import colored import os import colorama -from database_functions import * +import logging +from database_functions import get_news_list_by_date, write_news_to_database from parse_rss_functions import get_news_list -from personal_exceptions import * -from print_functions import * +from personal_exceptions import NoInternet, IncorrectURL, IncorrectFilePath, DatabaseConnectionError +from print_functions import print_news_colorize, print_news_JSON_colorize, print_news, print_news_JSON from save_in_format_functions import save_in_fb2, save_in_html VERSION = 5 -def main(): - """ - The main entry point of the application - """ - colorama.init() +def get_arguments(): argument_parser = argparse.ArgumentParser() argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout') @@ -26,19 +23,32 @@ def main(): argument_parser.add_argument('--to-html', help='save news in html format') argument_parser.add_argument('--to-fb2', help='save news in fb2 format') argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode') - argument_parser.add_argument('source') - arguments = argument_parser.parse_args() + argument_parser.add_argument('source', nargs='?') + return argument_parser.parse_args() + + +def main(): + """ + The main entry point of the application + """ + colorama.init() + arguments = get_arguments() + if arguments.version: + print(f'Program version - {VERSION}') + return if arguments.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO) logging.info('Program started') if arguments.to_html: - if not os.path.exists(arguments.to_html): + if not '.html' in arguments.to_html or \ + not os.access(os.path.dirname(arguments.to_html), os.W_OK) and '\\' in arguments.to_html: logging.error('Inrorrect html filepath') raise IncorrectFilePath('Inrorrect html filepath') if arguments.to_fb2: - if not os.path.exists(arguments.to_html): + if not '.fb2' in arguments.to_fb2 or \ + not os.access(os.path.dirname(arguments.to_fb2), os.W_OK) and '\\' in arguments.to_fb2: logging.error('Inrorrect fb2 filepath') raise IncorrectFilePath('Inrorrect fb2 filepath') if arguments.limit: @@ -71,8 +81,6 @@ def main(): else: print('No news by this date') return - if arguments.version: - print(f'Program version - {VERSION}') news_list = get_news_list(arguments.source, arguments.limit) if arguments.to_html or arguments.to_fb2: if arguments.to_html: @@ -108,5 +116,8 @@ def main(): except IncorrectFilePath as e: print(colored(e, 'red')) logging.error(e) + except DatabaseConnectionError as e: + print(colored(e, 'red')) + logging.error(e) finally: logging.info('Program ended') diff --git a/final_task/rss_reader/save_in_format_functions.py b/final_task/rss_reader/save_in_format_functions.py index 4b46a86..fadc482 100644 --- a/final_task/rss_reader/save_in_format_functions.py +++ b/final_task/rss_reader/save_in_format_functions.py @@ -16,10 +16,10 @@ def get_new_content_html(new): images_content = "" if not ckeck_internet(): for image_link in new['Image links']: - images_content+=f"{image_link}" + images_content += f"{image_link}" else: for image_link in new['Image links']: - if image_link=="": + if image_link == "": continue response = requests.get(image_link) encoded_string = str(base64.b64encode(response.content)) @@ -63,11 +63,11 @@ def get_new_content_fb2(new): images_content += f"" return f"""
-

{new['Feed'].replace('&','and')}

-

{new['Title'].replace('&','and')}

+

{new['Feed'].replace('&', 'and')}

+

{new['Title'].replace('&', 'and')}

{new['Date']}

{images_content}

-

{new['New description'].replace('&','and')}

+

{new['New description'].replace('&', 'and')}

""" @@ -83,12 +83,12 @@ def get_images_content(news_list): images_content = "" for new in news_list: for image_link in new['Image links']: - if image_link=="": + if image_link == "": continue response = requests.get(image_link) img = Image.open(BytesIO(response.content)) img = img.resize((100, 100)) - img=img.convert('RGB') + img = img.convert('RGB') img.save('tmp.jpg', 'JPEG') with open('tmp.jpg', 'rb') as f: encoded_string = str(base64.b64encode(f.read())) @@ -115,4 +115,3 @@ def save_in_fb2(path, news_list): with open(path, 'w', encoding="utf-8") as fb2_file: fb2_file.write(fb2_content) logging.info('Fb2 format file created') - From 2cb55da6abacf31bfc32989317a0093b7f5f51b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=BE=D1=81=D1=8C=20=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB?= Date: Fri, 22 Nov 2019 14:04:12 +0300 Subject: [PATCH 7/8] Rename personal_exceprions to custom_exceptions, fix bug with --json and --date, redo --to-fb2 and --to-html: now the argument is directory and file news.fb2 or news.html are created in this directory. Some other little changes --- .../{personal_exceptions.py => custom_exceptions.py} | 0 final_task/rss_reader/database_functions.py | 5 +++-- final_task/rss_reader/parse_rss_functions.py | 2 +- final_task/rss_reader/rss_reader.py | 8 +++----- final_task/rss_reader/save_in_format_functions.py | 4 ++-- final_task/setup.py | 2 +- 6 files changed, 10 insertions(+), 11 deletions(-) rename final_task/rss_reader/{personal_exceptions.py => custom_exceptions.py} (100%) diff --git a/final_task/rss_reader/personal_exceptions.py b/final_task/rss_reader/custom_exceptions.py similarity index 100% rename from final_task/rss_reader/personal_exceptions.py rename to final_task/rss_reader/custom_exceptions.py diff --git a/final_task/rss_reader/database_functions.py b/final_task/rss_reader/database_functions.py index 265cc22..c06416a 100644 --- a/final_task/rss_reader/database_functions.py +++ b/final_task/rss_reader/database_functions.py @@ -1,7 +1,7 @@ from contextlib import closing import logging import pymysql -from personal_exceptions import DatabaseConnectionError +from custom_exceptions import DatabaseConnectionError def get_news_list_by_date(date, limit): @@ -33,7 +33,7 @@ def get_news_list_by_date(date, limit): for index in range(limit): news_list.append({'Feed': database_response[index][0], 'Title': database_response[index][1], - 'Date': database_response[index][2], + 'Date': str(database_response[index][2]), 'Link': database_response[index][3], 'Image description': database_response[index][4], 'New description': database_response[index][5], @@ -62,6 +62,7 @@ def write_news_to_database(news_list): if cursor.fetchall(): continue insert_values = [value for value in new.values()] + #converting list of image links into string to store in database insert_values[6] = '|||'.join(insert_values[6]) insert_values = [tuple(insert_values), ] cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values) diff --git a/final_task/rss_reader/parse_rss_functions.py b/final_task/rss_reader/parse_rss_functions.py index 21f54df..72a793c 100644 --- a/final_task/rss_reader/parse_rss_functions.py +++ b/final_task/rss_reader/parse_rss_functions.py @@ -4,7 +4,7 @@ import logging from dateutil import parser as date_parser import html -from personal_exceptions import IncorrectURL, NoInternet +from custom_exceptions import IncorrectURL, NoInternet def ckeck_internet(): diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index d157bbb..1a96f1a 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -6,7 +6,7 @@ import logging from database_functions import get_news_list_by_date, write_news_to_database from parse_rss_functions import get_news_list -from personal_exceptions import NoInternet, IncorrectURL, IncorrectFilePath, DatabaseConnectionError +from custom_exceptions import NoInternet, IncorrectURL, IncorrectFilePath, DatabaseConnectionError from print_functions import print_news_colorize, print_news_JSON_colorize, print_news, print_news_JSON from save_in_format_functions import save_in_fb2, save_in_html @@ -42,13 +42,11 @@ def main(): logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO) logging.info('Program started') if arguments.to_html: - if not '.html' in arguments.to_html or \ - not os.access(os.path.dirname(arguments.to_html), os.W_OK) and '\\' in arguments.to_html: + if not os.path.exists(arguments.to_html): logging.error('Inrorrect html filepath') raise IncorrectFilePath('Inrorrect html filepath') if arguments.to_fb2: - if not '.fb2' in arguments.to_fb2 or \ - not os.access(os.path.dirname(arguments.to_fb2), os.W_OK) and '\\' in arguments.to_fb2: + if not os.path.exists(arguments.to_fb2): logging.error('Inrorrect fb2 filepath') raise IncorrectFilePath('Inrorrect fb2 filepath') if arguments.limit: diff --git a/final_task/rss_reader/save_in_format_functions.py b/final_task/rss_reader/save_in_format_functions.py index fadc482..b6dc78f 100644 --- a/final_task/rss_reader/save_in_format_functions.py +++ b/final_task/rss_reader/save_in_format_functions.py @@ -47,7 +47,7 @@ def save_in_html(path, news_list): for new in news_list: html_content += get_new_content_html(new) html_content += "\n" - with open(path, 'w', encoding="utf-8") as html_file: + with open(os.path.join(path, "news.html"), 'w', encoding="utf-8") as html_file: html_file.write(html_content) logging.info('Html format file created') @@ -112,6 +112,6 @@ def save_in_fb2(path, news_list): fb2_content += get_new_content_fb2(new) fb2_content += "\n\n" fb2_content += get_images_content(news_list) + "" - with open(path, 'w', encoding="utf-8") as fb2_file: + with open(os.path.join(path, "news.fb2"), 'w', encoding="utf-8") as fb2_file: fb2_file.write(fb2_content) logging.info('Fb2 format file created') diff --git a/final_task/setup.py b/final_task/setup.py index 032cc9b..6126060 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -17,7 +17,7 @@ author_email='Lospawel@yandex.ru', license='MIT', zip_safe=False, - scripts=['rss_reader/personal_exceptions.py', + scripts=['rss_reader/custom_exceptions.py', 'rss_reader/database_functions.py', 'rss_reader/parse_rss_functions.py', 'rss_reader/print_functions.py', From 010e4ca7c3ff26fc54dccd043d68b1500f676680 Mon Sep 17 00:00:00 2001 From: Los Pavel Date: Fri, 22 Nov 2019 17:55:11 +0300 Subject: [PATCH 8/8] move arguments functions to another module, change filnames of fb2 and html formats --- final_task/rss_reader/arguments_functions.py | 63 +++++++++++++++++++ final_task/rss_reader/rss_reader.py | 40 +++--------- .../rss_reader/save_in_format_functions.py | 8 +-- final_task/setup.py | 3 +- 4 files changed, 79 insertions(+), 35 deletions(-) create mode 100644 final_task/rss_reader/arguments_functions.py diff --git a/final_task/rss_reader/arguments_functions.py b/final_task/rss_reader/arguments_functions.py new file mode 100644 index 0000000..86639b1 --- /dev/null +++ b/final_task/rss_reader/arguments_functions.py @@ -0,0 +1,63 @@ +import argparse +import os +import re +import logging +from custom_exceptions import IncorrectFilePath + + +def get_arguments(): + """ + :return: Arguments of application + Read and returns arguments of application + """ + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') + argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout') + argument_parser.add_argument('--version', action='store_true', help='print version info') + argument_parser.add_argument('--limit', help='limit news topics if this parameter provided') + argument_parser.add_argument('--date', help='represent news from local storage by date') + argument_parser.add_argument('--to-html', help='save news in html format') + argument_parser.add_argument('--to-fb2', help='save news in fb2 format') + argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode') + argument_parser.add_argument('source', nargs='?') + return argument_parser.parse_args() + + +def check_html_argument(html_argument): + """ + :param html_argument: html directory path + If argument wrong raises exception + """ + if not os.path.exists(html_argument): + logging.error('Inrorrect html filepath') + raise IncorrectFilePath('Inrorrect html filepath') + + +def check_fb2_argument(fb2_argument): + """ + :param fb2_argument: fb2 directory path + If argument wrong raises exception + """ + if not os.path.exists(fb2_argument): + logging.error('Inrorrect fb2 filepath') + raise IncorrectFilePath('Inrorrect fb2 filepath') + + +def check_limit_argument(limit_argument): + """ + :param limit_argument: limit of news + If argument wrong raises exception + """ + if not re.match('\\d+', limit_argument): + logging.error('Input value of --limit is incorrect') + raise ValueError('Input value of --limit is incorrect') + + +def check_date_argument(date_argument): + """ + :param date_argument: Date of news in database + If argument wrong raises exception + """ + if not re.match('\\d+', date_argument) or len(date_argument) != 8: + logging.error('Input value of --date is incorrect') + raise ValueError('Input value of --date is incorrect') diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 1a96f1a..cca9100 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -9,24 +9,12 @@ from custom_exceptions import NoInternet, IncorrectURL, IncorrectFilePath, DatabaseConnectionError from print_functions import print_news_colorize, print_news_JSON_colorize, print_news, print_news_JSON from save_in_format_functions import save_in_fb2, save_in_html +from arguments_functions import check_date_argument, check_fb2_argument, check_html_argument, \ + check_limit_argument, get_arguments VERSION = 5 -def get_arguments(): - argument_parser = argparse.ArgumentParser() - argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') - argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout') - argument_parser.add_argument('--version', action='store_true', help='print version info') - argument_parser.add_argument('--limit', help='limit news topics if this parameter provided') - argument_parser.add_argument('--date', help='represent news from local storage by date') - argument_parser.add_argument('--to-html', help='save news in html format') - argument_parser.add_argument('--to-fb2', help='save news in fb2 format') - argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode') - argument_parser.add_argument('source', nargs='?') - return argument_parser.parse_args() - - def main(): """ The main entry point of the application @@ -42,28 +30,20 @@ def main(): logging.basicConfig(filename='sample.log', filemode='w', level=logging.INFO) logging.info('Program started') if arguments.to_html: - if not os.path.exists(arguments.to_html): - logging.error('Inrorrect html filepath') - raise IncorrectFilePath('Inrorrect html filepath') + check_html_argument(arguments.to_html) if arguments.to_fb2: - if not os.path.exists(arguments.to_fb2): - logging.error('Inrorrect fb2 filepath') - raise IncorrectFilePath('Inrorrect fb2 filepath') + check_fb2_argument(arguments.to_fb2) if arguments.limit: - if not re.match('\\d+', arguments.limit): - logging.error('Input value of --limit is incorrect') - raise ValueError('Input value of --limit is incorrect') + check_limit_argument(arguments.limit) arguments.limit = int(arguments.limit) if arguments.date: - if not re.match('\\d+', arguments.date) or len(arguments.date) != 8: - logging.error('Input value of --date is incorrect') - raise ValueError('Input value of --date is incorrect') + check_date_argument(arguments.date) news_list = get_news_list_by_date(arguments.date, arguments.limit) if arguments.to_html or arguments.to_fb2: if arguments.to_html: - save_in_html(arguments.to_html, news_list) + save_in_html(arguments.to_html, news_list, f"news_by_date-{arguments.date}.html") if arguments.to_fb2: - save_in_fb2(arguments.to_fb2, news_list) + save_in_fb2(arguments.to_fb2, news_list, f"news_by_date-{arguments.date}.fb2") else: if news_list: if arguments.colorize: @@ -82,9 +62,9 @@ def main(): news_list = get_news_list(arguments.source, arguments.limit) if arguments.to_html or arguments.to_fb2: if arguments.to_html: - save_in_html(arguments.to_html, news_list) + save_in_html(arguments.to_html, news_list, f"news_from-{arguments.source[8:-4]}.html") if arguments.to_fb2: - save_in_fb2(arguments.to_fb2, news_list) + save_in_fb2(arguments.to_fb2, news_list, f"news_from-{arguments.source[8:-4]}.fb2") else: if arguments.colorize: if arguments.json: diff --git a/final_task/rss_reader/save_in_format_functions.py b/final_task/rss_reader/save_in_format_functions.py index b6dc78f..7d8a62b 100644 --- a/final_task/rss_reader/save_in_format_functions.py +++ b/final_task/rss_reader/save_in_format_functions.py @@ -36,7 +36,7 @@ def get_new_content_html(new): """ -def save_in_html(path, news_list): +def save_in_html(path, news_list, filename): """ :param path: The path of html format file :param news_list: The list of news @@ -47,7 +47,7 @@ def save_in_html(path, news_list): for new in news_list: html_content += get_new_content_html(new) html_content += "\n" - with open(os.path.join(path, "news.html"), 'w', encoding="utf-8") as html_file: + with open(os.path.join(path, filename), 'w', encoding="utf-8") as html_file: html_file.write(html_content) logging.info('Html format file created') @@ -98,7 +98,7 @@ def get_images_content(news_list): return images_content -def save_in_fb2(path, news_list): +def save_in_fb2(path, news_list, filename): """ :param path: The path of fb2 format file :param news_list: The list of news @@ -112,6 +112,6 @@ def save_in_fb2(path, news_list): fb2_content += get_new_content_fb2(new) fb2_content += "\n\n" fb2_content += get_images_content(news_list) + "" - with open(os.path.join(path, "news.fb2"), 'w', encoding="utf-8") as fb2_file: + with open(os.path.join(path, filename), 'w', encoding="utf-8") as fb2_file: fb2_file.write(fb2_content) logging.info('Fb2 format file created') diff --git a/final_task/setup.py b/final_task/setup.py index 6126060..2f9e7f6 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -22,7 +22,8 @@ 'rss_reader/parse_rss_functions.py', 'rss_reader/print_functions.py', 'rss_reader/rss_reader.py', - 'rss_reader/save_in_format_functions.py'], + 'rss_reader/save_in_format_functions.py', + 'rss_reader/arguments_functions.py'], install_requires=['feedparser', 'termcolor', 'pymysql',