Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 68 additions & 3 deletions final_task/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,68 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.
RSS_READER
RSS reader is a command-line utility.

Usage
usage: rss_reader.py [-h] [--source SOURCE] [--version] [--json] [--verbose]
[--limit LIMIT] [--date DATE]

Pure Python command-line RSS reader.

optional arguments:

-h, --help show this help message and exit
--source SOURCE RSS URL
--version Print version info
--json Print result as JSON in stdout
--verbose Outputs verbose status messages
--limit LIMIT Limit news topics if this parameter provided
--date DATE News from the specified day will be printed out. Format: YYYYMMDD
It is mandatory to specify date or/and time.
If both are specified, then news will be searched by date and by source.
Comment on lines +5 to +20

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And what about Iteration 4 options?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot it


Json structure
[
{
"feed": [feed],
"title": [title],
"date": [date],
"link": [link],
"text": [text],
"image links": [
[link1]
[link2]
...
]
},
...
]

Local storage
All read news is saved in database by using Mysql. You should have database final_task_database with table news_cache
news_cache structure:
feed:longtext
title:longtext
date:date
link:longtext
image_description:longtext
new_description:longtext
image_links:longtext
Comment thread
HenadziStantchik marked this conversation as resolved.
When using the --date argument, news is searched by date in database

Saving in format feature
You can save getted news in 2 formats: html, fb2
If news are got from Internet and Internet on news images are downloaded from website
and converted in base64 string. After saved html or fb2 format files can show them without connecting
to Internet. If Internet off images aren't downloaded, in html instead of images utility writes links of images.
When using the --date argument, news are got from database. Image are downloaded the same way depending on whether
the Internet is on

Colorize mode
When using the --colorize argument the output news in console will be colorized. If using --json at the same time
the output news will be printed in colorized json format

How to install application
To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'.
Using 'pyhton setup.py install' in cmd install application.
Install requirements 'pip install -r requirements.txt'
You are now ready to run the application. Use 'rss-reader [arguments]' to run it.
Warning: If path to rss-reader is not in Path variable, use full path to file at running.
Comment thread
HenadziStantchik marked this conversation as resolved.
63 changes: 63 additions & 0 deletions final_task/rss_reader/arguments_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import argparse
import os
import re
import logging
from custom_exceptions import IncorrectFilePath


def get_arguments():
"""
:return: Arguments of application
Read and returns arguments of application
"""
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity')
argument_parser.add_argument('--json', action='store_true', help='print result as JSON in stdout')
argument_parser.add_argument('--version', action='store_true', help='print version info')
argument_parser.add_argument('--limit', help='limit news topics if this parameter provided')
argument_parser.add_argument('--date', help='represent news from local storage by date')
argument_parser.add_argument('--to-html', help='save news in html format')
argument_parser.add_argument('--to-fb2', help='save news in fb2 format')
argument_parser.add_argument('--colorize', action='store_true', help='print news in colorized mode')
argument_parser.add_argument('source', nargs='?')
return argument_parser.parse_args()


def check_html_argument(html_argument):
"""
:param html_argument: html directory path
If argument wrong raises exception
"""
if not os.path.exists(html_argument):
logging.error('Inrorrect html filepath')
raise IncorrectFilePath('Inrorrect html filepath')


def check_fb2_argument(fb2_argument):
"""
:param fb2_argument: fb2 directory path
If argument wrong raises exception
"""
if not os.path.exists(fb2_argument):
logging.error('Inrorrect fb2 filepath')
raise IncorrectFilePath('Inrorrect fb2 filepath')


def check_limit_argument(limit_argument):
"""
:param limit_argument: limit of news
If argument wrong raises exception
"""
if not re.match('\\d+', limit_argument):
logging.error('Input value of --limit is incorrect')
raise ValueError('Input value of --limit is incorrect')


def check_date_argument(date_argument):
"""
:param date_argument: Date of news in database
If argument wrong raises exception
"""
if not re.match('\\d+', date_argument) or len(date_argument) != 8:
logging.error('Input value of --date is incorrect')
raise ValueError('Input value of --date is incorrect')
14 changes: 14 additions & 0 deletions final_task/rss_reader/custom_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class IncorrectURL(Exception):
pass


class NoInternet(Exception):
pass


class IncorrectFilePath(Exception):
pass


class DatabaseConnectionError(Exception):
pass
74 changes: 74 additions & 0 deletions final_task/rss_reader/database_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from contextlib import closing
import logging
import pymysql
from custom_exceptions import DatabaseConnectionError


def get_news_list_by_date(date, limit):
"""
:param date: Date of publication of news
:return: : List with news publicated by date
Returns list of news by date from database
"""
logging.info('Connecting to database')
try:
with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
database='final_task_database')) as connection:
with closing(connection.cursor()) as cursor:
logging.info('Connected to database')
logging.info('Giving request')
try:
cursor.execute(f'select * from news_cache where date="{date}"')
except pymysql.err.InternalError:
logging.error('Input value of --date is incorrect')
raise ValueError('Input value of --date is incorrect')
logging.info('Getting response')
database_response = cursor.fetchall()
if limit:
limit = min(len(database_response), limit)
else:
limit = len(database_response)
logging.info('Response was got')
news_list = []
for index in range(limit):
news_list.append({'Feed': database_response[index][0],
'Title': database_response[index][1],
'Date': str(database_response[index][2]),
'Link': database_response[index][3],
'Image description': database_response[index][4],
'New description': database_response[index][5],
'Image links': database_response[index][6].split('|||')})
return news_list
except pymysql.err.OperationalError:
logging.error('Not connected to database')
raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
"database with table described in README")


def write_news_to_database(news_list):
"""
:param news_list: List of news
Writes news to database
"""
logging.info('Connecting to database')
try:
with closing(pymysql.connect(host='localhost', user='root', password='Password12345',
database='final_task_database')) as connection:
with closing(connection.cursor()) as cursor:
logging.info('Connected to database')
for new in news_list:
# Try to find new in database by link, if exists
cursor.execute(f'select * from news_cache where link = "{new["Link"]}"')
if cursor.fetchall():
continue
insert_values = [value for value in new.values()]
#converting list of image links into string to store in database
insert_values[6] = '|||'.join(insert_values[6])
Comment thread
HenadziStantchik marked this conversation as resolved.
insert_values = [tuple(insert_values), ]
cursor.executemany('Insert into news_cache values(%s,%s,%s,%s,%s,%s,%s)', insert_values)
connection.commit()
logging.info('Data write successful')
except pymysql.err.OperationalError:
logging.error('Not connected to database')
raise DatabaseConnectionError("Can't connect to database, check if you have installed Mysql, and necessary"
"database with table described in README")
81 changes: 81 additions & 0 deletions final_task/rss_reader/parse_rss_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import re
import feedparser
import socket
import logging
from dateutil import parser as date_parser
import html
from custom_exceptions import IncorrectURL, NoInternet


def ckeck_internet():
"""
Checks Internet connetction
"""
try:
logging.info("checking Internet connection")
socket.setdefaulttimeout(5)
host = socket.gethostbyname("www.google.com")
sock = socket.create_connection((host, 80), 2)
sock.close()
logging.info('Internet on.')
return True
except Exception as e:
logging.error("Internet off.")
return False


def get_new_description(summary_str):
"""
:param summary_str: Summary string from parsing RSS
:return: New description
Extract new description from summary string
"""
pattern = re.compile(r'<.*?>')
return pattern.sub('', summary_str)


def get_image_description(summary_str):
"""
:param summary_str: Summary string from parsing RSS
:return: Image description
Extract image description from summary string
"""
return summary_str[summary_str.find('alt') + 5::].split('"')[0]


def get_news_list(source, limit):
"""
:param source - RSS URL:
:param limit - Limit of viewing news:
:return - RSS display list:
Function parsing the rss received from source
into a list of news which will then be used for printing or parsing into JSON
"""
logging.info('Creating news list')
if not ckeck_internet():
raise NoInternet("Internet off, please check your connection")
logging.info('Getting and parsing RSS')
parsed_rss = feedparser.parse(source)
if parsed_rss['bozo']:
raise IncorrectURL('The entered URL is incorrect')
if limit:
limit = min(limit, len(parsed_rss['entries']))
else:
limit = len(parsed_rss['entries'])
news_list = []
for index in range(limit):
news_list.append({'Feed':
html.unescape(parsed_rss['feed']['title']),
'Title':
html.unescape(parsed_rss['entries'][index]['title']),
'Date':
str(date_parser.parse(parsed_rss['entries'][index]['published'])).split(" ")[0],
'Link':
parsed_rss['entries'][index]['link'],
'Image description':
html.unescape(get_image_description(parsed_rss['entries'][index]['summary'])),
'New description':
html.unescape(get_new_description(parsed_rss['entries'][index]['summary'])),
'Image links':
[content['url'] for content in parsed_rss['entries'][index]['media_content']]})
Comment on lines +67 to +80

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For multiline dictionary declarations it is better to use this codestyle:

news_dict = {
    "key`": value1,
    ....
}

return news_list
84 changes: 84 additions & 0 deletions final_task/rss_reader/print_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import logging
import json
from colorama import Fore, Back, Style


def print_news(news_list):
"""
:param news_list: The list of news
Prints news in readable format
"""
logging.info('Printing news')
for index, new in enumerate(news_list):
print(f'New {index + 1}\n')
print(f'Feed:\n\t{new["Feed"]}')
print('Title:')
print(f'\t{new["Title"]}')
print(f'Date:\n\t{new["Date"]}')
print(f'Link:\n\t{new["Link"]}')
print(f'Image description:\n\t{new["Image description"]}')
print(f'New description:\n\t{new["New description"]}')
print('Image links:')
for image_link in new['Image links']:
print(f'\t{image_link}')
print('\n')


def print_news_colorize(news_list):
"""
:param news_list: The list of news
Prints news in readable colorize format
"""
logging.info('Printing news colorize')
for index, new in enumerate(news_list):
print(Style.RESET_ALL + Fore.WHITE + Back.MAGENTA + f'New {index + 1}\n')
print(Style.RESET_ALL + Fore.WHITE + Back.BLUE + 'Feed:\n' + Style.RESET_ALL + Fore.BLUE + f'\t{new["Feed"]}')
print(
Style.RESET_ALL + Fore.WHITE + Back.GREEN + 'Title:\n' + Style.RESET_ALL + Fore.GREEN + f'\t{new["Title"]}')
print(Style.RESET_ALL + Fore.WHITE + Back.CYAN + 'Date:\n' + Style.RESET_ALL + Fore.CYAN + f'\t{new["Date"]}')
print(Style.RESET_ALL + Fore.WHITE + Back.RED + 'Link:\n' + Style.RESET_ALL + Fore.RED + f'\t{new["Link"]}')
print(
Style.RESET_ALL + Fore.WHITE + Back.YELLOW + 'Image description:\n' + Style.RESET_ALL + \
Fore.YELLOW + f'\t{new["Image description"]}')
print(
Style.RESET_ALL + Fore.WHITE + Back.LIGHTBLUE_EX + 'New description:\n' + Style.RESET_ALL + \
Fore.LIGHTBLUE_EX + f'\t{new["New description"]}')
print(Style.RESET_ALL + Fore.BLACK + Back.LIGHTGREEN_EX + 'Image links:')
for image_link in new['Image links']:
print(Style.RESET_ALL + Fore.LIGHTGREEN_EX + f'\t{image_link}')
print('\n')


def print_news_JSON(news_list):
"""
:param news_list: The list of news
Prints news in readable JSON format
"""
logging.info('Printing news as JSON')
print(json.dumps(news_list, ensure_ascii=False, indent=4))


def print_news_JSON_colorize(news_list):
"""
:param news_list: The list of news
Prints news in readable colorize JSON format
"""
logging.info('Printing news as JSON')
result_str = "["
for new_index, new in enumerate(news_list):
result_str += f"\n\t\n\t\t\"\033[41mFeed\033[0m\": \"\033[31m{new['Feed']}\033[0m\"," \
f"\n\t\t\"\033[42mTitle\033[0m\": \"\033[32m{new['Title']}\033[0m\"," \
f"\n\t\t\"\033[43mDate\033[0m\": \"\033[33m{new['Date']}\033[0m\"," \
f"\n\t\t\"\033[44mLink\033[0m\": \"\033[34m{new['Link']}\033[0m\"," \
f"\n\t\t\"\033[45mImage description\033[0m\": \"\033[35m{new['Image description']}\033[0m\"," \
f"\n\t\t\"\033[46mNew description\033[0m\": \"\033[36m{new['New description']}\033[0m\"," \
f"\n\t\t\"\033[44mImage links\033[0m\": ["
for link_index, link in enumerate(new['Image links']):
result_str += f"\n\t\t\t\"\033[34m{link}\033[0m\""
if link_index + 1 != len(new['Image links']):
result_str += ','
result_str += "\n\t\t]\n\t}"
if new_index + 1 != len(news_list):
result_str += ','
result_str += '\n]'
print(result_str)
Loading