Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions final_task/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include rss_reader\TimesNewRoman.ttf
include rss_reader\VERSION.txt
Comment on lines +1 to +2

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because Linux and Windows have different filesystem separators, your setup installation will not work on Linux.

162 changes: 159 additions & 3 deletions final_task/README.md

Large diffs are not rendered by default.

Empty file removed final_task/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions final_task/rss_reader/RssReaderException.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class RssReaderException(Exception):
def __init__(self, message):
self.expression = 'RssReaderException: '
self.message = message


class FileException(RssReaderException):
def __init__(self, message):
self.expression = 'RssReaderException.FileException: '
self.message = message


class ConnectException(RssReaderException):
def __init__(self, message):
self.expression = 'RssReaderException.ConnectException: '
self.message = message
Binary file added final_task/rss_reader/TimesNewRoman.ttf
Binary file not shown.
1 change: 1 addition & 0 deletions final_task/rss_reader/VERSION.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5.3
1 change: 1 addition & 0 deletions final_task/rss_reader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
VERSION = '5.2'
11 changes: 11 additions & 0 deletions final_task/rss_reader/decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import logging


def functions_log(function):
def wrapper(*args, **kwargs):
logging.info(f'start function: {function.__name__}')
result = function(*args, **kwargs)
logging.info(f'end function: {function.__name__}')
return result

return wrapper
4 changes: 4 additions & 0 deletions final_task/rss_reader/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
feedparser==5.2.1
colored==1.4.0
fpdf=1.7.2
requests==2.22.0
89 changes: 89 additions & 0 deletions final_task/rss_reader/rss_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import argparse
from typing import Union
import feedparser
import logging
import json
import sys
import os

sys.path.append(os.path.abspath(os.path.dirname(__file__)))


import work_with_file
import work_with_text
import work_with_dict
import work_with_html
import work_with_pdf
import work_with_colorize
import RssReaderException
import work_with_feedparser
import __init__


def set_start_setting():
"""setup start settings"""
parser = argparse.ArgumentParser()
parser.add_argument("source", help="RSS URL", nargs='?', default='', type=str)
parser.add_argument("--version", help="Print version info", action="store_true")
parser.add_argument("--json", help="Print result as JSON in stdout", action="store_true")
parser.add_argument("--verbose", help="Outputs verbose status messages", action="store_true")
parser.add_argument("--limit", help="Limit news topics if this parameter provided", type=int)
parser.add_argument("--date", help="Obtaining the cached news without the Internet", type=str)
parser.add_argument("--to-html", help="The argument gets the path where the HTML news will be saved", type=str)
parser.add_argument("--to-pdf", help="The argument gets the path where the PDF news will be saved", type=str)
parser.add_argument("--colorize", help="Colorize text", action="store_true")
args = parser.parse_args()
if not args.limit:
args.limit = -1
if args.verbose:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%I:%M:%S %p',
stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%I:%M:%S %p',
filename="sample.log", level=logging.DEBUG)
Comment on lines +39 to +43

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two logging line are almost the same, the only difference is in 1 keyword argument. It is better to use separate functions in these situations, but it is not mandatory here.

return args


def run():
os.chdir(os.path.abspath(os.path.dirname(__file__)))
args = set_start_setting()
logging.info('the application is running')
logging.debug('args: ' + str(args))
logging.info(os.getcwd())
data = None
try:
if args.version:
pass
elif args.date:
data = work_with_file.read_feed_form_file(args.date)
elif args.source:
data = work_with_feedparser.get_object_feed(args.source)
data = work_with_dict.to_dict(data)
work_with_file.add_feed_to_file(data)
else:
raise RssReaderException.RssReaderException('How work with application?\nEnter in command line: rss-reader -h')

if args.version:
result = f'RSS reader version {open("VERSION.txt").readline()}'
Comment on lines +66 to +67

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better if you put it instead of pass before

else:
if args.limit:
data = work_with_dict.limited_dict(data, args.limit)
if args.json:
result = json.dumps(data, ensure_ascii=False, indent=4)
elif args.to_html:
result = work_with_html.write_to_html_file(data, args.to_html)
elif args.to_pdf:
result = work_with_pdf.write_to_pdf_file(data, args.to_pdf)
elif args.colorize:
result = work_with_colorize.colorize_text(data)
else:
result = work_with_text.get_string_with_result(data, args.limit)
print(result)
except RssReaderException.RssReaderException as exc:
print(exc.expression)
print(exc)
logging.info('the application is finished')


if __name__ == '__main__':
run()
21 changes: 21 additions & 0 deletions final_task/rss_reader/work_with_colorize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import colored


def colorize_text(data: dict):
yellow = colored.fg(11)
red = colored.fg(9)
green = colored.fg(82)
pink = colored.fg(200)
blue = colored.fg(20)
description_color = colored.fg(14)
default = colored.fg(230)
result = '\n'
result += f"{yellow} {data['title']} {default}\n\n"
for index_news, dict_news in enumerate(data['items']):
result += f"{green}Title: {red}{dict_news['title']}\n"
result += f"{green}Date: {pink}{dict_news['published']}\n"
result += f"{green}Link: {blue}{dict_news['link']}\n"
result += f"{green}Description: {description_color}{dict_news['summary']}\n"
if dict_news['contain_image']:
result += f"{green}Link on image: {blue}{dict_news['link_on_image']}{default}\n\n"
return result
50 changes: 50 additions & 0 deletions final_task/rss_reader/work_with_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import json
from feedparser import FeedParserDict
import decorators
import work_with_text


@decorators.functions_log
def to_dict(data) -> dict:
"""convert data to JSON format"""
structure = {
'feed': [
'title'
],
'entries': [
'title',
'published',
'link',
'summary'
]
}

result = {'title': '', 'items': []}
links_on_image = []
if isinstance(data, FeedParserDict):
result['title'] = work_with_text.text_processing(data['feed']['title'])

for item in data['entries']:
temp = {
'title': work_with_text.text_processing(item['title']),
'published': work_with_text.text_processing(item['published']),
'link': work_with_text.text_processing(item['link']),
'summary': work_with_text.text_processing(item['summary'], links_on_image)
}
result['items'].append(temp)
for index_link, link in enumerate(links_on_image):
if link:
result['items'][index_link]['contain_image'] = True
result['items'][index_link]['link_on_image'] = links_on_image[index_link]
else:
result['items'][index_link]['contain_image'] = False
return result


@decorators.functions_log
def limited_dict(data: dict, limit: int) -> dict:
result = {
'title': data['title'],
'items': data['items'][:limit],
}
return result
30 changes: 30 additions & 0 deletions final_task/rss_reader/work_with_feedparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import decorators
import RssReaderException
import feedparser
import os


@decorators.functions_log
def get_object_feed(url: str) -> feedparser.FeedParserDict:
try:
data = feedparser.parse(url)
try:
if data.status == 200:

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to avoid using status for checking if we received feed or not.

data here has type FeedParserDict, which is inherited from dict, so it is a dictionary, which has keys and values.
Neither FeedParserDict nor dict have attribute status, so theoretically you should always get an AttributeError here.
BUT FeedParserDict class has its magic method __getattribute__ redefined (note: getattribute is called every time you try to access some object attribute using .) in a way so if there is no such attribute (for example status), it will try to look in this dictionary keys, using __getitem__ (note: getitem is called when you try to acccess some item in collection by key or by index: data["status"]). So there is no attribute status, but status is a key in data dictionary, so it is still luckily works.

In short, using status here is pretty volatile and may break your program, as you can see in one of your tests.
Moreover if you use bozo here you will be able to further simplify this function

if data['version']:
return data
else:
raise RssReaderException.ConnectException(f'There is no rss feed at this url: {url}')
else:
raise RssReaderException.ConnectException(f'HTTP Status Code {data.status}')
except AttributeError:
if os.path.isfile(url):
if data['version']:
return data
else:
raise RssReaderException.ConnectException(f'There is no rss feed at this url: {url}')
else:
raise RssReaderException.ConnectException(f'HTTP Status Code {data.status}')
except AttributeError:
raise RssReaderException.ConnectException(f'{url} - is not url(example url "https://google.com")')
except Exception as exc:
raise RssReaderException.ConnectException(exc)
50 changes: 50 additions & 0 deletions final_task/rss_reader/work_with_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import datetime
import os
import json
import decorators
import RssReaderException


@decorators.functions_log
def add_feed_to_file(dict_data: dict):
if os.path.exists('cache.json'):
with open('cache.json', 'r') as read_file:
dict_with_date = json.load(read_file)
else:
dict_with_date = {}
for news_index, news_dict in enumerate(dict_data['items']):
date = get_date(news_dict['published'])
if date in dict_with_date.keys():
links_on_news = []
for dict_news in dict_with_date[date]['items']:
links_on_news.append(dict_news['link'])
if news_dict['link'] not in links_on_news:
dict_with_date[date]['items'].append(news_dict)
else:
dict_with_date[date] = {
'title': f"News by {news_dict['published'][:news_dict['published'].find(':') - 2]}",
'items': [news_dict],
}
with open('cache.json', 'w') as file:
json.dump(dict_with_date, file)


@decorators.functions_log
def read_feed_form_file(date_str: str):
if os.path.exists('cache.json'):
with open('cache.json', 'r') as read_file:
dict_with_date = json.load(read_file)
if date_str in dict_with_date.keys():
return dict_with_date[date_str]
else:
raise RssReaderException.FileException('Date ' + date_str + ' not found in cache.')

else:
raise RssReaderException.FileException('Cache is empty. Please launch the app from the URL to the news site.\n'+\
'EXAMPLE: rss-reader https://news.yahoo.com/rss')


def get_date(input_str: str) -> str:
result = input_str[input_str.find(',') + 2: input_str.find(':') - 2].strip(' ')
result = datetime.datetime.strptime(result, '%d %b %Y')
return result.strftime('%Y%m%d')
35 changes: 35 additions & 0 deletions final_task/rss_reader/work_with_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
import datetime
import RssReaderException


def write_to_html_file(data: dict, path: str):
if os.path.isdir(path):
date_str = datetime.datetime.now().date().strftime('%Y%m%d')
filename = os.path.join(path, date_str + '_' +
data['title'][:data['title'].find(' ')].replace(':', '').replace('.', '') + '.html')
Comment thread
dzhigailo marked this conversation as resolved.
with open(filename, 'w', encoding="utf-8") as file:
file.write(text_processing_for_html(data))
return f'the recording has been completed in the file:\n{filename}'
else:
raise RssReaderException.FileException(f'{path} is not found')


def text_processing_for_html(data: dict):
style = '<style type="text/css">body{text-align: center; font-size: 120%;' + \
'font-family: Verdana, Arial, Helvetica, sans-serif;' + \
'color: #333366; } </style>'
result = '<!DOCTYPE html><html><head><meta charset="utf-8">' + \
'<title>' + data['title'] + '</title>' + style + '</head><center><h1>' \
+ data['title'] + '</h1></center><br>'
for index_news, dict_news in enumerate(data['items']):
result += '<h3><center><a href="' + dict_news['link'] + '">' + dict_news['title'] + '</a></center></h3>'
result += dict_news['published'] + '<br>'
if dict_news['contain_image']:
result += '<img src="' + dict_news['link_on_image'] + '" alt="' + \
dict_news['summary'][dict_news['summary'].find(': ') + 1:dict_news['summary'].find(']')] + '"><br>'
result += dict_news['summary'][dict_news['summary'].rfind(']') + 1:]
result += '<br><br>'
return result


Loading