Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions final_task/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,57 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.
# RSS_READER
---------------------------------------------------------------------------
RSS reader is a command-line utility.

### Usage
---------------------------------------------------------------------------
usage: rss_reader.py [source] [-h] [--version] [--json] [--verbose] [--limit LIMIT] [--date DATE] [--to-pdf PATH] [--to-html PATH] [--colorize]

Pure Python command-line RSS reader.

positional arguments:
- source            *RSS URL*

optional arguments:
- -h, --help            *show this help message and exit*
- --version            *Print version info*
- --json            *Print result as JSON in stdout*
- --verbose            *Output verbose status messages*
- --limit LIMIT            *Limit news topics if this parameter provided*
- --date DATE            *News from the specified day will be printed out. Format: YYYYMMDD*
- --to-pdf PATH            *Create PDF file with news*
- --to-html PATH            *Create HTML file with news*
- --colorize            *Print news in colorized mode (not for json mode)*

It is mandatory to specify date or/and source.
If both are specified, then news will be searched by date and by source.

### Json structure
---------------------------------------------------------------------------
{
            "feed": [feed],
            "items": [
                        {
                                    "title": [title],
                                    "date": [date],
                                    "link": [link],
                                    "text": [text],
                                    "image links": [
                                                [link1], [link2], ...
                                    ]
                        },
                        ...
            ]
}

### Local storage
---------------------------------------------------------------------------
All read news is saved in storage file *news.data*.
When using the --date argument, news is searched by specified date from *news.data*.

### How to install application
---------------------------------------------------------------------------
- To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'.
- Use 'python setup.py install' in cmd to install application.
- You are now ready to run the application. Use 'rss-reader [arguments]' to run it.

Warning: If path to rss-reader is not in Path variable, use full path to file at running.
File renamed without changes.
20 changes: 20 additions & 0 deletions final_task/rss_reader/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import datetime


class GettingRSSException(Exception):
pass


class StorageNotFoundError(Exception):
pass


class NewsNotFoundError(Exception):
def __init__(self, date, storage_name, source=None):
date_string = datetime.strftime(date, '%Y.%m.%d')

msg = 'News by date ' + date_string
if source:
msg += ' and by source ' + source

super().__init__(msg + ' not found in storage ' + storage_name)
Binary file not shown.
Binary file not shown.
Binary file not shown.
40 changes: 40 additions & 0 deletions final_task/rss_reader/item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from dataclasses import dataclass
from typing import List
from colorama import Fore, Style
import tools


@dataclass
class Item:
title: str
date: str
link: str
text: str
img_links: List[str]

def __repr__(self):
if tools.colorize:
str_item = f'{Style.BRIGHT + Fore.LIGHTBLUE_EX}Title: {Style.NORMAL + Fore.LIGHTBLUE_EX + self.title}' \
f'{Style.BRIGHT + Fore.LIGHTMAGENTA_EX}\nDate: ' \
f'{Style.NORMAL + Fore.LIGHTMAGENTA_EX + self.date}' \
f'{Style.BRIGHT + Fore.RED}\nLink: {Style.NORMAL + Fore.RED + self.link}' \
f'{Style.BRIGHT + Fore.LIGHTCYAN_EX}\nText: {Style.NORMAL + Fore.LIGHTCYAN_EX + self.text}\n'

if self.img_links:
str_item += Style.BRIGHT + Fore.LIGHTRED_EX + 'Image links:\n' + Style.NORMAL + Fore.LIGHTRED_EX

for num, link in enumerate(self.img_links):
str_item += f'\t[{num + 1}]: [{link}]\n'
else:
str_item = f'Title: {self.title}' \
f'\nDate: {self.date}' \
f'\nLink: {self.link}' \
f'\nText: {self.text}\n'

if self.img_links:
str_item += 'Image links:\n'

for num, link in enumerate(self.img_links):
str_item += f'\t[{num + 1}]: [{link}]\n'

return str_item
55 changes: 55 additions & 0 deletions final_task/rss_reader/item_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import logging
import tools
from dataclasses import dataclass
from typing import List
from item import Item
from parser_rss import format_description
from html import unescape as html_unescape
from colorama import Style, Fore


@dataclass
class ItemGroup:
feed: str
items: List[Item]

def __repr__(self):
if tools.colorize:
str_item_group = Style.BRIGHT + Fore.GREEN + 'Feed: ' + Style.NORMAL + Fore.GREEN + self.feed + '\n\n'
else:
str_item_group = 'Feed: ' + self.feed + '\n\n'

for item in self.items:
str_item_group += str(item) + '\n'

return str_item_group


def get_item_group_from_feedparser(parser):
""" Retrieve all items from feedparser and return item group.

:type parser: 'feedparser.FeedParserDict'

:rtype: ItemGroup
"""
items = list()

logging.info('Loop for retrieving items.')
for item in parser.entries:
try:
text, img_links = format_description(item.description)
except AttributeError:
continue

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to try to avoid using continue. In this particular case it looks fine, but in more complex logic you may want to do something like:
if text: new_item = Item( ...
And you will not need to use continue

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


if text:
new_item = Item(
title=html_unescape(item.title),
date=item.published,
link=item.link,
text=text,
img_links=img_links
)

items.append(new_item)

return ItemGroup(feed=parser.feed.title, items=items)
13 changes: 13 additions & 0 deletions final_task/rss_reader/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import logging


def turn_on_logging(logger):
""" Set debug level and set format of logging """
logger.setLevel(logging.DEBUG)

handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s')
handler.setFormatter(formatter)

logger.handlers = []
logger.addHandler(handler)
201 changes: 201 additions & 0 deletions final_task/rss_reader/news_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
from json import dumps as jdumps
from dataclasses import asdict
from fpdf import FPDF
from os import remove, path
import imghdr
import requests


def news_as_json_str(item_group):
""" Convert news in json format

:type item_group: 'item_group.ItemGroup'
:rtype: str
"""
news_dict = asdict(item_group)

return jdumps(news_dict, indent=4, ensure_ascii=False)


def news_as_json_str_from_list(item_groups):
""" Convert list of news in json format

:type item_groups: list of 'item_group.ItemGroup'
:rtype: str
"""
lst = [asdict(item_gr) for item_gr in item_groups]

return jdumps(lst, indent=4, ensure_ascii=False)


def news2html(item_groups):
""" Convert news to HTML code

:type item_groups: list of 'item_group.ItemGroup'
:return: HTML code
:rtype: str
"""
green_line = '<hr align=center size=3 width=70% color=green>'
font = '../fonts/DejaVuSansCondensed.ttf'

html_code = '<html><head><title>News</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type">' \
'<style>@font-face {font-family: DejaVuSans;src: url("' + font + '");}' \
'body {font-family: DejaVuSans;}</style></head><body>_content_</body></html>'

content = ''

for item_gr in item_groups:
item_gr_html = '<div>' + green_line + '<h1 align=center>' + item_gr.feed + '</h1>' + green_line + \
'<div>' + items2html(item_gr.items) + '</div></div>'
content += item_gr_html

html_code = html_code.replace('_content_', content)

return html_code


def items2html(items):
""" Convert items to HTML code

:type items: list of 'item.Item'
:return: HTML code
:rtype: str
"""
black_line = '<hr align=center size=1 width=70% color=black>'
source_link_text = 'Go to source..'
items_html = ''

for item in items:
itm_html = '<div style="margin: 60px 15% 20px 15%;"><h3 align=center>' + item.title + '</h3>' + \
'<p align="justify">' + item_text_with_imgs2html(item.text, item.img_links) + '</p>' + \
'<br><small><i><a href=' + item.link + ' color=blue>' + source_link_text + '</a><br>' + \
'<span style="float:right; margin-right:90">' + str(item.date) + '</span></i></small><br></div>'

items_html += itm_html + black_line

items_html = items_html[:-len(black_line)]
return items_html


def item_text_with_imgs2html(text, img_links):
""" Convert text with images to HTML code

:type text: str
:type img_links: list of str

:return: HTML code
:rtype: str
"""
text_and_imgs = ''

for ind, link in enumerate(img_links):
alt, before_picture, text = parse_item_text(text, ind + 1)

if before_picture:
text_and_imgs += before_picture

text_and_imgs += '<p style="text-align: center;">' \
'<img src="' + link + '" alt="' + alt + '" style="margin-bottom: 30px;"></p>'

text_and_imgs += text
return text_and_imgs


def news2pdf(item_groups, file_path):
""" Write news in PDF file

:type item_groups: list of 'item_group.ItemGroup'
:type file_path: str
"""
width = 180

pdf = FPDF()
pdf.add_page()

current_dir = path.dirname(path.abspath(__file__))
fonts_dir = current_dir[:current_dir.find('EGG-INFO')] + path.join('rss_reader', 'fonts')

pdf.add_font('DejaVu', '', path.join(fonts_dir, 'DejaVuSansCondensed.ttf'), uni=True)
pdf.add_font('DejaVuBold', '', path.join(fonts_dir, 'DejaVuSansCondensed-Bold.ttf'), uni=True)
pdf.add_font('DejaVuOblique', '', path.join(fonts_dir, 'DejaVuSansCondensed-Oblique.ttf'), uni=True)

num = 0

for item_gr in item_groups:
pdf.set_font('DejaVuBold', size=24)
pdf.set_text_color(0, 10, 180)
pdf.multi_cell(width, 260, item_gr.feed, align='C')

for item in item_gr.items:
pdf.add_page()

pdf.set_font('DejaVuBold', size=18)
pdf.set_text_color(0, 0, 0)
pdf.multi_cell(width, 16, item.title, align='C')

pdf.set_font('DejaVu', size=16)

text = item.text
for ind, link in enumerate(item.img_links):
alt, before_picture, text = parse_item_text(text, ind+1)

if before_picture:
pdf.multi_cell(width, 16, before_picture)

try:
img = requests.get(link)
if img.status_code != 200 or imghdr.what(None, img.content) != 'jpeg':
raise requests.exceptions.ConnectionError()

except requests.exceptions.ConnectionError:
pdf.set_font('DejaVuOblique', size=14)
pdf.set_text_color(80, 80, 80)

pdf.multi_cell(width, 14, f'[image: {alt}][{link}]')

pdf.set_font('DejaVu', size=16)
pdf.set_text_color(0, 0, 0)
else:
file_image_name = str(num) + 'tmp_img.jpg'

with open(file_image_name, 'wb') as img_file:
img_file.write(img.content)

pdf.multi_cell(width, 16, '')
pdf.image(file_image_name, x=75)
pdf.multi_cell(width, 16, '')

remove(file_image_name)
num += 1

pdf.multi_cell(width, 16, text)

pdf.set_font('DejaVuOblique', size=11)
pdf.set_text_color(0, 0, 255)
pdf.multi_cell(width, 11, '')
pdf.cell(width, 11, 'Go to source...', link=item.link)

pdf.set_text_color(0, 0, 0)
pdf.multi_cell(width, 11, '')
pdf.multi_cell(width, 11, str(item.date))

pdf.multi_cell(width, 16, '')

pdf.output(file_path)


def parse_item_text(text, img_num):
""" Return alternative text of image, text before image and text after image

:rtype: tuple of str
"""
img_begin = text.find(f'[image {img_num}:')
img_end = text.find(f'[{img_num}]', img_begin) + len(str(img_num)) + 2

len_num = len(str(img_num))
alt = text[img_begin + len_num + 9:img_end - len_num - 3]

before_picture = text[:img_begin]
after_picture = text[img_end:]

return alt, before_picture, after_picture
Loading