epam-python-courses-7-bsu · Kirill-Ulich · Nov 11, 2019 · Nov 13, 2019 · Nov 14, 2019 · Nov 17, 2019
diff --git a/final_task/README.md b/final_task/README.md
@@ -1,3 +1,57 @@
-# Your readme here
-Some text.
-Checkout how to write this file using *markdown*.
+# RSS_READER
+---------------------------------------------------------------------------
+RSS reader is a command-line utility.  
+
+### Usage
+---------------------------------------------------------------------------
+usage: rss_reader.py [source] [-h] [--version] [--json] [--verbose] [--limit LIMIT] [--date DATE] [--to-pdf PATH] [--to-html PATH] [--colorize]  
+
+Pure Python command-line RSS reader.  
+
+positional arguments:  
+ - source            *RSS URL*  
+
+optional arguments:  
+ - -h, --help            *show this help message and exit*  
+ - --version            *Print version info*  
+ - --json            *Print result as JSON in stdout*  
+ - --verbose            *Output verbose status messages*  
+ - --limit LIMIT            *Limit news topics if this parameter provided*  
+ - --date DATE            *News from the specified day will be printed out. Format: YYYYMMDD*  
+ - --to-pdf PATH            *Create PDF file with news*  
+ - --to-html PATH            *Create HTML file with news*  
+ - --colorize            *Print news in colorized mode (not for json mode)*  
+
+It is mandatory to specify date or/and source.  
+If both are specified, then news will be searched by date and by source.  
+
+### Json structure
+---------------------------------------------------------------------------
+{  
+            "feed": [feed],  
+            "items": [  
+                        {  
+                                    "title": [title],  
+                                    "date": [date],  
+                                    "link": [link],  
+                                    "text": [text],  
+                                    "image links": [  
+                                                [link1], [link2], ...  
+                                    ]  
+                        },  
+                        ...  
+            ]  
+}
+
+### Local storage
+---------------------------------------------------------------------------
+All read news is saved in storage file *news.data*.  
+When using the --date argument, news is searched by specified date from *news.data*.  
+
+### How to install application
+---------------------------------------------------------------------------
+ - To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'.  
+ - Use 'python setup.py install' in cmd to install application.  
+ - You are now ready to run the application. Use 'rss-reader [arguments]' to run it.  
+
+Warning: If path to rss-reader is not in Path variable, use full path to file at running.  
diff --git a/final_task/__init__.py → final_task/rss_reader/__init__.py b/final_task/__init__.py → final_task/rss_reader/__init__.py
diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py
@@ -0,0 +1,20 @@
+from datetime import datetime
+
+
+class GettingRSSException(Exception):
+    pass
+
+
+class StorageNotFoundError(Exception):
+    pass
+
+
+class NewsNotFoundError(Exception):
+    def __init__(self, date, storage_name, source=None):
+        date_string = datetime.strftime(date, '%Y.%m.%d')
+
+        msg = 'News by date ' + date_string
+        if source:
+            msg += ' and by source ' + source
+
+        super().__init__(msg + ' not found in storage ' + storage_name)
diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed-Bold.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed-Bold.ttf
diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf
diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed.ttf
diff --git a/final_task/rss_reader/item.py b/final_task/rss_reader/item.py
@@ -0,0 +1,40 @@
+from dataclasses import dataclass
+from typing import List
+from colorama import Fore, Style
+import tools
+
+
+@dataclass
+class Item:
+    title: str
+    date: str
+    link: str
+    text: str
+    img_links: List[str]
+
+    def __repr__(self):
+        if tools.colorize:
+            str_item = f'{Style.BRIGHT + Fore.LIGHTBLUE_EX}Title: {Style.NORMAL + Fore.LIGHTBLUE_EX + self.title}' \
+                       f'{Style.BRIGHT + Fore.LIGHTMAGENTA_EX}\nDate: ' \
+                       f'{Style.NORMAL + Fore.LIGHTMAGENTA_EX + self.date}' \
+                       f'{Style.BRIGHT + Fore.RED}\nLink: {Style.NORMAL + Fore.RED + self.link}' \
+                       f'{Style.BRIGHT + Fore.LIGHTCYAN_EX}\nText: {Style.NORMAL + Fore.LIGHTCYAN_EX + self.text}\n'
+
+            if self.img_links:
+                str_item += Style.BRIGHT + Fore.LIGHTRED_EX + 'Image links:\n' + Style.NORMAL + Fore.LIGHTRED_EX
+
+                for num, link in enumerate(self.img_links):
+                    str_item += f'\t[{num + 1}]: [{link}]\n'
+        else:
+            str_item = f'Title: {self.title}' \
+                       f'\nDate: {self.date}' \
+                       f'\nLink: {self.link}' \
+                       f'\nText: {self.text}\n'
+
+            if self.img_links:
+                str_item += 'Image links:\n'
+
+                for num, link in enumerate(self.img_links):
+                    str_item += f'\t[{num + 1}]: [{link}]\n'
+
+        return str_item
diff --git a/final_task/rss_reader/item_group.py b/final_task/rss_reader/item_group.py
@@ -0,0 +1,55 @@
+import logging
+import tools
+from dataclasses import dataclass
+from typing import List
+from item import Item
+from parser_rss import format_description
+from html import unescape as html_unescape
+from colorama import Style, Fore
+
+
+@dataclass
+class ItemGroup:
+    feed: str
+    items: List[Item]
+
+    def __repr__(self):
+        if tools.colorize:
+            str_item_group = Style.BRIGHT + Fore.GREEN + 'Feed: ' + Style.NORMAL + Fore.GREEN + self.feed + '\n\n'
+        else:
+            str_item_group = 'Feed: ' + self.feed + '\n\n'
+
+        for item in self.items:
+            str_item_group += str(item) + '\n'
+
+        return str_item_group
+
+
+def get_item_group_from_feedparser(parser):
+    """ Retrieve all items from feedparser and return item group.
+
+    :type parser: 'feedparser.FeedParserDict'
+
+    :rtype: ItemGroup
+    """
+    items = list()
+
+    logging.info('Loop for retrieving items.')
+    for item in parser.entries:
+        try:
+            text, img_links = format_description(item.description)
+        except AttributeError:
+            continue
+
+        if text:
+            new_item = Item(
+                title=html_unescape(item.title),
+                date=item.published,
+                link=item.link,
+                text=text,
+                img_links=img_links
+            )
+
+            items.append(new_item)
+
+    return ItemGroup(feed=parser.feed.title, items=items)
diff --git a/final_task/rss_reader/log.py b/final_task/rss_reader/log.py
@@ -0,0 +1,13 @@
+import logging
+
+
+def turn_on_logging(logger):
+    """ Set debug level and set format of logging """
+    logger.setLevel(logging.DEBUG)
+
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s')
+    handler.setFormatter(formatter)
+
+    logger.handlers = []
+    logger.addHandler(handler)
diff --git a/final_task/rss_reader/news_converter.py b/final_task/rss_reader/news_converter.py
@@ -0,0 +1,201 @@
+from json import dumps as jdumps
+from dataclasses import asdict
+from fpdf import FPDF
+from os import remove, path
+import imghdr
+import requests
+
+
+def news_as_json_str(item_group):
+    """ Convert news in json format
+
+    :type item_group: 'item_group.ItemGroup'
+    :rtype: str
+    """
+    news_dict = asdict(item_group)
+
+    return jdumps(news_dict, indent=4, ensure_ascii=False)
+
+
+def news_as_json_str_from_list(item_groups):
+    """ Convert list of news in json format
+
+    :type item_groups: list of 'item_group.ItemGroup'
+    :rtype: str
+    """
+    lst = [asdict(item_gr) for item_gr in item_groups]
+
+    return jdumps(lst, indent=4, ensure_ascii=False)
+
+
+def news2html(item_groups):
+    """ Convert news to HTML code
+
+    :type item_groups: list of 'item_group.ItemGroup'
+    :return: HTML code
+    :rtype: str
+    """
+    green_line = '<hr align=center size=3 width=70% color=green>'
+    font = '../fonts/DejaVuSansCondensed.ttf'
+
+    html_code = '<html><head><title>News</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type">' \
+                '<style>@font-face {font-family: DejaVuSans;src: url("' + font + '");}' \
+                'body {font-family: DejaVuSans;}</style></head><body>_content_</body></html>'
+
+    content = ''
+
+    for item_gr in item_groups:
+        item_gr_html = '<div>' + green_line + '<h1 align=center>' + item_gr.feed + '</h1>' + green_line + \
+                       '<div>' + items2html(item_gr.items) + '</div></div>'
+        content += item_gr_html
+
+    html_code = html_code.replace('_content_', content)
+
+    return html_code
+
+
+def items2html(items):
+    """ Convert items to HTML code
+
+    :type items: list of 'item.Item'
+    :return: HTML code
+    :rtype: str
+    """
+    black_line = '<hr align=center size=1 width=70% color=black>'
+    source_link_text = 'Go to source..'
+    items_html = ''
+
+    for item in items:
+        itm_html = '<div style="margin: 60px 15% 20px 15%;"><h3 align=center>' + item.title + '</h3>' + \
+                    '<p align="justify">' + item_text_with_imgs2html(item.text, item.img_links) + '</p>' + \
+                    '<br><small><i><a href=' + item.link + ' color=blue>' + source_link_text + '</a><br>' + \
+                    '<span style="float:right; margin-right:90">' + str(item.date) + '</span></i></small><br></div>'
+
+        items_html += itm_html + black_line
+
+    items_html = items_html[:-len(black_line)]
+    return items_html
+
+
+def item_text_with_imgs2html(text, img_links):
+    """ Convert text with images to HTML code
+
+    :type text: str
+    :type img_links: list of str
+
+    :return: HTML code
+    :rtype: str
+    """
+    text_and_imgs = ''
+
+    for ind, link in enumerate(img_links):
+        alt, before_picture, text = parse_item_text(text, ind + 1)
+
+        if before_picture:
+            text_and_imgs += before_picture
+
+        text_and_imgs += '<p style="text-align: center;">' \
+                         '<img src="' + link + '" alt="' + alt + '" style="margin-bottom: 30px;"></p>'
+
+    text_and_imgs += text
+    return text_and_imgs
+
+
+def news2pdf(item_groups, file_path):
+    """ Write news in PDF file
+
+    :type item_groups: list of 'item_group.ItemGroup'
+    :type file_path: str
+    """
+    width = 180
+
+    pdf = FPDF()
+    pdf.add_page()
+
+    current_dir = path.dirname(path.abspath(__file__))
+    fonts_dir = current_dir[:current_dir.find('EGG-INFO')] + path.join('rss_reader', 'fonts')
+
+    pdf.add_font('DejaVu', '', path.join(fonts_dir, 'DejaVuSansCondensed.ttf'), uni=True)
+    pdf.add_font('DejaVuBold', '', path.join(fonts_dir, 'DejaVuSansCondensed-Bold.ttf'), uni=True)
+    pdf.add_font('DejaVuOblique', '', path.join(fonts_dir, 'DejaVuSansCondensed-Oblique.ttf'), uni=True)
+
+    num = 0
+
+    for item_gr in item_groups:
+        pdf.set_font('DejaVuBold', size=24)
+        pdf.set_text_color(0, 10, 180)
+        pdf.multi_cell(width, 260, item_gr.feed, align='C')
+
+        for item in item_gr.items:
+            pdf.add_page()
+
+            pdf.set_font('DejaVuBold', size=18)
+            pdf.set_text_color(0, 0, 0)
+            pdf.multi_cell(width, 16, item.title, align='C')
+
+            pdf.set_font('DejaVu', size=16)
+
+            text = item.text
+            for ind, link in enumerate(item.img_links):
+                alt, before_picture, text = parse_item_text(text, ind+1)
+
+                if before_picture:
+                    pdf.multi_cell(width, 16, before_picture)
+
+                try:
+                    img = requests.get(link)
+                    if img.status_code != 200 or imghdr.what(None, img.content) != 'jpeg':
+                        raise requests.exceptions.ConnectionError()
+
+                except requests.exceptions.ConnectionError:
+                    pdf.set_font('DejaVuOblique', size=14)
+                    pdf.set_text_color(80, 80, 80)
+
+                    pdf.multi_cell(width, 14, f'[image: {alt}][{link}]')
+
+                    pdf.set_font('DejaVu', size=16)
+                    pdf.set_text_color(0, 0, 0)
+                else:
+                    file_image_name = str(num) + 'tmp_img.jpg'
+
+                    with open(file_image_name, 'wb') as img_file:
+                        img_file.write(img.content)
+
+                    pdf.multi_cell(width, 16, '')
+                    pdf.image(file_image_name, x=75)
+                    pdf.multi_cell(width, 16, '')
+
+                    remove(file_image_name)
+                    num += 1
+
+            pdf.multi_cell(width, 16, text)
+
+            pdf.set_font('DejaVuOblique', size=11)
+            pdf.set_text_color(0, 0, 255)
+            pdf.multi_cell(width, 11, '')
+            pdf.cell(width, 11, 'Go to source...', link=item.link)
+
+            pdf.set_text_color(0, 0, 0)
+            pdf.multi_cell(width, 11, '')
+            pdf.multi_cell(width, 11, str(item.date))
+
+        pdf.multi_cell(width, 16, '')
+
+    pdf.output(file_path)
+
+
+def parse_item_text(text, img_num):
+    """ Return alternative text of image, text before image and text after image
+
+    :rtype: tuple of str
+    """
+    img_begin = text.find(f'[image {img_num}:')
+    img_end = text.find(f'[{img_num}]', img_begin) + len(str(img_num)) + 2
+
+    len_num = len(str(img_num))
+    alt = text[img_begin + len_num + 9:img_end - len_num - 3]
+
+    before_picture = text[:img_begin]
+    after_picture = text[img_end:]
+
+    return alt, before_picture, after_picture