diff --git a/final_task/README.md b/final_task/README.md index 7af281f..a587c33 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,57 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +# RSS_READER +--------------------------------------------------------------------------- +RSS reader is a command-line utility. + +### Usage +--------------------------------------------------------------------------- +usage: rss_reader.py [source] [-h] [--version] [--json] [--verbose] [--limit LIMIT] [--date DATE] [--to-pdf PATH] [--to-html PATH] [--colorize] + +Pure Python command-line RSS reader. + +positional arguments: + - source            *RSS URL* + +optional arguments: + - -h, --help            *show this help message and exit* + - --version            *Print version info* + - --json            *Print result as JSON in stdout* + - --verbose            *Output verbose status messages* + - --limit LIMIT            *Limit news topics if this parameter provided* + - --date DATE            *News from the specified day will be printed out. Format: YYYYMMDD* + - --to-pdf PATH            *Create PDF file with news* + - --to-html PATH            *Create HTML file with news* + - --colorize            *Print news in colorized mode (not for json mode)* + +It is mandatory to specify date or/and source. +If both are specified, then news will be searched by date and by source. + +### Json structure +--------------------------------------------------------------------------- +{ +            "feed": [feed], +            "items": [ +                        { +                                    "title": [title], +                                    "date": [date], +                                    "link": [link], +                                    "text": [text], +                                    "image links": [ +                                                [link1], [link2], ... +                                    ] +                        }, +                        ... +            ] +} + +### Local storage +--------------------------------------------------------------------------- +All read news is saved in storage file *news.data*. +When using the --date argument, news is searched by specified date from *news.data*. + +### How to install application +--------------------------------------------------------------------------- + - To install application you should have setuptools. Open cmd and enter 'pip install -U setuptools'. + - Use 'python setup.py install' in cmd to install application. + - You are now ready to run the application. Use 'rss-reader [arguments]' to run it. + +Warning: If path to rss-reader is not in Path variable, use full path to file at running. \ No newline at end of file diff --git a/final_task/__init__.py b/final_task/rss_reader/__init__.py similarity index 100% rename from final_task/__init__.py rename to final_task/rss_reader/__init__.py diff --git a/final_task/rss_reader/exceptions.py b/final_task/rss_reader/exceptions.py new file mode 100644 index 0000000..6029bc1 --- /dev/null +++ b/final_task/rss_reader/exceptions.py @@ -0,0 +1,20 @@ +from datetime import datetime + + +class GettingRSSException(Exception): + pass + + +class StorageNotFoundError(Exception): + pass + + +class NewsNotFoundError(Exception): + def __init__(self, date, storage_name, source=None): + date_string = datetime.strftime(date, '%Y.%m.%d') + + msg = 'News by date ' + date_string + if source: + msg += ' and by source ' + source + + super().__init__(msg + ' not found in storage ' + storage_name) diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed-Bold.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed-Bold.ttf new file mode 100644 index 0000000..de020ab Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSansCondensed-Bold.ttf differ diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf new file mode 100644 index 0000000..7ea654a Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf differ diff --git a/final_task/rss_reader/fonts/DejaVuSansCondensed.ttf b/final_task/rss_reader/fonts/DejaVuSansCondensed.ttf new file mode 100644 index 0000000..5bd8b3e Binary files /dev/null and b/final_task/rss_reader/fonts/DejaVuSansCondensed.ttf differ diff --git a/final_task/rss_reader/item.py b/final_task/rss_reader/item.py new file mode 100644 index 0000000..c397c29 --- /dev/null +++ b/final_task/rss_reader/item.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass +from typing import List +from colorama import Fore, Style +import tools + + +@dataclass +class Item: + title: str + date: str + link: str + text: str + img_links: List[str] + + def __repr__(self): + if tools.colorize: + str_item = f'{Style.BRIGHT + Fore.LIGHTBLUE_EX}Title: {Style.NORMAL + Fore.LIGHTBLUE_EX + self.title}' \ + f'{Style.BRIGHT + Fore.LIGHTMAGENTA_EX}\nDate: ' \ + f'{Style.NORMAL + Fore.LIGHTMAGENTA_EX + self.date}' \ + f'{Style.BRIGHT + Fore.RED}\nLink: {Style.NORMAL + Fore.RED + self.link}' \ + f'{Style.BRIGHT + Fore.LIGHTCYAN_EX}\nText: {Style.NORMAL + Fore.LIGHTCYAN_EX + self.text}\n' + + if self.img_links: + str_item += Style.BRIGHT + Fore.LIGHTRED_EX + 'Image links:\n' + Style.NORMAL + Fore.LIGHTRED_EX + + for num, link in enumerate(self.img_links): + str_item += f'\t[{num + 1}]: [{link}]\n' + else: + str_item = f'Title: {self.title}' \ + f'\nDate: {self.date}' \ + f'\nLink: {self.link}' \ + f'\nText: {self.text}\n' + + if self.img_links: + str_item += 'Image links:\n' + + for num, link in enumerate(self.img_links): + str_item += f'\t[{num + 1}]: [{link}]\n' + + return str_item diff --git a/final_task/rss_reader/item_group.py b/final_task/rss_reader/item_group.py new file mode 100644 index 0000000..1835b6f --- /dev/null +++ b/final_task/rss_reader/item_group.py @@ -0,0 +1,55 @@ +import logging +import tools +from dataclasses import dataclass +from typing import List +from item import Item +from parser_rss import format_description +from html import unescape as html_unescape +from colorama import Style, Fore + + +@dataclass +class ItemGroup: + feed: str + items: List[Item] + + def __repr__(self): + if tools.colorize: + str_item_group = Style.BRIGHT + Fore.GREEN + 'Feed: ' + Style.NORMAL + Fore.GREEN + self.feed + '\n\n' + else: + str_item_group = 'Feed: ' + self.feed + '\n\n' + + for item in self.items: + str_item_group += str(item) + '\n' + + return str_item_group + + +def get_item_group_from_feedparser(parser): + """ Retrieve all items from feedparser and return item group. + + :type parser: 'feedparser.FeedParserDict' + + :rtype: ItemGroup + """ + items = list() + + logging.info('Loop for retrieving items.') + for item in parser.entries: + try: + text, img_links = format_description(item.description) + except AttributeError: + continue + + if text: + new_item = Item( + title=html_unescape(item.title), + date=item.published, + link=item.link, + text=text, + img_links=img_links + ) + + items.append(new_item) + + return ItemGroup(feed=parser.feed.title, items=items) diff --git a/final_task/rss_reader/log.py b/final_task/rss_reader/log.py new file mode 100644 index 0000000..0e59083 --- /dev/null +++ b/final_task/rss_reader/log.py @@ -0,0 +1,13 @@ +import logging + + +def turn_on_logging(logger): + """ Set debug level and set format of logging """ + logger.setLevel(logging.DEBUG) + + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: %(message)s') + handler.setFormatter(formatter) + + logger.handlers = [] + logger.addHandler(handler) diff --git a/final_task/rss_reader/news_converter.py b/final_task/rss_reader/news_converter.py new file mode 100644 index 0000000..490d5d2 --- /dev/null +++ b/final_task/rss_reader/news_converter.py @@ -0,0 +1,201 @@ +from json import dumps as jdumps +from dataclasses import asdict +from fpdf import FPDF +from os import remove, path +import imghdr +import requests + + +def news_as_json_str(item_group): + """ Convert news in json format + + :type item_group: 'item_group.ItemGroup' + :rtype: str + """ + news_dict = asdict(item_group) + + return jdumps(news_dict, indent=4, ensure_ascii=False) + + +def news_as_json_str_from_list(item_groups): + """ Convert list of news in json format + + :type item_groups: list of 'item_group.ItemGroup' + :rtype: str + """ + lst = [asdict(item_gr) for item_gr in item_groups] + + return jdumps(lst, indent=4, ensure_ascii=False) + + +def news2html(item_groups): + """ Convert news to HTML code + + :type item_groups: list of 'item_group.ItemGroup' + :return: HTML code + :rtype: str + """ + green_line = '
' + font = '../fonts/DejaVuSansCondensed.ttf' + + html_code = 'News' \ + '_content_' + + content = '' + + for item_gr in item_groups: + item_gr_html = '
' + green_line + '

' + item_gr.feed + '

' + green_line + \ + '
' + items2html(item_gr.items) + '
' + content += item_gr_html + + html_code = html_code.replace('_content_', content) + + return html_code + + +def items2html(items): + """ Convert items to HTML code + + :type items: list of 'item.Item' + :return: HTML code + :rtype: str + """ + black_line = '
' + source_link_text = 'Go to source..' + items_html = '' + + for item in items: + itm_html = '

' + item.title + '

' + \ + '

' + item_text_with_imgs2html(item.text, item.img_links) + '

' + \ + '
' + source_link_text + '
' + \ + '' + str(item.date) + '

' + + items_html += itm_html + black_line + + items_html = items_html[:-len(black_line)] + return items_html + + +def item_text_with_imgs2html(text, img_links): + """ Convert text with images to HTML code + + :type text: str + :type img_links: list of str + + :return: HTML code + :rtype: str + """ + text_and_imgs = '' + + for ind, link in enumerate(img_links): + alt, before_picture, text = parse_item_text(text, ind + 1) + + if before_picture: + text_and_imgs += before_picture + + text_and_imgs += '

' \ + '' + alt + '

' + + text_and_imgs += text + return text_and_imgs + + +def news2pdf(item_groups, file_path): + """ Write news in PDF file + + :type item_groups: list of 'item_group.ItemGroup' + :type file_path: str + """ + width = 180 + + pdf = FPDF() + pdf.add_page() + + current_dir = path.dirname(path.abspath(__file__)) + fonts_dir = current_dir[:current_dir.find('EGG-INFO')] + path.join('rss_reader', 'fonts') + + pdf.add_font('DejaVu', '', path.join(fonts_dir, 'DejaVuSansCondensed.ttf'), uni=True) + pdf.add_font('DejaVuBold', '', path.join(fonts_dir, 'DejaVuSansCondensed-Bold.ttf'), uni=True) + pdf.add_font('DejaVuOblique', '', path.join(fonts_dir, 'DejaVuSansCondensed-Oblique.ttf'), uni=True) + + num = 0 + + for item_gr in item_groups: + pdf.set_font('DejaVuBold', size=24) + pdf.set_text_color(0, 10, 180) + pdf.multi_cell(width, 260, item_gr.feed, align='C') + + for item in item_gr.items: + pdf.add_page() + + pdf.set_font('DejaVuBold', size=18) + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(width, 16, item.title, align='C') + + pdf.set_font('DejaVu', size=16) + + text = item.text + for ind, link in enumerate(item.img_links): + alt, before_picture, text = parse_item_text(text, ind+1) + + if before_picture: + pdf.multi_cell(width, 16, before_picture) + + try: + img = requests.get(link) + if img.status_code != 200 or imghdr.what(None, img.content) != 'jpeg': + raise requests.exceptions.ConnectionError() + + except requests.exceptions.ConnectionError: + pdf.set_font('DejaVuOblique', size=14) + pdf.set_text_color(80, 80, 80) + + pdf.multi_cell(width, 14, f'[image: {alt}][{link}]') + + pdf.set_font('DejaVu', size=16) + pdf.set_text_color(0, 0, 0) + else: + file_image_name = str(num) + 'tmp_img.jpg' + + with open(file_image_name, 'wb') as img_file: + img_file.write(img.content) + + pdf.multi_cell(width, 16, '') + pdf.image(file_image_name, x=75) + pdf.multi_cell(width, 16, '') + + remove(file_image_name) + num += 1 + + pdf.multi_cell(width, 16, text) + + pdf.set_font('DejaVuOblique', size=11) + pdf.set_text_color(0, 0, 255) + pdf.multi_cell(width, 11, '') + pdf.cell(width, 11, 'Go to source...', link=item.link) + + pdf.set_text_color(0, 0, 0) + pdf.multi_cell(width, 11, '') + pdf.multi_cell(width, 11, str(item.date)) + + pdf.multi_cell(width, 16, '') + + pdf.output(file_path) + + +def parse_item_text(text, img_num): + """ Return alternative text of image, text before image and text after image + + :rtype: tuple of str + """ + img_begin = text.find(f'[image {img_num}:') + img_end = text.find(f'[{img_num}]', img_begin) + len(str(img_num)) + 2 + + len_num = len(str(img_num)) + alt = text[img_begin + len_num + 9:img_end - len_num - 3] + + before_picture = text[:img_begin] + after_picture = text[img_end:] + + return alt, before_picture, after_picture diff --git a/final_task/rss_reader/news_storage.py b/final_task/rss_reader/news_storage.py new file mode 100644 index 0000000..4b12c02 --- /dev/null +++ b/final_task/rss_reader/news_storage.py @@ -0,0 +1,147 @@ +from exceptions import StorageNotFoundError, NewsNotFoundError +from datetime import datetime +from item_group import ItemGroup +from tools import merge_lists +import pickle +import logging + + +def save_news(news_source, items_group, file_name): + """ Save news in storage + + Existing news will not be repeated. + + :param news_source: url of RSS + :type news_source: str + + :type items_group: 'item_group.ItemGroup' + + :param file_name: storage file + :type file_name: str + """ + logging.info('Reading news from file named ' + file_name) + try: + with open(file_name, 'rb') as file: + news = dict(pickle.load(file)) + except FileNotFoundError: + news = dict() + + logging.info('Adding current news to news from file.') + if news_source in news.keys(): + unique_news = merge_lists(news[news_source].items, items_group.items) + news[news_source].items = unique_news + else: + news[news_source] = items_group + + logging.info('Writing news in file named ' + file_name) + with open(file_name, 'wb') as file: + pickle.dump(news, file) + + +def get_news_by_date(date, file_name, source=None, limit=None): + """ Retrieve and return news by date from storage + + If source is specified return news from this source. + If limit is specified return limited count of news. + + :param date: date of news publishing + :type date: 'datetime.datetime' + + :param file_name: storage file + :type file_name: str + + :param source: url of RSS + :type source: str + + :param limit: limited count of returned news + :type limit: int + + :rtype: list of 'item_group.ItemGroup' + """ + logging.info('Reading news from file named ' + file_name) + try: + with open(file_name, 'rb') as file: + news = dict(pickle.load(file)) + except FileNotFoundError: + raise StorageNotFoundError('Storage ' + file_name + ' not found.') + + logging.debug('source = ' + (source or 'None')) + logging.debug('limit = ' + (str(limit) or 'None')) + logging.info('Retrieving news by date ' + str(date)) + + list_of_news = list() + if source and source in news.keys(): + item_group_by_date = retrieve_news_by_date(date, news[source], limit) + + if item_group_by_date.items: + list_of_news.append(ItemGroup(feed=item_group_by_date.feed, items=item_group_by_date.items)) + + elif not source: + list_of_news = retrieve_news_by_date_from_list(date, news.values(), limit) + else: + raise NewsNotFoundError(date, file_name, source) + + if not list_of_news: + raise NewsNotFoundError(date, file_name) + + return list_of_news + + +def retrieve_news_by_date(date, item_group, limit=None): + """ Retrieve and return news by date from item group + + If limit is specified return limited count of news. + + :param date: date of news publishing + :type date: 'datetime.datetime' + + :type item_group: 'item_group.ItemGroup' + + :param limit: limited count of returned news + :type limit: int + + :rtype: 'item_group.ItemGroup' + """ + items_by_date = list() + + for item in item_group.items: + item_date = datetime.strptime(item.date.replace(',', ''), '%a %d %b %Y %H:%M:%S %z') + + if date.date() == item_date.date(): + items_by_date.append(item) + + if limit: + items_by_date = items_by_date[:limit] + + return ItemGroup(feed=item_group.feed, items=items_by_date) + + +def retrieve_news_by_date_from_list(date, item_groups, limit=None): + """ Retrieve and return news by date from list of item group + + If limit is specified return limited count of news. + + :param date: date of news publishing + :type date: 'datetime.datetime' + + :type item_groups: list of 'item_group.ItemGroup' + + :param limit: limited count of returned news + :type limit: int + + :rtype: list of 'item_group.ItemGroup' + """ + item_groups_by_date = list() + count = 0 + + for item_gr in item_groups: + item_group_by_date = retrieve_news_by_date(date, item_gr) + + if limit: + item_group_by_date.items = item_group_by_date.items[:limit-count] + + if item_group_by_date.items: + item_groups_by_date.append(item_group_by_date) + count += len(item_group_by_date.items) + + return item_groups_by_date diff --git a/final_task/rss_reader/parser_rss.py b/final_task/rss_reader/parser_rss.py new file mode 100644 index 0000000..e512dc0 --- /dev/null +++ b/final_task/rss_reader/parser_rss.py @@ -0,0 +1,59 @@ +import feedparser +import logging +from bs4 import BeautifulSoup +from exceptions import GettingRSSException + + +def create_feedparser(rss_url, limit=None): + """ Create feedparser with limited number of items or all items. + + If limit value is not set or =0 or =None created feedparser with all items. + Otherwise created feedparser with limited count of items. + + :param rss_url: url of RSS + :type rss_url: str + + :param limit: count of items + :type limit: int + + :raise GettingRSSException: if there are problems with getting rss + + :rtype: 'feedparser.FeedParserDict' + """ + logging.info('Getting RSS from ' + rss_url) + parser = feedparser.parse(rss_url) + + if parser.bozo: + raise GettingRSSException('Problems with getting RSS: ' + str(parser.bozo_exception)) + + logging.debug('limit = ' + str(limit)) + if limit: + logging.info('Cutting item list.') + parser.entries = parser.entries[:limit] + + return parser + + +def format_description(description): + """ Format 'description' tag. + + Retrieves images' links and replace all 'img' tags to construction [image_name][image_number]. + + :param description: description tag with his content + :type description: str + + :return: content of formatted description and list of image links + :rtype: tuple of: str and list of str + """ + + logging.info('Creating description soup.') + description_soup = BeautifulSoup(description, 'html.parser') + links = list() + + logging.info('Replacing img tags and extracting links.') + for num, img in enumerate(description_soup.find_all('img')): + if img['src']: + links.append(img['src']) + img.replace_with(f'[image {num+1}: {img["alt"]}][{num+1}]') + + return description_soup.text, links diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..7da73e3 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,5 @@ +feedparser==5.2.1 +bs4==0.0.1 +requests==2.22.0 +fpdf==1.7.2 +colorama==0.4.1 \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..22dea4f 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,169 @@ +import argparse +import logging +import parser_rss +import exceptions +import news_converter +import tools +from item_group import get_item_group_from_feedparser +from log import turn_on_logging +from news_storage import save_news, get_news_by_date +from datetime import datetime +from colorama import init as init_color + + +STORAGE_FILE = 'news.data' +VERSION = '5.0' + + +def create_arg_parser(): + """ Create and return argument parser. + + :return: argument parser + :rtype: 'argparse.ArgumentParser' + """ + arg_parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.') + + arg_parser.add_argument('source', type=str, help='RSS URL', nargs='?') + arg_parser.add_argument('--version', action='version', help='Print version info', version='%(prog)s v' + VERSION) + arg_parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') + arg_parser.add_argument('--verbose', action='store_true', help='Output verbose status messages') + arg_parser.add_argument('--limit', type=int, default=0, help='Limit news topics if this parameter provided') + arg_parser.add_argument('--date', type=lambda d: datetime.strptime(d, '%Y%m%d'), + help='News from the specified day will be printed out. Format: YYYYMMDD') + arg_parser.add_argument('--to-pdf', type=str, help='Create PDF file with news', metavar='PATH') + arg_parser.add_argument('--to-html', type=str, help='Create HTML file with news', metavar='PATH') + arg_parser.add_argument('--colorize', action='store_true', help='Print news in colorized mode (not for json mode)') + + return arg_parser + + +def print_news(json_arg, item_group): + """ Print news in stdout + + :param json_arg: if True news print as json + :type json_arg: bool + :type item_group: 'item_group.ItemGroup' + """ + if json_arg: + logging.info('Converting item group to json string.') + json_str = news_converter.news_as_json_str(item_group) + + logging.info('Printing news as json.') + print(json_str) + else: + logging.info('Printing news.') + print(item_group) + + +def print_news_from_list(json_arg, news): + """ Print news in stdout + + :param json_arg: if True news print as json + :type json_arg: bool + :type news: list of 'item_group.ItemGroup' + """ + if json_arg: + logging.info('Converting list of item group to json string.') + json_str = news_converter.news_as_json_str_from_list(news) + + logging.info('Printing news as json.') + print(json_str) + else: + for news_group in news: + print_news(json_arg, news_group) + print('-------------------------------------------------------------------------------------\n') + + +def write_in_file(html_path, pdf_path, item_groups): + """ Write news as HTML or/and PDF in file + + :param html_path: path to HTML file for writing + :type html_path: str + :param pdf_path: path to PDF file for writing + :type pdf_path: str + + :param item_groups: news for writing + :type item_groups: list of 'item_group.ItemGroup' + """ + if html_path: + if not html_path.endswith('.html'): + html_path += '.html' + + logging.info('Getting HTML code.') + html_code = news_converter.news2html(item_groups) + + logging.info('Writing news in ' + html_path) + with open(html_path, 'w', encoding='utf-8') as file: + file.write(html_code) + + if pdf_path: + if not pdf_path.endswith('.pdf'): + pdf_path += '.pdf' + + logging.info('Writing news in ' + pdf_path) + news_converter.news2pdf(item_groups, pdf_path) + + +def main(): + arg_parser = create_arg_parser() + args = arg_parser.parse_args() + + if args.verbose: + logging.info('Turning on logging.') + turn_on_logging(logging.getLogger()) + + if not args.source and not args.date: + logging.error('Source or/and date must be specified.') + arg_parser.print_help() + elif not args.limit or args.limit > 0: + tools.colorize = args.colorize + init_color() + + if args.date: + work_with_local_storage(args) + else: + work_with_internet(args) + else: + logging.error('Incorrect limit value!') + + +def work_with_local_storage(args): + try: + logging.info('Getting news by date ' + str(args.date) + ' from storage ' + STORAGE_FILE) + news_by_date = get_news_by_date(args.date, STORAGE_FILE, args.source, args.limit) + except exceptions.StorageNotFoundError as exc: + logging.error(exc) + except exceptions.NewsNotFoundError as err: + logging.error(err) + else: + if args.to_html or args.to_pdf: + write_in_file(args.to_html, args.to_pdf, news_by_date) + else: + print_news_from_list(args.json, news_by_date) + + +def work_with_internet(args): + try: + logging.info('Creating feedparser.') + rss_feedparser = parser_rss.create_feedparser(args.source, args.limit) + + logging.info('Getting item group.') + item_group = get_item_group_from_feedparser(rss_feedparser) + + except exceptions.GettingRSSException as exc: + logging.error(exc) + else: + logging.info('Saving news in ' + STORAGE_FILE) + save_news(args.source, item_group, STORAGE_FILE) + + if args.to_html or args.to_pdf: + lst = list() + lst.append(item_group) + + write_in_file(args.to_html, args.to_pdf, lst) + else: + print_news(args.json, item_group) + + +if __name__ == '__main__': + main() diff --git a/final_task/rss_reader/tools.py b/final_task/rss_reader/tools.py new file mode 100644 index 0000000..83b31c1 --- /dev/null +++ b/final_task/rss_reader/tools.py @@ -0,0 +1,17 @@ +import logging +from copy import deepcopy + + +colorize = False + + +def merge_lists(list1, list2): + """ Merge two lists """ + logging.info('Merging lists') + res_list = deepcopy(list1) + + for element in list2: + if element not in res_list: + res_list.append(element) + + return res_list diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..abb3ffa 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,39 @@ +from setuptools import setup, find_packages +from os import path + + +here = path.abspath(path.dirname(__file__)) + +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='rss-reader', + version='5.0', + description='Pure Python command-line RSS reader', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/Kirill-Ulich/FinalTaskRssParser/tree/master/final_task', + author='Kirill-Ulich', + author_email='k.ulitch@yandex.ru', + license='None', + zip_safe=False, + packages=find_packages(), + data_files=[('rss_reader/fonts', ['rss_reader/fonts/DejaVuSansCondensed.ttf', + 'rss_reader/fonts/DejaVuSansCondensed-Bold.ttf', + 'rss_reader/fonts/DejaVuSansCondensed-Oblique.ttf'])], + scripts=['rss_reader/exceptions.py', + 'rss_reader/item.py', + 'rss_reader/item_group.py', + 'rss_reader/log.py', + 'rss_reader/news_converter.py', + 'rss_reader/news_storage.py', + 'rss_reader/parser_rss.py', + 'rss_reader/rss_reader.py', + 'rss_reader/tools.py'], + install_requires=['feedparser==5.2.1', 'bs4==0.0.1', 'requests==2.22.0', 'fpdf==1.7.2', 'colorama==0.4.1'], + entry_points={ + 'console_scripts': ['rss-reader=rss_reader.rss_reader:main'], + }, + include_package_data=True +) diff --git a/final_task/tests/__init__.py b/final_task/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/tests/test_item.py b/final_task/tests/test_item.py new file mode 100644 index 0000000..854e4f9 --- /dev/null +++ b/final_task/tests/test_item.py @@ -0,0 +1,20 @@ +import unittest +from item import Item + + +class TestItem(unittest.TestCase): + def test_item_as_str(self): + itm = Item('title', 'date', 'link', 'text', ['img1', 'img2']) + expected_str = 'Title: title' \ + '\nDate: date' \ + '\nLink: link' \ + '\nText: text' \ + '\nImage links:' \ + '\n\t[1]: [img1]' \ + '\n\t[2]: [img2]\n' + + self.assertEqual(str(itm), expected_str) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_item_group.py b/final_task/tests/test_item_group.py new file mode 100644 index 0000000..09cd5c7 --- /dev/null +++ b/final_task/tests/test_item_group.py @@ -0,0 +1,46 @@ +import unittest +from item import Item +from feedparser import parse +import item_group + + +class TestItems(unittest.TestCase): + def setUp(self): + items = [Item('title1', 'date1', 'link1', '[image 1: alt1][1]text1', ['img1']), + Item('title2', 'date2', 'link2', 'text2', []), + Item('title3', 'date3', 'link3', 'text3', [])] + item_gr = item_group.ItemGroup('title0', items) + + self.item_gr = item_gr + + def test_get_item_group_from_feedparser(self): + text_for_parsing = '' \ + 'title0link0descr' \ + 'title2

text2

' \ + 'link2date2
' \ + 'title3

text3

link3' \ + 'date3
' + + parser = parse(text_for_parsing) + self.item_gr.items = self.item_gr.items[1:] + + self.assertEqual(item_group.get_item_group_from_feedparser(parser), self.item_gr) + + def test_item_group_as_str(self): + expected_str = 'Feed: title0\n' \ + '\nTitle: title1' \ + '\nDate: date1' \ + '\nLink: link1' \ + '\nText: [image 1: alt1][1]text1' \ + '\nImage links:\n\t[1]: [img1]\n' \ + '\nTitle: title2' \ + '\nDate: date2' \ + '\nLink: link2' \ + '\nText: text2\n\n' + self.item_gr.items = self.item_gr.items[:2] + + self.assertEqual(str(self.item_gr), expected_str) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_news_converter.py b/final_task/tests/test_news_converter.py new file mode 100644 index 0000000..83bab4f --- /dev/null +++ b/final_task/tests/test_news_converter.py @@ -0,0 +1,111 @@ +import unittest +import news_converter +from item import Item +from item_group import ItemGroup + + +class TestNewsConverter(unittest.TestCase): + def setUp(self): + self.items_json = [Item('title1', 'date1', 'link1', 'text1', ['src1', 'src2']), + Item('title2', 'date2', 'link2', 'text2', [])] + self.expected_json_str = ' "items": [\n' + \ + ' {\n' + \ + ' "title": "title1",\n' + \ + ' "date": "date1",\n' + \ + ' "link": "link1",\n' + \ + ' "text": "text1",\n' + \ + ' "img_links": [\n' + \ + ' "src1",\n' + \ + ' "src2"\n' + \ + ' ]\n' + \ + ' },\n' + \ + ' {\n' + \ + ' "title": "title2",\n' + \ + ' "date": "date2",\n' + \ + ' "link": "link2",\n' + \ + ' "text": "text2",\n' + \ + ' "img_links": []\n' + \ + ' }\n' + \ + ' ]' + + self.item1 = Item('title', 'date', 'link', 'before[image 1: alt1][1]middle[image 2: alt2][2]after', + ['img1', 'img2']) + self.expected_html1 = 'before

' \ + 'alt1' \ + '

middle

' \ + 'alt2

after' + + self.item2 = Item('title', 'date', 'link', 'text', []) + self.expected_html2 = 'text' + + self.expected_items_html = '

title

' \ + '

' + self.expected_html1 + '


' \ + 'Go to source..
' \ + 'date

' \ + '
' \ + '

title

' \ + '

' + self.expected_html2 + '


' \ + 'Go to source..
' \ + 'date

' + + self.item_gr = ItemGroup('feed', [self.item1, self.item2]) + + def test_news_as_json_str(self): + item_group = ItemGroup('feed title', self.items_json) + + expected_result = '{\n' + \ + ' "feed": "feed title",\n' + \ + self.expected_json_str + '\n' + \ + '}' + + self.assertEqual(news_converter.news_as_json_str(item_group), expected_result) + + def test_news_as_json_str_from_list(self): + item_group1 = ItemGroup('feed title 1', self.items_json) + item_group2 = ItemGroup('feed title 2', self.items_json) + + expected_result = '[\n' + \ + ' {\n' + \ + ' "feed": "feed title 1",\n' + \ + ' ' + self.expected_json_str.replace('\n', '\n ') + '\n' + \ + ' },\n' + \ + ' {\n' + \ + ' "feed": "feed title 2",\n' + \ + ' ' + self.expected_json_str.replace('\n', '\n ') + '\n' + \ + ' }\n' + \ + ']' + + item_groups = [item_group1, item_group2] + self.assertEqual(news_converter.news_as_json_str_from_list(item_groups), expected_result) + + def test_item_text_with_imgs2html(self): + resulting_str1 = news_converter.item_text_with_imgs2html(self.item1.text, self.item1.img_links) + self.assertEqual(resulting_str1, self.expected_html1) + + resulting_str2 = news_converter.item_text_with_imgs2html(self.item2.text, self.item2.img_links) + self.assertEqual(resulting_str2, self.expected_html2) + + def test_items2html(self): + resulting_str = news_converter.items2html([self.item1, self.item2]) + self.assertEqual(resulting_str, self.expected_items_html) + + def test_news2html(self): + item_groups = [ItemGroup('feed1', [self.item1, self.item2]), ItemGroup('feed2', [self.item1])] + expected_str = 'News' \ + '
' \ + '

feed1

' \ + '
' + self.expected_items_html + \ + '

feed2

' \ + '
' \ + '

title

' + self.expected_html1 + '


' \ + 'Go to source..
' \ + 'date

' + + resulting_str = news_converter.news2html(item_groups) + self.assertEqual(resulting_str, expected_str) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_news_storage.py b/final_task/tests/test_news_storage.py new file mode 100644 index 0000000..8e5256a --- /dev/null +++ b/final_task/tests/test_news_storage.py @@ -0,0 +1,48 @@ +import unittest +import news_storage +from datetime import datetime +from item import Item +from item_group import ItemGroup +from exceptions import StorageNotFoundError + + +class TestNewsStorage(unittest.TestCase): + def setUp(self): + self.item1 = Item('title1', 'Sat, 16 Nov 2019 19:45:37 -0500', 'link1', 'text1', []) + self.item2 = Item('title2', 'Sun, 17 Nov 2019 00:30:00 -0500', 'link2', 'text2', []) + self.item3 = Item('title3', 'Sat, 16 Nov 2019 06:13:25 -0500', 'link3', 'text3', []) + self.item4 = Item('title4', 'Fri, 15 Nov 2019 10:18:08 -0500', 'link4', 'text4', []) + self.date = datetime(2019, 11, 16) + + def test_get_news_by_date(self): + with self.assertRaises(StorageNotFoundError): + news_storage.get_news_by_date(None, 'nonexistent____________________file_.data') + + def test_retrieve_news_by_date(self): + feed = 'feed' + item_group = ItemGroup(feed, [self.item1, self.item2, self.item3, self.item4]) + + expected_item_group = ItemGroup(feed, [self.item1, self.item3]) + expected_limited_item_group = ItemGroup(feed, [self.item1]) + + self.assertEqual(news_storage.retrieve_news_by_date(self.date, item_group), expected_item_group) + self.assertEqual(news_storage + .retrieve_news_by_date(self.date, item_group, 1), expected_limited_item_group) + + def test_retrieve_news_by_date_from_list(self): + feed1 = 'feed1' + feed2 = 'feed2' + item_group1 = ItemGroup(feed1, [self.item1, self.item2]) + item_group2 = ItemGroup(feed2, [self.item3, self.item4]) + + extended_item_group1 = ItemGroup(feed1, [self.item1]) + extended_item_group2 = ItemGroup(feed2, [self.item3]) + + self.assertEqual(news_storage.retrieve_news_by_date_from_list(self.date, [item_group1, item_group2]), + [extended_item_group1, extended_item_group2]) + self.assertEqual(news_storage.retrieve_news_by_date_from_list(self.date, [item_group1, item_group2], 1), + [extended_item_group1]) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_parser_rss.py b/final_task/tests/test_parser_rss.py new file mode 100644 index 0000000..c99a694 --- /dev/null +++ b/final_task/tests/test_parser_rss.py @@ -0,0 +1,21 @@ +import unittest +import parser_rss +from exceptions import GettingRSSException + + +class TestParserRss(unittest.TestCase): + def test_create_feedparser(self): + with self.assertRaises(GettingRSSException): + parser_rss.create_feedparser('abcdefg') + + def test_format_description(self): + descr1 = '

name of image' \ + 'text


' + descr2 = descr1.replace('source', '') + + self.assertEqual(parser_rss.format_description(descr1), ('[image 1: name of image][1]text', ['source'])) + self.assertEqual(parser_rss.format_description(descr2), ('text', [])) + + +if __name__ == '__main__': + unittest.main() diff --git a/final_task/tests/test_tools.py b/final_task/tests/test_tools.py new file mode 100644 index 0000000..35ab069 --- /dev/null +++ b/final_task/tests/test_tools.py @@ -0,0 +1,20 @@ +import unittest +import tools + + +class TestTools(unittest.TestCase): + def test_merge_lists(self): + list0 = [1, 3, 11, 27] + list1 = [100, 3, 2, 11, 77] + list2 = [3, 11, 1, 27] + list3 = [] + list4 = [7, 8, 9] + + self.assertEqual(tools.merge_lists(list0, list1), [1, 3, 11, 27, 100, 2, 77]) + self.assertEqual(tools.merge_lists(list0, list2), [1, 3, 11, 27]) + self.assertEqual(tools.merge_lists(list0, list3), [1, 3, 11, 27]) + self.assertEqual(tools.merge_lists(list0, list4), [1, 3, 11, 27, 7, 8, 9]) + + +if __name__ == '__main__': + unittest.main()