diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b448b50 --- /dev/null +++ b/.gitignore @@ -0,0 +1,57 @@ +.pkl +__pycache__/ +.vscode/ +.idea/ +.idea + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*. +.log +local_settings.py +db.sqlite3 + + +# Environments +.env +.venv +env/ +venv/ diff --git a/README.md b/README.md index f3171ab..836ffef 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,51 @@ -# FinalTaskRssReader -For final task pull requests. +# **Rss-reader** +Command-line utility to receive RSS feeds, save it and convert to common formats. + + +## **Example** + +Input: +``` +python3 rss_reader.py "https://news.yahoo.com/rss" -lim 1 +``` +Output: +``` +Title : Rep. Justin Amash turned on Trump. Will his Michigan district follow him — or turn on him? + +Date : 17/11/2019 + +Link : https://news.yahoo.com/rep-justin-amash-turned-on-trump-will-his-michigan-district-follow-him-or-turn-on-him-110017880.html + +Description : If you want to understand how impeachment is being seen by actual Americans, +there may be no better place to go than Grand Rapids, Mich. In part that’s because +the area around Grand Rapids, comprising Michigan’s Third Congressional District, +is one of only about two dozen districts in the nation to vote for Barack Obama and for Donald Trump. +``` +Help: +``` +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limit news topics if this parameter provided + --data Print news from the specified day + --html Convert news in HTML format + --pdf Condert news in PDF format + ``` + +## Installation +``` +git clone https://github.com/Clonder/FinalTaskRssParser.git +cd/final_task/rss_reader +pip install -r requirements.txt +pip install rss-reader +``` -## How to create a pull request - -1. Create github account. *Preferrably using email you used when registerer on this course* -2. Fork this repository. ('Fork' button at the top right of this repository page) -3. Open the page of your *new repository* that was created when you forked this repo. -4. Press button clone or download at the middle right of the page and CTRL-C the url. -5. On your machine go to the directory you want. -6. Depending on the OS you are working with, open GitBash(Windows)/Command Line or Terminal(Linux) there -7. Use command `git clone ` - -Congrats! You have successfully forked our repository. - - -## Additional project structure requirements - -1. `setup.py` file for setuptools *must* be in the root of `final_task` folder. Use `setup.py` that is already there. (that means path to this file must end with `final_task/setup.py` ) -2. Entry point to your application, aka its main module *must* be named as `rss_reader.py` . Use `rss_reader.py` that is already in `rss_reader` folder. -3. You should describe how does your project work, how to launch it and etc in README.md in the `final_task/README.md` file. -4. If you used any non-standart libraries they must be listed in `rss_reader/requirements.txt` file. -5. All unit test files should be in separate folder called `tests`. - - -## Pull request requirements(!!!) - -1. When creating pull request make sure that `target branch` is `master` on OUR repo, not yours. -2. Pull request name *MUST* be in format: `YourFirstName_YourLastName_EmailYouUsedWhileRegisteringOnThisCourse` -3. Pull request which have any other name format, or invalid e-mail *will be ignored completely until you fix it*. So make sure you specified correct e-mail. -4. In pull request description specify your current iteration. You also can add there any other info you want us to know before we start code review. -5. *Pull request must NOT contain any .pyc files, any virtual environment files/folders, any IDE technical files*. diff --git a/final_task/README.md b/final_task/README.md index 7af281f..88545bf 100644 --- a/final_task/README.md +++ b/final_task/README.md @@ -1,3 +1,49 @@ -# Your readme here -Some text. -Checkout how to write this file using *markdown*. +# **Rss-reader** +Command-line utility to receive RSS feeds, save it and convert to common formats. + + +## **Example** + +Input: +``` +python3 rss_reader.py --lim 1 "https://news.yahoo.com/rss" +``` +Output: +``` +Title : Rep. Justin Amash turned on Trump. Will his Michigan district follow him — or turn on him? + +Date : Sun, 17 Nov 2019 06:00:35 -0500 + +Link : https://news.yahoo.com/rep-justin-amash-turned-on-trump-will-his-michigan-district-follow-him-or-turn-on-him-110017880.html + +Description : If you want to understand how impeachment is being seen by actual Americans, +there may be no better place to go than Grand Rapids, Mich. In part that’s because +the area around Grand Rapids, comprising Michigan’s Third Congressional District, +is one of only about two dozen districts in the nation to vote for Barack Obama and for Donald Trump. +``` +Help: +``` +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limit news topics if this parameter provided + --data Print news from the specified day + --html Convert news in HTML format + --pdf Condert news in PDF format + ``` + +## Installation +``` +pip install -r requirements.txt +pip install rss-reader +``` + + + + + diff --git a/final_task/rss_reader/DejaVuSans.ttf b/final_task/rss_reader/DejaVuSans.ttf new file mode 100644 index 0000000..39a60f4 Binary files /dev/null and b/final_task/rss_reader/DejaVuSans.ttf differ diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/rss_reader/_database/data b/final_task/rss_reader/_database/data new file mode 100644 index 0000000..e69de29 diff --git a/final_task/rss_reader/_database/dates.txt b/final_task/rss_reader/_database/dates.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/final_task/rss_reader/_database/dates.txt @@ -0,0 +1 @@ + diff --git a/final_task/rss_reader/arguments.py b/final_task/rss_reader/arguments.py new file mode 100644 index 0000000..6dee555 --- /dev/null +++ b/final_task/rss_reader/arguments.py @@ -0,0 +1,55 @@ +import argparse + +version = '5.0' + +""" Add argument commands for script """ + + +def command_line(): + parser = argparse.ArgumentParser( + description='Pure Python command-line RSS reader.', + prog='rss-reader') + + parser.add_argument( + 'URL', + action="store", + help='rss url') + + parser.add_argument( + '-version', + action='version', + help='info about version', + version='rss-reader {}'.format(version)) + + parser.add_argument( + '-json', + help='print result as json in stdout', + action='store_true') + + parser.add_argument( + '-verbose', + help="print lots of debugging statements", + action="store_true") + + parser.add_argument( + '-lim', + dest='limit', + type=int, + help='Limit news topics if this parameter provided') + + parser.add_argument( + '-date', + type=str, + help='print news from the specified day') + + parser.add_argument( + '-html', + action='store_true', + help='convert news in html format') + + parser.add_argument( + '-pdf', + action='store_true', + help='convert news in pdf format') + + return parser.parse_args() diff --git a/final_task/rss_reader/check.py b/final_task/rss_reader/check.py new file mode 100644 index 0000000..d409556 --- /dev/null +++ b/final_task/rss_reader/check.py @@ -0,0 +1,10 @@ +import urllib.error +import urllib.request + + +def internet_on(): + try: + urllib.request.urlopen("http://google.com", timeout=5) + return True + except (urllib.error.URLError, urllib.error.HTTPError): + return False diff --git a/final_task/rss_reader/converting.py b/final_task/rss_reader/converting.py new file mode 100644 index 0000000..7ee915a --- /dev/null +++ b/final_task/rss_reader/converting.py @@ -0,0 +1,146 @@ +import logging as log +import requests as r +import check +import os +import datetime +import sys + +from fpdf import FPDF +from dominate import document +from dominate.tags import div, h2, img, p + +import printers + +now = datetime.datetime.now + + +def create_html(items: list) -> document: + """ + convert article data in html format + """ + html_document = document(title='Dominate your HTML') + log.info('Start make html format') + for item in items: + item = printers.prepare_one_item(item) + with html_document: + with div(): + h2("Title: " + item['Title:']) + p("Link: " + item['Link: ']) + img(src=item['Media content:\n']) + p("Description: " + item['Description: ']) + p("Date: " + item['Date:']) + return html_document + + +def write_to_file(items: list) -> None: + result = create_html(items) + data = now().strftime("%d-%m-%Y") + time = now().strftime("%X") + name_of_html = str(data) + '_' + time + with open("{}".format(name_of_html), "w") as f: + f.write(str(result)) + log.info("Successful converting into html") + + +def create_pdf(items: list) -> None: + log.info("Start creating pdf") + date = now().strftime("%d-%m-%Y") + time = now().strftime("%X") + pdf = FPDF() + element = str + img_path = str + try: + pdf.add_page() + pdf.add_font("DejaVu", "", "DejaVuSans.ttf", uni=True) + pdf.set_font("DejaVu", "", 10) + except RuntimeError: + log.info("There isn't DejaVuSans.ttf in your rep") + print("Something go wrong, check DejaVuSans.ttf in your rep ") + sys.exit() + pdf.write(8, "RSS feed") + pdf.ln(20) + + for item in items: + item = printers.prepare_one_item(item) + pdf.write(8, "===Wow! News!===") + pdf.ln(10) + pdf.write(8, "Title: " + str(item['Title:'])) + pdf.ln(10) + pdf.write(8, "Link: " + str(item['Link: '])) + pdf.ln(15) + pdf.write(8, "Date: " + str(item["Date:"])) + pdf.ln(10) + img_url = str(item['Media content:\n']) + url_list = img_url.split("\n") + + # There's a difficult moment, cause 'fpdf' lib can't + # get images from the Internet + # So we should use temporary files + + if check.internet_on() and img_url != '': + log.info("There is connection") + try: + for element in url_list: + # Try to understand format of image file + log.info("Try to understand format of image file") + if element.endswith(".png") or element.endswith(".jpg"): + image = r.get(element) + try: + img_path = str(len(element)) + \ + str(img_url[-4:]) + with open(img_path, 'wb') as file: + file.write(image.content) + pdf.image(img_path, w=70, h=50) + pdf.ln(10) + # Some files have .jpg format, but have .jpeg ends + except RuntimeError: + log.info("Some files have .jpg format, but have .jpeg ends") + os.remove(img_path) + img_path = str(len(element)) + '_' + '.jpeg' + with open(img_path, 'wb') as file: + file.write(image.content) + pdf.image(img_path, w=70, h=50) + pdf.ln(10) + # We can't work with .gif, but we can show link of image + elif element.endswith(".gif"): + log.info("We can't work with .gif, but we can show link of image") + pdf.write(8, "I can't display images," + " but there's your links") + pdf.write(8, element) + # Some sites have duplicated links with double addresses, + # so we try to parse + else: + index_new_element = element.rfind('http') + new_element = element[index_new_element:] + image = r.get(new_element) + # They may have a format + if new_element.endswith(".jpg"): + img_path = str(len(new_element)) + \ + str(img_url[-4:]) + # They may not have a format + else: + img_path = str(len(new_element)) + \ + '.jpg' + with open(img_path, 'wb') as file: + file.write(image.content) + pdf.image(img_path, w=70, h=50) + pdf.ln(10) + os.remove(img_path) + # There are times when we can't insert pictures + except (RuntimeError, ConnectionError): + os.remove(img_path) + pdf.write(8, "I can't display this image, " + "but there's your link") + pdf.ln(10) + pdf.write(8, str(element)) + # Cases where we don't have the Internet + else: + log.info("There isn't connection") + pdf.write(8, str(item['Media content:\n'])) + pdf.ln(15) + pdf.write(8, "Description: " + str(item['Description: '])) + pdf.ln(10) + pdf.write(8, "===End, news!===") + pdf.ln(10) + name_of_pdf = date + '_' + time + '.pdf' + pdf.output(name_of_pdf, "F") diff --git a/final_task/rss_reader/databaseEmulation.py b/final_task/rss_reader/databaseEmulation.py new file mode 100644 index 0000000..a341e1f --- /dev/null +++ b/final_task/rss_reader/databaseEmulation.py @@ -0,0 +1,94 @@ +import pickle +import logging as log +import os +import datetime + +from typing import Any + + +now = datetime.datetime.now + + +class Singleton(type): + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__( + *args, + **kwargs + ) + return cls._instances[cls] + + +class DatabaseEmulation(metaclass=Singleton): + def __init__(self, path_to_dates, path_to_data): + self.path_to_dates = path_to_dates + self.path_to_data = path_to_data + self.dates = {} + with open(path_to_dates, "r") as data_file: + for line in data_file: + if not line: + continue + date, row_number = line.strip().split() + self.dates[date] = int(row_number) + self.dates_file = open(path_to_dates, "a") + self.write_data_stream = open(self.path_to_data, "ab") + self.read_data_stream = open(self.path_to_data, "rb") + + def __del__(self): + self.dates_file.close() + self.write_data_stream.close() + self.read_data_stream.close() + + def check_date(self, date): + return date in self.dates + + def get_id(self, date): + return self.dates[date] + + def get_items(self, dump_idx) -> Any: + for line_id in range(dump_idx): + pickle.load(self.read_data_stream) + result = pickle.load(self.read_data_stream) + self.read_data_stream.seek(0, os.SEEK_SET) + return result + + def write_items(self, rss_items: dict) -> None: + idx = len(self.dates) + date = now().strftime("%d/%m/%Y") + self.write_data_stream.seek(0, os.SEEK_END) + pickle.dump(rss_items, self.write_data_stream) + self.dates[date] = idx + self.dates_file.write("{} {}\n".format(date, idx)) + + +def get_txt_date(data) -> None: + date_id = {} + with open("dates.txt", "r") as f: + for line in f: + key, value = line.strip().split() + date_id[key] = value + + if data == date_id[key]: + date_id[key] = value + with open("{}.txt".format(value), "r") as f: + print(f.read()) + else: + print("There isn't any news from this day") + + +def cash_news() -> None: + log.info("Try to cash news from stdout") + data_id = {} + data = now().strftime("%d/%m/%Y") + + with open("dates.txt", "r") as f: + for line in f: + key, *value = line.split() + data_id[key] = value + len_txt_date = len(data_id) + 1 + remembered_data = {} + with open("dates.txt", "w") as f: + remembered_data[data] = len_txt_date + f.write(str(remembered_data)) diff --git a/final_task/rss_reader/printers.py b/final_task/rss_reader/printers.py new file mode 100644 index 0000000..7f346fb --- /dev/null +++ b/final_task/rss_reader/printers.py @@ -0,0 +1,118 @@ +import json +import pprint +import logging as log + +from collections import OrderedDict +from collections import defaultdict +from colorama import Fore +from colorama import Style + +import rss_get_items as filters + + +def split_string_by_lines(input_string: str, word_number: int) -> str: + input_string = input_string.strip().split() + result = '' + for idx in range(len(input_string) // word_number + 1): + start = idx * word_number + end = (idx + 1) * word_number + result += ' '.join(input_string[start: end]) + '\n' + return result.strip() + + +def prepare_one_item(item_xml: defaultdict) -> OrderedDict: + """" + + Take one rss item as dictionary and make ordered dict + with title, date, description and media content + + """ + title = filter_title(item_xml['title']) + date = (item_xml['pubDate']) + date = data_split(date) + news_link = item_xml['link'] + description = split_string_by_lines( + filters.description(item_xml['description']), + 11 + ) + media_content = split_string_by_lines( + ' '.join(item_xml['content']), + 1 + ) + prepared_news = OrderedDict() + + prepared_news["Title:"] = title + prepared_news["Date:"] = date + prepared_news["Link: "] = news_link + prepared_news["Description: "] = description + prepared_news["Media content:\n"] = media_content + return prepared_news + + +def filter_title(title: str) -> str: + new_title = title.replace("'", '') + return new_title + + +def print_one_item(news_item: OrderedDict) -> None: + print(Fore.LIGHTRED_EX + "===Wow! News!===") + for key, value in news_item.items(): + print(Fore.LIGHTGREEN_EX + key, value) + print(Fore.RED + "===End, news!===") + print(Style.RESET_ALL) + + +def print_news(items: list) -> None: + """" + Take list of rss items and print all this news + """ + log.info("Start print news") + for item in items: + item = prepare_one_item(item) + print_one_item(item) + log.info("End print news") + + +def make_json(items: list) -> dict: + """ + Convert article data in json format + """ + log.info('Start make json format') + json_format = {} + for idx, item in enumerate(items): + item = prepare_one_item(item) + json_item = { + 'images': item['Media content:\n'], + 'link': item['Link: '], + 'description': item['Description: '], + 'date': item['Date:'], + 'title': item['Title:'], + } + json_format[idx] = json_item + log.info("End make json format") + return json_format + + +def print_json(items: list) -> None: + json_representation = make_json(items) + json_representation = json.dumps(json_representation) + parsed = json.loads(json_representation) + log.info("Start print json") + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(parsed) + log.info("End print json") + + +def data_split(date: str) -> str: + log.info("Converting date") + month = { + 'Dec': '12', 'Jan': '01', 'Feb': '02', + 'Mar': '03', 'Apr': '04', 'May': '05', + 'Jun': '06', 'Jul': '07', 'Aug': '08', + 'Sep': '09', 'Oct': '10', 'Nov': '11' + } + new_date = date + new_date = (new_date.split(' ')[1:4]) + new_date[1] = month[new_date[1]] + new_date = '/'.join(new_date) + return new_date diff --git a/final_task/rss_reader/requirements.txt b/final_task/rss_reader/requirements.txt index e69de29..71c8eff 100644 --- a/final_task/rss_reader/requirements.txt +++ b/final_task/rss_reader/requirements.txt @@ -0,0 +1,28 @@ +arguments==76 +beautifulsoup4==4.8.1 +bs4==0.0.1 +certifi==2019.9.11 +cffi==1.13.2 +chardet==3.0.4 +colorama==0.4.1 +colorize==1.1.0 +cycler==0.10.0 +dominate==2.4.0 +entrypoints==0.3 +feedparser==5.2.1 +flake8==3.7.9 +fpdf==1.7.2 +idna==2.8 +kiwisolver==1.1.0 +lxml==4.4.1 +mccabe==0.6.1 +numpy==1.17.4 +pycodestyle==2.5.0 +pycparser==2.19 +pyflakes==2.1.1 +pyparsing==2.4.5 +python-dateutil==2.8.1 +requests==2.22.0 +six==1.13.0 +soupsieve==1.9.5 +urllib3==1.25.7 diff --git a/final_task/rss_reader/rss_get_items.py b/final_task/rss_reader/rss_get_items.py new file mode 100644 index 0000000..a3c7db1 --- /dev/null +++ b/final_task/rss_reader/rss_get_items.py @@ -0,0 +1,47 @@ +import logging as log +import urllib.error +import urllib.request +from bs4 import BeautifulSoup +from collections import defaultdict + +import check + + +def get_items(url: str) -> list: + if check.internet_on: + log.info("Start get items from url") + request = urllib.request.Request(url) + try: + soup_xml = BeautifulSoup( + urllib.request.urlopen(request), + "xml" + ) + except (urllib.error.URLError, KeyError): + log.info("URLError, pls try other one") + else: + log.info("Connect to website is fine") + items = [] + xml_items = list(soup_xml.find_all("item")) + for rss_node_xml in xml_items: + item_xml = defaultdict(list) + for node in rss_node_xml.findChildren(): + key = node.name + value = node.text + if key == "content": + item_xml[key].append(node.attrs['url']) + else: + item_xml[key] = value + items.append(item_xml) + log.info("Successful parsing") + return items + else: + log.info("There isn't internet connection") + print("Check your internet connection or use database") + + +def description(description_element: str) -> str: + """ + Try to delete all html in description + """ + log.info("Try to delete all html in description") + return BeautifulSoup(description_element, "html.parser").getText() diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..11ddbca 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import logging as log +import sys +import datetime + +from copy import deepcopy + +import converting +import databaseEmulation +import arguments +import printers +import rss_get_items +import check + +now = datetime.datetime.now() + +""" Set basic configs for logging """ +stdoutHandler = log.StreamHandler(sys.stdout) +fileHandler = log.FileHandler("logging.log", "a") +log.basicConfig(format='%(levelname)-8s [%(asctime)s] %(message)s', + level=log.DEBUG, + handlers=[fileHandler]) + + +def print_verbose() -> None: + log.info('try to read log file') + try: + with open('logging.log', 'r') as f: + print(f.read()) + except FileExistsError: + print("There isn't any log") + sys.exit() + + +if __name__ == '__main__': + log.info("Start script") + +try: + database = databaseEmulation.DatabaseEmulation( + '_database/dates.txt', '_database/data' + ) + + received_args = arguments.command_line() + link = received_args.URL + limit = received_args.limit + date = received_args.date + all_items = None + + if received_args.verbose: + log.info("User choose verbose") + print_verbose() + sys.exit() + + if date is not None: + if database.check_date(date): + idx = database.get_id(date) + all_items = database.get_items(idx) + else: + print("Nothing by this date") + sys.exit() + else: + all_items = rss_get_items.get_items(link) + + items = deepcopy(all_items) + + if limit is not None: + log.info("User choose some limits") + items = all_items[:limit] + + if received_args.json: + log.info("User choose json format") + printers.print_json(items) + else: + printers.print_news(items) + database.write_items(all_items) + + if received_args.html: + log.info("User choose html") + converting.write_to_file(items) + + if received_args.pdf: + log.info("User choose pdf") + converting.create_pdf(items) + +except (TypeError, RuntimeError): + if check.internet_on(): + print('Something go wrong, check arguments and database file') + else: + print("Something go wrong, If you don't have internet," + " use the database" + "and check arguments") diff --git a/final_task/rss_reader/test/__init__.py b/final_task/rss_reader/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/final_task/rss_reader/test/tests.py b/final_task/rss_reader/test/tests.py new file mode 100644 index 0000000..febcbd2 --- /dev/null +++ b/final_task/rss_reader/test/tests.py @@ -0,0 +1,105 @@ +import unittest +from dominate import document + +import rss_get_items +import converting +import printers + + +class RSSReaderTests(unittest.TestCase): + + def test_link_determine(self): + link_examples = [ + "https://news.google.com/rss?hl=en-US&gl=US&ceid=US:en", + "https://news.yahoo.com/rss/", + "https://news.tut.by/rss/all.rss", + ] + for link in link_examples: + self.assertEqual(type(rss_get_items.get_items(link)), list) + + def test_delete_html_in_description(self): + self.assertEqual(rss_get_items.description('Times Company


'), + 'Times Company') + self.assertEqual(rss_get_items.description('

America grapples with

'), + 'America grapples with') + self.assertEqual(rss_get_items.description('

https://news.yahoo.com/duterte-fires-vice-president-post' + '-125402618.html'), + 'https://news.yahoo.com/duterte-fires-vice-president-post-125402618.html') + + def test_converting_to_html(self): + item = [{'title': '«Ленинград» снова собрал забитую «Минск-Арену». И вот как это было', + 'content': ['https://img.tyt.by/n/sport/07/f/maradona_brest_2018_1_6.jpg'], + 'pubDate': 'Sun, 24 Nov 2019 21:53:00 +0300', + 'description': 'Фото: Евгений Ерчак, TUT.BYВ воскресенье в Минске отгремел концерт группы «Ленинград».' + ' Отгремел, как всегда, с аншлагом - «Минск-Арена» была забита до отказу. ' + 'Как фанаты отрывались в этот вечер под известные хиты - в фоторепортаже TUT.BY.
', + 'link': 'https://sport.tut.by/news/football/662512.html?' + 'utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news'}, + {'title': '«Ленинград» снова собрал забитую «Минск-Арену». И вот как это было', + 'content': ['https://img.tyt.by/n/sport/07/f/maradona_brest_2018_1_6.jpg'], + 'pubDate': 'Sun, 24 Nov 2019 21:53:00 +0300', + 'description': 'Фото: Евгений Ерчак, TUT.BYВ воскресенье в Минске отгремел концерт группы «Ленинград».' + ' Отгремел, как всегда, с аншлагом - «Минск-Арена» была забита до отказу. ' + 'Как фанаты отрывались в этот вечер под известные хиты - в фоторепортаже TUT.BY.
', + 'link': 'https://sport.tut.by/news/football/662512.html?' + 'utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news'}] + self.assertEqual(type(converting.create_html(item)), document) + + def test_make_json_format(self): + item = [{'title': '«Ленинград» снова собрал забитую «Минск-Арену». И вот как это было', + 'content': ['https://img.tyt.by/n/sport/07/f/maradona_brest_2018_1_6.jpg'], + 'pubDate': 'Sun, 24 Nov 2019 21:53:00 +0300', + 'description': 'Фото: Евгений Ерчак, TUT.BYВ воскресенье в Минске отгремел концерт группы «Ленинград».' + ' Отгремел, как всегда, с аншлагом - «Минск-Арена» была забита до отказу. ' + 'Как фанаты отрывались в этот вечер под известные хиты - в фоторепортаже TUT.BY.
', + 'link': 'https://sport.tut.by/news/football/662512.html?' + 'utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news'}, + {'title': '«Ленинград» снова собрал забитую «Минск-Арену». И вот как это было', + 'content': ['https://img.tyt.by/n/sport/07/f/maradona_brest_2018_1_6.jpg'], + 'pubDate': 'Sun, 24 Nov 2019 21:53:00 +0300', + 'description': 'Фото: Евгений Ерчак, TUT.BYВ воскресенье в Минске отгремел концерт группы «Ленинград».' + ' Отгремел, как всегда, с аншлагом - «Минск-Арена» была забита до отказу. ' + 'Как фанаты отрывались в этот вечер под известные хиты - в фоторепортаже TUT.BY.
', + 'link': 'https://sport.tut.by/news/football/662512.html?' + 'utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news'}] + self.assertEqual(type(printers.make_json(item)), dict) + + def test_data_split(self): + self.assertEqual(printers.data_split("Sun, 24 Nov 2019 11:48: 25 -0500"), "24/11/2019") + self.assertEqual(printers.data_split("Sat, 25 Dec 2019 12:12: 15 - 0540"), "25/12/2019") + self.assertEqual(printers.data_split("Mon, 23 Jun 2000 14:34: 12 - 0988"), "23/06/2000") + + def test_filter_title(self): + self.assertEqual(printers.filter_title("Indian fox killing pe'ople in Belarus"), + "Indian fox killing people in Belarus") + self.assertEqual(printers.filter_title("Somet'imes he goes' for a' walk"), + "Sometimes he goes for a walk") + self.assertEqual(printers.filter_title("This is 'some tests for 'this to'ol"), + "This is some tests for this tool") + + def test_split_by_lines(self): + self.assertEqual(printers.split_string_by_lines("Indian fox killing people in Belarus", 1), + "Indian\nfox\nkilling\npeople\nin\nBelarus") + + self.assertEqual(printers.split_string_by_lines("Sometimes he goes for a walk", 2), + "Sometimes he\ngoes for\na walk") + + +if __name__ == "__main__": + unittest.main() diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..3fd1a0d 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,26 @@ +import os + +from setuptools import setup, find_packages + + +this_directory = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(this_directory, 'README.md')) as foo: + long_description = foo.read() + + +setup( + name="rss_reader", + version="5.0", + packages=find_packages(), + author="Stacy Merkushova", + author_email="st.merkush@gmail.com", + url="https://github.com/Clonder/FinalTaskRssParser.git", + description="This is rss_reader", + long_description=long_description, + classifiers=[ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + ], + keywords="rss reader", + python_requires='>=3.8', +)