Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/venv/
/.idea/
23 changes: 23 additions & 0 deletions final_task/rss_reader/Tests/rss_reader_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import unittest
import final_task.rss_reader.rss_reader as reader


class MyTestCase(unittest.TestCase):
def test_totext(self):
self.assertEqual(reader.to_text('<div class="test">Test</div>'), 'Test')

def test_checkurl(self):
self.assertTrue(reader.check_url(url='https://news.yahoo.com/rss/', verbose=False), True)
self.assertFalse(reader.check_url(url='https://google.com/', verbose=False), False)

def test_getfeed(self):
self.assertIsNotNone(reader.get_feed(url='https://news.yahoo.com/rss/', verbose=False))

def test_formatfeed(self):
self.assertIsInstance(reader.format_feed
(reader.get_feed(url='https://news.yahoo.com/rss/', verbose=False),
verbose=False, limit=1), cls=tuple)


if __name__ == '__main__':
unittest.main()
Empty file.
7 changes: 7 additions & 0 deletions final_task/rss_reader/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
certifi==2019.9.11
chardet==3.0.4
feedparser==5.2.1
html2text==2019.9.26
idna==2.8
requests==2.22.0
urllib3==1.25.6
168 changes: 168 additions & 0 deletions final_task/rss_reader/rss_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import feedparser
import argparse
import html2text
import json
from requests import get
from datetime import datetime


VERSION = '[RSS Reader v0.02]'


def to_text(html, rehtml=False):
"""
This function creates instance of html2text.HTML2text
and configuring it.
We will use this to cleanup all HTML markup in our feed
:param html: Text to format
:param rehtml: True or False
:return: formatted text
"""
formatter = html2text.HTML2Text()
formatter.wrap_links = False
formatter.skip_internal_links = True
formatter.inline_links = True
formatter.ignore_anchors = True
formatter.ignore_images = True
formatter.ignore_emphasis = True
formatter.ignore_links = True
text = formatter.handle(html)
text = text.strip(' \t\n\r')
if rehtml:
text = text.replace('\n', '<br/>')
text = text.replace('\\', '')
return text


def check_url(url: str, verbose: bool):
"""
This function checks url provided by user
Function will check if Content-Type of document on url
is xml
:param url: url provided by user
:param verbose: True or False
:return: True in case of xml,
False and closing program in all other cases
"""
if verbose:
print('Wait a second, we will check if there is any RSS in URL you have given')
print('-' * 50)
response = get(url)
r = response.headers['Content-Type'] # We will check only headers

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to avoid one-letter variable names

if 'xml' not in r:
print('Something went wrong. \n'
'Looks like URL you\'ve entered doesn\'t content any RSS feed')
return False
else:
return True


def get_feed(url: str, verbose: bool):
"""
Caching feed by creating a feedparser instance.
Returns non-formatted feed
:param url: url provided by user
:param verbose: True of False
:return: non-formatted feed
"""
if verbose:

@HenadziStantchik HenadziStantchik Nov 8, 2019

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below. For such logging purposes it is bettert to use logging standart library. Check it out

print('Establishing connection')
print('-' * 50)
feed = feedparser.parse(url)
if feed is None:
print('STOP')
if verbose:
print('Connection established')
print('-' * 50)
return feed


def to_json(feed):
jsoned = json.dumps(feed, ensure_ascii=False, indent=4)
return jsoned

def format_feed(feed, verbose: bool, limit: int):
"""
This function recieves feed and format all entries
by using them as to_text() arguments to remove
all HTML markup
:param feed: Cached feed returned by get_feed()
:param verbose: True or False
:param limit: Standart limit is 1 article
:return: title, entry_title(Article title), description(Text of article), date
"""
try:
entry_title = to_text(feed.entries[limit].title)
description = to_text(feed.entries[limit].description)
link = feed.entries[limit].link
date = to_text(feed.entries[limit].published)
return entry_title, description, link, date
except AttributeError as error: # in case of AttributeError exception
with open('log' + str(datetime.now()) + '.txt', 'w') as log: # we will create a .txt log-file
log.write('[URL]: ' + str(args.source) + '\n') # providing info about feed that raised an exception
log.write('[ERROR]: ' + str(error) + '\n') # and text of error
print('An error has occured \n'
'Log file was created in program folder \n'
'To help us debug a program, please send this file to h4j0rx@gmail.com')
except IndexError as indexerror:
print("You've specified too many articles to print\n"
"Feed doesn't have specified number of articles\n")
print('Number of articles in feed: ' + str(len(feed.entries)))
print('Number of articles printed: ' + str(len(feed.entries)))
exit()


def feed_info():
print('[URL]: ' + args.source)
print('[Feed]: ' + feed.feed.title)
return None


def main(entry_title, description, link, date):
"""
This function just prints all parameters given by format_feed()
:param title: Title of whole feed returned by format_feed()
:param entry_title: Title of one article returned by format_feed()
:param description: Description of one article returned by format_feed()
:param date: Publishing date of one article returned by format_feed()
:return: None
"""
print('-' * 50)
print('[Title]: ' + entry_title)
print('[Text]: ' + description)
print('[Link]: ' + link)
print('[Date]: ' + date)
return None


def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument('source', type=str, help='Please enter a valid URL for RSS Feed')
parser.add_argument('--limit', type=int, default=50, help='You can set a limit for news')
parser.add_argument('--verbose', action='store_true', help='If you want to know '
'what\'s happening, set this to True')
parser.add_argument('--version', action='store_true', help='Print version of program')
parser.add_argument('--json', action='store_true', help='Prints feed in JSON format')
args = parser.parse_args()
if args.limit == 0:
Comment thread
HenadziStantchik marked this conversation as resolved.
print('You have entered zero values to print, please specify another limit value')
if args.version:
print(VERSION)
return args


if __name__ == '__main__':
args = arg_parse()
feed = get_feed(args.source, args.verbose)
feed_info()
if args.json:
print(to_json(feed))
else:
if args.verbose:
print('Formatting your feed')
print('-' * 50)
limit = [index for index in range(0, args.limit+1)]
del limit[-1]
for limit in limit:
entry_title, description, link, date = format_feed(feed, args.verbose, limit)
main(entry_title, description, link, date)
12 changes: 12 additions & 0 deletions final_task/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from setuptools import setup

setup(
name='rss-reader',
version='0.01',
packages=['final_task', 'final_task.rss_reader'],
url='',
license='',
author='Vitali Kozlou',
author_email='h4j0rx@gmail.com',
description='One shot command-line RSS Parser and reader [Iteration 2]'
)