Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 63 additions & 3 deletions final_task/README.md
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,63 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.
# Pyhton RSS reader

## How to install:
**1st way**
* You need to have git installed. Run:
> $ git clone https://github.com/kirill-stp/FinalTaskRssParser.git
* when you are in your workspace folder. Then run:
> $ python setup.py install
* when you are in **final task** folder
**2nd way:**
* To install this package, you must have Python added to your user environment.
* Download the distribution archive
* run $ pip install ./python_rss_reader-1.0.tar.gz

## Usage:
This app provide following interface:
```shell
usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
[--date DATE]
source

Pure Python command-line RSS reader

positional arguments:
source RSS URL

optional arguments:
-h, --help show this help message and exit
--version Print version info
--json Print result as JSON in stdout
--verbose Outputs verbose
--limit LIMIT Limit news topics
--date DATE Read news from given date (YMD)
```
for example:
> $ python3 rss_reader.py https://news.yahoo.com/rss --date 20191120 --limit 2 --verbose --json

## JSON structure:
```shell
{'Article 1': {'date':time.struct_time,
'images': {'image desription': 'url'},
'link': '',
'summary': '',
'title': ''},
'Article 2': {date': time.struct_time,
'images': {'image desription': 'url'},
'link': '',
'summary': '',
'title': ''},
'Feed': 'Feeds from 'url'',
'Link': 'rss link'}

```
## Local news storage:
When **--date** argument is not provided, the news that you received will be saved to the database, if it wasn’t there yet.
Cached data stored in rss_reder/cached_feeds.db file using **shelve**. Database stores dictionary-like object, where the key is the publication date and the value is instance of **Article** class.

## HTML and PDF converting:
You can use **--to-html** and **--to-pdf** to save feed in given format. If there is no internet connection, it will paste image links (clickable in pdf). If we have internet connection, then program will download images and paste it to the file. Titles in pdf also clickable.

## Colorizing
This program can colorize normal and json output, using **termcolor**. To add some color to your life, use **--colorize** argument

Binary file not shown.
Binary file not shown.
Binary file added final_task/rss_reader/DejaVuSansCondensed.ttf
Binary file not shown.
Empty file.
17 changes: 17 additions & 0 deletions final_task/rss_reader/args_creater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse


def arguments():
"""create command line arguments"""

parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader')
parser.add_argument('source', type=str, help='RSS URL')
parser.add_argument('--version', action='version', version='%(prog)s 1.0', help='Print version info')
parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout')
parser.add_argument('--verbose', action='store_true', help='Outputs verbose')
parser.add_argument('--limit', type=int, help='Limit news topics')
parser.add_argument('--date', type=str, help='Read news from given date')
parser.add_argument('--to-html', action='store_true', help='Save feed in html format')
parser.add_argument('--to-pdf', action='store_true', help='Save feed in pdf format')
parser.add_argument('--colorize', action='store_true', help='Print result in colorize mode')
return parser.parse_args()
87 changes: 87 additions & 0 deletions final_task/rss_reader/article.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from string_operations import *

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to not use wildcard import at all.

import logging
import datetime
from termcolor import colored



class Article:
"""single article class"""

def __init__(self, parsed, source):
"""receive parsed article and extracts data from it"""
self.title = make_string_readable(parsed.title)
self.link = parsed.link
self.feed_link = source
self.published = extract_date(parsed)
summary_ = extract_topic_info_from_summary(parsed.summary)
self.summary = make_string_readable(summary_)
self.media = parsed.media_content

description_ = extract_image_info_from_summary(parsed.summary)
self.media_description = make_string_readable(description_)

def print_readable_article(self, is_colored):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is large which lowers readability. It is better to split it somehow into the smaller ones.

"""print article to stdout in human-readable format"""
print( "_" * 79)
date = self.published
date_for_print = f'{date.tm_year}/{date.tm_mon}/{date.tm_mday}, {date.tm_hour}:{date.tm_min}:{date.tm_sec}\n'
if is_colored:
print(colored(date_for_print, 'red'))
else:
print(date_for_print)

cutted_title = cut_string_to_length_with_space(self.title, 77)
for str_number, string in enumerate(cutted_title):
if str_number + 1 == len(cutted_title):
if is_colored:
print(colored(string + '[1]', 'red'))
else:
print(string + '[1]')
else:
if is_colored:
print(colored(string, 'red'))
else:
print(string)

# images description and their links numbers (like [2] - [5])
str_number_of_img = ' '
if len(self.media) > 1:
str_number_of_img = f' - [{len(self.media) + 1}]'
images_and_link_numbers = f'\n\nImages:\n{self.media_description} [2] - {str_number_of_img}\n'
if is_colored:
print(colored(images_and_link_numbers, 'blue'))
else:
print(images_and_link_numbers)


cutted_summary = cut_string_to_length_with_space(self.summary, 79)
for string in cutted_summary:
if is_colored:
print(colored(string, 'cyan'))
else:
print(string)

# Links of article and images
if is_colored:
print(colored('\n\nLinks:\n[1]' + self.link, 'green'))
else:
print('\n\nLinks:\n[1]', self.link)
for number, img in enumerate(self.media):
if is_colored:
print(colored(f'[{number+2}] ' + img['url'], 'green'))
else:
print(f'[{number+2}]', img['url'])

print("_" * 79)

def make_article_json(self):
"""convert article data in json format"""
json = {
'images': {self.media_description: img['url'] for img in self.media},
'link': self.link,
'summary': self.summary,
'date': self.published,
'title': self.title,
}
return json
Binary file added final_task/rss_reader/cashed_feeds.db
Binary file not shown.
14 changes: 14 additions & 0 deletions final_task/rss_reader/check_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import requests
import exceptions as ex


def internet_connection_check():
url = 'http://www.google.com/'
timeout = 5
is_internet = True
try:
requests.get(url, timeout=timeout)
except requests.ConnectionError:
is_internet = False
finally:
return is_internet
17 changes: 17 additions & 0 deletions final_task/rss_reader/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class InvalidURLAddress(Exception):
pass


class NoInternetConnection(Exception):
pass


class EmptyDataBase(Exception):
pass


class DateNotInDatabase(Exception):
pass

class PathError(Exception):
pass
Loading