Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Editors
.idea/

# Environments
venv/

# Byte-compiled / optimized
__pycache__/
*.py[cod]

*.log

# Distribution / packaging
*.egg-info/
build/
develop-eggs/
dist/
56 changes: 53 additions & 3 deletions final_task/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,53 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.
# RSS reader
RSS reader is a command-line utility which receives RSS URL and prints results in human-readable
format.

[The source for this project is available here](https://github.com/AnnaPotter/FinalTaskRssParser).


### Installation
$ pip install rss-reader-Anna-Gonchar

### Usage
$ rss-reader (-h | --help)

Show help message and exit

$ rss-reader <RSS-SOURCE-LINK>

Print rss feeds in human-readable format

$ rss-reader --version

Print version info

$ rss-reader --json

Print result as JSON in stdout

$ rss-reader.py --verbose

Outputs verbose status messages

$ rss-reader.py --limit LIMIT

Limit news topics, if this parameter provided

$ rss-reader.py --date DATE

Gets a date in %Y%m%d format. Print news from the specified date
and source (<RSS-SOURCE-LINK>), if it specified

$ rss-reader.py --to-pdf PATH_TO_PDF

Gets file path. Convert news to pdf and save them to pdf file on the specified path

$ rss-reader.py --to-html PATH_TO_HTML

Gets file path. Convert news to html and save them to html file on the specified path

### Storage
All the pieces of news received from the source are saved to the binary file.
Shelve module is used for this. It saves object with the specific key to the file.
The key is the rss news publication date, the value is the news.

26 changes: 26 additions & 0 deletions final_task/rss_reader/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#JSON structure:

{

[

{
"feed_title": feed title,
"feed_language": feed language,
"title": news title,
"summary": news content,
"date": news publication date,
"link": news link },

{
"feed_title": feed title,
"feed_language": feed language,
"title": news title,
"summary": news content,
"date": news publication date,
"link": news link },

...

]
}
1 change: 1 addition & 0 deletions final_task/rss_reader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

179 changes: 179 additions & 0 deletions final_task/rss_reader/action_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
""" Module of creation functions and action functions.

Functions:
create_logger(com_line_args) -> logger
get_com_line_args() -> com_line_args
get_news(command_line_args, logger) -> news_collection
print_news_stdout(news_collection) -> None
print_news_json(news_collection) -> None
print_news(news_collection, com_line_args, logger) -> None
print_cache_news(news_collection, logger) -> None
print_cache_news_json(news_collection, logger) -> None
convert_date(date_str, logger) -> str_date
clean_str(string) -> clean_string """
Comment on lines +3 to +13

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below: there is actually no need to write function list in module docstring.


import feedparser
from bs4 import BeautifulSoup
import html
import argparse
import json
import logging
from datetime import datetime
from exceptions import Error
from models import NewsEntry
from dataclasses import asdict
from validation_functions import check_limit_arg


def create_logger(com_line_args):
"""Create logger function.

Creates a logger considering the --verbose argument. """
# Create a logger
logger = logging.getLogger("rss_reader_logger")
logger.setLevel(logging.DEBUG)

# Create handlers
c_handler = logging.StreamHandler()
f_handler = logging.FileHandler("file.log")

# Check --verbose argument
if com_line_args.verbose:
c_handler.setLevel(logging.DEBUG)
else:
c_handler.setLevel(logging.ERROR)

# Create formatters and add it to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(f_handler)
logger.addHandler(c_handler)

return logger


def get_com_line_args():
""" Function to get command line arguments. """
parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.", add_help=True)
parser.add_argument("--date", type=convert_date,
help="Gets a date in %%Y%%m%%d format. Print news from the specified date.")
parser.add_argument("--to-html", type=str,
help="Gets file path. Convert news to html and save them to html file.")
parser.add_argument("--to-pdf", type=str,
help="Gets file path. Convert news to pdf and save them to pdf file.")
parser.add_argument("--version", action="store_true", help="Print version info")
parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout")
parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages")
parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided")
parser.add_argument("source", type=str, nargs="?", help="RSS URL")

return parser.parse_args()


def get_limit_news_collection(news_collection, com_line_args, logger):
if not check_limit_arg(com_line_args, logger):
limit = len(news_collection)
else:
limit = com_line_args.limit

if len(news_collection) < limit:
logger.warning("The number of news is less than the value of the argument limit.")
return news_collection
else:
return news_collection[:limit]


def get_news(command_line_args, logger):
""" Get news function.

Uses feedparser library to receive news,
and BeautifulSoup library to converting news in readable format. """
logger.info("Getting news.")
news_feed = feedparser.parse(command_line_args.source)

feed = {"title": clean_str(news_feed.feed.get("title", "")),
"language": news_feed.feed.get("language", "")}
news_collection = []
source = command_line_args.source

for entry in news_feed.entries:
news_entry = NewsEntry()
Comment thread
dzhigailo marked this conversation as resolved.
news_entry.feed_title = feed["title"]
news_entry.feed_language = feed["language"]

news_entry.source = source
news_entry.title = clean_str(entry.get("title", ""))
news_entry.date = entry.get("published", "")
news_entry.link = entry.get("link", "")

# get rid of html tags
soup = BeautifulSoup(entry.get("summary", ""), "html.parser")
news_entry.summary = clean_str(soup.text)
# get images links
images = soup.findAll("img")

for img in images:
if img["src"]:
news_entry.image_links.append(img["src"])

news_collection.append(news_entry)

return news_collection


def print_news_stdout(news_collection):
""" Function for print news to stdout in text format. """
if news_collection:
for entry in news_collection:
entry.print_entry()


def print_news_json(news_collection):
""" Function for print news to stdout in json format. """
news_collection_for_json = []

for entry in news_collection:
entry_for_json = asdict(entry)
news_collection_for_json.append(entry_for_json)

print(json.dumps(news_collection_for_json, indent=4))


def print_news(news_collection, com_line_args, logger):
""" Function for print news to stdout,
that take account of json argument. """

# news_collection already get valid limit argument
logger.info("Printing news.")
if com_line_args.date:
logger.info("Printing cache news.")
else:
logger.info("Printing news.")

if com_line_args.json:
logger.info("Printing news in json format.")
print_news_json(news_collection)
else:
logger.info("Printing news stdout.")
print_news_stdout(news_collection)


def convert_date(date_str):
""" Converting date function. """
try:
datetime_obj = datetime.strptime(date_str, '%Y%m%d')
str_date = datetime_obj.strftime("%d %b %Y")
if str_date[0] == '0':
str_date = str_date[1:]
return str_date
except ValueError as e:
raise Error("Invalid date argument. Please, check your input.")


def clean_str(string):
clean_string = html.unescape(string).encode('ascii', 'ignore').decode("utf-8")
return clean_string
65 changes: 65 additions & 0 deletions final_task/rss_reader/caching_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
""" Module of caching functions.

Functions:
cache_news(news_collection, logger) -> None
get_cached_news(com_line_args, logger) -> cached_news_collection """

import shelve
from os import path

from validation_functions import check_limit_arg
from exceptions import EmptyFileError, EmptyCollectionError

DIRECTORY = path.abspath(path.dirname(__file__))


def cache_news(news_collection, logger):
""" Caching news function. """
logger.info("Collecting news to cache file.")
with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict:
for news in news_collection:
hash_date = news.date
news_dict[hash_date] = news
logger.info("News was cached successfully.")


def get_cached_news(com_line_args, logger):
logger.info("Getting cache news.")
date = com_line_args.date
source = com_line_args.source

news_collection = []

with shelve.open(path.join(DIRECTORY, '.cache_rss_news')) as news_dict:
if not news_dict:
raise EmptyFileError("Cache file is empty. Please, retrieve news from internet. ")

if not check_limit_arg(com_line_args, logger):
limit = len(news_dict)
else:
limit = min(com_line_args.limit, len(news_dict))

if source:
for hash_date_key in news_dict:
if date in hash_date_key:
if hash_date_key.split()[1] == date.partition(' ')[0]:

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line (and the same line below) breaks your date search algorithm for days of month that start from 0, e.g. 01,02,03.
For example for specified --date 20191203 in this function:

date = 3 Dec 2019
hash_date_key = 'Tue, 03 Dec 2019 07:11:16 -0500'
date in hash_date_key -> True

BUT:
hash_date_key.split()[1] -> '03'
and
date.partition(' ')[0] -> '3'

I actually noted that your Iteration 3 worked before final review.

news = news_dict[hash_date_key]
if source == news.source:
news_collection.append(news)

else:
for hash_date_key in news_dict:
if date in hash_date_key:
if hash_date_key.split()[1] == date.partition(' ')[0]:
news = news_dict[hash_date_key]
news_collection.append(news)

if not news_collection:
if source:
raise EmptyCollectionError("There are no news in cache file on specified date and source.")
else:
raise EmptyCollectionError("There are no news in cache file on specified date.")

else:
logger.info("Successfully get news from cache.")
return news_collection[:limit]
Loading