Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.idea/
final_task/dist
*.pyc
*.json
*.idea'.idea'
final_task/rss_reader/.coverage
*.html
*.pdf
*.pkl
*.idea'.idea'
final_task/FinalTaskRssParser.egg-inf
final_task/rss_reader/images/
.idea/
52 changes: 49 additions & 3 deletions final_task/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,49 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.

# Python RSS-reader
Python RSS-reader is a command-line utility which receives RSS URL and prints results in human-readable format.

To start Python RSS-reader run one of the following commands in command line:

``python rss_reader.py "https://news.yahoo.com/rss/" --limit 1``
``python rss_reader.py "https://timesofindia.indiatimes.com/rssfeedstopstories.cms" --json --limit 1``
7 file in my project
- consoleArgumemt.py this file which handles console phrases
- ConsoleOut.py - in this file function which handles print to console
- Handler.py - handles request
- Log.py -
- rss-reader.py - main file in project
- RssException.py - contains exception
- WorkWithCache.py - in this file function which works with cache(read and write json)
Comment on lines +9 to +16

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not actual information, it would be nice if you kept it up to date.


### JSON structure:
```
{
"news": "news text",
"title": "Title of news",
"date": "Wed, 20 Nov 2019 02:47:47 -0500",
"links": [
"http://l1.yimg.com/uu/api/res/1.2/1KHP4ztUcOL6a98.vsEHQA--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/http://media.zenfs.com/en_us/News/afp.com/0dca2dadd67f7128eb881f0333640fce05a84084.jpg"
],
"strDate": "20191120"

}
```
### Functional
```
positional arguments:
source RSS URL
optional arguments:
-h, --help show this help message and exit
--version Print version info
--json Print result as JSON in stdout
--verbose Outputs verbose status messages
--limit LIMIT Limit news topics if this parameter provided
--date - view news from cache with specified date
```

### Iteration 4
``python rss_reader.py --date 20191122 --to_html "D:\EpamFINAL\FinalTaskRssParser\final_task\rss_reader"``

``python rss_reader.py "https://news.yahoo.com/rss/" --to_pdf "D:\EpamFINAL\FinalTaskRssParser\final_task\rss_reader"


Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have some info on how to install your app using setuptools.

15 changes: 15 additions & 0 deletions final_task/rss_reader/ConsoleArgument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import argparse


def get_console_argument():
arg = argparse.ArgumentParser(description="read command of comamnd-line")
arg.add_argument("link", nargs='?', type=str, default="", help="Rss URL")
arg.add_argument('--limit', help="limit news topics if this parameter privided", type=int)
arg.add_argument('--verbose', help="verbose", action='store_true')
arg.add_argument('--json', help="print result as json in stdout", action='store_true')
arg.add_argument('--version', help="print version info", action='store_true')
arg.add_argument('--date', help="print news from cache for your date", type=int)
arg.add_argument('--to-html', help="create html file with news")
arg.add_argument('--to-pdf', help="create pdf file with news")

return arg.parse_args()
33 changes: 33 additions & 0 deletions final_task/rss_reader/ConsoleOut.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pprint import pprint

from Log import log_decore


@log_decore
def print_json(list_json):
for item_list in list_json:
pprint(item_list)


@log_decore
def print_array_of_news(news):
for item_news in news:
print("Title: " + item_news.title)
print("Date: " + item_news.date)
print("Link: " + item_news.link)
print("\n" + item_news.news + '\n')
print("Links: ")
for item_link in item_news.links:
print(item_link + "\n")


@log_decore
def print_array_of_dict(news):
for item_news in news:
print("Title: " + item_news["title"])
print("Date: " + item_news["date"])
print("Link: " + item_news["link"])
print("\n" + item_news["news"] + '\n')
print("Links: ")
for item_link in item_news["links"]:
print(item_link + "\n")
112 changes: 112 additions & 0 deletions final_task/rss_reader/ConvertToHtmlAndPdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
import sys
from fpdf import FPDF
from News import News
from dominate.tags import html, head, meta, body, div, img, p, b, br, h1, a

from WorkWithCache import correct_title
from Log import log_decore
from RssException import RssException

@log_decore
def convert_Dict_to_News(arr_news_dict):
all_news = []
for item_news in arr_news_dict:
tmp_img_link = item_news["links"]
tmp_link = item_news["link"]
tmp_news = item_news["news"]
tmp_title = item_news["title"]
tmp_date = item_news["date"]
tmp_date_str_date = item_news["strDate"]
item_of_list_news = News(tmp_news, tmp_link, tmp_title, tmp_date, tmp_img_link, tmp_date_str_date)
all_news.append(item_of_list_news)
return all_news



'''create an HTML file and fill it with news'''
@log_decore
def create_html_news(path, News):
if os.path.isdir(path) is False:
raise RssException("Error. It isn't a folder")

path = os.path.join(path, "News.html")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to have more informative filename than this.


news_html = html()
news_html.add(head(meta(charset='utf-8')))
news_body = news_html.add(body())
with news_body:
for item_news in News:
news_body = news_body.add(div())
news_body += h1(item_news.title)
news_body += p(b("Date: "), a(item_news.date))

text = item_news.news

# remove links in the text and add pictures
if len(item_news.links) > 0:
start = text.find(']', 0, len(text))
text = text[start + 1:]

this_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.append(this_dir)
news_body += img(src=f"file:///{this_dir}/images/{correct_title(item_news.title)}.jpg")
else:
# if there are no pictures, just remove the links
start = text.find(']', 0, len(text))
text = text[start + 1:]

news_body += p(text.encode("utf-8").decode("utf-8"), br(), br())

try:
with open(path, 'w', encoding='utf-8') as rss_html:
rss_html.write(str(news_html))
except FileNotFoundError:
raise RssException('Error. No such folder\n')
print("file News.html created")



'''create an PDF file and fill it with news'''
@log_decore
def create_pdf_news(path, News):
if os.path.isdir(path) is False:
raise RssException("Error. It isn't a folder")
path = os.path.join(path, "News.pdf")

pdf = FPDF()
try:
pdf.add_font('DejaVuSans', '', 'DejaVuSans.ttf', uni=True)
pdf.set_font("DejaVuSans")
except RuntimeError:
raise RssException("fonts file not found")
pdf.alias_nb_pages()
pdf.add_page()

for item_news in News:
text = item_news.news
# remove links in the text and add pictures

start = text.find(']', 0, len(text))
text = text[start + 1:]

pdf.set_font_size(26)
pdf.write(11, item_news.title + '\n\n')
pdf.set_font_size(14)
pdf.write(11, f"Date: {item_news.date}\n")

this_dir = os.path.abspath(os.path.dirname(__file__))
sys.path.append(this_dir)
if len(item_news.links) > 0:
try:
pdf.image(f'{this_dir}/images/{correct_title(item_news.title)}.jpg', w=75, h=75)
except RuntimeError:
pass
pdf.write(10, "\n")
pdf.write(10, text + "\n\n\n\n")
pdf.output(path, 'F')
try:
pdf.output(path, 'F')
except FileNotFoundError:
raise RssException("Error. No such folder")
print("file News.pdf created")
Binary file added final_task/rss_reader/DejaVuSans.ttf
Binary file not shown.
124 changes: 124 additions & 0 deletions final_task/rss_reader/Handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import feedparser
from dataclasses import asdict
import html

from Log import log_decore
from WorkWithCache import write_json_to_cache
from News import News
''' convert from News class to json'''


@log_decore
def parse_to_json(news):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use asdict function from dataclasses lib

return asdict(news)



''' In this class, we do all the news processing.
Here we translate the format required by the user'''


class Handler:
@log_decore
# limit - count elements which user want see
def __init__(self, url, limit):

self.article = feedparser.parse(url)
self.numb_news = 0
self.parsers = []
# standart value if user did not indicate limit we get all news
if limit == -1:
limit = len(self.article.entries)
self.create_news(url, limit)

@log_decore
def create_news(self, url, limit):
# for every news, which user will see we create object
while self.numb_news < limit:
tmp_img_link = self.get_img_links(self.get_news(self.numb_news))
tmp_link = self.get_link(self.numb_news)
tmp_news = self.parse_html(self.get_news(self.numb_news))
tmp_title = self.get_title(self.numb_news)
tmp_date = self.get_date(self.numb_news)
tmp_date_str_date = self.get_str_date(self.numb_news)
item_of_list_news = News(tmp_news, tmp_link, tmp_title, tmp_date, tmp_img_link, tmp_date_str_date)
self.parsers.append(item_of_list_news)
self.numb_news += 1

@log_decore
def get_news(self, index):
try:
return self.article.entries[index].summary

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the article has no summary this method will return None is this intended?

except IndexError:
pass

@log_decore
def get_link(self, index):
return self.article.entries[index].link


@log_decore
def get_title(self, index):
return html.unescape(self.article.entries[index].title)

@log_decore
def get_date(self, index):
return self.article.entries[index].published

@log_decore
def get_str_date(self, index):
str_date = ""
str_date += str(self.article.entries[index]['published_parsed'].tm_year)
str_date += str(self.article.entries[index]['published_parsed'].tm_mon)
str_date += str(self.article.entries[index]['published_parsed'].tm_mday)
return str_date

@log_decore
def get_img_links(self, text):
img_links = []
index_start_find = 0
while 1:
start = text.find('src="', index_start_find, len(text))
index_start_find = start + len('src="')
end = text.find('"', index_start_find)
if start == -1 or end == -1:
break
img_links.append(text[start + len('src="'):end])
return img_links

@log_decore
def get_img_alt(self, text):
img_alt = []
index_start_find = 0
while 1:
start = text.find('alt="', index_start_find, len(text))
index_start_find = start + len('alt="')
end = text.find('"', index_start_find)
if start == -1 or end == -1:
break
img_alt.append(text[start + len('alt="'):end])
return img_alt

@log_decore
def parse_html(self, text):
news = ""
img_alt = self.get_img_alt(text)
# add imgLinks to article
for id, item in enumerate(img_alt):
news += ("[img " + str(id) + " ")
news += (item + "]")

# clean the news from
while text.count('<'):
text = text[:text.find('<')] + text[text.find('>') + 1:]
news += text
news = html.unescape(news)
return news

@log_decore
def get_all(self):
# return all news which user want see
for item_news in self.parsers:
write_json_to_cache(parse_to_json(item_news))

return self.parsers
11 changes: 11 additions & 0 deletions final_task/rss_reader/Log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import logging


def log_decore(fn):
def wrapper(*args, **kwargs):
logging.info(f"function \"{fn.__name__}\"Run function")
res = fn(*args, **kwargs)
logging.info(f"function \"{fn.__name__}\"Stop function")
return res

return wrapper
17 changes: 17 additions & 0 deletions final_task/rss_reader/News.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from dataclasses import dataclass
from typing import List

'''
in this class, we store news in the required format and
implement all the logic for processing news in the class Handler
'''


@dataclass
class News:
news: str
link: str
title: str
date: str
links: List[str]
strDate: str
2 changes: 2 additions & 0 deletions final_task/rss_reader/RssException.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class RssException(Exception):
pass
Loading