Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions final_task/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include rss_reader/ARIALUNI.ttf

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will not work on Windows OS, because is has different filesystem path separators (\)

104 changes: 101 additions & 3 deletions final_task/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,101 @@
# Your readme here
Some text.
Checkout how to write this file using *markdown*.
RSS-reader
=============================
Pure Python command-line RSS reader.

INSTALLATION
------------
First of all open command line and install setuptools, just write 'pip install -u setuptools'.
After that you can install our app, enter 'python setup.py install' in command line opened as an administrator
in folder final_task, where setup.py is lying.
By this point if you want to launch the application, enter 'rss-reader [arguments]'.

For work with conversion, in your folder rss_reader should be ARIALUNI.ttf.

USING
------------
rss_reader.py [--help] [--source SOURCE] [--version] [--json] [--verbose] [--limit LIMIT] [--date DATE] [--to-html PATH]
[--to-pdf TO_PDF]


positional arguments:

source RSS url

optional arguments:

-h, --help show this help message and exit

--limit LIMIT Limit news topics if this parameter provided

--version Print version info

--json Print result as JSON in stdout

--verbose Outputs verbose status messages

--date Print the new from the specified day, YYYYMMDD format

--to-html TO_HTML Convert news in html format, need path, where the file
will be saved

--to-pdf TO_PDF Convert news in pdf format, need path, where the file
will be saved

FORMAT OF PRESENTING NEWS
------------
Feed: [feed]
__________________________________________________________________
Title: [title]

Date: [date of publishing]

Link: [link of news]

[[image: alt of image][2]description of news] # if the image exists

[description of news] # if doesn't

Links:

[1]: [link of news]

[2]: [link of image, if it exists]
__________________________________________________________________
...

JSON FORMAT
------------
If arguments '--json' is provided, news will be presented in json format, the structure will be like this:

[

{

"Feed": [feed],

"Title": [title],

"Date": [date of publishing],

"Description": [description],

"Link [1]": [link of news],

"Link [2]": [link of image, if it exists]

},

...

]

Description will be in format [[image: alt of image][2]description of news], if the image exists,
or in format [description of news], if it doesn't.

CONVERTING TO HTML OR PDF FORMAT
------------
You can download news in one of those formats, news can be read from the website or from the cache.
For doing that you should enter directory, and if it's correct, news will be converted and you will see a message,
which tells, if converting was successful or not.
In prepared file (it is named news.pdf or news.html) will be all information about news, if there is no connection
to the Internet, instead of image there will be url of it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add some info about the way you store files

Binary file added final_task/rss_reader/ARIALUNI.TTF
Binary file not shown.
54 changes: 54 additions & 0 deletions final_task/rss_reader/console_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse
import logging


def parse_args():
parser = argparse.ArgumentParser(description="Pure Python command-line RSS reader.")
parser.add_argument("source", help="RSS url")
parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided")
parser.add_argument("--version", action="version", version="Version 1.4.2", help="Print version info")
parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout")
parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages")
parser.add_argument("--date", type=str, help="Print the new from the specified day, YYYYMMDD format")
parser.add_argument("--to-html", type=str,
help="Convert news in html format, need path, where the file will be saved")
parser.add_argument("--to-pdf", type=str,
help="Convert news in pdf format, need path, where the file will be saved")
args = parser.parse_args()
return args


def get_args(args):
source = args.source
json = False
date = None
path = None
to_html = False
to_pdf = False
limit = -1
if args.limit:
limit = args.limit
if args.json:
json = True
if args.verbose:
verbose = True
logging.basicConfig(level=logging.DEBUG, format='%(process)d-%(levelname)s-%(message)s')
else:
logging.basicConfig(filename='app.log', level=logging.DEBUG, filemode='w',
format='%(name)s - %(levelname)s - %(message)s')
if args.date:
date = args.date
logging.debug("Check in interface")
if args.to_html:
to_html = True
path = args.to_html
if args.to_pdf:
to_pdf = True
path = args.to_pdf
return {"url": source,
"lim": limit,
"json": json,
"date": date,
"path": path,
"html": to_html,
"pdf": to_pdf}
Comment on lines +48 to +54

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better just to use

{
    "url": args.source,
    ...
}

Instead of creating redundant variables.

154 changes: 154 additions & 0 deletions final_task/rss_reader/conversion_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import os
import logging
import requests
from pkg_resources import resource_filename
from yattag import Doc
from fpdf import FPDF
from database_functions import put_into_db
from information_about_news import taking_information_from_feedparser, InfoAboutNews


def get_path(path_to_file, expansion):
""" Checks if the directory is correct or not"""

if not os.path.exists(path_to_file):
print("Invalid directory")
logging.error("Directory doesn't exist")
return None
filename = os.path.join(path_to_file, "news." + expansion)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to use type or format instead of expansion

logging.debug("Directory exists")
return filename


def convert_into_html_format(feed, dict_of_args):
""" Creates html file in specified directory and puts information in database, if it was read from website"""

# openning html-file to write
filename = get_path(dict_of_args.get("path"), "html")
if not filename:
return None
file = open(filename, 'w')

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to use context manager for opening and closing file.

logging.debug("File was opened successfully")

doc, tag, text, line = Doc().ttl()
line('h1', 'News from ' + dict_of_args.get("url"))

for feed_entry in feed:
# getting list of tags
if dict_of_args.get("date") is None:
list_of_tags = taking_information_from_feedparser(feed_entry, dict_of_args)
else:
list_of_tags = feed_entry
news_info = InfoAboutNews(list_of_tags)

with tag('item'):
with tag('h2'):
text(news_info.title)
with tag('link'):
text(news_info.link)
with tag('p'):
text(news_info.date)
with tag('h3'):
if news_info.link_of_img:
with tag('img', src=news_info.link_of_img, alt=news_info.link_of_img,
border="0", align="left", hspace="5"):
pass
text(news_info.description)
with tag('br'):
with tag('br'):
pass

# putting news in database
if not dict_of_args.get("date"):
put_into_db(news_info.feed, news_info.title,
feed_entry.get("published_parsed", feed_entry.published_parsed),
news_info.description, news_info.link, news_info.link_of_img)
try:
file.write(doc.getvalue())
print("News were written in html file")
except Exception:
logging.error(Exception)
print("Can't write information in html file")
finally:
file.close()
return doc.getvalue()


def get_image(link_of_img, count, pdf):
""" Downloads image from the Internet and puts it into pdf file"""

try:
img = requests.get(link_of_img)
out = open("img" + str(count) + ".jpg", "wb")
out.write(img.content)
out.close()
pdf.image("img" + str(count) + ".jpg", x=100, w=20)
except requests.exceptions.ConnectionError:
print("Can't download image, because of connection to the Internet")
pdf.multi_cell(0, 8, txt=link_of_img, align="C")
logging.error(requests.exceptions.ConnectionError)
except Exception as e:
logging.error("Something wrong with format of image")
pdf.multi_cell(0, 8, txt=link_of_img, align="C")
logging.error(e)


def adding_text_in_pdf(pdf, text, size_of_font, indent):
""" Puts text of specified size in pdf file"""

pdf.set_font('FreeSans', size=size_of_font)
pdf.multi_cell(200, indent, txt=(text), align="C")


def get_font(pdf) -> bool:
""" Gets information about path to font and adds it, if it's possible"""

pdf.add_font('FreeSans', '', resource_filename(__name__, "ARIALUNI.ttf"), True)
return True


def convert_into_pdf_format(feed, dict_of_args):
""" Creates pdf file in specified directory and puts information in database, if it was read from website"""

filename = get_path(dict_of_args.get("path"), "pdf")
if not filename:
return None
logging.debug("File was opened successfully")

pdf = FPDF()
pdf.add_page(('P', 'A4'))
if not get_font(pdf):
return None
pdf.set_margins(10, 10, 10)

count = 0
for feed_entry in feed:
if dict_of_args.get("date") is None:
list_of_tags = taking_information_from_feedparser(feed_entry, dict_of_args)
else:
list_of_tags = feed_entry
news_info = InfoAboutNews(list_of_tags)

adding_text_in_pdf(pdf, news_info.title + "\n", 14, 10)
adding_text_in_pdf(pdf, news_info.date, 10, 8)
adding_text_in_pdf(pdf, news_info.link, 8, 8)
if news_info.link_of_img:
get_image(news_info.link_of_img, count, pdf)
count += 1
adding_text_in_pdf(pdf, news_info.description, 10, 6)
adding_text_in_pdf(pdf, "_________________\n", 10, 10)

# putting news in database
if dict_of_args.get("date") is None:
put_into_db(news_info.feed, news_info.title,
feed_entry.get("published_parsed", feed_entry.published_parsed),
news_info.description, news_info.link, news_info.link_of_img)
try:
pdf.output(filename)
print("News were written in pdf file")

for i in range(count):
os.remove("img" + str(i) + ".jpg")
except Exception:
logging.error(Exception)
print("Can't write information in pdf file")
73 changes: 73 additions & 0 deletions final_task/rss_reader/database_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging
import sqlite3
import json
from contextlib import closing
from information_about_news import InfoAboutNews


def check_existance(name) -> bool:
with closing(open_database()) as con:
flag = False
try:
if con:
cur = con.cursor()
cur.execute("SELECT * from " + name)
logging.debug("Table exists")
flag = True
except Exception:
flag = False
return flag


def create_table():
con = open_database()
if con:
cur = con.cursor()
cur.execute("CREATE TABLE cache(feed TEXT, title TEXT, date TEXT, description TEXT, link1 TEXT, link2 TEXT,"
"UNIQUE (title, date) ON CONFLICT IGNORE)")
con.commit()
con.close()


def open_database():
con = None
try:
con = sqlite3.connect("mydatabase.db")
logging.debug("Database opened successfully")
cur = con.cursor()
return con
except (sqlite3.DatabaseError) as error:
logging.error(error)
return None


def put_into_db(url, title, date, text, link, link_of_img):
""" Writes infomation about news in database"""

exists = check_existance("cache")
if not exists:
create_table()
with closing(open_database()) as con:
if con:
cur = con.cursor()
date_of_publishing = (str(date.tm_year) + (str(date.tm_mon)) + (str(date.tm_mday)))
cur.execute("INSERT INTO cache VALUES (?, ?, ?, ?, ?, ?)", (url, title, date_of_publishing, text, link,
link_of_img))
con.commit()


def json_from_cashe(rows):
""" Convets news from cache in json format"""

list_to_json_format = []
for row in rows:
news_info = InfoAboutNews(row)
dictionary = {"Title": news_info.title,
"Date": news_info.date,
"Description": news_info.description,
"Link [1]": news_info.link}
if news_info.link_of_img:
dictionary.update({"Link [2]": news_info.link_of_img})
list_to_json_format.append(dictionary)
json_data = json.dumps(list_to_json_format, indent=5, ensure_ascii=False)
print(json_data)
Loading