epam-python-courses-7-bsu · scarydzik · Nov 23, 2019 · Nov 23, 2019 · Nov 23, 2019 · Nov 23, 2019
diff --git a/final_task/LICENSE b/final_task/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Denis Marfonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/final_task/README.md b/final_task/README.md
@@ -1,3 +1,104 @@
-# Your readme here
-Some text.
-Checkout how to write this file using *markdown*.
+# RSS Reader
+Cli-based RSS reader built with Python 3.8. 
+Supports all RSS standards, can handle incorrect RSS. 
+Also partially supports Atom feeds. 
+
+## Getting Started
+### Prerequisites
+- Python 3.8
+- feedparser, lxml, beautifulsoup4
+
+```
+pip install feedparser lxml beautifulsoup4
+```
+### Installation
+```
+pip install -i https://test.pypi.org/simple/ rss-reader-scarzdz
+```
+Also you can just download source code and install using: 
+```
+$ python final_task/setup.py install
+```
+### Running
+After installation, `rss-reader` command is added to PATH.
+
+Alternatively, the application can be run from the source file:
+``` 
+$ cd final_task
+$ python -m rss_reader ...
+```
+### Usage
+```
+usage: rss-reader [-h] [--json | --html PATH | --epub PATH] [--version] [-v]
+                  [--limit LIMIT] [--date DATE]
+                  source
+
+Pure Python command-line RSS reader.
+
+positional arguments:
+  source         RSS URL
+
+optional arguments:
+  -h, --help     show this help message and exit
+  --json         Print result as JSON in stdout
+  --html PATH    Generate html book on path
+  --epub PATH    Generate epub book an path
+  --version      Print version info
+  -v, --verbose  Outputs verbose status messages
+  --limit LIMIT  Limit news topics if this parameter provided
+  --date DATE    Load news with date (%Y%m%d) from cache, if this parameter
+                 provided
+```
+## Behavior
+RSS Reader can work in online or offline mode. 
+
+In **online** mode, when `--date` argument is not provided, the application loads and parses rss feed from `source` argument. 
+It is done using `feedparser` library. 
+Parsed news saved in **_sqlite database_**, which located in `rss_parser/data/rss.sqlite`. 
+If item contains _html_ markup, it converted to plain text.
+
+In **offline** mode, when `--date` argument is provided, 
+the application loads news with specified feed link and date from the database.
+
+News printed to stdout in the following format:
+
+```
+Feed: *RSS feed title*
+
+
+Title: *item 1 title*
+Date: *%a, %d %b %Y %H:%M:%S +0000* 
+Link: https://example.com/link_to_item
+
+*Item description*
+
+Links:
+[1]: *first link is always link to item*
+[2]: Others can be links parsed from  <a> or <img> tags
+
+
+Title: *item 2 title*
+Date: ...
+```
+
+News is converted to json like this:
+```
+{
+  "title": "*Feed title*",
+  "link": "*link to feed*"
+  "items": [
+    {
+      "title": "*item 1 title*",
+      "date": *time.struct_time tuple*,
+      "link": "*link to item*",
+      "enclosure": *null* or *link to eclosure*,
+      "description": "*item description*",
+      "description_parsed": "*description parsed to plain text*"  or *null* if description is text
+    },
+    ...
+  ]
+}
+```
+
+## Licence
+This project is licensed under the MIT License - see the LICENSE file for details.
diff --git a/final_task/__init__.py → final_task/rss_reader/__init__.py b/final_task/__init__.py → final_task/rss_reader/__init__.py
diff --git a/final_task/rss_reader/__main__.py b/final_task/rss_reader/__main__.py
@@ -0,0 +1,3 @@
+from .rss_reader import main
+
+main()
diff --git a/final_task/rss_reader/book_gen.py b/final_task/rss_reader/book_gen.py
@@ -0,0 +1,110 @@
+import time
+import os.path
+import html
+
+from ebooklib import epub
+
+DATE_FORMAT = "%Y%m%d"
+
+
+def _render_document(title, items, date_str):
+    html = ["<html><head><title>"]
+    h_title = f"{date_str}"
+    html.append(h_title)
+    html.append("</title></head><body>")
+
+    html.append("<h1>")
+    html.append(title)
+    html.append("</h1>")
+
+    for i in items:
+        html.append(_render_item(i))
+
+    html = "".join(html)
+    return html
+
+
+def _render_item(item):
+    html = ["<hr>"]
+    title = item["title"] or "No Headline"
+    html.append(f"<h2>{title}</h2>")
+    date = time.strftime("%a, %d %b %Y %H:%M:%S", item["date"])
+    html.append(f"<p><i>{date}</i></p>")
+    if item["link"] is not None:
+        link = item["link"]
+        html.append(f"<p><a href='{link}'>{link}</a></p>")
+    else:
+        html.append(f"<p><a>no link</a></p>")
+    description = item["description"] or "No description"
+    html.append(f"<p>{description}</p>")
+    html = "".join(html)
+    return html
+
+
+def _gen_id(title: str, date:str) -> str:
+    """
+    generate string for book id and file name
+
+    :return: generated string
+    """
+    string = title + "_" + date
+    string = string.lower()
+    for c in r'\|/:*?"<>':
+        string.replace(c, '_')
+    if len(string) > 122:
+        string = string[:121] + "..."
+    return string
+
+
+def _create_html(book_id, bookpath, text):
+    # text = html.escape(text)
+    file = None
+    if os.path.isdir(bookpath):
+        file = open(os.path.join(bookpath, book_id + ".html"), "w", encoding="utf-8")
+    elif not os.path.exists(bookpath) or os.path.isfile(bookpath) or os.path.islink(bookpath):
+        file = open(bookpath, "w", encoding="utf-8")
+    try:
+        file.write(text)
+        file.close()
+    except (AttributeError, OSError):
+        pass
+
+
+def _create_epub(book_title, book_id, bookpath, text):
+
+    book = epub.EpubBook()
+    book.set_language('en')
+    book.set_identifier(book_id)
+    book.set_title(book_title)
+
+    content = epub.EpubHtml(title='News', file_name='content.xhtml')
+
+    content.set_content(text)
+    book.add_item(content)
+
+    book.toc = (content,)
+    book.spine = [content]
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+    if os.path.isdir(bookpath):
+        epub.write_epub(os.path.join(bookpath, book_id+".epub"), book)
+    elif not os.path.exists(bookpath) or os.path.isfile(bookpath) or os.path.islink(bookpath):
+        epub.write_epub(bookpath, book)
+
+
+def create_book(title, items, bookpath, date=None, *, html=False):
+    if date is not None:
+        date = time.strftime(DATE_FORMAT, date)
+    else:
+        date = "Latest"
+
+    if title is None:
+        title = "No Title"
+    book_title = title + " - " + date
+    text = _render_document(book_title, items, date)
+    book_id = _gen_id(title, date)
+
+    if html:
+        _create_html(book_id, bookpath, text)
+    else:
+        _create_epub(book_title, book_id, bookpath, text)
diff --git a/final_task/rss_reader/database.py b/final_task/rss_reader/database.py
@@ -0,0 +1,149 @@
+import sqlite3
+import time
+import pkg_resources
+import logging
+
+DATE_FORMAT = "%Y-%m-%d"
+DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+DATA_FILE = "data/rss.sqlite"
+
+
+class DBError(Exception):
+    pass
+
+
+class DB:
+    def __init__(self):
+        path = pkg_resources.resource_filename(__name__, DATA_FILE)
+        try:
+            self.conn = sqlite3.connect(path)
+        except sqlite3.OperationalError as e:
+            raise DBError(str(e))
+
+        self.cursor = self.conn.cursor()
+        self.feed_id = None
+
+        if not self._is_db_exist():
+            self._create_db()
+
+    def _is_db_exist(self):
+        try:
+            self.cursor.execute('''SELECT count(name) FROM sqlite_master 
+                                       WHERE type='table' AND (name='items' or name='feeds')''')
+        except sqlite3.DatabaseError:
+            return False
+        # if the count is 2, then tables exists
+        return self.cursor.fetchone()[0] == 2
+
+    def _create_db(self):
+        logging.info("Creating DB for local cache")
+        self.cursor.execute("DROP TABLE IF EXISTS feeds")
+        self.cursor.execute("DROP TABLE IF EXISTS items")
+        self.cursor.execute('''
+            CREATE TABLE feeds(
+                id INTEGER PRIMARY KEY,
+                title TEXT,
+                link TEXT UNIQUE NOT NULL 
+            )
+        ''')
+        self.cursor.execute('''
+            CREATE TABLE items(
+                id INTEGER PRIMARY KEY,
+                feed_id INTEGER NOT NULL, 
+                published TEXT NOT NULL,
+                title TEXT,
+                link TEXT,
+                enclosure TEXT,
+                description TEXT,
+                description_parsed TEXT,
+                FOREIGN KEY (feed_id) REFERENCES feeds(id)
+            )
+        ''')
+        self.cursor.execute('''
+            CREATE UNIQUE INDEX un_items ON items(published, title, link)
+        ''')
+        self.conn.commit()
+
+    def get_feed(self, feed_link, req_date, limit=-1):
+        feed_info = self._get_feed_info_if_exists(feed_link)
+        if feed_info is not None:
+            req_date = time.strftime(DATE_FORMAT, req_date)
+            try:
+                self.cursor.execute('''
+                    select i.title, i.published, i.link, i.enclosure, i.description, i.description_parsed 
+                        from items i join feeds f on i.feed_id = f.id 
+                            where f.id=(?) and date(i.published)=date(?) limit (?)
+                ''', (feed_info[0], req_date, limit)
+                )
+            except sqlite3.Error:
+                raise DBError("Error getting items from db")
+
+            items = []
+            for i in self.cursor.fetchall():
+                item = dict(title=i[0], date=time.strptime(i[1], DATETIME_FORMAT), link=i[2], enclosure=i[3],
+                            description=i[4], description_parsed=i[5])
+                items.append(item)
+            return dict(title=feed_info[1], items=items)
+        else:
+
+            return None
+
+    def store_feed(self, link, title, items):
+        feed_id = self._get_feed_id_if_exists(link)
+        if feed_id is None:
+            try:
+                self.cursor.execute("insert into feeds(title, link) values (?, ?)", (title, link))
+                feed_id = self.cursor.lastrowid
+            except sqlite3.Error:
+                self.conn.rollback()
+                raise DBError("Error adding feed to db")
+            self.conn.commit()
+        self.feed_id = feed_id
+        self._store_items(items)
+
+    def close(self):
+        self.conn.close()
+
+    def _store_items(self, items):
+        for i in items:
+            item_dict = dict(
+                feed_id=self.feed_id,
+                published=time.strftime(DATETIME_FORMAT, i["date"]),
+                title=i["title"],
+                link=i["link"],
+                enclosure=i["enclosure"],
+                description=i["description"],
+                description_parsed=i["description_parsed"]
+            )
+            try:
+                self.cursor.execute('''
+                insert or ignore into items(feed_id, published, title, link, enclosure, description, description_parsed) 
+                    values (:feed_id, :published, :title, :link, :enclosure, :description, :description_parsed)
+                ''', item_dict)
+            except sqlite3.Error:
+                self.conn.rollback()
+                raise DBError("Error adding item to db")
+        self.conn.commit()
+
+    def _get_feed_id_if_exists(self, feed_link):
+        feed_info = self._get_feed_info_if_exists(feed_link)
+        if feed_info is not None:
+            return feed_info[0]
+        else:
+            return None
+
+    def _get_feed_info_if_exists(self, feed_link):
+        try:
+            self.cursor.execute("select feeds.id, feeds.title from feeds where feeds.link=?", (feed_link,))
+        except sqlite3.Error:
+            raise DBError("Error checking Feed in db")
+        feed_info = self.cursor.fetchone()
+        if feed_info is not None:
+            return feed_info[0], feed_info[1]
+        return None
+
+
+if __name__ == '__main__':
+    db = DB()
+    db._create_db()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .rss_reader import main
HenadziStantchik Dec 3, 2019 Copy link Copy Markdown Collaborator Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. It is better to use simple `import` in this case

		main()