epam-python-courses-7-bsu · Useftro · Nov 1, 2019 · Nov 11, 2019 · Nov 13, 2019 · Nov 13, 2019
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+*.pyc
+*.idea'.idea'
+*.db
+*.pkl
+*.cw127.pkl
+*.egg-info
+final_task/FinalTaskRssParser.egg-info
+final_task/dist
+*.log
+
diff --git a/Final_Task/FinalTask.md b/Final_Task/FinalTask.md
diff --git a/Final_Task/README.md b/Final_Task/README.md
diff --git a/README.md b/README.md
@@ -1,2 +1,32 @@
-# FinalTaskRssParser
-For final task pull requests.
+# That's how it works
+
+* Creating rss_read class object
+* Using feedparser to get a page with function parse
+* Then using output functions get info from the page
+* Info (source link, image link, etc.) for every novelty pack in class Novelty
+* Create a pack of news filled with novelty class objects
+* When a pack of news is done come back to rss_reader.py
+* Here we prepare to output info according to arguments from console and write down information into DB
+* If there is '--to-pdf' or '--to-html' (or both arguments) argument in console we use functions 
+from PDF_and_HTML_converting to:
+        1. Get some images (to avoid many copies of pictures we first of all delete images 
+                            folder if it exists)
+        2. Add them into PDF or/and html file
+        3. Add all other information 
+
+* If there is also '--date Y%M%D' in console with '--to-pdf' or/and '--to-html' we write down into the 
+pdf or/and html file(s) news for that date.  
+* If there is '--date Y%M%D' in console we take news with that date from our DB. If there is also 
+'--limit N' arguments, we take N news from our DB. 
+* If in addition to '--to-pdf' or/and '--to-html' and '--date Y%M%D' there is '--limit N' we write down 
+N news with that date to file(s) pdf or/and html
+* If '--colorize' is in console args then we colorize our news in random colors. If there is no '--colorize'
+we use usual color (grey-white)
+## Important!
+When using pdf or html converting input your path in look like this: "C:\\Test\\" or "C:\\Test"
+
+When input arguments to parse any page first of all put link, EXAMPLE: 
+python rss_reader.py https://bla-bla-bla.by --limit 1 
+
+If you don't want to input link and want to get news stored in local storage input for EXAMPLE
+ like this: python rss_reader.py --colorize --limit 15
diff --git a/rss_task/__init__.py b/rss_task/__init__.py
diff --git a/rss_task/rss_reader/Classes/novelty.py b/rss_task/rss_reader/Classes/novelty.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Novelty:
+    number_of_novelty: int
+    title_of_novelty: str
+    time_of_novelty: str
+    source_link: str
+    description: str
+    images_links: str
+    alt_text: str
+    date_corrected: str
+    main_source: str
diff --git a/rss_task/rss_reader/Classes/rss_read.py b/rss_task/rss_reader/Classes/rss_read.py
@@ -0,0 +1,163 @@
+import feedparser
+from output_functions import getting_full_info, getting_pack_of_news, converting_to_json, \
+    writing_to_cache, getting_from_database_to_pack
+from pdf_and_html_converting import converting_to_pdf, converting_to_html, pdf_path, html_path
+import logging
+import re
+
+
+class RSSParser:
+    """
+    class RSSParser has 3 parameters and it calls function parse when created
+    """
+
+    def __init__(self, param_url, num_of_news=None, list_of_args=None):
+        self.feed_url = param_url
+        self.number = num_of_news
+        self.list_of_args = list_of_args
+
+    def parse(self):
+        """
+        1. Use feedparser to get the page
+        2. If we have some problems with connection - raise ConnectionError
+        3. Handle Exception without showing a traceback
+        4. Do parse method
+        5. If there are some arguments from console - work with them
+        """
+        try:
+            logging.info("Trying to get page from feedparser!")
+            the_feed = feedparser.parse(self.feed_url)
+            logging.info("Got it (the page)!")
+            if the_feed.get('bozo'):
+                if '--date' in self.list_of_args:
+                    if '--to-pdf' in self.list_of_args:
+                        path_pdf = pdf_path(self.list_of_args)
+                        pack_news = self.news_for_date()
+                        converting_to_pdf(path_pdf, pack_news)
+                    elif '--to-html' in self.list_of_args:
+                        path_html = html_path(self.list_of_args)
+                        pack_news = self.news_for_date()
+                        converting_to_pdf(path_html, pack_news)
+                    else:
+                        logging.info("Getting news for date!")
+                        news = self.news_for_date()
+                        if '--json' not in self.list_of_args:
+                            getting_full_info(the_feed, news, self.list_of_args)
+                            print("\nJSON VIEW OF NEWS:", converting_to_json(news, the_feed))
+                        logging.info("Got news for date!")
+                else:
+                    logging.info("Got some problems due to connection!")
+        except ConnectionError:
+            logging.critical("CONNECTION ERROR, HELP!")
+            print("You have some connection problems!")
+            if '--date' in self.list_of_args:
+                if '--to-pdf' in self.list_of_args:
+                    path_pdf = pdf_path(self.list_of_args)
+                    pack_news = self.news_for_date()
+                    converting_to_pdf(path_pdf, pack_news)
+                elif '--to-html' in self.list_of_args:
+                    path_html = html_path(self.list_of_args)
+                    pack_news = self.news_for_date()
+                    converting_to_html(path_html, pack_news)
+                else:
+                    logging.info("Getting news for date!")
+                    news = self.news_for_date()
+                    if '--json' not in self.list_of_args:
+                        getting_full_info(the_feed, news, self.list_of_args)
+                        print("\nJSON VIEW OF NEWS:", converting_to_json(news, the_feed))
+                    logging.info("Got news for date!")
+
+        logging.info("Getting pack of news!")
+        pack_of_news, pack_of_news_for_db = getting_pack_of_news(the_feed, self.feed_url,
+                                                                 self.list_of_args, self.number)
+        logging.info("Got pack of news!")
+        logging.info("Writing news from source and DB to file!")
+        writing_to_cache(pack_of_news, pack_of_news_for_db, 'news_cache.txt')
+        logging.info("News are in the file!")
+        if '--to-html' in self.list_of_args:
+            path_html = html_path(self.list_of_args)
+            if '--date' in self.list_of_args:
+                pack = self.news_for_date()
+                converting_to_html(path_html, pack)
+            else:
+                converting_to_html(path_html, pack_of_news)
+        if '--to-pdf' in self.list_of_args:
+            path_pdf = pdf_path(self.list_of_args)
+            if '--date' in self.list_of_args:
+                pack = self.news_for_date()
+                converting_to_pdf(path_pdf, pack)
+            else:
+                converting_to_pdf(path_pdf, pack_of_news)
+        if '--to-pdf' not in self.list_of_args and '--to-html' not in self.list_of_args:
+            if '--date' in self.list_of_args and '--json' not in self.list_of_args:
+                logging.info("Getting full info!")
+                getting_full_info(the_feed, self.news_for_date(), self.list_of_args)
+                logging.info("Got full info!")
+            else:
+                logging.info("Getting full info!")
+                if not the_feed.get('bozo') and '--json' not in self.list_of_args:
+                    getting_full_info(the_feed, pack_of_news, self.list_of_args)
+                logging.info("Got full info!")
+
+        if '--json' in self.list_of_args and '--date' not in self.list_of_args:
+            print("\nJSON VIEW OF NEWS:", converting_to_json(pack_of_news, the_feed))
+        elif '--json' in self.list_of_args and '--date' in self.list_of_args:
+            print("\nJSON VIEW OF NEWS:", converting_to_json(self.news_for_date(), the_feed))
+
+    def news_if_not_source(self, the_feed):
+        # Looking for url address: if it is => doing all the thing; if it is not => printing all the news
+        chk_pat = '(?:{})'.format('|'.join(self.list_of_args))
+        s = 'http'
+        if not bool(re.search(s, chk_pat, flags=re.I)):
+            pack_of, pack_db = getting_pack_of_news(the_feed, self.feed_url, self.list_of_args, self.number)
+            getting_full_info(the_feed, pack_db, self.list_of_args)
+
+    def news_for_date(self):
+        """
+        Finding news by date and rss
+        If your rss and date are correct we append the novelty to the pack_of_news_needed
+        If not we continue our searching
+        """
+        try:
+            news_for_date_needed = []
+            date_needed = self.list_of_args[self.list_of_args.index('--date') + 1]
+            pack_of_db_news = getting_from_database_to_pack()
+            if '--limit' in self.list_of_args:
+                cycle_counter = 0
+                number_of_news_found = 0
+                while cycle_counter != len(pack_of_db_news):
+                    if str(pack_of_db_news[cycle_counter].date_corrected) == date_needed and \
+                            self.feed_url == pack_of_db_news[cycle_counter].main_source:
+                        news_for_date_needed.append(pack_of_db_news[cycle_counter])
+                        number_of_news_found += 1
+                    if number_of_news_found == self.number:
+                        break
+                    cycle_counter += 1
+            else:
+                for item in pack_of_db_news:
+                    if str(item.date_corrected) == date_needed and \
+                            self.feed_url == item.main_source:
+                        news_for_date_needed.append(item)
+            if self.feed_url is None:
+                counter = 0
+                number_of_news_f = 0
+                while counter != len(pack_of_db_news):
+                    if str(pack_of_db_news[counter].date_corrected) == date_needed:
+                        pack_of_db_news[counter].number_of_novelty = number_of_news_f + 1
+                        news_for_date_needed.append(pack_of_db_news[counter])
+                        number_of_news_f += 1
+                    counter += 1
+                    if '--limit' in self.list_of_args:
+                        if number_of_news_f == self.number:
+                            break
+            if not news_for_date_needed:
+                if '--limit' in self.list_of_args:
+                    print("No news have been found for this date with your limits!")
+                elif 'source' in self.list_of_args:
+                    print("No news have been found for your source")
+                else:
+                    print("No news have been found for this date!")
+            return news_for_date_needed
+        except IndexError:
+            print("You forgot to enter date in format %Y%m%d")
+
diff --git a/rss_task/rss_reader/DejaVuSans.ttf b/rss_task/rss_reader/DejaVuSans.ttf