-
Notifications
You must be signed in to change notification settings - Fork 24
Usevalad_Trafimau_Unioltered@gmail.com #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
a6dc7da
8a26e18
00e5980
0e85949
d923240
42e9b5a
d449935
8add54c
e12a08e
bbad5fb
acb2b16
bb45870
3f7d015
71d9e58
67e93e6
5f35c6b
8c31a03
40d1d40
e4ac5bf
0c9f69b
0f8eb25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| *.pyc | ||
| *.idea'.idea' | ||
| *.db | ||
| *.pkl | ||
| *.cw127.pkl | ||
| *.egg-info | ||
| final_task/FinalTaskRssParser.egg-info | ||
| final_task/dist | ||
| *.log | ||
|
|
||
This file was deleted.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,32 @@ | ||
| # FinalTaskRssParser | ||
| For final task pull requests. | ||
| # That's how it works | ||
|
|
||
| * Creating rss_read class object | ||
| * Using feedparser to get a page with function parse | ||
| * Then using output functions get info from the page | ||
| * Info (source link, image link, etc.) for every novelty pack in class Novelty | ||
| * Create a pack of news filled with novelty class objects | ||
| * When a pack of news is done come back to rss_reader.py | ||
| * Here we prepare to output info according to arguments from console and write down information into DB | ||
| * If there is '--to-pdf' or '--to-html' (or both arguments) argument in console we use functions | ||
| from PDF_and_HTML_converting to: | ||
| 1. Get some images (to avoid many copies of pictures we first of all delete images | ||
| folder if it exists) | ||
| 2. Add them into PDF or/and html file | ||
| 3. Add all other information | ||
|
|
||
| * If there is also '--date Y%M%D' in console with '--to-pdf' or/and '--to-html' we write down into the | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to have some info about how to install your app, and also more info about the way you store news entries locally. |
||
| pdf or/and html file(s) news for that date. | ||
| * If there is '--date Y%M%D' in console we take news with that date from our DB. If there is also | ||
| '--limit N' arguments, we take N news from our DB. | ||
| * If in addition to '--to-pdf' or/and '--to-html' and '--date Y%M%D' there is '--limit N' we write down | ||
| N news with that date to file(s) pdf or/and html | ||
| * If '--colorize' is in console args then we colorize our news in random colors. If there is no '--colorize' | ||
| we use usual color (grey-white) | ||
| ## Important! | ||
| When using pdf or html converting input your path in look like this: "C:\\Test\\" or "C:\\Test" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about Linux OS? |
||
|
|
||
| When input arguments to parse any page first of all put link, EXAMPLE: | ||
| python rss_reader.py https://bla-bla-bla.by --limit 1 | ||
|
|
||
| If you don't want to input link and want to get news stored in local storage input for EXAMPLE | ||
| like this: python rss_reader.py --colorize --limit 15 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| from dataclasses import dataclass | ||
|
|
||
|
|
||
| @dataclass | ||
| class Novelty: | ||
| number_of_novelty: int | ||
| title_of_novelty: str | ||
| time_of_novelty: str | ||
| source_link: str | ||
| description: str | ||
| images_links: str | ||
| alt_text: str | ||
| date_corrected: str | ||
| main_source: str |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| import feedparser | ||
| from output_functions import getting_full_info, getting_pack_of_news, converting_to_json, \ | ||
| writing_to_cache, getting_from_database_to_pack | ||
| from pdf_and_html_converting import converting_to_pdf, converting_to_html, pdf_path, html_path | ||
| import logging | ||
| import re | ||
|
|
||
|
|
||
| class RSSParser: | ||
| """ | ||
| class RSSParser has 3 parameters and it calls function parse when created | ||
| """ | ||
|
|
||
| def __init__(self, param_url, num_of_news=None, list_of_args=None): | ||
| self.feed_url = param_url | ||
| self.number = num_of_news | ||
| self.list_of_args = list_of_args | ||
|
|
||
| def parse(self): | ||
| """ | ||
| 1. Use feedparser to get the page | ||
| 2. If we have some problems with connection - raise ConnectionError | ||
| 3. Handle Exception without showing a traceback | ||
| 4. Do parse method | ||
| 5. If there are some arguments from console - work with them | ||
| """ | ||
| try: | ||
| logging.info("Trying to get page from feedparser!") | ||
| the_feed = feedparser.parse(self.feed_url) | ||
| logging.info("Got it (the page)!") | ||
| if the_feed.get('bozo'): | ||
| if '--date' in self.list_of_args: | ||
| if '--to-pdf' in self.list_of_args: | ||
| path_pdf = pdf_path(self.list_of_args) | ||
| pack_news = self.news_for_date() | ||
| converting_to_pdf(path_pdf, pack_news) | ||
| elif '--to-html' in self.list_of_args: | ||
| path_html = html_path(self.list_of_args) | ||
| pack_news = self.news_for_date() | ||
| converting_to_pdf(path_html, pack_news) | ||
| else: | ||
| logging.info("Getting news for date!") | ||
| news = self.news_for_date() | ||
| if '--json' not in self.list_of_args: | ||
| getting_full_info(the_feed, news, self.list_of_args) | ||
| print("\nJSON VIEW OF NEWS:", converting_to_json(news, the_feed)) | ||
| logging.info("Got news for date!") | ||
| else: | ||
| logging.info("Got some problems due to connection!") | ||
| except ConnectionError: | ||
| logging.critical("CONNECTION ERROR, HELP!") | ||
| print("You have some connection problems!") | ||
| if '--date' in self.list_of_args: | ||
| if '--to-pdf' in self.list_of_args: | ||
| path_pdf = pdf_path(self.list_of_args) | ||
| pack_news = self.news_for_date() | ||
| converting_to_pdf(path_pdf, pack_news) | ||
| elif '--to-html' in self.list_of_args: | ||
| path_html = html_path(self.list_of_args) | ||
| pack_news = self.news_for_date() | ||
| converting_to_html(path_html, pack_news) | ||
| else: | ||
| logging.info("Getting news for date!") | ||
| news = self.news_for_date() | ||
| if '--json' not in self.list_of_args: | ||
| getting_full_info(the_feed, news, self.list_of_args) | ||
| print("\nJSON VIEW OF NEWS:", converting_to_json(news, the_feed)) | ||
| logging.info("Got news for date!") | ||
|
|
||
| logging.info("Getting pack of news!") | ||
| pack_of_news, pack_of_news_for_db = getting_pack_of_news(the_feed, self.feed_url, | ||
| self.list_of_args, self.number) | ||
| logging.info("Got pack of news!") | ||
| logging.info("Writing news from source and DB to file!") | ||
| writing_to_cache(pack_of_news, pack_of_news_for_db, 'news_cache.txt') | ||
| logging.info("News are in the file!") | ||
| if '--to-html' in self.list_of_args: | ||
| path_html = html_path(self.list_of_args) | ||
| if '--date' in self.list_of_args: | ||
| pack = self.news_for_date() | ||
| converting_to_html(path_html, pack) | ||
| else: | ||
| converting_to_html(path_html, pack_of_news) | ||
| if '--to-pdf' in self.list_of_args: | ||
| path_pdf = pdf_path(self.list_of_args) | ||
| if '--date' in self.list_of_args: | ||
| pack = self.news_for_date() | ||
| converting_to_pdf(path_pdf, pack) | ||
| else: | ||
| converting_to_pdf(path_pdf, pack_of_news) | ||
| if '--to-pdf' not in self.list_of_args and '--to-html' not in self.list_of_args: | ||
| if '--date' in self.list_of_args and '--json' not in self.list_of_args: | ||
| logging.info("Getting full info!") | ||
| getting_full_info(the_feed, self.news_for_date(), self.list_of_args) | ||
| logging.info("Got full info!") | ||
| else: | ||
| logging.info("Getting full info!") | ||
| if not the_feed.get('bozo') and '--json' not in self.list_of_args: | ||
| getting_full_info(the_feed, pack_of_news, self.list_of_args) | ||
| logging.info("Got full info!") | ||
|
|
||
| if '--json' in self.list_of_args and '--date' not in self.list_of_args: | ||
| print("\nJSON VIEW OF NEWS:", converting_to_json(pack_of_news, the_feed)) | ||
| elif '--json' in self.list_of_args and '--date' in self.list_of_args: | ||
| print("\nJSON VIEW OF NEWS:", converting_to_json(self.news_for_date(), the_feed)) | ||
|
|
||
| def news_if_not_source(self, the_feed): | ||
| # Looking for url address: if it is => doing all the thing; if it is not => printing all the news | ||
| chk_pat = '(?:{})'.format('|'.join(self.list_of_args)) | ||
| s = 'http' | ||
| if not bool(re.search(s, chk_pat, flags=re.I)): | ||
| pack_of, pack_db = getting_pack_of_news(the_feed, self.feed_url, self.list_of_args, self.number) | ||
| getting_full_info(the_feed, pack_db, self.list_of_args) | ||
|
|
||
| def news_for_date(self): | ||
| """ | ||
| Finding news by date and rss | ||
| If your rss and date are correct we append the novelty to the pack_of_news_needed | ||
| If not we continue our searching | ||
| """ | ||
| try: | ||
| news_for_date_needed = [] | ||
| date_needed = self.list_of_args[self.list_of_args.index('--date') + 1] | ||
| pack_of_db_news = getting_from_database_to_pack() | ||
| if '--limit' in self.list_of_args: | ||
| cycle_counter = 0 | ||
| number_of_news_found = 0 | ||
| while cycle_counter != len(pack_of_db_news): | ||
| if str(pack_of_db_news[cycle_counter].date_corrected) == date_needed and \ | ||
| self.feed_url == pack_of_db_news[cycle_counter].main_source: | ||
| news_for_date_needed.append(pack_of_db_news[cycle_counter]) | ||
| number_of_news_found += 1 | ||
| if number_of_news_found == self.number: | ||
| break | ||
| cycle_counter += 1 | ||
| else: | ||
| for item in pack_of_db_news: | ||
| if str(item.date_corrected) == date_needed and \ | ||
| self.feed_url == item.main_source: | ||
| news_for_date_needed.append(item) | ||
| if self.feed_url is None: | ||
| counter = 0 | ||
| number_of_news_f = 0 | ||
| while counter != len(pack_of_db_news): | ||
| if str(pack_of_db_news[counter].date_corrected) == date_needed: | ||
| pack_of_db_news[counter].number_of_novelty = number_of_news_f + 1 | ||
| news_for_date_needed.append(pack_of_db_news[counter]) | ||
| number_of_news_f += 1 | ||
| counter += 1 | ||
| if '--limit' in self.list_of_args: | ||
| if number_of_news_f == self.number: | ||
| break | ||
| if not news_for_date_needed: | ||
| if '--limit' in self.list_of_args: | ||
| print("No news have been found for this date with your limits!") | ||
| elif 'source' in self.list_of_args: | ||
| print("No news have been found for your source") | ||
| else: | ||
| print("No news have been found for this date!") | ||
| return news_for_date_needed | ||
| except IndexError: | ||
| print("You forgot to enter date in format %Y%m%d") | ||
|
|
Uh oh!
There was an error while loading. Please reload this page.