From 7a10e17846489b39bf73fe7c62678bdb58cca019 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Sat, 9 Nov 2019 11:26:06 +0300 Subject: [PATCH 01/11] Gets source information --- final_task/rss_reader/rss_reader.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..fe1dae3 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,19 @@ +import feedparser + + +def parse(url): + return feedparser.parse(url) + +def get_sourse(parsed): + ''' Gets source information ''' + feed = parsed['feed'] + return { + 'link': feed['link'], + 'title': feed['title'], + 'subtitle': feed['subtitle'], + } + +if __name__ == '__main__': + parsed = parse("https://news.yahoo.com/rss/") + feed = get_sourse(parsed) + print('Feed: ', feed['link'], '\n') From 60df8e803651dbade67e4bed511afaff574c58fb Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Sat, 9 Nov 2019 11:41:32 +0300 Subject: [PATCH 02/11] Gets entries information --- final_task/rss_reader/rss_reader.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index fe1dae3..70cdcf9 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,4 +1,5 @@ import feedparser +from bs4 import BeautifulSoup def parse(url): @@ -13,7 +14,33 @@ def get_sourse(parsed): 'subtitle': feed['subtitle'], } +def get_news(parsed): + """ Gets entries information """ + articles = [] + entries = parsed['entries'] + for entry in entries: + soup = BeautifulSoup(entry['summary'], 'lxml') + article_img = soup.find('img')['src'] + articles.append({ + 'link': entry['link'], + 'title': entry['title'], + 'img': article_img, + 'summary': entry['summary'], + 'published': entry['published'], + }) + return articles + if __name__ == '__main__': parsed = parse("https://news.yahoo.com/rss/") feed = get_sourse(parsed) + articles = get_news(parsed) print('Feed: ', feed['link'], '\n') + + value = articles[0] + + print("Title: ", value['title']) + print("Date: ", value['published']) + print("Link: ", value['link']) + print("\nSummary: ", value['summary']) + print("\nImage: ", value['img']) + print('\n') From 8e1078b898697294f6976e3214d4238562eefe4a Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Sat, 9 Nov 2019 11:45:07 +0300 Subject: [PATCH 03/11] Add argparse --- final_task/rss_reader/myargparse.py | 20 ++++++++++++++++++++ final_task/rss_reader/rss_reader.py | 29 ++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 final_task/rss_reader/myargparse.py diff --git a/final_task/rss_reader/myargparse.py b/final_task/rss_reader/myargparse.py new file mode 100644 index 0000000..f85e12f --- /dev/null +++ b/final_task/rss_reader/myargparse.py @@ -0,0 +1,20 @@ +import argparse + +parser = argparse.ArgumentParser(description='Getting info from sites') + +parser.add_argument('source', type=str, help='RSS URL') + +parser.add_argument( + '--limit', + type=int, + default=1, + help='Limit news topics if this parameter provided' +) + +parser.add_argument( + '--json', + action='store_true', + help='Print result as JSON in stdout' +) + +args = parser.parse_args() \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 70cdcf9..181e98c 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,5 +1,8 @@ import feedparser +from pprint import pprint +from myargparse import * from bs4 import BeautifulSoup +import json def parse(url): @@ -18,6 +21,7 @@ def get_news(parsed): """ Gets entries information """ articles = [] entries = parsed['entries'] + entries = entries[:args.limit] # Get the right amount from the array for entry in entries: soup = BeautifulSoup(entry['summary'], 'lxml') article_img = soup.find('img')['src'] @@ -30,17 +34,28 @@ def get_news(parsed): }) return articles -if __name__ == '__main__': - parsed = parse("https://news.yahoo.com/rss/") - feed = get_sourse(parsed) - articles = get_news(parsed) - print('Feed: ', feed['link'], '\n') +def to_json(articles): + """ Convert to json """ + return json.dumps(articles) - value = articles[0] - +def output(articles): print("Title: ", value['title']) print("Date: ", value['published']) print("Link: ", value['link']) print("\nSummary: ", value['summary']) print("\nImage: ", value['img']) print('\n') + +if __name__ == '__main__': + parsed = parse(args.source) + feed = get_sourse(parsed) + articles = get_news(parsed) + print('Feed: ', feed['link'], '\n') + if args.json: + for value in articles: + j = to_json(articles) + print(j, '\n') + else: + for value in articles: + output(articles) + From 88d7ac5c1c79c711f41b4e09d2ee6b597acf1b6a Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Sat, 9 Nov 2019 16:42:10 +0300 Subject: [PATCH 04/11] Added argparse logic to function and --- final_task/rss_reader/myargparse.py | 31 ++++++++++++++++------------- final_task/rss_reader/rss_reader.py | 7 +++---- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/final_task/rss_reader/myargparse.py b/final_task/rss_reader/myargparse.py index f85e12f..12f3656 100644 --- a/final_task/rss_reader/myargparse.py +++ b/final_task/rss_reader/myargparse.py @@ -1,20 +1,23 @@ import argparse -parser = argparse.ArgumentParser(description='Getting info from sites') -parser.add_argument('source', type=str, help='RSS URL') +def parsargs(): + parser = argparse.ArgumentParser(description='Getting info from sites') + parser.add_argument('source', type=str, help='RSS URL') -parser.add_argument( - '--limit', - type=int, - default=1, - help='Limit news topics if this parameter provided' -) + parser.add_argument( + '--limit', + type=int, + default=1, + help='Limit news topics if this parameter provided' + ) -parser.add_argument( - '--json', - action='store_true', - help='Print result as JSON in stdout' -) + parser.add_argument( + '--json', + action='store_true', + help='Print result as JSON in stdout' + ) -args = parser.parse_args() \ No newline at end of file + args = parser.parse_args() + + return args \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 181e98c..41f6b5d 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,12 +1,12 @@ import feedparser from pprint import pprint -from myargparse import * +from myargparse import parsargs from bs4 import BeautifulSoup import json +args = parsargs() -def parse(url): - return feedparser.parse(url) +parsed = feedparser.parse(args.source) def get_sourse(parsed): ''' Gets source information ''' @@ -47,7 +47,6 @@ def output(articles): print('\n') if __name__ == '__main__': - parsed = parse(args.source) feed = get_sourse(parsed) articles = get_news(parsed) print('Feed: ', feed['link'], '\n') From a5a583de86e46cfb7c20fac2e3ccb7a13f90e346 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Tue, 12 Nov 2019 15:50:04 +0300 Subject: [PATCH 05/11] ignore this commit) --- final_task/rss_reader/arg.py | 23 +++++++++++++++++++++++ final_task/rss_reader/rss_reader.py | 13 +++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 final_task/rss_reader/arg.py diff --git a/final_task/rss_reader/arg.py b/final_task/rss_reader/arg.py new file mode 100644 index 0000000..12f3656 --- /dev/null +++ b/final_task/rss_reader/arg.py @@ -0,0 +1,23 @@ +import argparse + + +def parsargs(): + parser = argparse.ArgumentParser(description='Getting info from sites') + parser.add_argument('source', type=str, help='RSS URL') + + parser.add_argument( + '--limit', + type=int, + default=1, + help='Limit news topics if this parameter provided' + ) + + parser.add_argument( + '--json', + action='store_true', + help='Print result as JSON in stdout' + ) + + args = parser.parse_args() + + return args \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 41f6b5d..eafd898 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,13 +1,13 @@ import feedparser from pprint import pprint -from myargparse import parsargs +from arg import parsargs from bs4 import BeautifulSoup import json args = parsargs() parsed = feedparser.parse(args.source) - +print("gello") def get_sourse(parsed): ''' Gets source information ''' feed = parsed['feed'] @@ -34,10 +34,6 @@ def get_news(parsed): }) return articles -def to_json(articles): - """ Convert to json """ - return json.dumps(articles) - def output(articles): print("Title: ", value['title']) print("Date: ", value['published']) @@ -52,8 +48,9 @@ def output(articles): print('Feed: ', feed['link'], '\n') if args.json: for value in articles: - j = to_json(articles) - print(j, '\n') + """ Convert to json """ + json_format = json.dumps(articles) + print(json_format, '\n') else: for value in articles: output(articles) From d2d342e7464d82515fc7ccc00682bcba25a3aec7 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Tue, 12 Nov 2019 15:55:07 +0300 Subject: [PATCH 06/11] delete myargparse.py --- final_task/rss_reader/myargparse.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 final_task/rss_reader/myargparse.py diff --git a/final_task/rss_reader/myargparse.py b/final_task/rss_reader/myargparse.py deleted file mode 100644 index 12f3656..0000000 --- a/final_task/rss_reader/myargparse.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse - - -def parsargs(): - parser = argparse.ArgumentParser(description='Getting info from sites') - parser.add_argument('source', type=str, help='RSS URL') - - parser.add_argument( - '--limit', - type=int, - default=1, - help='Limit news topics if this parameter provided' - ) - - parser.add_argument( - '--json', - action='store_true', - help='Print result as JSON in stdout' - ) - - args = parser.parse_args() - - return args \ No newline at end of file From 22f7706137627a0e01f3998a23bac1fd32d9fc49 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Thu, 14 Nov 2019 17:55:29 +0300 Subject: [PATCH 07/11] Add --verbose and --version --- final_task/rss_reader/arg.py | 17 ++++++++++-- final_task/rss_reader/clean_output.py | 14 ++++++++++ final_task/rss_reader/loggs.log | 22 +++++++++++++++ final_task/rss_reader/loggs.py | 16 +++++++++++ final_task/rss_reader/rss_reader.py | 40 ++++++++++++++++++--------- 5 files changed, 94 insertions(+), 15 deletions(-) create mode 100644 final_task/rss_reader/clean_output.py create mode 100644 final_task/rss_reader/loggs.log create mode 100644 final_task/rss_reader/loggs.py diff --git a/final_task/rss_reader/arg.py b/final_task/rss_reader/arg.py index 12f3656..89ae123 100644 --- a/final_task/rss_reader/arg.py +++ b/final_task/rss_reader/arg.py @@ -8,7 +8,6 @@ def parsargs(): parser.add_argument( '--limit', type=int, - default=1, help='Limit news topics if this parameter provided' ) @@ -18,6 +17,20 @@ def parsargs(): help='Print result as JSON in stdout' ) + parser.add_argument( + '--version', + action='store_true', + help='Print version of the application' + ) + + parser.add_argument( + '--verbose', + action="store_true", + help='Outputs verbose status messages' + ) + args = parser.parse_args() - return args \ No newline at end of file + return args + +vers = 1.0 \ No newline at end of file diff --git a/final_task/rss_reader/clean_output.py b/final_task/rss_reader/clean_output.py new file mode 100644 index 0000000..458ca48 --- /dev/null +++ b/final_task/rss_reader/clean_output.py @@ -0,0 +1,14 @@ +from string import ascii_letters, whitespace +import re + +def delete_html(summary): + "Delete html, take description" + clean_summary_list = re.findall('(.+)

', summary) + clean_summary_str = ' '.join(clean_summary_list) + return clean_summary_str + +def clean(text): + "Delete unnecessary symbols" + good_chars = (ascii_letters + whitespace).encode() + junk_chars = bytearray(set(range(0x100)) - set(good_chars)) + return text.encode('ascii', 'ignore').translate(None, junk_chars).decode() \ No newline at end of file diff --git a/final_task/rss_reader/loggs.log b/final_task/rss_reader/loggs.log new file mode 100644 index 0000000..7602de9 --- /dev/null +++ b/final_task/rss_reader/loggs.log @@ -0,0 +1,22 @@ +INFO:root:Website is working +DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump +DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500 +DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html +DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him + +INFO:root:Website is working +DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump +DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500 +DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html +DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him + +DEBUG:root:Title: Venezuela exintel chief missing in Spain ahead of US extradition police +DEBUG:root:Date: Wed, 13 Nov 2019 04:46:30 -0500 +DEBUG:root:Link: https://news.yahoo.com/venezuela-ex-intel-chief-missing-spain-ahead-us-094630755.html +DEBUG:root:Description: Venezuelas former military intelligence chief has gone missing in Spain just days after a court approved a request for his extradition to the United States on drug trafficking charges police said Wednesday They are currently looking for him said a spokeswoman for Spains national police referring to General Hugo Armando Carvajal Judicial sources said police had gone to his house in Madrid after Fridays court decision but could not find him + +DEBUG:root:Title: Turkey deports American IS suspect stuck at Greek border +DEBUG:root:Date: Thu, 14 Nov 2019 05:20:35 -0500 +DEBUG:root:Link: https://news.yahoo.com/turkey-deport-suspect-stuck-greek-102035932.html +DEBUG:root:Description: An American man suspected of being a member of the Islamic State group is being repatriated to the United States after spending three days in a no mans land between Turkey and Greece Turkeys Interior Ministry said Thursday The United States agreed to take him in and will provide him with travel documents the ministry said adding that the repatriation was underway The move comes a day after Turkish President Recep Tayyip Erdogan met with US President Donald Trump in Washington + diff --git a/final_task/rss_reader/loggs.py b/final_task/rss_reader/loggs.py new file mode 100644 index 0000000..61456ca --- /dev/null +++ b/final_task/rss_reader/loggs.py @@ -0,0 +1,16 @@ + +import logging +from clean_output import delete_html, clean + + +logging.basicConfig(filename="loggs.log", level=logging.DEBUG) + + +def logg(article): + logging.debug("Title: " + clean(article['title'])) + logging.debug("Date: " + article['published']) + logging.debug("Link: " + article['link']) + logging.debug("Description: " + clean(delete_html(article['summary'])) + '\n') + +def logg_json(json_format): + logging.debug("Json: " + json_format) \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index eafd898..88ffa77 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,13 +1,16 @@ import feedparser from pprint import pprint -from arg import parsargs +from arg import parsargs, vers +from clean_output import delete_html, clean from bs4 import BeautifulSoup import json +from loggs import logg, logg_json +import logging args = parsargs() parsed = feedparser.parse(args.source) -print("gello") + def get_sourse(parsed): ''' Gets source information ''' feed = parsed['feed'] @@ -21,7 +24,9 @@ def get_news(parsed): """ Gets entries information """ articles = [] entries = parsed['entries'] - entries = entries[:args.limit] # Get the right amount from the array + if args.limit is not None: + ''' Get right amount from the array ''' + entries = entries[:args.limit] for entry in entries: soup = BeautifulSoup(entry['summary'], 'lxml') article_img = soup.find('img')['src'] @@ -34,24 +39,33 @@ def get_news(parsed): }) return articles -def output(articles): - print("Title: ", value['title']) +def output(value): + print("Title: ", clean(value['title'])) print("Date: ", value['published']) print("Link: ", value['link']) - print("\nSummary: ", value['summary']) + print("\nSummary: ", clean(delete_html(value['summary']))) print("\nImage: ", value['img']) print('\n') if __name__ == '__main__': feed = get_sourse(parsed) articles = get_news(parsed) + if args.verbose: + logging.info('Website is working') + print('Feed: ', feed['link'], '\n') - if args.json: - for value in articles: + + for value in articles: + if args.json: """ Convert to json """ - json_format = json.dumps(articles) + json_format = json.dumps(value) print(json_format, '\n') - else: - for value in articles: - output(articles) - + if args.verbose: + logg_json(json_format) + else: + output(value) + if args.verbose: + logg(value) + + if args.version: + print("Version: ",vers) From c0095cc355c33f229d6e081db1756778a06f4615 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Thu, 14 Nov 2019 19:19:47 +0300 Subject: [PATCH 08/11] Add setup.py --- final_task/setup.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..f5b2ee1 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages +from os.path import join, dirname + +setup( + name='RSS-reader', + version='1.0', + packages=find_packages(), + long_description=open(join(dirname(__file__), 'README.md')).read(), + author="Oleg Slavashevich", + author_email="oslavashevish@gmail.com" +) \ No newline at end of file From 822b2eb21508a013ea086b713f47224f11892d49 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Thu, 14 Nov 2019 20:10:22 +0300 Subject: [PATCH 09/11] Fixed problems with the Title --- final_task/rss_reader/clean_output.py | 8 +------- final_task/rss_reader/loggs.py | 7 +++---- final_task/rss_reader/rss_reader.py | 23 ++++++++++++++--------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/final_task/rss_reader/clean_output.py b/final_task/rss_reader/clean_output.py index 458ca48..81bbf57 100644 --- a/final_task/rss_reader/clean_output.py +++ b/final_task/rss_reader/clean_output.py @@ -1,13 +1,7 @@ from string import ascii_letters, whitespace import re -def delete_html(summary): - "Delete html, take description" - clean_summary_list = re.findall('(.+)

', summary) - clean_summary_str = ' '.join(clean_summary_list) - return clean_summary_str - -def clean(text): +def clean_title(text): "Delete unnecessary symbols" good_chars = (ascii_letters + whitespace).encode() junk_chars = bytearray(set(range(0x100)) - set(good_chars)) diff --git a/final_task/rss_reader/loggs.py b/final_task/rss_reader/loggs.py index 61456ca..72df72d 100644 --- a/final_task/rss_reader/loggs.py +++ b/final_task/rss_reader/loggs.py @@ -1,16 +1,15 @@ - import logging -from clean_output import delete_html, clean +from clean_output import clean_title logging.basicConfig(filename="loggs.log", level=logging.DEBUG) def logg(article): - logging.debug("Title: " + clean(article['title'])) + logging.debug("Title: " + clear_title(article['title'])) logging.debug("Date: " + article['published']) logging.debug("Link: " + article['link']) - logging.debug("Description: " + clean(delete_html(article['summary'])) + '\n') + logging.debug("Description: " + article['summary'] + '\n') def logg_json(json_format): logging.debug("Json: " + json_format) \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 88ffa77..3b194d3 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,7 +1,7 @@ import feedparser from pprint import pprint from arg import parsargs, vers -from clean_output import delete_html, clean +from clean_output import clean_title from bs4 import BeautifulSoup import json from loggs import logg, logg_json @@ -29,25 +29,26 @@ def get_news(parsed): entries = entries[:args.limit] for entry in entries: soup = BeautifulSoup(entry['summary'], 'lxml') + summary = BeautifulSoup(entry.summary, features='html.parser').text article_img = soup.find('img')['src'] articles.append({ 'link': entry['link'], 'title': entry['title'], 'img': article_img, - 'summary': entry['summary'], + 'summary': summary, 'published': entry['published'], }) return articles -def output(value): - print("Title: ", clean(value['title'])) - print("Date: ", value['published']) - print("Link: ", value['link']) - print("\nSummary: ", clean(delete_html(value['summary']))) - print("\nImage: ", value['img']) +def output(article): + print("Title: ", clean_title(article['title'])) + print("Date: ", article['published']) + print("Link: ", article['link']) + print("\nSummary: ", article['summary']) + print("\nImage: ", article['img']) print('\n') -if __name__ == '__main__': +def main(): feed = get_sourse(parsed) articles = get_news(parsed) if args.verbose: @@ -58,6 +59,7 @@ def output(value): for value in articles: if args.json: """ Convert to json """ + value['title'] = clean_title(value['title']) json_format = json.dumps(value) print(json_format, '\n') if args.verbose: @@ -69,3 +71,6 @@ def output(value): if args.version: print("Version: ",vers) + +if __name__ == '__main__': + main() \ No newline at end of file From eb546337f5045875fceaa73a139ecb027c6cd218 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Thu, 14 Nov 2019 20:16:27 +0300 Subject: [PATCH 10/11] super small change --- final_task/rss_reader/rss_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index 3b194d3..d42def5 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -59,9 +59,9 @@ def main(): for value in articles: if args.json: """ Convert to json """ - value['title'] = clean_title(value['title']) json_format = json.dumps(value) print(json_format, '\n') + value['title'] = clean_title(value['title']) if args.verbose: logg_json(json_format) else: From eb04e6b5798e2c495d6536cb4f151bb2a79294c1 Mon Sep 17 00:00:00 2001 From: Oleg Slavashevich Date: Mon, 25 Nov 2019 18:29:00 +0300 Subject: [PATCH 11/11] Iteration 3 --- final_task/rss_reader/add_to_csv.py | 17 ++++++ final_task/rss_reader/arg.py | 10 +++- final_task/rss_reader/clean_output.py | 2 +- final_task/rss_reader/converter.py | 19 ++++++ final_task/rss_reader/loggs.py | 4 +- final_task/rss_reader/rss_reader.py | 83 ++++++++++++++++++--------- final_task/setup.py | 11 +++- 7 files changed, 113 insertions(+), 33 deletions(-) create mode 100644 final_task/rss_reader/add_to_csv.py create mode 100644 final_task/rss_reader/converter.py diff --git a/final_task/rss_reader/add_to_csv.py b/final_task/rss_reader/add_to_csv.py new file mode 100644 index 0000000..bb64aed --- /dev/null +++ b/final_task/rss_reader/add_to_csv.py @@ -0,0 +1,17 @@ +import csv + +def addcsv(articles): + with open('news.csv', 'a', newline='') as csvfile: + fieldnames = ['link','title', 'img', 'summary', 'published'] + + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writerow(articles) + +def out(): + a = [] + with open('news.csv', 'r') as csvFile: + reader = csv.reader(csvFile) + for row in reader: + a.append(row) + return a \ No newline at end of file diff --git a/final_task/rss_reader/arg.py b/final_task/rss_reader/arg.py index 89ae123..6d21493 100644 --- a/final_task/rss_reader/arg.py +++ b/final_task/rss_reader/arg.py @@ -1,8 +1,8 @@ import argparse - def parsargs(): parser = argparse.ArgumentParser(description='Getting info from sites') + parser.add_argument('source', type=str, help='RSS URL') parser.add_argument( @@ -29,8 +29,14 @@ def parsargs(): help='Outputs verbose status messages' ) + parser.add_argument( + '--date', + type=int, + help='Take date in format %Y%m%d' + ) + args = parser.parse_args() return args -vers = 1.0 \ No newline at end of file +VERSION = 1.0 \ No newline at end of file diff --git a/final_task/rss_reader/clean_output.py b/final_task/rss_reader/clean_output.py index 81bbf57..75cae30 100644 --- a/final_task/rss_reader/clean_output.py +++ b/final_task/rss_reader/clean_output.py @@ -1,7 +1,7 @@ from string import ascii_letters, whitespace import re -def clean_title(text): +def delete_unnecessary_symbols(text): "Delete unnecessary symbols" good_chars = (ascii_letters + whitespace).encode() junk_chars = bytearray(set(range(0x100)) - set(good_chars)) diff --git a/final_task/rss_reader/converter.py b/final_task/rss_reader/converter.py new file mode 100644 index 0000000..d96f85b --- /dev/null +++ b/final_task/rss_reader/converter.py @@ -0,0 +1,19 @@ +def convert_date(date): + """This function converts date""" + month = {'Jan': '1', + 'Feb': '2', + 'Mar': '3', + 'Apr': '4', + 'May': '5', + 'Jun': '6', + 'Jul': '7', + 'Aug': '8', + 'Sep': '9', + 'Oct': '10', + 'Nov': '11', + 'Dec': '12'} + day = date[5:7] + month_int = month[date[8:11]] + year = date[12:16] + + return year+month_int+day \ No newline at end of file diff --git a/final_task/rss_reader/loggs.py b/final_task/rss_reader/loggs.py index 72df72d..157774d 100644 --- a/final_task/rss_reader/loggs.py +++ b/final_task/rss_reader/loggs.py @@ -1,12 +1,12 @@ import logging -from clean_output import clean_title +from clean_output import delete_unnecessary_symbols logging.basicConfig(filename="loggs.log", level=logging.DEBUG) def logg(article): - logging.debug("Title: " + clear_title(article['title'])) + logging.debug("Title: " + delete_unnecessary_symbols(article['title'])) logging.debug("Date: " + article['published']) logging.debug("Link: " + article['link']) logging.debug("Description: " + article['summary'] + '\n') diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index d42def5..8c6577e 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -1,15 +1,15 @@ import feedparser -from pprint import pprint -from arg import parsargs, vers -from clean_output import clean_title +#from arg import parsargs, VERSION +from arg import parsargs, VERSION +import clean_output from bs4 import BeautifulSoup import json from loggs import logg, logg_json import logging - -args = parsargs() - -parsed = feedparser.parse(args.source) +import sys +from datetime import datetime +from add_to_csv import addcsv, out +from converter import convert_date def get_sourse(parsed): ''' Gets source information ''' @@ -20,17 +20,17 @@ def get_sourse(parsed): 'subtitle': feed['subtitle'], } -def get_news(parsed): +def get_news(parsed, console_args): """ Gets entries information """ articles = [] entries = parsed['entries'] - if args.limit is not None: + if console_args.limit is not None: ''' Get right amount from the array ''' - entries = entries[:args.limit] + entries = entries[:console_args.limit] for entry in entries: - soup = BeautifulSoup(entry['summary'], 'lxml') + img = BeautifulSoup(entry.summary, features="html.parser") summary = BeautifulSoup(entry.summary, features='html.parser').text - article_img = soup.find('img')['src'] + article_img = img.find('img')['src'] articles.append({ 'link': entry['link'], 'title': entry['title'], @@ -41,36 +41,67 @@ def get_news(parsed): return articles def output(article): - print("Title: ", clean_title(article['title'])) + print("Title: ", + clean_output.delete_unnecessary_symbols(article['title'])) #clean_output.print("Date: ", article['published']) print("Date: ", article['published']) print("Link: ", article['link']) print("\nSummary: ", article['summary']) print("\nImage: ", article['img']) print('\n') +def test_to_add(news_csv, articles): + for i in articles: + var = True + for j in news_csv: + if i['published'] != j[4]: + var = True + else: + var = False + break + if var: + addcsv(i) + def main(): + console_args = arg.parsargs() + if console_args.version: + print("Version: ", VERSION) + parsed = feedparser.parse(console_args.source) feed = get_sourse(parsed) - articles = get_news(parsed) - if args.verbose: + articles = get_news(parsed, console_args) + if console_args.verbose: logging.info('Website is working') print('Feed: ', feed['link'], '\n') - for value in articles: - if args.json: + + news_csv = out() + test_to_add(news_csv, articles) + + if console_args.date is not None: + for news in news_csv: + if int(convert_date(news[4])) == console_args.date: + print("Title: ", + clean_output.delete_unnecessary_symbols(news[1])) #clean_output.print("Date: ", article['published']) + print("Date: ", news[4]) + print("Link: ", news[0]) + print("\nSummary: ", news[3]) + print("\nImage: ", news[2]) + print('\n') + + + for article in articles: + if console_args.json: """ Convert to json """ - json_format = json.dumps(value) + json_format = json.dumps(article) print(json_format, '\n') - value['title'] = clean_title(value['title']) - if args.verbose: + article['title'] = clean_output.delete_unnecessary_symbols(article['title']) + if console_args.verbose: logg_json(json_format) else: - output(value) - if args.verbose: - logg(value) - - if args.version: - print("Version: ",vers) + output(article) + if console_args.verbose: + logg(article) + if __name__ == '__main__': main() \ No newline at end of file diff --git a/final_task/setup.py b/final_task/setup.py index f5b2ee1..79511a0 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -2,10 +2,17 @@ from os.path import join, dirname setup( - name='RSS-reader', + name='rss-reader', version='1.0', packages=find_packages(), + py_modules=['rss_reader.py'], + install_requires = ['feedparser', 'bs4'], long_description=open(join(dirname(__file__), 'README.md')).read(), author="Oleg Slavashevich", - author_email="oslavashevish@gmail.com" + author_email="oslavashevish@gmail.com", + entry_points = { + 'console_scripts': [ + 'rss_reader = rss_reader.rss_reader:main' + ] + } ) \ No newline at end of file