diff --git a/final_task/rss_reader/add_to_csv.py b/final_task/rss_reader/add_to_csv.py new file mode 100644 index 0000000..bb64aed --- /dev/null +++ b/final_task/rss_reader/add_to_csv.py @@ -0,0 +1,17 @@ +import csv + +def addcsv(articles): + with open('news.csv', 'a', newline='') as csvfile: + fieldnames = ['link','title', 'img', 'summary', 'published'] + + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writerow(articles) + +def out(): + a = [] + with open('news.csv', 'r') as csvFile: + reader = csv.reader(csvFile) + for row in reader: + a.append(row) + return a \ No newline at end of file diff --git a/final_task/rss_reader/arg.py b/final_task/rss_reader/arg.py new file mode 100644 index 0000000..6d21493 --- /dev/null +++ b/final_task/rss_reader/arg.py @@ -0,0 +1,42 @@ +import argparse + +def parsargs(): + parser = argparse.ArgumentParser(description='Getting info from sites') + + parser.add_argument('source', type=str, help='RSS URL') + + parser.add_argument( + '--limit', + type=int, + help='Limit news topics if this parameter provided' + ) + + parser.add_argument( + '--json', + action='store_true', + help='Print result as JSON in stdout' + ) + + parser.add_argument( + '--version', + action='store_true', + help='Print version of the application' + ) + + parser.add_argument( + '--verbose', + action="store_true", + help='Outputs verbose status messages' + ) + + parser.add_argument( + '--date', + type=int, + help='Take date in format %Y%m%d' + ) + + args = parser.parse_args() + + return args + +VERSION = 1.0 \ No newline at end of file diff --git a/final_task/rss_reader/clean_output.py b/final_task/rss_reader/clean_output.py new file mode 100644 index 0000000..75cae30 --- /dev/null +++ b/final_task/rss_reader/clean_output.py @@ -0,0 +1,8 @@ +from string import ascii_letters, whitespace +import re + +def delete_unnecessary_symbols(text): + "Delete unnecessary symbols" + good_chars = (ascii_letters + whitespace).encode() + junk_chars = bytearray(set(range(0x100)) - set(good_chars)) + return text.encode('ascii', 'ignore').translate(None, junk_chars).decode() \ No newline at end of file diff --git a/final_task/rss_reader/converter.py b/final_task/rss_reader/converter.py new file mode 100644 index 0000000..d96f85b --- /dev/null +++ b/final_task/rss_reader/converter.py @@ -0,0 +1,19 @@ +def convert_date(date): + """This function converts date""" + month = {'Jan': '1', + 'Feb': '2', + 'Mar': '3', + 'Apr': '4', + 'May': '5', + 'Jun': '6', + 'Jul': '7', + 'Aug': '8', + 'Sep': '9', + 'Oct': '10', + 'Nov': '11', + 'Dec': '12'} + day = date[5:7] + month_int = month[date[8:11]] + year = date[12:16] + + return year+month_int+day \ No newline at end of file diff --git a/final_task/rss_reader/loggs.log b/final_task/rss_reader/loggs.log new file mode 100644 index 0000000..7602de9 --- /dev/null +++ b/final_task/rss_reader/loggs.log @@ -0,0 +1,22 @@ +INFO:root:Website is working +DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump +DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500 +DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html +DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him + +INFO:root:Website is working +DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump +DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500 +DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html +DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him + +DEBUG:root:Title: Venezuela exintel chief missing in Spain ahead of US extradition police +DEBUG:root:Date: Wed, 13 Nov 2019 04:46:30 -0500 +DEBUG:root:Link: https://news.yahoo.com/venezuela-ex-intel-chief-missing-spain-ahead-us-094630755.html +DEBUG:root:Description: Venezuelas former military intelligence chief has gone missing in Spain just days after a court approved a request for his extradition to the United States on drug trafficking charges police said Wednesday They are currently looking for him said a spokeswoman for Spains national police referring to General Hugo Armando Carvajal Judicial sources said police had gone to his house in Madrid after Fridays court decision but could not find him + +DEBUG:root:Title: Turkey deports American IS suspect stuck at Greek border +DEBUG:root:Date: Thu, 14 Nov 2019 05:20:35 -0500 +DEBUG:root:Link: https://news.yahoo.com/turkey-deport-suspect-stuck-greek-102035932.html +DEBUG:root:Description: An American man suspected of being a member of the Islamic State group is being repatriated to the United States after spending three days in a no mans land between Turkey and Greece Turkeys Interior Ministry said Thursday The United States agreed to take him in and will provide him with travel documents the ministry said adding that the repatriation was underway The move comes a day after Turkish President Recep Tayyip Erdogan met with US President Donald Trump in Washington + diff --git a/final_task/rss_reader/loggs.py b/final_task/rss_reader/loggs.py new file mode 100644 index 0000000..157774d --- /dev/null +++ b/final_task/rss_reader/loggs.py @@ -0,0 +1,15 @@ +import logging +from clean_output import delete_unnecessary_symbols + + +logging.basicConfig(filename="loggs.log", level=logging.DEBUG) + + +def logg(article): + logging.debug("Title: " + delete_unnecessary_symbols(article['title'])) + logging.debug("Date: " + article['published']) + logging.debug("Link: " + article['link']) + logging.debug("Description: " + article['summary'] + '\n') + +def logg_json(json_format): + logging.debug("Json: " + json_format) \ No newline at end of file diff --git a/final_task/rss_reader/rss_reader.py b/final_task/rss_reader/rss_reader.py index e69de29..8c6577e 100644 --- a/final_task/rss_reader/rss_reader.py +++ b/final_task/rss_reader/rss_reader.py @@ -0,0 +1,107 @@ +import feedparser +#from arg import parsargs, VERSION +from arg import parsargs, VERSION +import clean_output +from bs4 import BeautifulSoup +import json +from loggs import logg, logg_json +import logging +import sys +from datetime import datetime +from add_to_csv import addcsv, out +from converter import convert_date + +def get_sourse(parsed): + ''' Gets source information ''' + feed = parsed['feed'] + return { + 'link': feed['link'], + 'title': feed['title'], + 'subtitle': feed['subtitle'], + } + +def get_news(parsed, console_args): + """ Gets entries information """ + articles = [] + entries = parsed['entries'] + if console_args.limit is not None: + ''' Get right amount from the array ''' + entries = entries[:console_args.limit] + for entry in entries: + img = BeautifulSoup(entry.summary, features="html.parser") + summary = BeautifulSoup(entry.summary, features='html.parser').text + article_img = img.find('img')['src'] + articles.append({ + 'link': entry['link'], + 'title': entry['title'], + 'img': article_img, + 'summary': summary, + 'published': entry['published'], + }) + return articles + +def output(article): + print("Title: ", + clean_output.delete_unnecessary_symbols(article['title'])) #clean_output.print("Date: ", article['published']) + print("Date: ", article['published']) + print("Link: ", article['link']) + print("\nSummary: ", article['summary']) + print("\nImage: ", article['img']) + print('\n') + +def test_to_add(news_csv, articles): + for i in articles: + var = True + for j in news_csv: + if i['published'] != j[4]: + var = True + else: + var = False + break + if var: + addcsv(i) + +def main(): + console_args = arg.parsargs() + if console_args.version: + print("Version: ", VERSION) + parsed = feedparser.parse(console_args.source) + feed = get_sourse(parsed) + articles = get_news(parsed, console_args) + if console_args.verbose: + logging.info('Website is working') + + print('Feed: ', feed['link'], '\n') + + + news_csv = out() + test_to_add(news_csv, articles) + + if console_args.date is not None: + for news in news_csv: + if int(convert_date(news[4])) == console_args.date: + print("Title: ", + clean_output.delete_unnecessary_symbols(news[1])) #clean_output.print("Date: ", article['published']) + print("Date: ", news[4]) + print("Link: ", news[0]) + print("\nSummary: ", news[3]) + print("\nImage: ", news[2]) + print('\n') + + + for article in articles: + if console_args.json: + """ Convert to json """ + json_format = json.dumps(article) + print(json_format, '\n') + article['title'] = clean_output.delete_unnecessary_symbols(article['title']) + if console_args.verbose: + logg_json(json_format) + else: + output(article) + if console_args.verbose: + logg(article) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/final_task/setup.py b/final_task/setup.py index e69de29..79511a0 100644 --- a/final_task/setup.py +++ b/final_task/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages +from os.path import join, dirname + +setup( + name='rss-reader', + version='1.0', + packages=find_packages(), + py_modules=['rss_reader.py'], + install_requires = ['feedparser', 'bs4'], + long_description=open(join(dirname(__file__), 'README.md')).read(), + author="Oleg Slavashevich", + author_email="oslavashevish@gmail.com", + entry_points = { + 'console_scripts': [ + 'rss_reader = rss_reader.rss_reader:main' + ] + } +) \ No newline at end of file