Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions final_task/rss_reader/add_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import csv

def addcsv(articles):
with open('news.csv', 'a', newline='') as csvfile:
fieldnames = ['link','title', 'img', 'summary', 'published']

writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

writer.writerow(articles)
Comment on lines +4 to +9

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand if you will get news 5 times from the same rss source, then you will have these news 5 times repeated in your file


def out():
a = []
with open('news.csv', 'r') as csvFile:
reader = csv.reader(csvFile)
for row in reader:
a.append(row)
Comment on lines +12 to +16

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is no such file (and the first time you launch program on new machine there won't be such file) this line will crash your application with FileNotFoundError

return a
42 changes: 42 additions & 0 deletions final_task/rss_reader/arg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import argparse

def parsargs():
parser = argparse.ArgumentParser(description='Getting info from sites')

parser.add_argument('source', type=str, help='RSS URL')

parser.add_argument(
'--limit',
type=int,
help='Limit news topics if this parameter provided'
)
Comment on lines +8 to +12

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below: it is ok to make that into one line. It will be readable anyway.


parser.add_argument(
'--json',
action='store_true',
help='Print result as JSON in stdout'
)

parser.add_argument(
'--version',
action='store_true',
help='Print version of the application'
)

parser.add_argument(
'--verbose',
action="store_true",
help='Outputs verbose status messages'
)

parser.add_argument(
'--date',
type=int,
help='Take date in format %Y%m%d'
)

args = parser.parse_args()

return args

VERSION = 1.0
8 changes: 8 additions & 0 deletions final_task/rss_reader/clean_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from string import ascii_letters, whitespace
import re

def delete_unnecessary_symbols(text):
"Delete unnecessary symbols"

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and below: please make your docstrings a bit more specific

good_chars = (ascii_letters + whitespace).encode()
junk_chars = bytearray(set(range(0x100)) - set(good_chars))
return text.encode('ascii', 'ignore').translate(None, junk_chars).decode()
19 changes: 19 additions & 0 deletions final_task/rss_reader/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def convert_date(date):
"""This function converts date"""
month = {'Jan': '1',
'Feb': '2',
'Mar': '3',
'Apr': '4',
'May': '5',
'Jun': '6',
'Jul': '7',
'Aug': '8',
'Sep': '9',
'Oct': '10',
'Nov': '11',
'Dec': '12'}
day = date[5:7]
month_int = month[date[8:11]]
year = date[12:16]
Comment on lines +3 to +17

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to use datetime lib for this.


return year+month_int+day
22 changes: 22 additions & 0 deletions final_task/rss_reader/loggs.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
INFO:root:Website is working

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, remove your local .log file from repository

DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump
DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500
DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html
DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him

INFO:root:Website is working
DEBUG:root:Title: Rep Connolly says Democrats already have smoking gun to impeach Trump
DEBUG:root:Date: Tue, 12 Nov 2019 22:12:58 -0500
DEBUG:root:Link: https://news.yahoo.com/connolly-says-democrats-already-have-smoking-gun-to-impeach-trump-031258672.html
DEBUG:root:Description: A Democratic lawmaker who attended most of the closeddoor depositions over the past month said that while public hearings will be instructive for Americans President Trump has already given Congress all the evidence it needs to impeach him

DEBUG:root:Title: Venezuela exintel chief missing in Spain ahead of US extradition police
DEBUG:root:Date: Wed, 13 Nov 2019 04:46:30 -0500
DEBUG:root:Link: https://news.yahoo.com/venezuela-ex-intel-chief-missing-spain-ahead-us-094630755.html
DEBUG:root:Description: Venezuelas former military intelligence chief has gone missing in Spain just days after a court approved a request for his extradition to the United States on drug trafficking charges police said Wednesday They are currently looking for him said a spokeswoman for Spains national police referring to General Hugo Armando Carvajal Judicial sources said police had gone to his house in Madrid after Fridays court decision but could not find him

DEBUG:root:Title: Turkey deports American IS suspect stuck at Greek border
DEBUG:root:Date: Thu, 14 Nov 2019 05:20:35 -0500
DEBUG:root:Link: https://news.yahoo.com/turkey-deport-suspect-stuck-greek-102035932.html
DEBUG:root:Description: An American man suspected of being a member of the Islamic State group is being repatriated to the United States after spending three days in a no mans land between Turkey and Greece Turkeys Interior Ministry said Thursday The United States agreed to take him in and will provide him with travel documents the ministry said adding that the repatriation was underway The move comes a day after Turkish President Recep Tayyip Erdogan met with US President Donald Trump in Washington

15 changes: 15 additions & 0 deletions final_task/rss_reader/loggs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import logging
from clean_output import delete_unnecessary_symbols


logging.basicConfig(filename="loggs.log", level=logging.DEBUG)


def logg(article):
logging.debug("Title: " + delete_unnecessary_symbols(article['title']))
logging.debug("Date: " + article['published'])
logging.debug("Link: " + article['link'])
logging.debug("Description: " + article['summary'] + '\n')

def logg_json(json_format):
logging.debug("Json: " + json_format)
107 changes: 107 additions & 0 deletions final_task/rss_reader/rss_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import feedparser
#from arg import parsargs, VERSION

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please avoid commented-out code in pushed commits.

from arg import parsargs, VERSION
import clean_output
from bs4 import BeautifulSoup
import json
from loggs import logg, logg_json
import logging
import sys
from datetime import datetime
from add_to_csv import addcsv, out
from converter import convert_date

def get_sourse(parsed):
''' Gets source information '''
feed = parsed['feed']
return {
'link': feed['link'],
'title': feed['title'],
'subtitle': feed['subtitle'],
}

def get_news(parsed, console_args):
""" Gets entries information """
articles = []
entries = parsed['entries']
if console_args.limit is not None:
''' Get right amount from the array '''
entries = entries[:console_args.limit]
for entry in entries:
img = BeautifulSoup(entry.summary, features="html.parser")
summary = BeautifulSoup(entry.summary, features='html.parser').text
article_img = img.find('img')['src']
articles.append({
'link': entry['link'],
'title': entry['title'],
'img': article_img,
'summary': summary,
'published': entry['published'],
})
return articles

def output(article):
print("Title: ",
clean_output.delete_unnecessary_symbols(article['title'])) #clean_output.print("Date: ", article['published'])
print("Date: ", article['published'])
print("Link: ", article['link'])
print("\nSummary: ", article['summary'])
print("\nImage: ", article['img'])
print('\n')

def test_to_add(news_csv, articles):
for i in articles:
var = True
for j in news_csv:
if i['published'] != j[4]:
Comment on lines +53 to +56

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to avoid one-character variable names.

var = True
else:
var = False
break
if var:
addcsv(i)

def main():
console_args = arg.parsargs()

@HenadziStantchik HenadziStantchik Dec 3, 2019

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you import specific function using from then you should just use it as is: parsargs, without arg

Right now this will break your application.

if console_args.version:
print("Version: ", VERSION)
parsed = feedparser.parse(console_args.source)
feed = get_sourse(parsed)
articles = get_news(parsed, console_args)
if console_args.verbose:
logging.info('Website is working')

print('Feed: ', feed['link'], '\n')


news_csv = out()
test_to_add(news_csv, articles)

if console_args.date is not None:
for news in news_csv:
if int(convert_date(news[4])) == console_args.date:
print("Title: ",
clean_output.delete_unnecessary_symbols(news[1])) #clean_output.print("Date: ", article['published'])
print("Date: ", news[4])
print("Link: ", news[0])
print("\nSummary: ", news[3])
print("\nImage: ", news[2])
print('\n')


for article in articles:
if console_args.json:
""" Convert to json """
json_format = json.dumps(article)
print(json_format, '\n')
article['title'] = clean_output.delete_unnecessary_symbols(article['title'])
if console_args.verbose:
logg_json(json_format)
else:
output(article)
if console_args.verbose:
logg(article)


if __name__ == '__main__':
main()
18 changes: 18 additions & 0 deletions final_task/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from setuptools import setup, find_packages
from os.path import join, dirname

setup(
name='rss-reader',
version='1.0',
packages=find_packages(),
py_modules=['rss_reader.py'],
install_requires = ['feedparser', 'bs4'],
long_description=open(join(dirname(__file__), 'README.md')).read(),
author="Oleg Slavashevich",
author_email="oslavashevish@gmail.com",
entry_points = {
'console_scripts': [
'rss_reader = rss_reader.rss_reader:main'
]
}
)