From dc82836dfad111e2f5662ab4930928da459d2912 Mon Sep 17 00:00:00 2001 From: wpower12 Date: Fri, 16 Sep 2022 14:59:56 -0400 Subject: [PATCH 1/2] Returning reddit user IDs for submissions and comments. If applied, will update the returned item of the _api_obj_to_item method of the reddit scraper to include the authors full user id, instead of just their username. This is useful to users who track reddit users by these unique IDs across sources. --- snscrape/modules/reddit.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/snscrape/modules/reddit.py b/snscrape/modules/reddit.py index 55af939c..609583c6 100644 --- a/snscrape/modules/reddit.py +++ b/snscrape/modules/reddit.py @@ -20,6 +20,7 @@ @dataclasses.dataclass class Submission(snscrape.base.Item): author: typing.Optional[str] # E.g. submission hf7k6 + author_id: typing.Optional[str] date: datetime.datetime id: str link: typing.Optional[str] @@ -37,6 +38,7 @@ def __str__(self): @dataclasses.dataclass class Comment(snscrape.base.Item): author: typing.Optional[str] + author_id: typing.Optional[str] body: str date: datetime.datetime id: str @@ -115,6 +117,7 @@ def _api_obj_to_item(self, d): kwargs = { 'author': d.get('author'), + 'author_id': d.get('author_fullname'), 'date': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc), 'url': f'https://old.reddit.com{permalink}', 'subreddit': d.get('subreddit'), From d6621b4b3d5d9b9ccae6d2dc7c066293e1acd4e1 Mon Sep 17 00:00:00 2001 From: wpower12 Date: Mon, 19 Sep 2022 13:00:56 -0400 Subject: [PATCH 2/2] Adding subreddit_ids as well. --- snscrape/modules/reddit.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/snscrape/modules/reddit.py b/snscrape/modules/reddit.py index 609583c6..17d1b512 100644 --- a/snscrape/modules/reddit.py +++ b/snscrape/modules/reddit.py @@ -26,6 +26,8 @@ class Submission(snscrape.base.Item): link: typing.Optional[str] selftext: typing.Optional[str] subreddit: typing.Optional[str] # E.g. submission 617p51 + subreddit_id: typing.Optional[str] + score: int title: str url: str @@ -44,6 +46,8 @@ class Comment(snscrape.base.Item): id: str parentId: typing.Optional[str] subreddit: typing.Optional[str] + subreddit_id: typing.Optional[str] + score: int url: str created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date') @@ -121,6 +125,8 @@ def _api_obj_to_item(self, d): 'date': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc), 'url': f'https://old.reddit.com{permalink}', 'subreddit': d.get('subreddit'), + 'subreddit_id': d.get('subreddit_id'), + 'score': int(d.get('score')) } if cls is Submission: kwargs['selftext'] = d.get('selftext') or None