Ghost Dog<\\/em>, by Jim Jarmusch.<\\/div>",
- "warnings": {
- "graphic": [
- "Suicide",
- "Violence"
- ],
- "moderate": [
- "Suicide",
- "Suicide attempt",
- "War"
- ],
- "minor": [
- "Gore"
- ]
- }
-}
-```
+ ```dotenv
+ _STORYGRAPH_SESSION=your_session_cookie_value
+ REMEMBER_USER_TOKEN=your_remember_token_value
+ STORYGRAPH_USERNAME=your_storygraph_username
+ ```
+## Usage
-### User List:
+Here's a basic example of how to use the `Book` and `User` clients.
```python
-# User Client
-# works only for public profiles
-# fetch user's currently reading list
-
-from storygraph_api import User
+import os
+import json
from dotenv import load_dotenv
+from storygraph_api import Book, User
+
+# Load environment variables from .env file
load_dotenv()
-cookie = os.getenv('COOKIE') # retrieve cookie from .env file
-uname = 'sampleuname' #some username
-user = User()
-result = user.currently_reading(uname,cookie=cookie)
-print(result)
-```
+# --- Authentication ---
+username = os.getenv("STORYGRAPH_USERNAME")
+session_cookie = os.getenv("_STORYGRAPH_SESSION")
+remember_token = os.getenv("REMEMBER_USER_TOKEN")
-#### Result:
-
- ```json
- [
- {
- "title": "The Murder After the Night Before",
- "book_id": "38cb5b56-23f1-48fd-b4b3-a80e07a19775"
- },
- {
- "title": "The Graces",
- "book_id": "653b54b3-a79d-4c2e-ae40-eae281a91315"
- }
-]
+auth_cookies = {
+ "_storygraph_session": session_cookie,
+ "remember_user_token": remember_token
+}
+
+# --- Initialize Clients ---
+book_client = Book()
+user_client = User()
+
+# --- User Client Examples ---
+
+# Get user ID
+user_id_json = user_client.get_user_id(username)
+user_id = json.loads(user_id_json).get("user_id")
+print(f"User ID: {user_id}")
+
+# Get 'Currently Reading' list
+currently_reading = user_client.currently_reading(username, auth_cookies)
+print(currently_reading)
+
+# Get 'To-Read' list
+to_read = user_client.to_read(username, auth_cookies)
+print(to_read)
- ```
+# Get 'Read' list
+books_read = user_client.books_read(username, auth_cookies)
+print(books_read)
-## Further Information
-* Refer to [books_client.py](https://github.com/ym496/storygraph-api/tree/main/storygraph_api/books_client.py) and [users_client.py](https://github.com/ym496/storygraph-api/tree/main/storygraph_api/users_client.py) files to know more functionalities.
-* All the user related tasks require the `remember_user_token` cookie. It can be found in the `Application` section of your browserβs developer tools for the StoryGraph website.
+# --- Book Client Examples ---
-## Contributing
-Contributions are welcome! Fork the repository, make your changes, and submit a pull request.
+book_id = "1c023e31-637b-41d9-ba64-260c3c1b0f3d" # Example book ID
-For bugs or feature requests, please open an issue on [GitHub](https://github.com/ym496/storygraph-api/issues).
+# Search for a book
+search_results = book_client.search("Dune Frank Herbert")
+print(search_results)
+
+# Get book info
+book_info = book_client.book_info(book_id)
+print(book_info)
+
+# Get your reading progress for a book
+progress = book_client.reading_progress(book_id, auth_cookies)
+print(progress)
+
+# Get your read dates for a book
+read_dates = book_client.get_read_dates(book_id, auth_cookies)
+print(read_dates)
+
+# Get your journal entries for a book
+journal_entries = book_client.get_journal_entries(book_id, auth_cookies)
+print(journal_entries)
+
+# Get the AI summary for a book
+if user_id:
+ ai_summary = book_client.get_ai_summary(book_id, user_id)
+ print(ai_summary)
+```
-## License
+## Disclaimer
-This project is licensed under the MIT License.
+This is an unofficial wrapper. It is not affiliated with or endorsed by The StoryGraph. Use it at your own risk. The StoryGraph's website structure could change at any time, which might break this wrapper.
diff --git a/manual_tests.py b/manual_tests.py
deleted file mode 100644
index 94f6dc9..0000000
--- a/manual_tests.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-Basic Manual Testing of Components.
-"""
-import os
-from dotenv import load_dotenv
-# from storygraph_api.request.books_request import BooksScraper
-# from storygraph_api.parse.books_parser import BooksParser
-# from storygraph_api.request.user_request import UserScraper
-# from storygraph_api.parse.user_parser import UserParser
-# from storygraph_api.users_client import User
-load_dotenv()
-
-id = "a5da6127-beb2-44b9-aba6-f63de432777"
-query = "pride and prejudice"
-# testing book page info
-# print(BooksScraper.main(id))
-# print(BooksParser.book_page(id))
-
-id = "e5a59ed0-31f0-46af-849e-cd8e624b68ff"
-from storygraph_api import Book
-book = Book()
-print(book.book_info(id))
-# print(book.search(query))
-
-# cookie = os.getenv('COOKIE')
-# # print(UserScraper.currently_reading(uname,session_cookie=cookie))
-# # print(UserParser.books_read(uname,cookie=cookie))
-# user = User()
-# print(user.books_read(uname,cookie=cookie))
-
-#
-# from storygraph_api.users_client import User
-# from dotenv import load_dotenv
-# load_dotenv()
-# cookie = os.getenv('COOKIE')
-# uname = 'clyrmze'
-# user = User()
-# result = user.books_read(uname,cookie=cookie)
-# print(result)
diff --git a/requirements.txt b/requirements.txt
index 8dea80e..6ac3d6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,20 +1,9 @@
-attrs==24.2.0
beautifulsoup4==4.12.3
certifi==2024.7.4
charset-normalizer==3.3.2
-exceptiongroup==1.2.2
-h11==0.14.0
idna==3.7
-outcome==1.3.0.post0
-PySocks==1.7.1
+python-dotenv==1.1.1
requests==2.32.3
-selenium==4.23.1
-sniffio==1.3.1
-sortedcontainers==2.4.0
soupsieve==2.6
-trio==0.26.2
-trio-websocket==0.11.1
typing_extensions==4.12.2
urllib3==2.2.2
-websocket-client==1.8.0
-wsproto==1.2.0
diff --git a/storygraph_api/books_client.py b/storygraph_api/books_client.py
index 01b5a36..f3da804 100644
--- a/storygraph_api/books_client.py
+++ b/storygraph_api/books_client.py
@@ -1,14 +1,36 @@
from storygraph_api.parse.books_parser import BooksParser
from storygraph_api.exception_handler import handle_exceptions
import json
+from typing import Dict
class Book:
@handle_exceptions
- def book_info(self,book_id):
+ def book_info(self, book_id: str) -> str:
data = BooksParser.book_page(book_id)
- return json.dumps(data,indent=4)
+ return json.dumps(data, indent=4)
@handle_exceptions
- def search(self,query):
+ def reading_progress(self, book_id: str, cookies: Dict[str, str]) -> str:
+ progress = BooksParser.reading_progress(book_id, cookies)
+ data = {"progress": progress}
+ return json.dumps(data, indent=4)
+
+ @handle_exceptions
+ def get_read_dates(self, book_id: str, cookies: Dict[str, str]) -> str:
+ data = BooksParser.get_read_dates(book_id, cookies)
+ return json.dumps(data, indent=4)
+
+ @handle_exceptions
+ def get_ai_summary(self, book_id: str, user_id: str) -> str:
+ data = BooksParser.get_ai_summary(book_id, user_id)
+ return json.dumps(data, indent=4)
+
+ @handle_exceptions
+ def get_journal_entries(self, book_id: str, cookies: Dict[str, str]) -> str:
+ data = BooksParser.journal_entries(book_id, cookies)
+ return json.dumps(data, indent=4)
+
+ @handle_exceptions
+ def search(self, query: str) -> str:
data = BooksParser.search(query)
- return json.dumps(data,indent=4)
+ return json.dumps(data, indent=4)
diff --git a/storygraph_api/exception_handler.py b/storygraph_api/exception_handler.py
index c84f9a8..df90bba 100644
--- a/storygraph_api/exception_handler.py
+++ b/storygraph_api/exception_handler.py
@@ -2,18 +2,18 @@
import requests
from functools import wraps
from storygraph_api.exceptions import RequestError, ParsingError, UnexpectedError
+from selenium.common.exceptions import WebDriverException
def handle_exceptions(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
- except RequestError as e:
- return json.dumps({"error": e.message}, indent=4)
- except ParsingError as e:
+ except (RequestError, ParsingError) as e:
return json.dumps({"error": e.message}, indent=4)
except Exception as e:
- raise UnexpectedError(f"Unexpected error: {str(e)}")
+ unexpected_error = UnexpectedError(f"An unexpected error occurred: {str(e)}")
+ return json.dumps({"error": unexpected_error.message}, indent=4)
return wrapper
def request_exception(func):
@@ -22,9 +22,9 @@ def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except requests.RequestException as e:
- return json.dumps({"error": f"Scraping Error: {str(e)}"}, indent=4)
- except Exception as e:
- return json.dumps({"error": f"Scraping Error: {str(e)}"}, indent=4)
+ raise RequestError(f"A network error occurred: {str(e)}") from e
+ except WebDriverException as e:
+ raise RequestError(f"A browser automation error occurred: {str(e)}") from e
return wrapper
def parsing_exception(func):
@@ -32,8 +32,6 @@ def parsing_exception(func):
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
- except ParsingError as e:
- return json.dumps({"error": e.message}, indent=4)
- except Exception as e:
- return json.dumps({"error": f"Parsing Error: {str(e)}"}, indent=4)
+ except (AttributeError, IndexError, TypeError, ValueError) as e:
+ raise ParsingError(f"Failed to parse page content. The website structure may have changed. Details: {str(e)}") from e
return wrapper
diff --git a/storygraph_api/exceptions.py b/storygraph_api/exceptions.py
index 3a6f046..ce548bd 100644
--- a/storygraph_api/exceptions.py
+++ b/storygraph_api/exceptions.py
@@ -1,21 +1,17 @@
class StoryGraphAPIError(Exception):
- """Base class for exceptions in StoryGraphAPI"""
pass
class RequestError(StoryGraphAPIError):
- """Exception raised for errors during the request."""
def __init__(self, message="An error occurred during the request."):
self.message = message
super().__init__(self.message)
class ParsingError(StoryGraphAPIError):
- """Exception raised for errors during parsing responses."""
def __init__(self, message="An error occurred while parsing the response."):
self.message = message
super().__init__(self.message)
class UnexpectedError(StoryGraphAPIError):
- """Exception raised for unexpected errors."""
def __init__(self, message="An unexpected error occurred."):
self.message = message
diff --git a/storygraph_api/parse/books_parser.py b/storygraph_api/parse/books_parser.py
index 20014eb..8df03bb 100644
--- a/storygraph_api/parse/books_parser.py
+++ b/storygraph_api/parse/books_parser.py
@@ -1,95 +1,365 @@
from storygraph_api.request.books_request import BooksScraper
from storygraph_api.exception_handler import parsing_exception
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag, NavigableString
import re
+from typing import Dict, Any, List
+from urllib.parse import parse_qs, urlparse
class BooksParser:
@staticmethod
@parsing_exception
- def book_page(book_id):
+ def book_page(book_id: str) -> Dict[str, Any]:
content = BooksScraper.main(book_id)
soup = BeautifulSoup(content, 'html.parser')
- h3_tag = soup.find('h3',class_="font-serif font-bold text-2xl md:w-11/12")
- title = h3_tag.contents[0].strip()
+
+ h3_tag = soup.find('h3', class_="font-serif font-bold text-2xl md:w-11/12")
+ if not isinstance(h3_tag, Tag):
+ raise Exception("Could not find the main title header.")
+
+ title = ""
+ if h3_tag.contents and isinstance(h3_tag.contents[0], NavigableString):
+ title = h3_tag.contents[0].strip()
+
authors = []
for a in h3_tag.find_all('a'):
- if a["href"].startswith("/authors"):
- authors.append(a.text)
- p_tag = soup.find('p',class_="text-sm font-light text-darkestGrey dark:text-grey mt-1")
- pages = p_tag.contents[0].strip().split()[0]
- first_pub = p_tag.contents[1].find_all('span')[1].text.split()[2]
- tags = []
- tag_div = soup.find('div',class_="book-page-tag-section").find_all('span')
- for tag in tag_div:
- tags.append(tag.text)
- desc = soup.find_all('script')[5].text
- pattern = re.compile(r"Description<\/h4>(.*?)<\/div>", re.DOTALL)
- match = pattern.search(desc)
- description = match.group(1).strip()
+ if isinstance(a, Tag):
+ href = a.get("href")
+ if isinstance(href, str) and href.startswith("/authors"):
+ authors.append(a.text)
+
+ p_tag = soup.find('p', class_="text-sm font-light text-darkestGrey dark:text-grey mt-1")
+ if not isinstance(p_tag, Tag) or not p_tag.contents:
+ raise Exception("Could not find book metadata paragraph.")
+
+ pages_text = p_tag.contents[0]
+ pages = pages_text.strip().split()[0] if isinstance(pages_text, NavigableString) else "N/A"
+
+ pub_info_span = p_tag.find('span', string=re.compile(r'first pub'))
+ first_pub = pub_info_span.text.split()[-1] if pub_info_span else "N/A"
+
+ tag_div = soup.find('div', class_="book-page-tag-section")
+ tags = [tag.text for tag in tag_div.find_all('span')] if isinstance(tag_div, Tag) else []
+
+ cover_url = None
+ cover_div = soup.find('div', class_="book-cover")
+ if isinstance(cover_div, Tag):
+ img_tag = cover_div.find('img')
+ if isinstance(img_tag, Tag):
+ cover_url = img_tag.get('src')
+
+ description = "Description not found."
+ script_tag = soup.find('script', string=re.compile(r"\$\('\.read-more-btn'\)"))
+ if isinstance(script_tag, Tag):
+ script_content = script_tag.string
+ if script_content:
+ pattern = re.compile(r"\.html\('(.*)'\)", re.DOTALL)
+ match = pattern.search(str(script_content))
+ if match:
+ html_str = match.group(1).replace(r'\/', r'/')
+ desc_soup = BeautifulSoup(html_str, 'html.parser')
+ desc_div = desc_soup.find('div', class_='trix-content')
+ if desc_div:
+ description = desc_div.get_text(separator="\n", strip=True)
+
review_content = BooksScraper.community_reviews(book_id)
- rev_soup = BeautifulSoup(review_content,'html.parser')
- avg_rating = rev_soup.find('span',class_="average-star-rating").text.strip()
+ rev_soup = BeautifulSoup(review_content, 'html.parser')
+ avg_rating_span = rev_soup.find('span', class_="average-star-rating")
+ avg_rating = avg_rating_span.text.strip() if avg_rating_span else "N/A"
+
warnings = BooksParser.content_warnings(book_id)
+
data = {
- 'title':title,
- 'authors': authors,
- 'pages': pages,
- 'first_pub': first_pub,
- 'tags': tags,
- 'average_rating': avg_rating,
- 'description':description,
- 'warnings': warnings
- }
+ 'title': title, 'authors': authors, 'pages': pages,
+ 'first_pub': first_pub, 'tags': tags, 'average_rating': avg_rating,
+ 'description': description, 'warnings': warnings,
+ 'cover_url': cover_url
+ }
return data
@staticmethod
@parsing_exception
- def content_warnings(book_id):
+ def reading_progress(book_id: str, cookies: Dict[str, str]) -> str:
+ content = BooksScraper.book_page_authenticated(book_id, cookies)
+ soup = BeautifulSoup(content, 'html.parser')
+
+ status_label = soup.find('button', class_='read-status-label')
+ if isinstance(status_label, Tag) and status_label.text.strip() == 'read':
+ return "100%"
+
+ progress_bar_div = soup.find('div', class_='progress-bar')
+ if isinstance(progress_bar_div, Tag):
+ progress_span = progress_bar_div.find('span')
+ if isinstance(progress_span, Tag) and progress_span.string:
+ return progress_span.string.strip()
+
+ inner_div = progress_bar_div.find('div', style=lambda v: 'width: 0%' in v if v else False)
+ if inner_div is not None:
+ return "0%"
+
+ to_read_button = soup.find('button', string=re.compile(r'\s*to read\s*'))
+ if isinstance(to_read_button, Tag):
+ return "0%"
+
+ raise Exception("Could not determine reading status from the page.")
+
+ @staticmethod
+ @parsing_exception
+ def get_read_dates(book_id: str, cookies: Dict[str, str]) -> Dict[str, Any]:
+ try:
+ from storygraph_api.parse.user_parser import UserParser
+ from storygraph_api.request.user_request import UserScraper
+
+ all_entries = []
+ page = 1
+ while True:
+ content = UserScraper.all_journal_entries(cookies, page)
+ entries = UserParser.all_journal_entries(content)
+ if not entries:
+ break
+ all_entries.extend(entries)
+ page += 1
+
+ start_date = None
+ finish_date = None
+
+ for entry in all_entries:
+ if entry.get('book_id') == book_id:
+ if entry.get('status') == 'Started reading':
+ date_str = entry.get('date', '')
+ if date_str:
+ try:
+ from datetime import datetime
+ parsed_date = datetime.strptime(date_str, '%d %B %Y')
+ start_date = parsed_date.strftime('%Y-%m-%d')
+ except:
+ pass
+ elif entry.get('status') == 'Finished':
+ date_str = entry.get('date', '')
+ if date_str:
+ try:
+ from datetime import datetime
+ parsed_date = datetime.strptime(date_str, '%d %B %Y')
+ finish_date = parsed_date.strftime('%Y-%m-%d')
+ except:
+ pass
+
+ return {'start_date': start_date, 'finish_date': finish_date}
+
+ except Exception:
+ pass
+
+ content = BooksScraper.book_page_authenticated(book_id, cookies)
+ soup = BeautifulSoup(content, 'html.parser')
+
+ edit_link = soup.find('a', href=re.compile(r'/edit-(read-instance|journal-entry)-from-book'))
+ if not (isinstance(edit_link, Tag) and edit_link.get('href')):
+ return {'start_date': None, 'finish_date': None}
+
+ href = edit_link['href']
+ if not isinstance(href, str):
+ raise Exception("Could not find a valid edit link href.")
+
+ parsed_url = urlparse(href)
+ query_params = parse_qs(parsed_url.query)
+
+ id_val = None
+ form_content = b''
+ id_type = ''
+
+ if 'read_instance_id' in query_params:
+ id_val = query_params.get('read_instance_id', [None])[0]
+ id_type = 'read_instance'
+ if not id_val:
+ raise Exception("Could not extract read_instance_id from edit link.")
+ try:
+ form_content = BooksScraper.get_read_dates_form(book_id, id_val, cookies)
+ except:
+ return {'start_date': None, 'finish_date': None}
+
+ elif 'journal_entry_id' in query_params:
+ id_val = query_params.get('journal_entry_id', [None])[0]
+ id_type = 'journal_entry'
+ if not id_val:
+ raise Exception("Could not extract journal_entry_id from edit link.")
+ try:
+ form_content = BooksScraper.get_journal_entry_form(book_id, id_val, cookies)
+ except:
+ return {'start_date': None, 'finish_date': None}
+
+ if not form_content:
+ return {'start_date': None, 'finish_date': None}
+
+ form_soup = BeautifulSoup(form_content, 'html.parser')
+
+ def get_date(date_prefix: str) -> str | None:
+ day_select = form_soup.find('select', id=f'{id_type}_{date_prefix}day')
+ month_select = form_soup.find('select', id=f'{id_type}_{date_prefix}month')
+ year_select = form_soup.find('select', id=f'{id_type}_{date_prefix}year')
+
+ if not (isinstance(day_select, Tag) and isinstance(month_select, Tag) and isinstance(year_select, Tag)):
+ return None
+
+ day_option = day_select.find('option', selected=True)
+ month_option = month_select.find('option', selected=True)
+ year_option = year_select.find('option', selected=True)
+
+ if isinstance(day_option, Tag) and isinstance(month_option, Tag) and isinstance(year_option, Tag):
+ day = day_option.get('value')
+ month = month_option.get('value')
+ year = year_option.get('value')
+ if day and month and year:
+ return f"{year}-{str(month).zfill(2)}-{str(day).zfill(2)}"
+ return None
+
+ start_date_prefix = 'started_at_' if id_type == 'journal_entry' else 'start_'
+ start_date = get_date(start_date_prefix)
+ finish_date = get_date('finished_at_') if id_type == 'journal_entry' else get_date('')
+
+ return {'start_date': start_date, 'finish_date': finish_date}
+
+ @staticmethod
+ @parsing_exception
+ def get_ai_summary(book_id: str, user_id: str) -> Dict[str, str]:
+ content = BooksScraper.get_ai_summary(book_id, user_id)
+ soup = BeautifulSoup(content, 'html.parser')
+
+ template = soup.find('template')
+ if isinstance(template, Tag):
+ p_tag = template.find('p')
+ if isinstance(p_tag, Tag) and p_tag.string:
+ return {'summary': p_tag.string.strip()}
+
+ raise Exception("Could not parse AI summary.")
+
+ @staticmethod
+ @parsing_exception
+ def content_warnings(book_id: str) -> Dict[str, List[str]]:
warnings_content = BooksScraper.content_warnings(book_id)
- warnings_soup = BeautifulSoup(warnings_content,'html.parser')
- user_warnings_pane = warnings_soup.find_all('div',class_='standard-pane')[1]
- warnings_graphic = []
- warnings_moderate = []
- warnings_minor = []
- warnings_list = warnings_graphic
+ warnings_soup = BeautifulSoup(warnings_content, 'html.parser')
+
+ standard_panes = warnings_soup.find_all('div', class_='standard-pane')
+ if len(standard_panes) < 2:
+ return {'graphic': [], 'moderate': [], 'minor': []}
+
+ user_warnings_pane = standard_panes[1]
+ warnings: Dict[str, List[str]] = {'graphic': [], 'moderate': [], 'minor': []}
+ current_list_key = 'graphic'
tag_re = re.compile(r'^(.*) \((\d+)\)$')
+
for tag in user_warnings_pane.children:
- if tag == '\n':
- continue
+ if not isinstance(tag, Tag): continue
+
if tag.name == 'p':
- if tag.text == 'Graphic':
- warnings_list = warnings_graphic
- elif tag.text == 'Moderate':
- warnings_list = warnings_moderate
- elif tag.text == 'Minor':
- warnings_list = warnings_minor
+ if tag.text == 'Graphic': current_list_key = 'graphic'
+ elif tag.text == 'Moderate': current_list_key = 'moderate'
+ elif tag.text == 'Minor': current_list_key = 'minor'
elif tag.name == 'div':
match = tag_re.match(tag.text)
- warnings_list.append(match[1])
- warnings = {
- 'graphic': warnings_graphic,
- 'moderate': warnings_moderate,
- 'minor': warnings_minor
- }
+ if match: warnings[current_list_key].append(match.group(1))
return warnings
@staticmethod
@parsing_exception
- def search(query):
+ def search(query: str) -> List[Dict[str, str]]:
content = BooksScraper.search(query)
soup = BeautifulSoup(content, 'html.parser')
- search_results = []
+ search_results: List[Dict[str, str]] = []
+
books = soup.find_all('div', class_="book-title-author-and-series w-11/12")
for book in books:
- title = book.find('a').text.strip()
- for a in book.find_all('a'):
- if a["href"].startswith('/author'):
- author = a.text.strip()
- break
- book_id = book.find('a')['href'].split('/')[-1]
- search_results.append({
- 'title': title,
- 'author': author,
- 'book_id': book_id
- })
+ if not isinstance(book, Tag): continue
+
+ title_tag = book.find('a')
+ title = title_tag.text.strip() if isinstance(title_tag, Tag) else "N/A"
+
+ href_val = title_tag.get('href') if isinstance(title_tag, Tag) else None
+
+ href = href_val[0] if isinstance(href_val, list) else href_val
+ book_id = href.split('/')[-1] if isinstance(href, str) else "N/A"
+
+ author = "N/A"
+ for a_tag in book.find_all('a'):
+ if isinstance(a_tag, Tag):
+ href = a_tag.get("href")
+ if isinstance(href, str) and href.startswith('/author'):
+ author = a_tag.text.strip()
+ break
+
+ search_results.append({'title': title, 'author': author, 'book_id': book_id})
+
return search_results
+
+ @staticmethod
+ @parsing_exception
+ def journal_entries(book_id: str, cookies: Dict[str, str]) -> List[Dict[str, Any]]:
+ content = BooksScraper.get_journal_page(book_id, cookies)
+ soup = BeautifulSoup(content, 'html.parser')
+
+ journal_entries: List[Dict[str, Any]] = []
+
+ entry_panes = soup.find_all('span', class_="journal-entry-panes")
+ if not entry_panes:
+ return journal_entries
+
+ for entry in entry_panes[0].find_all(lambda tag: tag.name == 'div' and 'grid-cols-4' in tag.get('class', [])):
+ date_tag = entry.find('p', class_="font-semibold")
+ date = date_tag.text.strip().split('\n')[0] if date_tag else "N/A"
+
+ progress_percent_tag = entry.find('div', class_="text-teal-500")
+ progress_percent = int(progress_percent_tag.text.strip().replace('%', '')) if progress_percent_tag else None
+
+ pages_read_tag = entry.find('p', class_="clear-both")
+ pages_read_this_session = None
+ total_pages_read = None
+ total_pages = None
+
+ if pages_read_tag:
+ pages_text = pages_read_tag.text
+ session_match = re.search(r'(\d+) pages read', pages_text)
+ if session_match:
+ pages_read_this_session = int(session_match.group(1))
+
+ total_match = re.search(r'\((\d+) pages out of (\d+)\)', pages_text)
+ if total_match:
+ total_pages_read = int(total_match.group(1))
+ total_pages = int(total_match.group(2))
+
+ note_tag = entry.find('div', class_="trix-content")
+ note = note_tag.text.strip() if note_tag else None
+
+ status_tag = entry.find('span', class_=lambda x: x and 'inline-flex' in x)
+ status = status_tag.text.strip() if status_tag else None
+
+ if status == "Started reading":
+ if progress_percent is None:
+ progress_percent = 0
+ if pages_read_this_session is None:
+ pages_read_this_session = 0
+ if total_pages_read is None:
+ total_pages_read = 0
+ elif status == "Finished":
+ if progress_percent is None:
+ progress_percent = 100
+
+ journal_entries.append({
+ 'date': date,
+ 'status': status,
+ 'progress_percent': progress_percent,
+ 'pages_read_this_session': pages_read_this_session,
+ 'total_pages_read': total_pages_read,
+ 'total_pages': total_pages,
+ 'note': note
+ })
+
+ book_total_pages = None
+ for entry in journal_entries:
+ if entry.get('total_pages') is not None:
+ book_total_pages = entry['total_pages']
+ break
+
+ if book_total_pages is not None:
+ for entry in journal_entries:
+ if entry.get('total_pages') is None:
+ entry['total_pages'] = book_total_pages
+
+ return journal_entries
diff --git a/storygraph_api/parse/user_parser.py b/storygraph_api/parse/user_parser.py
index b4f30c4..2ca80f5 100644
--- a/storygraph_api/parse/user_parser.py
+++ b/storygraph_api/parse/user_parser.py
@@ -1,10 +1,24 @@
from storygraph_api.request.user_request import UserScraper
from storygraph_api.exception_handler import parsing_exception
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
+from typing import Dict
+import re
class UserParser:
@staticmethod
- @parsing_exception
+ @parsing_exception
+ def get_user_id(username: str) -> Dict[str, str]:
+ content = UserScraper.get_profile_page(username)
+ soup = BeautifulSoup(content, 'html.parser')
+ profile_pane = soup.find('div', id='profile-heading-pane')
+ if isinstance(profile_pane, Tag):
+ user_id = profile_pane.get('data-user-id')
+ if user_id and isinstance(user_id, str):
+ return {'user_id': user_id}
+ raise Exception(f"Could not find user_id for username '{username}'.")
+
+ @staticmethod
+ @parsing_exception
def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
books_list = []
@@ -21,15 +35,96 @@ def parse_html(html):
@staticmethod
def currently_reading(uname, cookie):
- content = UserScraper.currently_reading(uname,cookie)
+ content = UserScraper.currently_reading(uname, cookie)
return UserParser.parse_html(content)
@staticmethod
def to_read(uname, cookie):
- content = UserScraper.to_read(uname,cookie)
+ content = UserScraper.to_read(uname, cookie)
return UserParser.parse_html(content)
@staticmethod
def books_read(uname, cookie):
- content = UserScraper.books_read(uname,cookie)
+ content = UserScraper.books_read(uname, cookie)
return UserParser.parse_html(content)
+
+ @staticmethod
+ @parsing_exception
+ def all_journal_entries(html_content):
+ soup = BeautifulSoup(html_content, 'html.parser')
+
+ journal_entries = []
+
+ for entry in soup.find_all('div', class_="mb-7"):
+ try:
+ book_title_tag = entry.find('p', class_="font-semibold text-sm md:text-base font-semibold").find('a')
+ book_title = book_title_tag.text.strip() if book_title_tag else "N/A"
+
+ book_id = book_title_tag['href'].split('/')[-1] if book_title_tag else "N/A"
+
+ date_tag = entry.find('p', class_="font-semibold text-xs md:text-sm")
+ date = date_tag.text.strip().split('\n')[0] if date_tag else "N/A"
+
+ progress_percent_tag = entry.find('div', class_="text-teal-500")
+ progress_percent = int(progress_percent_tag.text.strip().replace('%', '')) if progress_percent_tag else None
+
+ pages_read_tag = entry.find('p', class_=re.compile(r'clear-both.*'))
+ pages_read_this_session = None
+ total_pages_read = None
+ total_pages = None
+
+ if pages_read_tag:
+ pages_text = pages_read_tag.text
+ session_match = re.search(r'(\d+) pages read', pages_text)
+ if session_match:
+ pages_read_this_session = int(session_match.group(1))
+
+ total_match = re.search(r'\((\d+) pages out of (\d+)\)', pages_text)
+ if total_match:
+ total_pages_read = int(total_match.group(1))
+ total_pages = int(total_match.group(2))
+
+ note_tag = entry.find('div', class_="trix-content")
+ note = note_tag.text.strip() if note_tag else None
+
+ status_tag = entry.find('span', class_=lambda x: x and 'inline-flex' in x)
+ status = status_tag.text.strip() if status_tag else None
+
+ if status == "Started reading":
+ if progress_percent is None:
+ progress_percent = 0
+ if pages_read_this_session is None:
+ pages_read_this_session = 0
+ if total_pages_read is None:
+ total_pages_read = 0
+ elif status == "Finished":
+ if progress_percent is None:
+ progress_percent = 100
+
+ journal_entries.append({
+ 'book_title': book_title,
+ 'book_id': book_id,
+ 'date': date,
+ 'status': status,
+ 'progress_percent': progress_percent,
+ 'pages_read_this_session': pages_read_this_session,
+ 'total_pages_read': total_pages_read,
+ 'total_pages': total_pages,
+ 'note': note
+ })
+ except Exception:
+ continue
+
+ book_total_pages = {}
+
+ for entry in journal_entries:
+ book_id = entry.get('book_id')
+ if book_id and entry.get('total_pages') is not None:
+ book_total_pages[book_id] = entry['total_pages']
+
+ for entry in journal_entries:
+ book_id = entry.get('book_id')
+ if book_id and entry.get('total_pages') is None and book_id in book_total_pages:
+ entry['total_pages'] = book_total_pages[book_id]
+
+ return journal_entries
diff --git a/storygraph_api/request/books_request.py b/storygraph_api/request/books_request.py
index b6f22f8..9cb2433 100644
--- a/storygraph_api/request/books_request.py
+++ b/storygraph_api/request/books_request.py
@@ -1,31 +1,70 @@
import requests
-from storygraph_api.exception_handler import request_exception
+from typing import Dict
class BooksScraper:
@staticmethod
- @request_exception
- def fetch_url(url):
- response = requests.get(url)
+ def fetch_url(url: str, cookies: Dict[str, str] | None = None, params: Dict[str, str] | None = None) -> bytes:
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+ }
+ response = requests.get(url, cookies=cookies, headers=headers, params=params)
response.raise_for_status()
return response.content
@staticmethod
- def main(book_id):
+ def post_url(url: str, cookies: Dict[str, str] | None = None, data: Dict[str, str] | None = None) -> bytes:
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }
+ response = requests.post(url, cookies=cookies, headers=headers, data=data)
+ response.raise_for_status()
+ return response.content
+
+ @staticmethod
+ def main(book_id: str) -> bytes:
url = f"https://app.thestorygraph.com/books/{book_id}"
return BooksScraper.fetch_url(url)
@staticmethod
- def community_reviews(book_id):
+ def book_page_authenticated(book_id: str, cookies: Dict[str, str]) -> bytes:
+ url = f"https://app.thestorygraph.com/books/{book_id}"
+ return BooksScraper.fetch_url(url, cookies=cookies)
+
+ @staticmethod
+ def community_reviews(book_id: str) -> bytes:
url = f"https://app.thestorygraph.com/books/{book_id}/community_reviews"
return BooksScraper.fetch_url(url)
@staticmethod
- def content_warnings(book_id):
+ def content_warnings(book_id: str) -> bytes:
url = f"https://app.thestorygraph.com/books/{book_id}/content_warnings"
return BooksScraper.fetch_url(url)
+
+ @staticmethod
+ def get_read_dates_form(book_id: str, read_instance_id: str, cookies: Dict[str, str]) -> bytes:
+ url = f"https://app.thestorygraph.com/edit-read-instance-from-book?book_id={book_id}&read_instance_id={read_instance_id}"
+ return BooksScraper.post_url(url, cookies=cookies)
@staticmethod
- def search(query):
- formatted_query = query.replace(' ', '%20')
- url = f"https://app.thestorygraph.com/browse?search_term={formatted_query}"
- return BooksScraper.fetch_url(url)
+ def get_journal_entry_form(book_id: str, journal_entry_id: str, cookies: Dict[str, str]) -> bytes:
+ url = f"https://app.thestorygraph.com/edit-journal-entry-from-book?book_id={book_id}&journal_entry_id={journal_entry_id}"
+ return BooksScraper.post_url(url, cookies=cookies)
+
+ @staticmethod
+ def get_ai_summary(book_id: str, user_id: str) -> bytes:
+ url = f"https://app.thestorygraph.com/personalized-preview.turbo_stream"
+ params = {'book_id': book_id, 'personalized': 'false', 'user_id': user_id}
+ return BooksScraper.fetch_url(url, params=params)
+
+ @staticmethod
+ def get_journal_page(book_id: str, cookies: Dict[str, str]) -> bytes:
+ url = "https://app.thestorygraph.com/journal"
+ params = {'book_id': book_id}
+ return BooksScraper.fetch_url(url, cookies=cookies, params=params)
+
+ @staticmethod
+ def search(query: str) -> bytes:
+ url = "https://app.thestorygraph.com/browse"
+ params = {'search_term': query}
+ return BooksScraper.fetch_url(url, params=params)
\ No newline at end of file
diff --git a/storygraph_api/request/user_request.py b/storygraph_api/request/user_request.py
index c1b24e3..00bfdb5 100644
--- a/storygraph_api/request/user_request.py
+++ b/storygraph_api/request/user_request.py
@@ -1,46 +1,42 @@
-from storygraph_api.exception_handler import request_exception
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-import time
+import requests
+from typing import Dict
class UserScraper:
@staticmethod
- @request_exception
- def fetch_url(url,cookie):
- options = Options()
- options.add_argument("--headless")
- driver = webdriver.Chrome(options=options)
- driver.get(url)
- if cookie:
- driver.add_cookie({
- 'name': 'remember_user_token',
- 'value': cookie,
- })
- driver.refresh()
- SCROLL_PAUSE_TIME = 2
- last_height = driver.execute_script("return document.body.scrollHeight")
- while True:
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
- time.sleep(SCROLL_PAUSE_TIME)
- new_height = driver.execute_script("return document.body.scrollHeight")
- if new_height == last_height:
- break
- last_height = new_height
- html_content = driver.page_source
- driver.quit()
- return html_content
+ def get_profile_page(username: str) -> bytes:
+ url = f"https://app.thestorygraph.com/profile/{username}"
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+ }
+ response = requests.get(url, headers=headers)
+ response.raise_for_status()
+ return response.content
@staticmethod
- def currently_reading(uname, cookie):
- url = f"https://app.thestorygraph.com/currently-reading/{uname}"
- return UserScraper.fetch_url(url,cookie)
+ def fetch_paginated_url(url: str, cookies: dict) -> bytes:
+ headers = {
+ 'User-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/91.0.4472.124 safari/537.36'
+ }
+ response = requests.get(url, cookies=cookies, headers=headers)
+ response.raise_for_status()
+ return response.content
@staticmethod
- def to_read(uname, cookie):
- url = f"https://app.thestorygraph.com/to-read/{uname}"
- return UserScraper.fetch_url(url,cookie)
+ def currently_reading(uname: str, cookies: Dict[str, str], page: int) -> bytes:
+ url = f"https://app.thestorygraph.com/currently-reading/{uname}?page={page}"
+ return UserScraper.fetch_paginated_url(url, cookies)
@staticmethod
- def books_read(uname, cookie):
- url = f"https://app.thestorygraph.com/books-read/{uname}"
- return UserScraper.fetch_url(url,cookie)
+ def to_read(uname: str, cookies: Dict[str, str], page: int) -> bytes:
+ url = f"https://app.thestorygraph.com/to-read/{uname}?page={page}"
+ return UserScraper.fetch_paginated_url(url, cookies)
+
+ @staticmethod
+ def books_read(uname: str, cookies: Dict[str, str], page: int) -> bytes:
+ url = f"https://app.thestorygraph.com/books-read/{uname}?page={page}"
+ return UserScraper.fetch_paginated_url(url, cookies)
+
+ @staticmethod
+ def all_journal_entries(cookies: Dict[str, str], page: int) -> bytes:
+ url = f"https://app.thestorygraph.com/journal?page={page}"
+ return UserScraper.fetch_paginated_url(url, cookies)
diff --git a/storygraph_api/users_client.py b/storygraph_api/users_client.py
index 0b4c41e..759c46f 100644
--- a/storygraph_api/users_client.py
+++ b/storygraph_api/users_client.py
@@ -1,19 +1,50 @@
from storygraph_api.parse.user_parser import UserParser
+from storygraph_api.request.user_request import UserScraper
from storygraph_api.exception_handler import handle_exceptions
import json
class User:
@handle_exceptions
- def currently_reading(self,uname,cookie):
- data = UserParser.currently_reading(uname,cookie)
- return json.dumps(data,indent=4)
+ def get_user_id(self, username: str) -> str:
+ data = UserParser.get_user_id(username)
+ return json.dumps(data, indent=4)
+
+ def _fetch_paginated_books(self, fetch_function, uname, cookies):
+ all_books = []
+ page = 1
+ while True:
+ content = fetch_function(uname, cookies, page)
+ books = UserParser.parse_html(content)
+ if not books:
+ break
+ all_books.extend(books)
+ page += 1
+ return all_books
+
+ @handle_exceptions
+ def currently_reading(self, uname, cookies):
+ data = self._fetch_paginated_books(UserScraper.currently_reading, uname, cookies)
+ return json.dumps(data, indent=4)
+
+ @handle_exceptions
+ def to_read(self, uname, cookies):
+ data = self._fetch_paginated_books(UserScraper.to_read, uname, cookies)
+ return json.dumps(data, indent=4)
@handle_exceptions
- def to_read(self,uname,cookie):
- data = UserParser.to_read(uname,cookie)
- return json.dumps(data,indent=4)
+ def books_read(self, uname, cookies):
+ data = self._fetch_paginated_books(UserScraper.books_read, uname, cookies)
+ return json.dumps(data, indent=4)
@handle_exceptions
- def books_read(self,uname,cookie):
- data = UserParser.books_read(uname,cookie)
- return json.dumps(data,indent=4)
+ def get_all_journal_entries(self, cookies):
+ all_entries = []
+ page = 1
+ while True:
+ content = UserScraper.all_journal_entries(cookies, page)
+ entries = UserParser.all_journal_entries(content)
+ if not entries:
+ break
+ all_entries.extend(entries)
+ page += 1
+ return json.dumps(all_entries, indent=4)
\ No newline at end of file
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..2f3bf78
--- /dev/null
+++ b/test.py
@@ -0,0 +1,169 @@
+import os
+import json
+from dotenv import load_dotenv
+
+from storygraph_api import Book, User
+
+def run_tests():
+ print("π Starting StoryGraph API tests...")
+
+ print("\n--- 1. Loading Environment & Initializing Clients ---")
+ load_dotenv()
+
+ username = os.getenv("STORYGRAPH_USERNAME")
+ session_cookie = os.getenv("_STORYGRAPH_SESSION")
+ remember_token = os.getenv("REMEMBER_USER_TOKEN")
+
+ if not all([username, session_cookie, remember_token]):
+ print("π Error: Missing one or more required environment variables.")
+ print("Please ensure _STORYGRAPH_SESSION, REMEMBER_USER_TOKEN, and STORYGRAPH_USERNAME are in your .env file.")
+ else:
+ auth_cookies = {
+ "_storygraph_session": session_cookie,
+ "remember_user_token": remember_token
+ }
+
+ book_client = Book()
+ user_client = User()
+ print("β
Setup complete.")
+
+ print("\n--- 2. Testing User Client ---")
+
+ user_id = None
+ try:
+ print("\nFetching User ID for:", username)
+ user_id_json = user_client.get_user_id(username)
+ user_id_data = json.loads(user_id_json)
+ if "user_id" in user_id_data:
+ user_id = user_id_data["user_id"]
+ print(f"β
Success! User ID found: {user_id}")
+ else:
+ print(f"β οΈ Could not extract user_id from response: {user_id_json}")
+ except Exception as e:
+ print(f"π Error fetching user ID: {e}")
+
+ try:
+ print("\nFetching 'Currently Reading' list...")
+ currently_reading = user_client.currently_reading(username, auth_cookies)
+ print("β
Success! Result:")
+ print(currently_reading)
+ except Exception as e:
+ print(f"π Error fetching 'Currently Reading' list: {e}")
+
+ try:
+ print("\nFetching 'To-Read' list...")
+ to_read = user_client.to_read(username, auth_cookies)
+ print("β
Success! Result:")
+ print(to_read)
+ except Exception as e:
+ print(f"π Error fetching 'To-Read' list: {e}")
+
+ try:
+ print("\nFetching 'Read' list...")
+ books_read = user_client.books_read(username, auth_cookies)
+ print("β
Success! Result:")
+ print(books_read)
+ except Exception as e:
+ print(f"π Error fetching 'Read' list: {e}")
+
+ try:
+ print("\nFetching all journal entries...")
+ all_journal_entries = user_client.get_all_journal_entries(auth_cookies)
+ all_journal_data = json.loads(all_journal_entries)
+ if isinstance(all_journal_data, list):
+ print(f"β
All Journal Entries: {len(all_journal_data)} entries found.")
+ print(all_journal_entries)
+ else:
+ print(f"π Error: All journal entries not returned as a list.")
+ except Exception as e:
+ print(f"π Error fetching all journal entries: {e}")
+
+ print("\n--- 3. Testing Book Client ---")
+
+ finished_book_id = "1c023e31-637b-41d9-ba64-260c3c1b0f3d"
+ reading_book_id = "87ca0994-06fb-4360-a0bf-660918a7fbc4"
+ unread_book_id = "c89e808f-39db-49f0-98af-6028d98097f9"
+ search_query = "Dune Frank Herbert"
+
+ try:
+ print(f"\nSearching for '{search_query}'...")
+ search_results = book_client.search(search_query)
+ print("β
Success! Result:")
+ print(search_results)
+ except Exception as e:
+ print(f"π Error searching for book: {e}")
+
+ try:
+ print(f"\nFetching info for book ID: {finished_book_id}")
+ book_info = book_client.book_info(finished_book_id)
+ print("β
Success! Result:")
+ print(book_info)
+ info = json.loads(book_info)
+ cover_url = info.get('cover_url')
+ if cover_url and isinstance(cover_url, str) and cover_url.startswith("https://cdn.thestorygraph.com/"):
+ print(f"β
cover_url present and valid: {cover_url}")
+ else:
+ print(f"π cover_url missing or invalid: {cover_url}")
+ except Exception as e:
+ print(f"π Error fetching book info: {e}")
+
+ print("\nFetching reading progress for all 3 test cases...")
+ try:
+ progress_finished = book_client.reading_progress(finished_book_id, auth_cookies)
+ print(f"β
Finished Book Progress: {json.loads(progress_finished).get('progress')}")
+
+ progress_reading = book_client.reading_progress(reading_book_id, auth_cookies)
+ print(f"β
Currently Reading Book Progress: {json.loads(progress_reading).get('progress')}")
+
+ progress_unread = book_client.reading_progress(unread_book_id, auth_cookies)
+ print(f"β
Unread Book Progress: {json.loads(progress_unread).get('progress')}")
+ except Exception as e:
+ print(f"π Error fetching reading progress: {e}")
+
+ print("\nFetching read dates for all 3 test cases...")
+ try:
+ dates_finished = book_client.get_read_dates(finished_book_id, auth_cookies)
+ print(f"β
Finished Book Dates: {dates_finished}")
+
+ dates_reading = book_client.get_read_dates(reading_book_id, auth_cookies)
+ print(f"β
Currently Reading Book Dates: {dates_reading}")
+
+ dates_unread = book_client.get_read_dates(unread_book_id, auth_cookies)
+ print(f"β
Unread Book Dates: {dates_unread}")
+ except Exception as e:
+ print(f"π Error fetching read dates: {e}")
+
+ print("\nFetching journal entries for a finished book and an unread book...")
+ try:
+ journal_entries_finished = book_client.get_journal_entries(finished_book_id, auth_cookies)
+ journal_data = json.loads(journal_entries_finished)
+ if isinstance(journal_data, list):
+ print(f"β
Finished Book Journal Entries: {len(journal_data)} entries found.")
+ print(journal_entries_finished)
+ else:
+ print(f"π Error: Journal entries for finished book not returned as a list.")
+
+ journal_entries_unread = book_client.get_journal_entries(unread_book_id, auth_cookies)
+ journal_data_unread = json.loads(journal_entries_unread)
+ if isinstance(journal_data_unread, list) and len(journal_data_unread) == 0:
+ print(f"β
Unread Book Journal Entries: Correctly returned an empty list.")
+ else:
+ print(f"π Error: Journal entries for unread book did not return an empty list.")
+ except Exception as e:
+ print(f"π Error fetching journal entries: {e}")
+
+ if user_id:
+ try:
+ print(f"\nFetching AI summary for book ID: {finished_book_id}")
+ ai_summary = book_client.get_ai_summary(finished_book_id, user_id)
+ print("β
Success! Result:")
+ print(ai_summary)
+ except Exception as e:
+ print(f"π Error fetching AI summary: {e}")
+ else:
+ print("\nβ οΈ Skipping AI Summary test because user_id could not be retrieved.")
+
+ print("\nπ All tests complete!")
+
+if __name__ == "__main__":
+ run_tests()