From 4bdedeb3bfe56948b217c2af40573912c4f4f7d9 Mon Sep 17 00:00:00 2001 From: Caio Mello <68293475+caiocmello@users.noreply.github.com> Date: Mon, 20 Apr 2026 14:04:44 +0200 Subject: [PATCH 1/4] add a glance to library index --- docs/index.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 9076076..3efdad1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,9 +16,58 @@ pip install impresso The library requires Python version `3.10` or higher. It also depends on several packages commonly found in Jupyter environments, such as `matplotlib` and `pandas`. +# A Glance: + ## Create a session -::: impresso.connect +``` +from impresso import connect +client = connect() +``` + +## Search +``` +results = client.search.find(term="moon landing") +results +``` +'results' will be displayed as preview of pandas data frame. To see full data frame, run: +``` +results.df +``` +## Pagination +'results' are paginated. This data frame just displays the first 100 results. To navigate through pages, use: +``` +# Define the total amount of items you want to retrieve +total_results = 2000 +limit = 1000 + +# This creates a list called 'all_results' to save your items +all_results = [] + +# Now you loop through pages of 1000 outputs until you have collected all the items you defined in 'total_results' +# Results are saved in the list 'all_results' +for offset in range(0, total_results, limit): + results = client.search.find( + term="Titanic", + order_by="-date", + limit=limit, + offset=offset + ) + all_results.append(results.df) + +# To conclude, you transform your list into a Pandas Dataframe and visualise it by running 'full_results_df' +full_results_df = pd.concat(all_results, ignore_index=True) +full_results_df +``` +## Accessing transcripts +Transcripts, text data from content items, can accessed by sending a request using the content item id. See example below: +``` +results = client.content_items.get("NZG-1877-10-20-a-i0024") +# transcript data is shown in column 'text.content' +``` +## See content item on Web App (shortcut) +To see a specific content item on the Web App, just add the content item id on the URL +https://impresso-project.ch/app/article/{id} ## About Impresso From d6670d329b645315e9b481e1f837a7ddfd89b32e Mon Sep 17 00:00:00 2001 From: Roman Kalyakin Date: Wed, 22 Apr 2026 09:22:21 +0200 Subject: [PATCH 2/4] updated changes --- docs/index.md | 88 ++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/docs/index.md b/docs/index.md index 3efdad1..7d210f3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,59 +16,69 @@ pip install impresso The library requires Python version `3.10` or higher. It also depends on several packages commonly found in Jupyter environments, such as `matplotlib` and `pandas`. -# A Glance: +## At a glance -## Create a session +### Create a session -``` +```python from impresso import connect client = connect() ``` -## Search -``` +### Search + +```python results = client.search.find(term="moon landing") results ``` -'results' will be displayed as preview of pandas data frame. To see full data frame, run: -``` + +`results` will display a summary of the result including a preview of a pandas data frame with the result data. Use `df` property to access the full data frame: + +```python results.df ``` -## Pagination -'results' are paginated. This data frame just displays the first 100 results. To navigate through pages, use: -``` -# Define the total amount of items you want to retrieve -total_results = 2000 -limit = 1000 - -# This creates a list called 'all_results' to save your items -all_results = [] - -# Now you loop through pages of 1000 outputs until you have collected all the items you defined in 'total_results' -# Results are saved in the list 'all_results' -for offset in range(0, total_results, limit): - results = client.search.find( - term="Titanic", - order_by="-date", - limit=limit, - offset=offset - ) - all_results.append(results.df) - -# To conclude, you transform your list into a Pandas Dataframe and visualise it by running 'full_results_df' -full_results_df = pd.concat(all_results, ignore_index=True) -full_results_df -``` -## Accessing transcripts -Transcripts, text data from content items, can accessed by sending a request using the content item id. See example below: +### Pagination + +!!! warning "Monthly Quota" + Every Impresso user has a monthly quota of the content items they can access. + The quota is currently set at 200,000 content items. Paginating through a + large result set may see you hitting the quota limit fairly soon. + Make sure to check the size of the full result set before fetching all pages. + +By default every result object is the first page of the full result set. Use the following code to go through the rest of the pages: + +```python +import pandas as pd +# Get first page with 100 items per page +results = impresso.search.find(term="landing", limit=100) +print(f"Full result contains {results.total} items.") + +full_df = results.df + +# Iterate through all pages +for page in results.pages(): + full_df = pd.concat([full_df, page.df]) + +full_df ``` -results = client.content_items.get("NZG-1877-10-20-a-i0024") -# transcript data is shown in column 'text.content' + +### Accessing transcripts + +Content item transcripts can be large and are not returned by default. +To access a transcript, request it by content item ID: + +```python +result = client.content_items.get("NZG-1877-10-20-a-i0024") +result.df['text.content'][0] ``` -## See content item on Web App (shortcut) -To see a specific content item on the Web App, just add the content item id on the URL -https://impresso-project.ch/app/article/{id} +### See content item on Web App (shortcut) +To see a specific content item in the Web App, look for the link "See this result in the Impresso App" in the rendered result summary: + +```python +result = client.content_items.get("NZG-1877-10-20-a-i0024") +result +``` ## About Impresso From a3b9006439162cb8ac3e57b3b389694cf0eed6ef Mon Sep 17 00:00:00 2001 From: Roman Kalyakin Date: Wed, 22 Apr 2026 09:31:10 +0200 Subject: [PATCH 3/4] added links to necessary resources in documentation --- docs/resources.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/resources.md b/docs/resources.md index e914fd8..74ba8f1 100644 --- a/docs/resources.md +++ b/docs/resources.md @@ -29,6 +29,9 @@ impresso.search.facet(facet='newspaper', term='war') ::: impresso.resources.search.SearchResource ::: impresso.api_client.models.search_order_by.SearchOrderByLiteral +::: impresso.api_client.models.content_item_access_rights_copyright.ContentItemAccessRightsCopyrightLiteral +::: impresso.resources.tools.Embedding + ::: impresso.resources.search.SearchDataContainer ## Entities @@ -72,6 +75,7 @@ impresso.media_sources.find( ::: impresso.resources.media_sources.MediaSourcesResource +::: impresso.api_client.models.find_media_sources_type.FindMediaSourcesTypeLiteral ::: impresso.api_client.models.find_media_sources_order_by.FindMediaSourcesOrderByLiteral ::: impresso.resources.media_sources.FindMediaSourcesContainer From 3011008f25839b2340634323a99fea222f298e68 Mon Sep 17 00:00:00 2001 From: Roman Kalyakin Date: Wed, 22 Apr 2026 09:33:55 +0200 Subject: [PATCH 4/4] brought back the deleted connect section --- docs/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/index.md b/docs/index.md index 7d210f3..191fe0f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -80,6 +80,10 @@ result = client.content_items.get("NZG-1877-10-20-a-i0024") result ``` +## Create a session + +::: impresso.connect + ## About Impresso ### Impresso project