From 7e2b6f942f174aa1942e189d45be07f16a20e616 Mon Sep 17 00:00:00 2001 From: AndrewVFranco <129307231+AndrewVFranco@users.noreply.github.com> Date: Sat, 4 Apr 2026 02:36:36 -0700 Subject: [PATCH] Add pubmed data retrieval function --- .idea/.gitignore | 10 ---------- requirements.txt | 6 +++++- src/core/config.py | 4 +++- src/retrieval/pubmed.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 12 deletions(-) delete mode 100644 .idea/.gitignore create mode 100644 src/retrieval/pubmed.py diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index ab1f416..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Ignored default folder with query files -/queries/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml -# Editor-based HTTP Client requests -/httpRequests/ diff --git a/requirements.txt b/requirements.txt index f1fa448..109cd41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,8 @@ python-json-logger>=2.0.0 ruff>=0.4.0 # Pytest -pytest>=9.0.0 \ No newline at end of file +pytest>=9.0.0 + +# Pubmed requests +requests>=2.33.0 +lxml>=6.0.2 \ No newline at end of file diff --git a/src/core/config.py b/src/core/config.py index 98fc734..1a1084f 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -27,4 +27,6 @@ class Settings(BaseSettings): DEBUG: bool = True class Config: - env_file = ".env" \ No newline at end of file + env_file = ".env" + +settings = Settings() \ No newline at end of file diff --git a/src/retrieval/pubmed.py b/src/retrieval/pubmed.py new file mode 100644 index 0000000..ed418c1 --- /dev/null +++ b/src/retrieval/pubmed.py @@ -0,0 +1,35 @@ +import requests +from src.core.config import settings + +def search_pubmed(query: str, max_results: int = 10) -> list[dict]: + base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + + esearch_params = { + "db": "pubmed", + "term": query, + "retmax": max_results, + "retmode": "json", + "api_key": settings.NCBI_API_KEY + } + + try: + response = requests.get(f"{base_url}esearch.fcgi", params=esearch_params) + response.raise_for_status() + pmids = response.json()["esearchresult"]["idlist"] + + pmids = ",".join(pmids) + + efetch_params = { + "db": "pubmed", + "id": pmids, + "retmode": "XML", + "api_key": settings.NCBI_API_KEY + } + + raw_data = requests.get(f"{base_url}efetch.fcgi", params=efetch_params) + raw_data.raise_for_status() + + return raw_data.text + except Exception as e: + print(f"Error: {e}") +