Skip to content

Commit 2de362e

Browse files
feat: gemini citations
1 parent e456c79 commit 2de362e

7 files changed

Lines changed: 569 additions & 299 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ __pycache__
1818
.pyenv
1919
.env
2020
.envrc
21+
credentials/
2122

2223
# data
2324
data/articles/

data/annotations/PMC11730665.json

Lines changed: 22 additions & 21 deletions
Large diffs are not rendered by default.

pixi.lock

Lines changed: 129 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ datasets = ">=3.6.0,<4"
3737
litellm = ">=1.72.2,<2"
3838
termcolor = ">=3.1.0,<4"
3939
gdown = ">=5.2.0,<6"
40+
google-auth = ">=2.40.3,<3"

src/components/annotation_pipeline.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import os
88

99
class AnnotationPipeline:
10-
def __init__(self, pmcid: str, citation_approach: str = "lm"):
10+
def __init__(self, pmcid: str, citation_model: str = "local"):
1111
if not is_pmcid(pmcid):
1212
logger.error(f"Invalid PMCID: {pmcid}")
1313
self.pmcid = pmcid
14-
self.citation_approach = citation_approach
14+
self.citation_model = citation_model
1515
self.article_text = get_article_text(pmcid)
1616
self.title = get_title(self.article_text)
1717
self.study_parameters = {}
@@ -41,11 +41,11 @@ def run(self, save_path: str = "data/annotations"):
4141
self.annotations = annotation_generator.generate_table_json()
4242

4343
# Generate citations for annotations and study parameters
44-
citation_generator = CitationGenerator(self.pmcid, approach=self.citation_approach)
45-
logger.info(f"Adding Citations to Annotations using {self.citation_approach} approach")
44+
citation_generator = CitationGenerator(self.pmcid, model=self.citation_model)
45+
logger.info(f"Adding Citations to Annotations using model {self.citation_model}")
4646
self.annotations = citation_generator.add_citations_to_annotations(self.annotations)
4747

48-
logger.info(f"Adding Citations to Study Parameters using {self.citation_approach} approach")
48+
logger.info(f"Adding Citations to Study Parameters using model {self.citation_model}")
4949
self.study_parameters = citation_generator.add_citations_to_study_parameters(self.study_parameters)
5050

5151
self.print_info()
@@ -79,15 +79,15 @@ def copy_markdown(pmcid: str):
7979

8080
if __name__ == "__main__":
8181
pmcids = [
82-
"PMC5728534",
82+
# "PMC5728534",
8383
"PMC11730665",
84-
"PMC5712579",
85-
"PMC4737107",
86-
"PMC5749368"
84+
# "PMC5712579",
85+
# "PMC4737107",
86+
# "PMC5749368"
8787
]
8888
for pmcid in pmcids:
8989
logger.info(f"Processing {pmcid}")
90-
pipeline = AnnotationPipeline(pmcid, citation_approach="local")
90+
pipeline = AnnotationPipeline(pmcid, citation_model="gemini/gemini-2.0-flash")
9191
pipeline.run()
9292
for pmcid in pmcids:
9393
copy_markdown(pmcid)

0 commit comments

Comments
 (0)