|
7 | 7 | import os |
8 | 8 |
|
9 | 9 | class AnnotationPipeline: |
10 | | - def __init__(self, pmcid: str, citation_approach: str = "lm"): |
| 10 | + def __init__(self, pmcid: str, citation_model: str = "local"): |
11 | 11 | if not is_pmcid(pmcid): |
12 | 12 | logger.error(f"Invalid PMCID: {pmcid}") |
13 | 13 | self.pmcid = pmcid |
14 | | - self.citation_approach = citation_approach |
| 14 | + self.citation_model = citation_model |
15 | 15 | self.article_text = get_article_text(pmcid) |
16 | 16 | self.title = get_title(self.article_text) |
17 | 17 | self.study_parameters = {} |
@@ -41,11 +41,11 @@ def run(self, save_path: str = "data/annotations"): |
41 | 41 | self.annotations = annotation_generator.generate_table_json() |
42 | 42 |
|
43 | 43 | # Generate citations for annotations and study parameters |
44 | | - citation_generator = CitationGenerator(self.pmcid, approach=self.citation_approach) |
45 | | - logger.info(f"Adding Citations to Annotations using {self.citation_approach} approach") |
| 44 | + citation_generator = CitationGenerator(self.pmcid, model=self.citation_model) |
| 45 | + logger.info(f"Adding Citations to Annotations using model {self.citation_model}") |
46 | 46 | self.annotations = citation_generator.add_citations_to_annotations(self.annotations) |
47 | 47 |
|
48 | | - logger.info(f"Adding Citations to Study Parameters using {self.citation_approach} approach") |
| 48 | + logger.info(f"Adding Citations to Study Parameters using model {self.citation_model}") |
49 | 49 | self.study_parameters = citation_generator.add_citations_to_study_parameters(self.study_parameters) |
50 | 50 |
|
51 | 51 | self.print_info() |
@@ -79,15 +79,15 @@ def copy_markdown(pmcid: str): |
79 | 79 |
|
80 | 80 | if __name__ == "__main__": |
81 | 81 | pmcids = [ |
82 | | - "PMC5728534", |
| 82 | + # "PMC5728534", |
83 | 83 | "PMC11730665", |
84 | | - "PMC5712579", |
85 | | - "PMC4737107", |
86 | | - "PMC5749368" |
| 84 | + # "PMC5712579", |
| 85 | + # "PMC4737107", |
| 86 | + # "PMC5749368" |
87 | 87 | ] |
88 | 88 | for pmcid in pmcids: |
89 | 89 | logger.info(f"Processing {pmcid}") |
90 | | - pipeline = AnnotationPipeline(pmcid, citation_approach="local") |
| 90 | + pipeline = AnnotationPipeline(pmcid, citation_model="gemini/gemini-2.0-flash") |
91 | 91 | pipeline.run() |
92 | 92 | for pmcid in pmcids: |
93 | 93 | copy_markdown(pmcid) |
0 commit comments