Skip to content

Commit 5d63a72

Browse files
feat: copy markdown utility
Merge pull request #17 from shloknatarajan/main
2 parents 39eea8f + 8f6033f commit 5d63a72

2 files changed

Lines changed: 35 additions & 0 deletions

File tree

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ update-download-map = "python -c 'from src.fetch_articles.article_downloader imp
1717
download-articles = "python -m src.fetch_articles.article_downloader"
1818
download-data = "gdown --fuzzy https://drive.google.com/file/d/1qtQWvi0x_k5_JofgrfsgkWzlIdb6isr9/view && unzip autogkb-data.zip && rm autogkb-data.zip"
1919
setup-repo = "pixi install && pixi run download-data"
20+
copy-markdown = "python -m src.copy_markdown"
2021

2122
[dependencies]
2223
seaborn = ">=0.13.2,<0.14"

src/copy_markdown.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import List
2+
import os
3+
import shutil
4+
from loguru import logger
5+
from pathlib import Path
6+
7+
"""
8+
Pass in a list of PMCIDs and have those files copied to a new folder
9+
"""
10+
11+
12+
def copy_markdown(pmcids: List[str]) -> None:
13+
succesful = 0
14+
for pmcid in pmcids:
15+
try:
16+
source_file = Path("data") / "articles" / f"{pmcid}.md"
17+
destination = Path("data") / "extractions" / "markdown"
18+
os.makedirs(destination, exist_ok=True)
19+
shutil.copy2(source_file, destination / f"{pmcid}.md")
20+
succesful += 1
21+
except Exception as e:
22+
logger.error(e)
23+
logger.info(
24+
f"Copied {succesful}/{len(pmcids)} markdown to data/extractions/markdown"
25+
)
26+
27+
28+
def main():
29+
pmcids = ["PMC4737107", "PMC5712579", "PMC5728534", "PMC5749368", "PMC11730665"]
30+
copy_markdown(pmcids=pmcids)
31+
32+
33+
if __name__ == "__main__":
34+
main()

0 commit comments

Comments
 (0)