From 681fa996e5ede1d60936e503db8d65e0d8b90e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20F=2E=20Mu=C3=B1oz?= Date: Thu, 14 Aug 2025 11:22:04 -0400 Subject: [PATCH] fix: Update data sources issue #11 --- .../analysis/Data_filtering/get_features.py | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/workspace/analysis/Data_filtering/get_features.py b/workspace/analysis/Data_filtering/get_features.py index 1e98270..86fd8b3 100755 --- a/workspace/analysis/Data_filtering/get_features.py +++ b/workspace/analysis/Data_filtering/get_features.py @@ -2,24 +2,10 @@ from pyarrow.dataset import dataset from s3fs import S3FileSystem - -_PREFIX = ( - "s3://cellpainting-gallery/cpg0016-jump-assembled/source_all/workspace/profiles" -) -_RECIPE = "jump-profiling-recipe_2024_a917fa7" - -transforms = ( - ( - "CRISPR", - "profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected", - ), - ("ORF", "profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony"), - ("COMPOUND", "profiles_var_mad_int_featselect_harmony"), -) - filepaths = { - dset: f"{_PREFIX}/{_RECIPE}/{dset}/{transform}/profiles.parquet" - for dset, transform in transforms + "CRISPR": "s3://cellpainting-gallery/cpg0016-jump-assembled/source_all/workspace/profiles_assembled/CRISPR/v1.0a/profiles.parquet", + "ORF": "s3://cellpainting-gallery/cpg0016-jump-assembled/source_all/workspace/profiles_assembled/ORF/v1.0a/profiles.parquet", + "COMPOUND": "s3://cellpainting-gallery/cpg0016-jump-assembled/source_all/workspace/profiles_assembled/COMPOUND/v1.0/profiles.parquet", }