From ccb164de5f7762d81132149fdd71f5cdd4c745c1 Mon Sep 17 00:00:00 2001
From: Aidan Borkan <134334100+aidanborkan@users.noreply.github.com>
Date: Fri, 15 Mar 2024 16:34:14 -0600
Subject: [PATCH] Update train_model.py

eitherr we need to import * from params or simply define our min # of proteins
---
 predict_protein/train_model.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/predict_protein/train_model.py b/predict_protein/train_model.py
index 4f88316..1f733ca 100644
--- a/predict_protein/train_model.py
+++ b/predict_protein/train_model.py
@@ -139,7 +139,11 @@ def get_train_test(self,
         # skip proteins with fewer than 20 samples
         # 2021-11-12 this should be filtered at the protein step (y_df) rather than the
         # transcript-protein joined set (xy_df). We will do a simple impute for the transcripts instead
-        if len(y_df) < params.min_proteins:
+
+        #AB 03.15.2024: we can either modify the script to import * from params script within protein_prediction or simply define it here
+        # Minimum number of protein observations to attempt to train mode
+        min_proteins = 50  
+        if len(y_df) < min_proteins:
             tqdm.tqdm.write('Not enough proteomics observations. Skipping protein.')
             return None