andrewSharo · nvnieuwk · Apr 23, 2026
diff --git a/README.md b/README.md
@@ -121,6 +121,20 @@ chr2  345678  456789  DUP
 ```
 If you have any questions that aren't answered here, please [raise a new issue](https://github.com/andrewSharo/StrVCTVRE/issues)
 
+## Install via conda
+
+An alternative way to install StrVCTVRE is through conda. StrVCTVRE is available on bioconda, so it can be installed by running:
+
+```
+conda install -c bioconda strvctvre
+```
+
+When using the tool via conda, you will need to install the following additional files in order to run StrVCTVRE:
+
+1. The phyloP conservation scores as described in the relevant [chapter](#3-download-phylop-conservation-scores-for-human-genome-38) above.
+2. The random forest scores joblib file located in the located [here](data/rfTrainedAllChromsPy3.joblib) in the StrVCTVRE repository. You can use `--scores /path/to/rfTrainedAllChromsPy3.joblib` to indicate the path to this file when running StrVCTVRE.
+3. The exon transcript file located [here](data/exons_Appris_featurized_transcript_Chr1-Y_loeuf.sorted.bed) in the StrVCTVRE repository. You can use `--exon_transcripts /path/to/exons_Appris_featurized_transcript_Chr1-Y_loeuf.sorted.bed` to indicate the path to this file when running StrVCTVRE.
+
 ## Citation
 If you use StrVCTRE in your work, please cite:
 

diff --git a/StrVCTVRE.py b/StrVCTVRE.py
@@ -33,6 +33,8 @@
                     metavar = 'path/to/hg38.phyloP100way.bw',dest='phylopPath')
 parser.add_argument('-a','--assembly',help='Genome assembly of input, either GRCh38 or GRCh37',choices=['GRCh37','GRCh38'],default='GRCh38',dest='assembly')
 parser.add_argument('-l','--liftover',help='Liftover executable path, required if assembly is GRCh37',required=False,metavar='/path/to/liftover',dest='pathLiftover')
+parser.add_argument('-t','--exon_transcripts',help='Exon transcript file path, defaults to \'data/exons_Appris_featurized_transcript_Chr1-Y_loeuf.sorted.bed\' when not provided',default='data/exons_Appris_featurized_transcript_Chr1-Y_loeuf.sorted.bed', type=str)
+parser.add_argument('-s','--scores',help='Path to the joblib file containing random forest scores, defaults to \'data/rfTrainedAllChromsPy3.joblib\' when not provided',default='data/rfTrainedAllChromsPy3.joblib', type=str)
 # for testing
 # args = parser.parse_args(['-i','/test/path/sept','-o','/test/output/sept'])
 
@@ -42,6 +44,11 @@
 if args.assembly == 'GRCh37' and args.pathLiftover is None:
     parser.error("--assembly GRCh37 requires --liftover")
 
+if args.exon_transcripts != '' or not os.path.isfile(args.exon_transcripts):
+    parser.error('exon transcript file not found at ' + args.exon_transcripts + '. Provide an existing file using the -t or --exon_transcripts argument.')
+
+if args.scores != '' or not os.path.isfile(args.scores):
+    parser.error('random forest scores file not found at ' + args.scores + '. Provide an existing file using the -s or --scores argument.')
 
 # Create temporary directory to store files created, deleted after finished running
 
@@ -196,7 +203,7 @@
 
 print('\nidentifying exonic deletions and duplications...\n')
 
-exons = pybedtools.BedTool('data/exons_Appris_featurized_transcript_Chr1-Y_loeuf.sorted.bed')
+exons = pybedtools.BedTool(args.exon_transcripts)
 df[['chrom','start','end','OldID']].to_csv(os.path.join(td,'svs.bed'),sep='\t', index=False,header=False)
 a = pybedtools.BedTool(os.path.join(td,'svs.bed'))
 b = a.intersect(exons, wa=True, wb=True).saveas(os.path.join(td,'svsExonOverlap.bed'))
@@ -251,7 +258,7 @@
 #         X = an[an['chrom'] == chrm][['DEL','numExonsFinal','phyloP', 'lowestExonRank', 'allSkippable','lowestExonsInGene', 'anyConstExon','pLIMax','loeufMin', 'cdsFracStartMin', 'cdsFracEndMax', 'cdsFracMax', 'pLI_max25_ID', 'loeuf_min25_ID','topExp','topUsage','maxStrength']].copy()
 #         an.loc[an['chrom'] == chrm,'path'] = rf.predict_proba(X)[:,1]
 
-    rf = load('data/rfTrainedAllChromsPy3.joblib')
+    rf = load(args.scores)
     X = an[['DEL','numExonsFinal','phyloP', 'lowestExonRank', 'allSkippable','lowestExonsInGene', 'anyConstExon','pLIMax','loeufMin', 'cdsFracStartMin', 'cdsFracEndMax', 'cdsFracMax', 'pLI_max25_ID', 'loeuf_min25_ID','topExp','topUsage','maxStrength']].copy()
     an['path'] = rf.predict_proba(X)[:,1]
     an.set_index('OldID', inplace=True)