diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index bc93ed10..00000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/README.md b/README.md
index 3440b850..1ea0ec90 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # Reinforcement Learning Complex Detection
-This reinforcement learning algorithm is a machine learning method for complex detection in networks. Using known communities, it is trained and learns to find new complexes in the network.
+This is a reinforcement learning algorithm for community detection in networks. Trained on known communities, it learns to find new communities in a network.
 
 # Installation:
 Required python3                                  
@@ -10,38 +10,45 @@ Requirements installation:
 1. For a toy network use input_toy.yaml
 2. For hu.MAP - use input file input_humap.yaml
 
-
 # Instructions:
 To run this pipeline on a new network, construct an input file similar to input_toy.yaml specifying where to find the required inputs.
-1. Specify input options relating to network: Set options dir_nm (directory containing the network) and netf_nm (file name of the network)
-2. Specify input options relating to known communities in network: If you already have sepearated known communities into train and test communitites, specify their paths in the options comf_nm and comf_test_nm (relative to the directory specified in the option:dir_nm) Otherwise, Split complex list into train and test: Set option split_flag = 1 Verify that train test size distributions in figure are the similar. Also check that number of training complexes is not too low by looking at the res_metrics.out file. Set options comf_nm and comf_test_nm with these two files. All the above paths are set relative to the directory specified in the option:dir_nm Make sure to change the option split_flag back to 0 after this step
+1. Specify the network input file: Set options dir_nm (directory containing the network) and netf_nm (file name of the network)
+2. Specify the paths for train and test communitites, in the options comf_nm and comf_test_nm (relative to the directory specified in the option- dir_nm)
 
-An example bash script to run the RL pipeline after the above steps is shown below: This is for hu.MAP complexes
+An example bash script to run the RL pipeline after the above steps is shown below: This is for complexes learned on the human PPI network, hu.MAP 1.0:
 ```
 #!/bin/bash
-mtype = humap
-input_file_name = input_$mtype.yaml
-graph_file = hu.MAP_network_experiments/input_data/humap_network_weighted_edge_lists.txt
-input_training_file = hu.MAP_network_experiments/intermediate_output_results_data/training_CORUM_complexes_node_lists.txt
-input_testing_file = hu.MAP_network_experiments/intermediate_output_results_data/testing_CORUM_complexes_node_lists.txt
-out_dir_name = /results_$mtype
-train_results = $out_dir_name/train_results
-pred_results = $out_dir_name/pred_results
-id_map_path = convert_ids/humap_gene_id_name_map.txt
+
+mtype=humap
+input_file_name=input_$mtype.yaml
+graph_file=hu.MAP_network/input_data/humap_network_weighted_edge_lists.txt
+input_training_file=hu.MAP_network/intermediate_data/training_CORUM_complexes_node_lists.txt
+input_testing_file=hu.MAP_network/intermediate_data/testing_CORUM_complexes_node_lists.txt
+mkdir results_$mtype
+out_dir_name=./results_$mtype
+train_results=$out_dir_name/train_results
+pred_results=$out_dir_name/pred_results
+id_map_path=convert_ids/humap_gene_id_name_map.txt
+
 echo Training Algorithm....
-python3 functions/main_training.py --input_training_file $input_training_file --graph_file $graph_file --train_results $train_results
+python3 main_training.py --input_training_file $input_training_file --graph_file $graph_file --train_results $train_results
 
 echo Predicting new complexes from known communities...
-python3 functions/main_prediction.py --graph_file $graph_file --train_results $train_results --out_dir_name $out_dir_name --pred_results $pred_results
+python3 main_prediction.py --graph_file $graph_file --train_results $train_results --out_dir_name $out_dir_name --pred_results $pred_results
 
 echo Merging similar communities...
-python3 functions/postprocessing.py --input_file_name $input_file_name --graph_file $graph_file --out_dir_name $out_dir_name --pred_results $pred_results --train_results $train_results --input_training_file $input_training_file --input_testing_file $input_testing_file --id_map_path $id_map_path
+python3 postprocessing.py --input_file_name $input_file_name --graph_file $graph_file --out_dir_name $out_dir_name --pred_results $pred_results --train_results $train_results --input_training_file $input_training_file --input_testing_file $input_testing_file --id_map_path $id_map_path
 
 echo Comparing predicted and known communitites...
-python3 functions/eval_complex_RL --input_file_name $input_file_name  --input_training_file $input_training_file --input_testing_file $input_testing_file --out_dir_name $out_dir_name
+python3 eval_complex_RL.py --input_file_name $input_file_name  --input_training_file $input_training_file --input_testing_file $input_testing_file --out_dir_name $out_dir_name --id_name_path $id_map_path
 
 ```
 
-# Additional tips:
+## Additional tips:
 For each of the scripts, optional arguments can be viewed by running: python3 script_name.py --help
 For each command, add the desired argument directly on the terminal.
+
+# References:
+M. V. Palukuri, R. S. Patil, and E. M. Marcotte, “Molecular complex detection in protein interaction networks through reinforcement learning.” bioRxiv, p. 2022.06.20.496772. doi: [10.1101/2022.06.20.496772](https://www.biorxiv.org/content/10.1101/2022.06.20.496772v1).
+
+Interactive visualizations of complexes learned by the RL algorithm on two human PPI networks, hu.MAP 1.0 and hu.MAP 2.0 are available here: [https://marcottelab.github.io/RL_humap_prediction/](https://marcottelab.github.io/RL_humap_prediction/)
diff --git a/convert_humap_ids2names.py b/convert_humap_ids2names.py
index d68a3e18..7b398331 100644
--- a/convert_humap_ids2names.py
+++ b/convert_humap_ids2names.py
@@ -184,6 +184,6 @@ def convert2names_wscores(complexes, filename, G, filename_edges, ids_map):
     convert_edges_wscore(lines, G, filename_edges, id_name_map)
 
 
-def convert2names_wscores_matches(complex_matches, filename):
-    id_name_map = read_gene_id_map()
+def convert2names_wscores_matches(complex_matches, filename, id_name_map_path):
+    id_name_map = read_gene_id_map(id_name_map_path)
     convert_nodes_matches_wscore(complex_matches, filename, id_name_map)    
diff --git a/eval_cmplx_sc.py b/eval_cmplx_sc.py
index e2287b7d..0a2c5ef7 100644
--- a/eval_cmplx_sc.py
+++ b/eval_cmplx_sc.py
@@ -16,11 +16,11 @@
 
 
 
-def write_best_matches(best_matches_for_known,out_comp_nm,dir_nm,suffix):
+def write_best_matches(best_matches_for_known,out_comp_nm,dir_nm,suffix,id_name_map):
        
     sorted_matches = sorted(best_matches_for_known,key=lambda x: x[2],reverse=True)
     if dir_nm == "humap":
-        convert2names_wscores_matches(sorted_matches, out_comp_nm + suffix + '_known_pred_matches_names.out')
+        convert2names_wscores_matches(sorted_matches, out_comp_nm + suffix + '_known_pred_matches_names.out',id_name_map)
  
     with open(out_comp_nm + suffix + '_known_pred_matches.out', "w") as fn:
         fn_write = fn.write
@@ -127,7 +127,7 @@ def f1_similarity(P,T):
     return F1_score, C 
 
 
-def one2one_matches(known_complex_nodes_list, fin_list_graphs, N_pred_comp, N_test_comp,out_comp_nm,suffix,dir_nm):
+def one2one_matches(known_complex_nodes_list, fin_list_graphs, N_pred_comp, N_test_comp,out_comp_nm,suffix,dir_nm, id_name_map):
 
     Metric = np_zeros((N_test_comp, N_pred_comp))
     Common_nodes = np_zeros((N_test_comp, N_pred_comp))
@@ -174,8 +174,8 @@ def one2one_matches(known_complex_nodes_list, fin_list_graphs, N_pred_comp, N_te
     avg_f1_score = (avged_f1_score4known + avged_f1_score4pred)/2
     net_f1_score = 2 * avged_f1_score4known * avged_f1_score4pred / (avged_f1_score4known + avged_f1_score4pred)
     
-    write_best_matches(best_matches_4known,out_comp_nm,dir_nm,'_best4known' + suffix)
-    write_best_matches(best_matches_4predicted,out_comp_nm,dir_nm,'_best4predicted' + suffix)
+    write_best_matches(best_matches_4known,out_comp_nm,dir_nm,'_best4known' + suffix, id_name_map)
+    write_best_matches(best_matches_4predicted,out_comp_nm,dir_nm,'_best4predicted' + suffix, id_name_map)
 
     prec_MMR, recall_MMR, f1_MMR, max_matching_edges = f1_mmr(Metric)
     
@@ -295,28 +295,28 @@ def remove_unknown_prots(fin_list_graphs_orig, prot_list):
     return fin_list_graphs
 
 
-def compute_metrics(known_complex_nodes_list, fin_list_graphs,out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix):
+def compute_metrics(known_complex_nodes_list, fin_list_graphs,out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix, id_name_map):
 
     if N_test_comp != 0 and N_pred_comp != 0:
         Precision, Recall, F1_score = node_comparison_prec_recall(known_complex_nodes_list,fin_list_graphs, N_pred_comp, N_test_comp, inputs["eval_p"],out_comp_nm+suffix)
         
-        avg_f1_score, net_f1_score,PPV,Sn,acc_unbiased,prec_MMR, recall_MMR, f1_MMR,n_matches = one2one_matches(known_complex_nodes_list, fin_list_graphs, N_pred_comp, N_test_comp,out_comp_nm,suffix,inputs['dir_nm'])
+        avg_f1_score, net_f1_score,PPV,Sn,acc_unbiased,prec_MMR, recall_MMR, f1_MMR,n_matches = one2one_matches(known_complex_nodes_list, fin_list_graphs, N_pred_comp, N_test_comp,out_comp_nm,suffix,inputs['dir_nm'], id_name_map)
         
         with open(out_comp_nm + '_metrics.out', "a") as fid:
             print("No. of matches in MMR = ", n_matches, file=fid)            
-            print("MMR Precision = %.3f" % prec_MMR, file=fid)
-            print("MMR Recall = %.3f" % recall_MMR, file=fid)
-            print("MMR F1 score = %.3f" % f1_MMR, file=fid)               
-            print("Net F1 score = %.3f" % net_f1_score, file=fid)   
+            print("FMM Precision = %.3f" % prec_MMR, file=fid)
+            print("FMM Recall = %.3f" % recall_MMR, file=fid)
+            print("FMM F1 score = %.3f" % f1_MMR, file=fid)               
+            print("CMMF = %.3f" % net_f1_score, file=fid)   
             print("Unbiased PPV = %.3f" % PPV, file=fid)
             print("Unbiased Sn = %.3f" % Sn, file=fid)
-            print("Unbiased accuracy= %.3f" % acc_unbiased, file=fid)             
+            print("Unbiased accuracy (UnSPA)= %.3f" % acc_unbiased, file=fid)             
             print("Net Averaged F1 score (Average of Precision and Recall based on F1 score) = %.3f" % avg_f1_score, file=fid)
-            print("Prediction Precision = %.3f" % Precision, file=fid)
-            print("Prediction Recall = %.3f" % Recall, file=fid)
-            print("Prediction F1 score = %.3f" % F1_score, file=fid)    
+            print("Qi et al Precision = %.3f" % Precision, file=fid)
+            print("Qi et al Recall = %.3f" % Recall, file=fid)
+            print("Qi et al F1 score = %.3f" % F1_score, file=fid)    
     
-def eval_complex(rf=0, rf_nm=0, inputs={}, known_complex_nodes_list=[], prot_list=[], fin_list_graphs=[], out_comp_nm = '',suffix="both"):
+def eval_complex(rf=0, rf_nm=0, inputs={}, known_complex_nodes_list=[], prot_list=[], fin_list_graphs=[], out_comp_nm = '',suffix="both", id_name_map = ""):
     # rf - read flag to read complexes from file
     logging_info("Evaluating complexes..." + suffix)
     if rf == 1:
@@ -338,7 +338,7 @@ def eval_complex(rf=0, rf_nm=0, inputs={}, known_complex_nodes_list=[], prot_lis
         print("No. of Predicted complexes = ", N_pred_comp, file=fid)
         print("\n -- Metrics on complexes with all proteins -- ", file=fid)       
     print(out_comp_nm)
-    compute_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix+'_all_prots')            
+    compute_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix+'_all_prots',id_name_map)            
     
     fin_list_graphs = remove_unknown_prots(fin_list_graphs, prot_list)
     plot_size_dists(known_complex_nodes_list, fin_list_graphs, sizes_orig, out_comp_nm)
@@ -348,8 +348,8 @@ def eval_complex(rf=0, rf_nm=0, inputs={}, known_complex_nodes_list=[], prot_lis
         print("No. of Predicted complexes after removing non-gold std proteins = ", N_pred_comp, file=fid)
         print("\n -- Metrics on complexes with only gold std proteins -- ", file=fid)   
     
-    compute_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix+'_gold_std_prots')            
+    compute_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm,N_test_comp,N_pred_comp,inputs,suffix+'_gold_std_prots', id_name_map)            
     with open(out_comp_nm + '_metrics.out', "a") as fid:
         print("-- Finished writing main metrics -- \n", file=fid)   
 
-    logging_info("Finished evaluating basic metrics for complexes " + suffix)
\ No newline at end of file
+    logging_info("Finished evaluating basic metrics for complexes " + suffix)
diff --git a/eval_complex_RL.py b/eval_complex_RL.py
index 19f15ace..5e09a0ad 100644
--- a/eval_complex_RL.py
+++ b/eval_complex_RL.py
@@ -1,8 +1,7 @@
 from argparse import ArgumentParser as argparse_ArgumentParser, ArgumentParser
 from pickle import load as pickle_load
 from yaml import load as yaml_load, dump as yaml_dump, Loader as yaml_Loader
-from eval_cmplx_sc import eval_complex
-from eval_cmplx_sc import remove_unknown_prots
+from eval_cmplx_sc import eval_complex, remove_unknown_prots
 from main6_eval import run_metrics
 import os
 def main():
@@ -13,6 +12,7 @@ def main():
     parser.add_argument("--input_testing_file", default="", help="Testing Graph file path")
     parser.add_argument("--out_dir_name", default="", help="Output directory name")
     parser.add_argument("--evaluate_additional_metrics", default=1, help="complexes file name")
+    parser.add_argument("--id_name_path", default="", help="Path for id to gene name file")
     args = parser.parse_args()
     print(args.input_file_name)
     with open(args.input_file_name, 'r') as f:
@@ -52,7 +52,7 @@ def main():
     # Remove all proteins in Predicted complexes that are not present in known complex protein list
     fin_list_graphs = remove_unknown_prots(fin_list_graphs_orig, prot_list)
     suffix = ''
-    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm, suffix="_train")
+    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm, suffix="_train", id_name_map = args.id_name_path)
     if args.evaluate_additional_metrics:
         try:
             run_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm, "_train")
@@ -75,7 +75,7 @@ def main():
     # Remove all proteins in Predicted complexes that are not present in known complex protein list
     fin_list_graphs = remove_unknown_prots(fin_list_graphs_orig, prot_list)
     suffix = ''
-    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train")
+    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train", id_name_map = args.id_name_path)
 
     if args.evaluate_additional_metrics:
         try:
@@ -109,7 +109,7 @@ def main():
     N_pred_comp = len(fin_list_graphs)
     suffix = ''
 
-    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train")
+    eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train", id_name_map = args.id_name_path)
 
     if args.evaluate_additional_metrics:
         try:
diff --git a/hu.MAP_network/.DS_Store b/hu.MAP_network/.DS_Store
deleted file mode 100644
index 15d62dd4..00000000
Binary files a/hu.MAP_network/.DS_Store and /dev/null differ
diff --git a/humap_steps.sh b/humap_steps.sh
new file mode 100755
index 00000000..bccb6f6f
--- /dev/null
+++ b/humap_steps.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+mtype=humap
+input_file_name=input_$mtype.yaml
+graph_file=hu.MAP_network/input_data/humap_network_weighted_edge_lists.txt
+input_training_file=hu.MAP_network/intermediate_data/training_CORUM_complexes_node_lists.txt
+input_testing_file=hu.MAP_network/intermediate_data/testing_CORUM_complexes_node_lists.txt
+mkdir results_$mtype
+out_dir_name=./results_$mtype
+train_results=$out_dir_name/train_results
+pred_results=$out_dir_name/pred_results
+id_map_path=convert_ids/humap_gene_id_name_map.txt
+
+echo Training Algorithm....
+python3 main_training.py --input_training_file $input_training_file --graph_file $graph_file --train_results $train_results
+
+echo Predicting new complexes from known communities...
+python3 main_prediction.py --graph_file $graph_file --train_results $train_results --out_dir_name $out_dir_name --pred_results $pred_results
+
+echo Merging similar communities...
+python3 postprocessing.py --input_file_name $input_file_name --graph_file $graph_file --out_dir_name $out_dir_name --pred_results $pred_results --train_results $train_results --input_training_file $input_training_file --input_testing_file $input_testing_file --id_map_path $id_map_path
+
+echo Comparing predicted and known communitites...
+python3 eval_complex_RL.py --input_file_name $input_file_name  --input_training_file $input_training_file --input_testing_file $input_testing_file --out_dir_name $out_dir_name --id_name_path $id_map_path
diff --git a/input_humap.yaml b/input_humap.yaml
index 525b6826..d75a259f 100644
--- a/input_humap.yaml
+++ b/input_humap.yaml
@@ -6,43 +6,11 @@ comf_nm: "/res_train_complexes_new_73_more.txt"
 comf_test_nm: "/res_test_complexes_new_73_more.txt" # Make sure no extra rows are present
 comf_nm_all: "/all_complexes.txt"
 out_comp_nm: "/results_qi0.325/res"
-scale_factor: 10  # Number of times negatives should be higher than positives 
-use_full: 1
 split_flag: 0
-fact: 0.99
-perc_transfer: 0.275
-mode: non_gen # gen means only feature extraction, non_gen is all 
 # -------------------Training parameters--------------------------------
-feats: 6
 
-classifier_file: "humap/results_73_neg_unif_10x/res_classifiers_new.txt" # or remove new - CHECK
-model_type: "tpot" # Options: tpot, NN
-train_feat_mat: "humap/results_73_neg_unif_10x/res_train_dat.csv"
-test_feat_mat: "humap/results_73_neg_unif_10x/res_test_dat.csv"
-model_name: "tpot_select" #Options: FF_1hidden, log_reg, SVM, rand_forest, extra_trees, estimator_SVM
 model_dir: "/results_73_neg_unif_10x/res"
 # --------------------Search parameters ------------------------------
-seed_mode: "all_nodes" # Options:all_nodes_known_comp, all_nodes, n_nodes,cliques
-num_comp: 5 # Options: 10, 7778, 1500 - only for n_nodes mode
-classi_thresh: 0.5
-
-run_mode: "parallel" # Options: serial, parallel
-max_size_thres: 11 
-search_method: "isa" # isa, metropolis, search_top_neigs, search_max_neig 
-
-# All methods except max_neig    
-use_all_neigs: 1
-thres_neig: 30 # Maximum number of neighbors sampled for checking 
-min_thres_neig_sorted: 100 # Threshold above which only a percentage of neigs are considered as per sorted weights
-perc: 0.7 # Percentage of neighbors to check for adding new node
-explore_prob: 0.01 # use 0.1 for top_neigs 
-
-# Metropolis algorithm params
-prob_metropolis: 0.1
-
-# ISA params
-T0: 1.75
-alpha: 0.005
 
 over_t: 0.325 # Overlap threshold = 0.7/0.9
 overlap_method: "qi" # testing_qi_0.3 or 1
diff --git a/input_toy.yaml b/input_toy.yaml
index 3dd71693..2678ae56 100644
--- a/input_toy.yaml
+++ b/input_toy.yaml
@@ -3,44 +3,10 @@ dir_nm: "toy_network" # Options: toy_network, toy_network_old, humap, humap2
 sep: " " # Options: " ", "\t"
 out_comp_nm: "/results/res" 
 split_flag: 0
-fact: 0.7
-perc_transfer: 0.2
-use_full: 1
-scale_factor: 1.1  # Number of times negatives should be higher than positives 
-mode: non_gen # gen means only feature extraction, non_gen is all
 # -------------------Training parameters--------------------------------
-feats: 6
-
-model_type: "tpot" # Options: tpot, NN
-train_feat_mat: "toy_network/results_train_dat.csv"
-test_feat_mat: "toy_network/results_train_dat.csv"
-model_name: "SVM" #Options: FF_1hidden, log_reg, SVM, rand_forest, extra_trees
-# humap with separted train and test sets - tpot result - extra_trees
  
 model_dir: "/results/res"
 # --------------------Search parameters ------------------------------
-seed_mode: "all_nodes" # Options:all_nodes_known_comp, all_nodes, n_nodes, cliques
-num_comp: 40 # Options: 10, 7778, 1500 -  only for n_nodes mode
-
-run_mode: "parallel" # Options: serial, parallel
-max_size_thres: 50 
-
-search_method: "search_top_neigs" # isa, metropolis, search_top_neigs, search_max_neig
-    
-# All methods except search_max_neig    
-# No. of neighbors considered params
-use_all_neigs: 1
-thres_neig: 30 # Maximum number of neighbors sampled for checking
-min_thres_neig_sorted: 30 # Threshold above which only a percentage of neigs are considered as per sorted weights
-perc: 0.7 # Percentage of neighbors to check for adding new node
-
-explore_prob: 0.01 # use 0.1 for top_neigs
-# Metropolis algorithm params
-prob_metropolis: 0.1
-
-# ISA params
-T0: 0.88
-alpha: 1.8
 
 over_t: 0.1 # Overlap threshold = 0.7/0.9
 infer_overlap_threshold: "y"
diff --git a/main6_eval.py b/main6_eval.py
index baf11886..39ae2f5a 100644
--- a/main6_eval.py
+++ b/main6_eval.py
@@ -21,7 +21,7 @@
 sys_path.insert(1, 'functions_py3/')
 from yaml import load as yaml_load, dump as yaml_dump, Loader as yaml_Loader
 from argparse import ArgumentParser as argparse_ArgumentParser
-from humap.functions.eval_cmplx_sc import eval_complex
+from eval_cmplx_sc import eval_complex
 # from random_walk_control import control
 
 from logging import basicConfig as logging_basicConfig, INFO as logging_INFO
@@ -93,10 +93,7 @@ def main():
     parser = argparse_ArgumentParser("Input parameters")
     parser.add_argument("--input_file_name", default="input_toy.yaml", help="Input parameters file name")
     parser.add_argument("--out_dir_name", default="/results", help="Output directory name, by default - /results")
-    parser.add_argument("--seed_mode", help="Seed mode - specify 'cliques' for the cliques algo")
     parser.add_argument("--train_test_files_dir", default="", help="Train test file path")
-    
-    parser.add_argument("--search_method", help="Sampling algorithm")
     parser.add_argument("--model_dir", help="Directory containing model")
     parser.add_argument("--python_command", default="python", help="python / python3")
     parser.add_argument("--read_flag", default=0, help="1 when you want to read from file for evaluation")
diff --git a/main_prediction.py b/main_prediction.py
index cb811709..dc6c7c32 100644
--- a/main_prediction.py
+++ b/main_prediction.py
@@ -113,14 +113,21 @@ def pred_complex(n, nodes_list, G, gg, value_functions, intervals,args):
 
    # args.pred_results = "../results/pred_results"
     file = args.pred_results + '/nodes_complexes/'
+
     with open(file + str(n), 'wb') as f:
         pickle_dump(tup_cmplx, f)
     with open(file + str(n), 'rb') as f:
         pickle_load(f)
 
+import os
 
 def network(G, gg, nodes, intervals, value_functions,args):
     ## input data
+
+    fol = args.pred_results + '/nodes_complexes/'
+    if not os.path.exists(fol):
+        os.mkdir(fol)
+
     nodes_list = list(nodes)
     # make sure all intervals are accounted for
     for i in intervals:
@@ -128,6 +135,8 @@ def network(G, gg, nodes, intervals, value_functions,args):
             val_fn = interpolate(value_functions, i)
             value_functions[i] = val_fn
     filename = args.pred_results + '/value_fns_pred.pkl'
+    if not os.path.exists(args.pred_results):
+        os.mkdir(args.pred_results)
     with open(filename, 'wb') as f:
         pickle.dump(value_functions, f)
     fname = args.pred_results + '/value_fns_interp.txt'
@@ -135,7 +144,11 @@ def network(G, gg, nodes, intervals, value_functions,args):
         f.write(str(value_functions))
 
     # parallel running
-    num_cores = mul_cpu_count()
+    if args.n_cores == "all":
+        num_cores = mul_cpu_count()
+    else:
+        num_cores = int(args.n_cores)
+    print("No. of cores used = ",num_cores)
     Parallel(n_jobs=num_cores, backend='loky')(
         delayed(pred_complex)(node, nodes_list, G, gg, value_functions, intervals,args) for node in tqdm(nodes_list))
 
@@ -162,6 +175,7 @@ def main():
     parser.add_argument("--train_results", default="", help="Directory for training results")
     parser.add_argument("--pred_results", default="", help="Directory for prediction results")
     parser.add_argument("--out_dir_name", default = "", help = 'Main output directory')
+    parser.add_argument("--n_cores", default = "all", help = 'No. of cores to use for parallel processing')
     args = parser.parse_args()
     #os.makedirs(args.pred_results + '/nodes_complexes', exist_ok=True)
 
diff --git a/main_training.py b/main_training.py
index 9266506c..25845b2e 100755
--- a/main_training.py
+++ b/main_training.py
@@ -131,12 +131,16 @@ def network(G, gg, value_dict, dens_counter, valuefn_update, intervals, subgraph
     return gg
     # e += 1
 
+import os
 
 def main():
     start_time = time.time()
     matplotlib.use('Agg')
     logging.basicConfig(level=logging.WARNING)
-    matplotlib.use('tkagg')
+    try:
+        matplotlib.use('tkagg')
+    except:
+        print("Can't use tkagg backend")
     # input data
     parser = argparse_ArgumentParser("Input parameters")
     parser.add_argument("--input_training_file", default="", help="Training Complexes file path")
@@ -181,6 +185,8 @@ def main():
     network(G, gg, value_dict, dens_counter, valuefn_update, intervals, subgraphs)
     # save value function scores in dictionary
     #args.train_results = "../results/train_results"
+    if not os.path.exists(args.train_results):
+        os.mkdir(args.train_results)
     fname = args.train_results + "/value_fn_dens_dict.txt"
     file = open(fname, "w")
     value_dict_sorted = sorted(value_dict.items())
diff --git a/postprocessing.py b/postprocessing.py
index 3ee52121..bd5c577c 100644
--- a/postprocessing.py
+++ b/postprocessing.py
@@ -61,15 +61,22 @@ def main():
     file = ''
     if inputs['dir_nm'] == 'toy_network':
         file = args.out_dir_name + '/qi_results'
+
+        if not os.path.exists(args.out_dir_name + '/qi_results'):
+            os.mkdir(args.out_dir_name + '/qi_results')
         filename = file + '/res'
     else:
         if inputs['overlap_method'] == 'qi':
            file = args.out_dir_name + '/qi_results'
+           if not os.path.exists(args.out_dir_name + '/qi_results'):
+               os.mkdir(args.out_dir_name + '/qi_results')
            #os.makedirs(args.out_dir_name + '/qi_results', exist_ok=True)
            filename = file + '/res'  # inputs['out_comp_nm']
            #os.makedirs(file + '/results_qi', exist_ok=True)
         elif inputs["overlap_method"] == '1':  # jaccard coeff
            file = args.out_dir_name + '/jacc_results'
+           if not os.path.exists(file):
+               os.mkdir(file)
            #os.makedirs(args.out_dir_name + '/jacc_results', exist_ok=True)
            filename = file + '/res'  # inputs['out_comp_nm']
            #os.makedirs(file + '/results_jacc', exist_ok=True)
diff --git a/read_complexes.py b/read_complexes.py
index af10f6cc..4cfc3a7e 100644
--- a/read_complexes.py
+++ b/read_complexes.py
@@ -9,7 +9,7 @@
 from jaccard_coeff import jaccard_coeff
 from numpy import mean as np_mean, argmax as np_argmax, var as np_var, sqrt as sqrt
 from numpy.random import permutation as rand_perm, choice as rand_choice
-from logging import info as logging_info
+from logging import info as logging_info, debug as debug_info
 from networkx import write_weighted_edgelist as nx_write_weighted_edgelist, is_connected as nx_is_connected
 from scipy.stats import norm as norm_dist
 from convert_humap_ids2names import convert2names
@@ -199,55 +199,6 @@ def split_ratio(perm_lines, ratio):
     return train_list, test_list
 
 
-def split_meth_orig(perm_lines, inputs):
-    fact = inputs['fact']  # 0.99
-    split_pt = int(round(len(perm_lines) * fact))
-    train_list = [line for line in perm_lines[0:split_pt]]
-    test_list = [line for line in perm_lines[split_pt:]]
-    # Start with something that has a biased size distribution !!
-
-    sizes = [len(line) for line in train_list]
-    train_mean = np_mean(sizes)
-
-    # Transferring some of the smaller complexes to the test list
-    train_list_lower_mean = [line for line in train_list if len(line) < train_mean]
-    perc_transfer = inputs['perc_transfer']  # 0.3 # You can optimize these parameters !
-    to_transfer = train_list_lower_mean[:int(round(len(train_list_lower_mean) * perc_transfer))]
-    test_list = test_list + to_transfer
-
-    # Now remove from train set
-    for line in to_transfer:
-        train_list.remove(line)
-
-    # Finding complexes in train that share an edge with a complex in test
-    com_comp = 10
-    while com_comp != 0:  # Do until train and test sets are completely separated
-
-        # Removing super huge complexes also (nodes >30 ) from test set
-        test_list = [line for line in test_list if len(line) < 30]
-
-        # REMOVE OVERLAP B/W TRAIN AND TEST DATA
-        # Remove complexes from train set sharing two proteins with test set
-        train_rem = []
-        train_rem_append = train_rem.append
-        com_comp = 0
-        for train_line in train_list:
-            pres = 0
-            for test_line in test_list:
-                common = len(set(train_line.edges()).intersection(set(test_line.edges)))
-                if common >= 1:
-                    pres = 1
-                    break
-            if pres == 1:
-                train_rem_append(train_line)
-                com_comp += 1
-
-        logging_info("No. of train complexes transferred = %s", str(com_comp))
-        test_list = test_list + train_rem
-        for t_line in train_rem:
-            train_list.remove(t_line)
-    return train_list, test_list
-
 
 def merge_overlapped(list_comp,overlap_thres = 0.6):
     logging_info("Merging complexes...")
@@ -354,7 +305,6 @@ def split_train_test_complexes(inputs, G):
 
     perm_lines = rand_perm(complexes)
     ratio = (70, 30)
-    # train_list, test_list = split_meth_orig(perm_lines, inputs)
     train_list, test_list = split_ratio(perm_lines, ratio)
     plot_size_dists(train_list, test_list, out_comp_nm)
     with open(out_comp_nm + "_train_complexes_new.txt", "w") as f:
@@ -366,8 +316,6 @@ def split_train_test_complexes(inputs, G):
             f.write(sep.join(line) + "\n")
     with open(out_comp_nm + '_metrics.out', "a") as fid:
         print("Split ratio = %.3f" % str(float(len(train_list)) / len(test_list)), file=fid)
-        # print("Initial train_test split = ", fact, file=fid)
-        # print("Percentage of low sizes transferred from train to test = ", perc_transfer, file=fid)
     return train_list, test_list
 
 
diff --git a/requirements_py3.txt b/requirements_py3.txt
index 78c0bc1c..c6d28759 100644
--- a/requirements_py3.txt
+++ b/requirements_py3.txt
@@ -8,13 +8,5 @@ joblib==1.1.0
 tqdm==4.63.0
 numpy==1.22.0
 pandas==1.4.1
-scikit-MDR==0.4.4
-deap==1.3.1
-update-checker==0.18.0
-stopit==1.1.2
-TPOT==0.11.7
 seaborn==0.11.2
-xgboost
-tensorflow
-pytest
 pypiwin32
\ No newline at end of file
diff --git a/toy_network/.DS_Store b/toy_network/.DS_Store
deleted file mode 100644
index 5008ddfc..00000000
Binary files a/toy_network/.DS_Store and /dev/null differ