From 6751bccf3672f8c12aaf9d77fffdff49ca64736b Mon Sep 17 00:00:00 2001 From: KingsRoad Date: Thu, 21 Nov 2019 14:42:06 +0100 Subject: [PATCH] Misleading Parameter Typo and default value line15: this is not a bug, but this default parameter isn't equal with the corresponding default parameter setting in SGVF_PerFile.py, which may misleading user, then cause error (users tend to use default parameters), users can also set --x_coverage=0.01 to avoid error when running SGVF_cmd.py. line39: the parameter should correspond with previous parameter list. --- src/SGVF_cmd.py | 4 ++-- src/SGVFinder.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SGVF_cmd.py b/src/SGVF_cmd.py index f24e1d8..07d319a 100644 --- a/src/SGVF_cmd.py +++ b/src/SGVF_cmd.py @@ -12,7 +12,7 @@ def _addargs(parser): parser.add_argument('input_glob', help = 'A glob string that includes all the files processed with the PerFile_cmd') parser.add_argument('output_dsgv', help = 'Output path for the deletion-sgv dataframe. By default a pickled pandas dataframe') parser.add_argument('output_vsgv', help = 'Output path for the variable-sgv dataframe. By default a pickled pandas dataframe') - parser.add_argument('--x_coverage', help = 'The desired coverage across the genome in units of 100bp reads. This parameter is used to determine bin size: bin_size = rate_param/x_coverage (Default = 0.1)', type=float, default = 0.1) + parser.add_argument('--x_coverage', help = 'The desired coverage across the genome in units of 100bp reads. This parameter is used to determine bin size: bin_size = rate_param/x_coverage (Default = 0.1)', type=float, default = 0.01) parser.add_argument('--rate_param', help = 'The lower limit for the median number of reads per genomic bin. Genomes with coverage lower than rate_param will be discarded from the analysis (Default = 10)', type = int, default = 10) parser.add_argument('--byorig', help = 'Calculate SGVs according to the regions published by Zeevi et al, XXX, 201Y.', action = 'store_true') parser.add_argument('--min_samp_cutoff', help = 'Minimum number of samples in which a microbe exists with sufficient coverage to be considered in the analysis (Default=75)', type=int, default = 75) @@ -36,7 +36,7 @@ def _load_ujsn(fname): _addargs(parser) args = parser.parse_args() samp_to_map = {splitext(basename(f))[0]: _load_ujsn(f) for f in glob(args.input_glob)} - if args.byother: + if args.byorig: vsgv, dsgv = calculate_by_other(join(split(realpath(__file__))[0], '../DataFiles/orig_dsgv.df'), join(split(realpath(__file__))[0], '../DataFiles/orig_vsgv.df'), join(split(realpath(__file__))[0], '../DataFiles/orig_frames'), diff --git a/src/SGVFinder.py b/src/SGVFinder.py index 2f1857a..9b0693b 100644 --- a/src/SGVFinder.py +++ b/src/SGVFinder.py @@ -29,10 +29,10 @@ def get_sample_map(delta_fname, x_coverage, average_read_length, rate_param): for dest_id, pos1, pos2, used_koef, _ in mapngs: if dest_id not in bacid_maps: bacid_maps[dest_id] = np.zeros(int(lengthdb[dest_id] / bin_size) + 1) - ind1 = int((pos1 + (average_read_length / 2)) / bin_size) + ind1 = int((int(pos1) + (int(average_read_length) / 2)) / bin_size) if pos2 >= 0: used_koef = used_koef / 2.0 - ind2 = int((pos2 + (average_read_length / 2)) / bin_size) + ind2 = int((int(pos2) + (int(average_read_length) / 2)) / bin_size) bacid_maps[dest_id][ind2] += used_koef bacid_maps[dest_id][ind1] += used_koef return {dest_id:cov_map for dest_id, cov_map in bacid_maps.iteritems()\