Updates to ONT project summary reports#175
Updates to ONT project summary reports#175aanil merged 20 commits intoNationalGenomicsInfrastructure:masterfrom
Conversation
Added sample read counts and samples sequenced for FCs
Added the average read length per sample and fixed the units for total read counts
Set option --skip_fastq to True for ONT runs
Calculate avg. read length by fetching FC sequenced for each sample. Also add a separate table for FC-sample info, and add Lib prep to the library info table
Change prep.label to prep ID to display it in all cases, and remove the space from "Flow cell" in the headers to make the collapse/expand function work.
Change where the stats are taken from when splitting by barcode
Added seq. method to flowcell info table, and remove the FC-sample table from report to keep only in a separate file
Add a warning for cases where there is no way to assign reads to samples due to barcode naming.
Changed the searching for barcodes to look for only argument --split_files_by_barcode
Update version number and versionlog
ngi_reports/utils/entities.py
Outdated
| f"Flowcell {self.run_name} has no LIMS information, please check and amend report manually" | ||
| ) | ||
| self.exclude = True | ||
| else: |
There was a problem hiding this comment.
If you have a return instead of else, you could avoid one level of indentation below
ngi_reports/utils/entities.py
Outdated
| .get("sample_data", []) | ||
| ) | ||
| self.fc_sample_barcodes = {} | ||
| for lims_sample in lims_samples: |
There was a problem hiding this comment.
We loop through lims_samples 3 times here and below. Its better to minimise the number of times we loop through the same variable
There was a problem hiding this comment.
We only ever seem to use the sample name, we could just have a list of sample names instead.
ngi_reports/utils/entities.py
Outdated
| "user_specified_flow_cell_id" | ||
| ) | ||
| run_arguments = fc_runparameters.get("args") | ||
| for arg in run_arguments: |
There was a problem hiding this comment.
If run_arguments is a list, we don't need to loop through it, we can just check "min_qscore" in run_arguments
ngi_reports/utils/entities.py
Outdated
| for arg in run_arguments: | ||
| if "--split_files_by_barcode=on" in arg: |
There was a problem hiding this comment.
Same here, you don't need to go through every argument if run_arguments is a list
ngi_reports/utils/entities.py
Outdated
| self.sample_reads[sample_id] = float( | ||
| final_acquisition.get("acquisition_run_info") | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_read_count") |
There was a problem hiding this comment.
This calculation does not need to be in the loop
ngi_reports/utils/entities.py
Outdated
| self.sample_reads[sample_id] = float( | ||
| barcode.get("snapshots")[-1] | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_read_count") | ||
| ) |
There was a problem hiding this comment.
This calculation does not need to be in the loop
There was a problem hiding this comment.
Oh wait, strike that, you use the barcode here
ngi_reports/utils/entities.py
Outdated
| fcObj.populate_ont_flowcell(log) | ||
| if fcObj.exclude: | ||
| continue | ||
| else: |
There was a problem hiding this comment.
You don't really need the else here
Small changes in indentations and looping
Co-authored-by: Anandashankar Anil <aanil@users.noreply.github.com>
Co-authored-by: Anandashankar Anil <aanil@users.noreply.github.com>
Co-authored-by: Anandashankar Anil <aanil@users.noreply.github.com>
ngi_reports/utils/entities.py
Outdated
| self.qual_threshold = float(arg.split("=")[-1]) | ||
| if "split_files_by_barcode" in arg: | ||
| split_files_by_barcode = arg.split("=")[-1] | ||
| self.qual_threshold = float(arg.split("=")[-1]) |
There was a problem hiding this comment.
| self.qual_threshold = float(arg.split("=")[-1]) |
ngi_reports/utils/entities.py
Outdated
| sample_id = lims_sample.get("sample_name", "") | ||
| self.fc_sample_barcodes[sample_id] = lims_sample.get( | ||
| "ont_barcode", "NoIndex" | ||
| ) | ||
| self.samples_run = [] | ||
| for sample in self.fc_sample_barcodes.keys(): | ||
| self.samples_run.append(f"{sample}") | ||
| self.samples_run = ", ".join(self.samples_run) | ||
|
|
||
| self.sample_reads = {} | ||
| self.average_read_length_passed = {} | ||
| fc_barcode_info = final_acquisition.get("acquisition_output")[1] | ||
|
|
||
| if "--split_files_by_barcode=on" in run_arguments: | ||
| for barcode in fc_barcode_info.get("plot")[0].get("snapshots"): | ||
| barcode_name = barcode.get("filtering")[0].get("barcode_name") | ||
| barcode_alias = barcode.get("filtering")[0].get("barcode_alias") | ||
| if barcode_name != barcode_alias: | ||
| for lims_sample in lims_samples: | ||
| sample_id = lims_sample.get("sample_name", "") | ||
| if sample_id == barcode_alias: | ||
| self.sample_reads[sample_id] = float( | ||
| barcode.get("snapshots")[-1] | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_read_count") | ||
| ) | ||
| self.average_read_length_passed[sample_id] = ( | ||
| float( | ||
| barcode.get("snapshots")[-1] | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_bases") | ||
| ) | ||
| / self.sample_reads[sample_id] | ||
| ) | ||
| if self.sample_reads == {}: | ||
| log.warning( | ||
| f"Flowcell {self.run_name} has no barcode aliases corresponding to sample IDs." | ||
| ) | ||
|
|
||
| elif "--split_files_by_barcode=off" in run_arguments: | ||
| for lims_sample in lims_samples: | ||
| sample_id = lims_sample.get("sample_name", "") | ||
| self.sample_reads[sample_id] = float( | ||
| final_acquisition.get("acquisition_run_info") | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_read_count") | ||
| ) | ||
| self.average_read_length_passed[sample_id] = round( | ||
| float( | ||
| final_acquisition.get("acquisition_run_info") | ||
| .get("yield_summary") | ||
| .get("basecalled_pass_bases") | ||
| ) | ||
| / self.sample_reads[sample_id] | ||
| ) |
There was a problem hiding this comment.
| sample_id = lims_sample.get("sample_name", "") | |
| self.fc_sample_barcodes[sample_id] = lims_sample.get( | |
| "ont_barcode", "NoIndex" | |
| ) | |
| self.samples_run = [] | |
| for sample in self.fc_sample_barcodes.keys(): | |
| self.samples_run.append(f"{sample}") | |
| self.samples_run = ", ".join(self.samples_run) | |
| self.sample_reads = {} | |
| self.average_read_length_passed = {} | |
| fc_barcode_info = final_acquisition.get("acquisition_output")[1] | |
| if "--split_files_by_barcode=on" in run_arguments: | |
| for barcode in fc_barcode_info.get("plot")[0].get("snapshots"): | |
| barcode_name = barcode.get("filtering")[0].get("barcode_name") | |
| barcode_alias = barcode.get("filtering")[0].get("barcode_alias") | |
| if barcode_name != barcode_alias: | |
| for lims_sample in lims_samples: | |
| sample_id = lims_sample.get("sample_name", "") | |
| if sample_id == barcode_alias: | |
| self.sample_reads[sample_id] = float( | |
| barcode.get("snapshots")[-1] | |
| .get("yield_summary") | |
| .get("basecalled_pass_read_count") | |
| ) | |
| self.average_read_length_passed[sample_id] = ( | |
| float( | |
| barcode.get("snapshots")[-1] | |
| .get("yield_summary") | |
| .get("basecalled_pass_bases") | |
| ) | |
| / self.sample_reads[sample_id] | |
| ) | |
| if self.sample_reads == {}: | |
| log.warning( | |
| f"Flowcell {self.run_name} has no barcode aliases corresponding to sample IDs." | |
| ) | |
| elif "--split_files_by_barcode=off" in run_arguments: | |
| for lims_sample in lims_samples: | |
| sample_id = lims_sample.get("sample_name", "") | |
| self.sample_reads[sample_id] = float( | |
| final_acquisition.get("acquisition_run_info") | |
| .get("yield_summary") | |
| .get("basecalled_pass_read_count") | |
| ) | |
| self.average_read_length_passed[sample_id] = round( | |
| float( | |
| final_acquisition.get("acquisition_run_info") | |
| .get("yield_summary") | |
| .get("basecalled_pass_bases") | |
| ) | |
| / self.sample_reads[sample_id] | |
| ) |
ngi_reports/utils/entities.py
Outdated
|
|
||
| self.lanes = OrderedDict() | ||
| self.fc_sample_qvalues = defaultdict(dict) | ||
| self.exclude = False |
There was a problem hiding this comment.
| self.exclude = False |
ngi_reports/utils/entities.py
Outdated
| log.warning( | ||
| f"Flowcell {self.run_name} has no LIMS information, please check and amend report manually" | ||
| ) | ||
| self.exclude = True |
There was a problem hiding this comment.
| self.exclude = True |
ngi_reports/utils/entities.py
Outdated
| fcObj = Flowcell(fc, self.ngi_name, ontcon) | ||
| fcObj.populate_ont_flowcell() | ||
| fcObj.populate_ont_flowcell(log) | ||
| if fcObj.exclude: |
There was a problem hiding this comment.
| if fcObj.exclude: | |
| if not fcObj: |
change from FC.exclude to return values for FC without LIMS information
Co-authored-by: Anandashankar Anil <aanil@users.noreply.github.com>
Added read counts and avg. read length per sample
Added sequencing method to FC table
Added separate file showing samples sequenced per FC