Mercurial > repos > greg > pima_report
changeset 18:e948214a9e3c draft
Uploaded
author | greg |
---|---|
date | Wed, 22 Mar 2023 13:07:22 +0000 |
parents | b4ed9f55de13 |
children | c509e6819795 |
files | pima_report.py pima_report.xml |
diffstat | 2 files changed, 33 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/pima_report.py Fri Mar 17 19:50:39 2023 +0000 +++ b/pima_report.py Wed Mar 22 13:07:22 2023 +0000 @@ -20,11 +20,12 @@ def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, dnadiff_snps_file=None, - dnadiff_version=None, feature_bed_files=None, feature_png_files=None, flye_assembly_info_file=None, - flye_version=None, genome_insertions_file=None, gzipped=None, kraken2_report_file=None, - kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None, - ont_fastq_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, reference_insertions_file=None, - samtools_version=None, varscan_version=None): + dnadiff_version=None, errors_file=None, feature_bed_files=None, feature_png_files=None, + flye_assembly_info_file=None, flye_version=None, genome_insertions_file=None, gzipped=None, + kraken2_report_file=None, kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, + mutation_regions_tsv_files=None, ont_fastq_file=None, pima_css=None, plasmids_file=None, + quast_report_file=None, read_type=None, reference_insertions_file=None, samtools_version=None, + varscan_version=None): self.ofh = open("process_log.txt", "w") self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) @@ -40,6 +41,7 @@ self.ofh.write("dbkey: %s\n" % str(dbkey)) self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version)) + self.ofh.write("errors_file: %s\n" % str(errors_file)) self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) @@ -55,6 +57,7 @@ self.ofh.write("pima_css: %s\n" % str(pima_css)) self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) self.ofh.write("quast_report_file: %s\n" % str(quast_report_file)) + self.ofh.write("read_type: %s\n" % str(read_type)) self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) self.ofh.write("samtools_version: %s\n" % str(samtools_version)) self.ofh.write("varscan_version: %s\n" % str(varscan_version)) @@ -87,6 +90,7 @@ self.dnadiff_version = 'dnadiff (version unknown)' else: self.dnadiff_version = re.sub('_', '.', dnadiff_version.rstrip(' _snps_')) + self.errors_file = errors_file self.feature_bed_files = feature_bed_files self.feature_png_files = feature_png_files self.flye_assembly_info_file = flye_assembly_info_file @@ -110,7 +114,7 @@ self.pima_css = pima_css self.plasmids_file = plasmids_file self.quast_report_file = quast_report_file - self.read_type = 'ONT' + self.read_type = read_type.upper() self.reference_insertions_file = reference_insertions_file self.reference_insertions_file = reference_insertions_file if samtools_version is None: @@ -137,6 +141,7 @@ self.feature_plot_title = 'Feature annotation plots' self.large_indel_title = 'Large insertions & deletions' self.methods_title = 'Methods' + self.mutation_errors_title = 'Errors finding mutations in the sample' self.mutation_title = 'Mutations found in the sample' self.mutation_methods_title = 'Mutation screening' self.plasmid_methods_title = 'Plasmid annotation' @@ -599,29 +604,6 @@ except Exception: # Likely an empty file. return - # TODO: this is the only place where reference_genome is used, - # so I'm commenting it out for now. We need to confirm if these - # errors that require the reference genmoe being passed are necessary. - # If so, we'll need to implement data tables in this tool. - # Make sure that the positions in the BED file fall within - # the chromosomes provided in the reference sequence. - """ - for mutation_region in range(mutation_regions.shape[0]): - mutation_region = mutation_regions.iloc[mutation_region, :] - if not (mutation_region[0] in self.reference_genome): - self.ofh.write("\nMutation region: %s not found in reference genome.\n" % ' '.join(mutation_region.astype(str))) - continue - if not isinstance(mutation_region[1], int): - self.ofh.write("\nNon-integer found in mutation region start (column 2): %s.\n" % str(mutation_region[1])) - break - elif not isinstance(mutation_region[2], int): - self.ofh.write("\nNon-integer found in mutation region start (column 3): %s.\n" % str(mutation_region[2])) - break - if mutation_region[1] <= 0 or mutation_region[2] <= 0: - self.ofh.write("\nMutation region %s starts before the reference sequence.\n" % ' '.join(mutation_region.astype(str))) - if mutation_region[1] > len(self.reference_genome[mutation_region[0]].seq) or mutation_region[2] > len(self.reference_genome[mutation_region[0]].seq): - self.ofh.write("\nMutation region %s ends after the reference sequence.\n" % ' '.join(mutation_region.astype(str))) - """ amr_mutations = pandas.Series(dtype=object) for region_i in range(mutation_regions.shape[0]): region = mutation_regions.iloc[region_i, :] @@ -662,6 +644,16 @@ Table_List = Table_List + region_mutations.iloc[i, [0, 1, 3, 4, 5, 6]].values.tolist() row_count = int(len(Table_List) / 6) self.doc.new_table(columns=6, rows=row_count, text=Table_List, text_align='left') + if os.path.getsize(self.errors_file) > 0: + # Report the errors encountered when attempting + # to find mutations in the sample. + self.doc.new_line() + self.doc.new_header(level=2, title=self.mutation_errors_title) + with open(self.errors_file, 'r') as efh: + for i, line in enumerate(efh): + line = line.strip() + if line: + self.doc.new_line('* %s' % line) method = '%s reads were mapped to the reference sequence using %s.' % (self.read_type, self.minimap2_version) self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) method = 'Mutations were identified using %s and %s.' % (self.samtools_version, self.varscan_version) @@ -703,6 +695,7 @@ amr_deletions = amr_deletions.loc[amr_deletions['type'].isin(['large-deletion', 'any']), :] self.doc.new_line() self.doc.new_header(level=2, title=self.large_indel_title) + self.doc.new_line('This section is informative only when your idolates were identified as *Bacillus anthracis* strains') for genome in ['Reference insertions', 'Query insertions']: genome_indels = large_indels[genome].copy() self.doc.new_line() @@ -852,6 +845,7 @@ parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') parser.add_argument('--dnadiff_version', action='store', dest='dnadiff_version', default=None, help='DNAdiff version string') +parser.add_argument('--errors_file', action='store', dest='errors_file', default=None, help='AMR mutations errors encountered txt file') parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') @@ -867,6 +861,7 @@ parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') parser.add_argument('--plasmids_file', action='store', dest='plasmids_file', help='pChunks plasmids TSV file') parser.add_argument('--quast_report_file', action='store', dest='quast_report_file', help='Quast report tabular file') +parser.add_argument('--read_type', action='store', dest='read_type', help='Sample read type (ONT or Illumina)') parser.add_argument('--reference_insertions_file', action='store', dest='reference_insertions_file', help='Reference insertions BED file') parser.add_argument('--samtools_version', action='store', dest='samtools_version', default=None, help='Samtools version string') parser.add_argument('--varscan_version', action='store', dest='varscan_version', default=None, help='Varscan version string') @@ -912,6 +907,7 @@ args.dbkey, args.dnadiff_snps_file, args.dnadiff_version, + args.errors_file, feature_bed_files, feature_png_files, args.flye_assembly_info_file, @@ -927,6 +923,7 @@ args.pima_css, args.plasmids_file, args.quast_report_file, + args.read_type, args.reference_insertions_file, args.samtools_version, args.varscan_version)
--- a/pima_report.xml Fri Mar 17 19:50:39 2023 +0000 +++ b/pima_report.xml Wed Mar 22 13:07:22 2023 +0000 @@ -88,6 +88,7 @@ #if str($dnadiff_snps_file) not in ['None', '']: --dnadiff_version '$dnadiff_version' #end if +--errors_file '$errors_file' --feature_bed_dir 'feature_bed_dir' --feature_png_dir 'feature_png_dir' #if str($flye_assembly_info_file) not in ['None', '']: @@ -111,6 +112,7 @@ --pima_css '${__tool_directory__}/pima.css' --plasmids_file '$plasmids_file' --quast_report_file '$quast_report_file' +--read_type '$read_type' --reference_insertions_file '$reference_insertions_file' #if str($samtools_pileup_file) not in ['None', '']: --samtools_version '$samtools_version' @@ -131,6 +133,7 @@ <param name="compute_sequence_length_file" type="data" format="tabular,tsv" label="Compute sequence length tabular file"/> <param name="contig_coverage_file" type="data" format="tabular,tsv" label="Contig coverage tabular file"/> <param name="dnadiff_snps_file" type="data" format="tabular" label="DNAdiff snps tabular file"/> + <param name="errors_file" type="data" format="txt" label="AMR mutation regions error txt file"/> <param name="features_bed" format="bed" type="data_collection" collection_type="list" label="Collection of best feature hits BED files"/> <param name="features_png" format="png" type="data_collection" collection_type="list" label="Collection of best feature hits PNG files"/> <param name="flye_assembly_info_file" type="data" format="tabular,tsv" optional="true" label="Flye assembly info tabular file" help="Optional, ignored if not selected"/> @@ -141,6 +144,10 @@ <param name="mutation_regions" format="tabular,tsv" type="data_collection" collection_type="list" label="Collection of mutation regions tabular files"/> <param name="mutation_regions_bed_file" type="data" format="mutations_regions,bed" label="Mutation regions BED file"/> <param name="quast_report_file" type="data" format="tabular" label="Quast report tabular file"/> + <param argument="--read_type" type="select" label="Specify the read type"> + <option value="ont" selected="true">Long reads - Oxford Nanopore Technologies (ONT)</option> + <option value="illumina">Short reads - Illumina</option> + </param> <param name="reference_insertions_file" type="data" format="bed" label="Reference insertions BED file"/> <param name="plasmids_file" type="data" format="tsv" label="pChunks plasmids TSV file"/> <param name="samtools_pileup_file" type="data" format="pileup" label="Samtools pileup file"/>