changeset 86:97bd9c7a1b44 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Sat, 22 Apr 2023 09:28:36 +0000
parents d1cd4cd9f18d
children 49b4a9ca7400
files mut2read.xml mut2sscs.xml read2mut.py read2mut.xml test-data/Variant_Analyzer_allele_frequencies_test.xlsx test-data/Variant_Analyzer_summary_test.xlsx test-data/Variant_Analyzer_tiers_test.xlsx
diffstat 7 files changed, 28 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/mut2read.xml	Wed Aug 24 09:47:08 2022 +0000
+++ b/mut2read.xml	Sat Apr 22 09:28:36 2023 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="mut2read" name="DCS mutations to tags/reads:" version="3.1.0" profile="19.01">
+<tool id="mut2read" name="DCS mutations to tags/reads:" version="3.2.0" profile="19.01">
     <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description>
     <macros>
         <import>va_macros.xml</import>
--- a/mut2sscs.xml	Wed Aug 24 09:47:08 2022 +0000
+++ b/mut2sscs.xml	Sat Apr 22 09:28:36 2023 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="3.1.0" profile="19.01">
+<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="3.2.0" profile="19.01">
     <description>Extracts all tags from the single-stranded consensus sequence (SSCS) bam file that carries a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies</description>
     <macros>
         <import>va_macros.xml</import>
--- a/read2mut.py	Wed Aug 24 09:47:08 2022 +0000
+++ b/read2mut.py	Sat Apr 22 09:28:36 2023 +0000
@@ -377,9 +377,12 @@
     workbook = xlsxwriter.Workbook(outfile)
     workbook2 = xlsxwriter.Workbook(outfile2)
     workbook3 = xlsxwriter.Workbook(outfile3)
-    ws1 = workbook.add_worksheet("Results")
+    count_sheet = 1
+    ws1 = workbook.add_worksheet("Results" + str(count_sheet))
     ws2 = workbook2.add_worksheet("Allele frequencies")
     ws3 = workbook3.add_worksheet("Tiers")
+    current_result_sheet = ws1
+
 
     format1 = workbook.add_format({'bg_color': '#BCF5A9'})  # green
     format2 = workbook.add_format({'bg_color': '#FFC7CE'})  # red
@@ -690,6 +693,11 @@
                     counts_mut += 1
 
                     if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)):
+                        if row > 1000000:
+                            count_sheet += 1
+                            ws_new = workbook.add_worksheet("Results" + str(count_sheet))
+                            current_result_sheet = ws_new
+
                         if variant_type == "alt":
                             tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f
                             tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f
@@ -1338,18 +1346,18 @@
                         line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
                         line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
                         if tier != "4":
-                            ws1.write_row(row, 0, line)
+                            current_result_sheet.write_row(row, 0, line)
                             csv_writer.writerow(line)
-                            ws1.write_row(row + 1, 0, line2)
+                            current_result_sheet.write_row(row + 1, 0, line2)
                             csv_writer.writerow(line2)
                             if variant_type == "alt":
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
                             elif variant_type == "ref":
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
                             row += 3
                         else:
                             change_tier_after_print.append((line, line2, trimmed_actual_high_tier))    
@@ -1405,18 +1413,18 @@
                         counter_tier4 -= 1
                         tier_dict_ref[key1]["tier 2.5"] += 1
                         tier_dict_ref[key1]["tier 4"] -= 1
-                    ws1.write_row(row, 0, line1)
+                    current_result_sheet.write_row(row, 0, line1)
                     csv_writer.writerow(line1)
-                    ws1.write_row(row + 1, 0, line2)
+                    current_result_sheet.write_row(row + 1, 0, line2)
                     csv_writer.writerow(line2)
                     if line1[2] == "alt":
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
                     elif line1[2] == "ref":
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        current_result_sheet.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
                     row += 3
 
     # sheet 2
--- a/read2mut.xml	Wed Aug 24 09:47:08 2022 +0000
+++ b/read2mut.xml	Sat Apr 22 09:28:36 2023 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="read2mut" name="Call specific mutations in reads:" version="3.1.0" profile="19.01">
+<tool id="read2mut" name="Call specific mutations in reads:" version="3.2.0" profile="19.01">
     <description>Looks for reads with a mutation at known positions and calculates frequencies and stats.</description>
     <macros>
         <import>va_macros.xml</import>
Binary file test-data/Variant_Analyzer_allele_frequencies_test.xlsx has changed
Binary file test-data/Variant_Analyzer_summary_test.xlsx has changed
Binary file test-data/Variant_Analyzer_tiers_test.xlsx has changed