changeset 55:8fbe6aba07e5 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Fri, 12 Mar 2021 14:18:45 +0000
parents 95c27bcb1b7a
children 371c09d4050b
files read2mut.py read2mut.xml
diffstat 2 files changed, 104 insertions(+), 78 deletions(-) [+]
line wrap: on
line diff
--- a/read2mut.py	Fri Mar 12 08:00:31 2021 +0000
+++ b/read2mut.py	Fri Mar 12 14:18:45 2021 +0000
@@ -23,6 +23,7 @@
 from __future__ import division
 
 import argparse
+import csv
 import itertools
 import json
 import operator
@@ -48,6 +49,8 @@
                         help='JSON file with SSCS counts collected by mut2sscs.py.')
     parser.add_argument('--outputFile',
                         help='Output xlsx file with summary of mutations.')
+    parser.add_argument('--outputFile_csv',
+                        help='Output csv file with summary of mutations.')
     parser.add_argument('--outputFile2',
                         help='Output xlsx file with allele frequencies of mutations.')
     parser.add_argument('--outputFile3',
@@ -83,6 +86,7 @@
     outfile = args.outputFile
     outfile2 = args.outputFile2
     outfile3 = args.outputFile3
+    outputFile_csv = args.outputFile_csv
     thresh = args.thresh
     phred_score = args.phred
     trim = args.trim
@@ -258,6 +262,9 @@
     #        else:
     #            whole_array.append(keys[0])
 
+    csv_data = open(outputFile_csv, "wb")
+    csv_writer = csv.writer(csv_data, delimiter=",")
+
     # output summary with threshold
     workbook = xlsxwriter.Workbook(outfile)
     workbook2 = xlsxwriter.Workbook(outfile2)
@@ -286,7 +293,7 @@
                    'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
                    'in phase', 'chimeric tag')
     ws1.write_row(0, 0, header_line)
-
+    csv_writer.writerow(header_line)
     counter_tier11 = 0
     counter_tier12 = 0
     counter_tier21 = 0
@@ -1031,32 +1038,35 @@
                         if (read_pos3 == -1):
                             read_pos3 = read_len_median3 = None
                         line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
-                        ws1.write_row(row, 0, line)
+                        #ws1.write_row(row, 0, line)
+                        #csv_writer.writerow(line)
                         line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
-                        ws1.write_row(row + 1, 0, line2)
+                        #ws1.write_row(row + 1, 0, line2)
+                        #csv_writer.writerow(line2)
 
-                        ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                                               {'type': 'formula',
-                                                'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
-                                                'format': format1,
-                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                                               {'type': 'formula',
-                                                'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
-                                                'format': format3,
-                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                                               {'type': 'formula',
-                                                'criteria': '=$B${}>="3"'.format(row + 1),
-                                                'format': format2,
-                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        if trimmed:
-                            if key1 not in list(change_tier_after_print.keys()):
-                                change_tier_after_print[key1] = [((row, line), (row, line2))]
-                            else:
-                                change_tier_after_print[key1].append(((row, line), (row, line2)))
+                        #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                        #                       {'type': 'formula',
+                        #                        'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
+                        #                        'format': format1,
+                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                        #                       {'type': 'formula',
+                        #                        'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
+                        #                        'format': format3,
+                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                        #                       {'type': 'formula',
+                        #                        'criteria': '=$B${}>="3"'.format(row + 1),
+                        #                        'format': format2,
+                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        #if trimmed:
+                        if key1 not in list(change_tier_after_print.keys()):
+                            change_tier_after_print[key1] = [((row, line, line2))]
+                        else:
+                            change_tier_after_print[key1].append(((row, line, line2)))
 
                         row += 3
+
             if chimera_correction:
                 chimeric_dcs_high_tiers = 0
                 chimeric_dcs = 0
@@ -1070,56 +1080,60 @@
                         chimeric_dcs_high_tiers += high_tiers
                 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
 
+            # write to file
+            
             # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4
-            print(list(sorted(tier_dict[key1].keys())))
-            print(list(sorted(tier_dict[key1].keys()))[:6])
             sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]])
-            print(sum_highTiers)
+
+            correct_tier = False
+
             if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0:
                 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"]
                 tier_dict[key1]["tier 4"] = 0
-                lines = change_tier_after_print[key1]
-                
-                for sample in lines:
-                	l_i = 0
-                	for li in sample:
-                	    row = li[0]
-                	    new_line = li[1]
-                	    if l_i == 0:
-                	        new_line[1] = "2.5"
-                	        ws1.write_row(row, 0, new_line)
-                	    else:
-                	        ws1.write_row(row + 1, 0, new_line)
-	
-                	    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                	                               {'type': 'formula',
-                	                                'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
-                	                                'format': format1,
-                	                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                	    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                	                               {'type': 'formula',
-                	                                'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
-                	                                'format': format3,
-                	                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                	    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                	                               {'type': 'formula',
-                	                                'criteria': '=$B${}>="3"'.format(row + 1),
-                	                                'format': format2,
-                	                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-	
-                	    l_i += 1
+                correct_tier = True
+
+            lines = change_tier_after_print[key1]
+            for sample in lines:
+                row = sample[0]
+                line1 = sample[1]
+                line2 = sample[2]
+
+                if correct_tier:
+                    line1 = list(line1)
+                    line1[1] = "2.5"
+                    line1 = tuple(line1)
+                ws1.write_row(row, 0, line1)
+                csv_writer.writerow(line1)
+                ws1.write_row(row + 1, 0, line2)
+                csv_writer.writerow(line2)
+
+                ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                           {'type': 'formula',
+                                            'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
+                                            'format': format1,
+                                            'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                           {'type': 'formula',
+                                            'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
+                                            'format': format3,
+                                            'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                           {'type': 'formula',
+                                            'criteria': '=$B${}>="3"'.format(row + 1),
+                                            'format': format2,
+                                            'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})                
 	
     # sheet 2
     if chimera_correction:
-        header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
+        header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
                     'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
                     'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
-                    'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6')
+                    'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
     else:
-        header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
+        header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
                         'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
                         'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
-                        'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6')
+                        'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
 
     ws2.write_row(0, 0, header_line2)
     row = 0
@@ -1211,82 +1225,89 @@
                          ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"),
                          ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"),
                          ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"),
+                         ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4")
                          ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"),
                          ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"),
-                         ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"),
+                         ("Tier 4", "variants at the start or end of the reads"),
                          ("Tier 5.1", "variant is close to softclipping in both mates"),
                          ("Tier 5.2", "variant is close to softclipping in one of the mates"),
                          ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"),
                          ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"),
                          ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"),
-                         ("Tier 6", "remaining variants")]
-    examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
+                         ("Tier 6", "mates with contradictory information"),
+                         ("Tier 7", "remaining variants")]
+    examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
                         "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
                         "4081", "4098", "5", "10", "", ""),
                        ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None,
                         "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None,
                         "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289",
+                      [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289",
                         "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0",
                         "0", "4081", "4098", "5", "10", "", ""),
                        ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289",
                         "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1",
                         "7", "0", "0", "4081", "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290",
+                      [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290",
                         "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
                         "0", "0", "1", "6", "47170", "41149", "", ""),
                        ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290",
                         "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
                         "0", "0", "1", "6", "47170", "41149", "", "")],
-                      [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289",
+                      [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289",
                         "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
                         "4081", "4098", "5", "10", "", ""),
                        ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None,
                         "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0",
                         "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289",
+                      [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289",
                         "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
                         "4081", "4098", "5", "10", "", ""),
                        ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289",
                         "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
                         "4081", "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None,
+                      [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None,
                         "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0",
                         "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""),
                        ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289",
                         "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0",
                         "0", "0", "4081", "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
+                      [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
                         "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081",
                         "4098", "5", "10", "", ""),
                        ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289",
                         "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081",
                         "4098", "5", "10", "", "")],
-                      [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290",
+                      [("chr5-11068-C-G", "2.5", "ATTGAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269",
+                        "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
+                       ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
+                        "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
+                        "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
+                      [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290",
                         "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1",
                         "0", "0", "3", "3", "47170", "41149", "", ""),
                        ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None,
                         "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5",
                         "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")],
-                      [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",
+                      [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",
                         "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
                         "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""),
                        ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271",
                         "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
                         "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")],
-                      [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269",
+                      [("chr5-13983-G-C", "4", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269",
                         "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
                        ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
                         "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
                         "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
-                      [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
+                      [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)],
+                      [("chr5-13963-T-C", "6", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
                         "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0",
                         "0", "0", "0", "1", "1", "5348", "5350", "", ""),
                        ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263",
                         "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0",
                         "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
-                      [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)],
-                      [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
+                      [("chr5-13983-G-C", "7", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
                         "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
                         "0", "1", "1", "5348", "5350", "", ""),
                        ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
@@ -1316,6 +1337,7 @@
     workbook.close()
     workbook2.close()
     workbook3.close()
+    csv_data.close()
 
 
 if __name__ == '__main__':
--- a/read2mut.xml	Fri Mar 12 08:00:31 2021 +0000
+++ b/read2mut.xml	Fri Mar 12 14:18:45 2021 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="read2mut" name="Call specific mutations in reads:" version="2.1.0" profile="19.01">
+<tool id="read2mut" name="Call specific mutations in reads:" version="2.1.1" profile="19.01">
     <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description>
     <macros>
         <import>va_macros.xml</import>
@@ -26,6 +26,7 @@
         --softclipping_dist '$softclipping_dist'
         --reads_threshold '$reads_threshold'
         --outputFile '$output_xlsx'
+        --outputFile_csv '$outputFile_csv'
         --outputFile2 '$output_xlsx2'
         --outputFile3 '$output_xlsx3'
     ]]>
@@ -44,6 +45,7 @@
     </inputs>
     <outputs>
         <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/>
+        <data name="outputFile_csv" format="csv" label="${tool.name} on ${on_string}: CSV summary"/>
         <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/>
         <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/>
     </outputs>
@@ -60,6 +62,7 @@
             <param name="softclipping_dist" value="15"/>
             <param name="reads_threshold" value="1.0"/>
             <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/>
+            <output name="outputFile_csv" file="Variant_Analyzer_summary_test.csv" decompress="true" lines_diff="10"/>
             <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/>
             <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/>
         </test>
@@ -75,7 +78,7 @@
 **Input** 
 
 **Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. 
-generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_.
+generated by the `FreeBayes <https://arxiv.org/abs/1207.3907>`_ or `LoFreq <https://academic.oup.com/nar/article/40/22/11189/1152727>`_ variant caller.
 
 **Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the 
 tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_.
@@ -91,9 +94,10 @@
 **Output**
 
 The output are three XLSX files containing frequencies stats for DCS mutations based 
-on information from the raw reads. In addition to that a tier based 
+on information from the raw reads and a CSV file containing the summary information without color-coding. In addition to that a tier based 
 classification is provided based on the amout of support for a true variant call.
 
+
     ]]> 
     </help>
     <expand macro="citation" />