changeset 79:d7aea14291e8 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Mon, 25 Jul 2022 13:24:04 +0000
parents fdfe9a919ff7
children 8336a4f2b647
files read2mut.py read2mut.xml
diffstat 2 files changed, 121 insertions(+), 115 deletions(-) [+]
line wrap: on
line diff
--- a/read2mut.py	Fri Jul 22 09:19:44 2022 +0000
+++ b/read2mut.py	Mon Jul 25 13:24:04 2022 +0000
@@ -313,6 +313,8 @@
                             count_other += 1
                     else:
                         count_indel += 1
+                print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, low quality = %s\n" %
+                      (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_lowq))
 
     mut_array = np.array(mut_array)
     for read in bam.fetch(until_eof=True):
@@ -425,6 +427,10 @@
         chimeric_tag = {}
         if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()):  # ref or alt
 
+            if key1 not in np.array(['#'.join(str(i) for i in z)
+                                   for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]):
+                continue
+
             change_tier_after_print = []
             i = np.where(np.array(['#'.join(str(i) for i in z)
                                    for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
@@ -443,6 +449,7 @@
                 tier_dict_ref[key1][k] = v
 
             used_keys = []
+            # used_keys_ref = []
             if 'ab' in mut_pos_dict[key1].keys():
                 sscs_mut_ab = mut_pos_dict[key1]['ab']
             else:
@@ -465,11 +472,10 @@
                 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys():  # skip reads that have not alt or ref
                     continue
 
-                if ((key2[:-5] in tag_dict.keys() and key2[:-5] in pure_tags_dict_short[key1].keys() and key1 in tag_dict[key2[:-5]].keys()) or (key2[:-5] in tag_dict_ref.keys() and key2[:-5] in pure_tags_dict_ref[key1].keys() and key1 in tag_dict_ref[key2[:-5]].keys())) and (key2[:-5] not in used_keys):
-
-                    if key2[:-5] in pure_tags_dict_short[key1].keys():
+                if (((key2[:-5] in tag_dict.keys()) and (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key1 in tag_dict[key2[:-5]].keys()) and (key2[:-5] not in used_keys)) or ((key2[:-5] in tag_dict_ref.keys()) and (key2[:-5] in pure_tags_dict_ref[key1].keys()) and (key1 in tag_dict_ref[key2[:-5]].keys()) and (key2[:-5] not in used_keys))):
+                    if key2[:-5] in tag_dict.keys():
                         variant_type = "alt"
-                    elif key2[:-5] in pure_tags_dict_ref[key1].keys():
+                    elif key2[:-5] in tag_dict_ref.keys():
                         variant_type = "ref"
 
                     if key2[:-5] + '.ab.1' in mut_dict[key1].keys():
@@ -670,7 +676,10 @@
                         end_read4 = reads_dict[key1][key2[:-5] + '.ba.2']
                         ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2']
 
+                    # if variant_type == "alt":
                     used_keys.append(key2[:-5])
+                    # elif variant_type == "ref":
+                    #     used_keys_ref.append(key2[:-5])
                     counts_mut += 1
                     if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)):
                         if variant_type == "alt":
@@ -1240,61 +1249,64 @@
 
                         chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
                         var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
-                        sample_tag = key2[:-5]
-                        array2 = np.unique(whole_array)  # remove duplicate sequences to decrease running time
-                        # exclude identical tag from array2, to prevent comparison to itself
-                        same_tag = np.where(array2 == sample_tag)
-                        index_array2 = np.arange(0, len(array2), 1)
-                        index_withoutSame = np.delete(index_array2, same_tag)  # delete identical tag from the data
-                        array2 = array2[index_withoutSame]
-                        if len(array2) != 0:  # only perform chimera analysis if there is more than 1 variant
-                            array1_half = sample_tag[0:int(len(sample_tag) / 2)]  # mate1 part1
-                            array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))]  # mate1 part 2
-                            array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2])  # mate2 part1
-                            array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2])  # mate2 part2
 
-                            min_tags_list_zeros = []
-                            chimera_tags = []
-                            for mate_b in [False, True]:
-                                i = 0  # counter, only used to see how many HDs of tags were already calculated
-                                if mate_b is False:  # HD calculation for all a's
-                                    half1_mate1 = array1_half
-                                    half2_mate1 = array1_half2
-                                    half1_mate2 = array2_half
-                                    half2_mate2 = array2_half2
-                                elif mate_b is True:  # HD calculation for all b's
-                                    half1_mate1 = array1_half2
-                                    half2_mate1 = array1_half
-                                    half1_mate2 = array2_half2
-                                    half2_mate2 = array2_half
-                                # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
-                                dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2])
-                                min_index = np.where(dist == dist.min())  # get index of min HD
-                                # get all "b's" of the tag or all "a's" of the tag with minimum HD
-                                min_tag_half2 = half2_mate2[min_index]
-                                min_tag_array2 = array2[min_index]  # get whole tag with min HD
-                                min_value = dist.min()
-                                # calculate HD of "b" to all "b's" or "a" to all "a's"
-                                dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e))
-                                                             for e in min_tag_half2])
-                                dist2 = dist_second_half.max()
-                                max_index = np.where(dist_second_half == dist_second_half.max())[0]  # get index of max HD
-                                max_tag = min_tag_array2[max_index]
-
-                                # tags which have identical parts:
-                                if min_value == 0 or dist2 == 0:
-                                    min_tags_list_zeros.append(tag)
-                                    chimera_tags.append(max_tag)
-                                i += 1
-                            chimera_tags = [x for x in chimera_tags if x != []]
-                            chimera_tags_new = []
-                            for i in chimera_tags:
-                                if len(i) > 1:
-                                    for t in i:
-                                        chimera_tags_new.append(t)
-                                else:
-                                    chimera_tags_new.extend(i)
-                            chimera = ", ".join(chimera_tags_new)
+                        if variant_type == "alt":
+                            sample_tag = key2[:-5]
+                            array2 = np.unique(whole_array)  # remove duplicate sequences to decrease running time
+                            # exclude identical tag from array2, to prevent comparison to itself
+                            same_tag = np.where(array2 == sample_tag)
+                            index_array2 = np.arange(0, len(array2), 1)
+                            index_withoutSame = np.delete(index_array2, same_tag)  # delete identical tag from the data
+                            array2 = array2[index_withoutSame]
+                            if len(array2) != 0:  # only perform chimera analysis if there is more than 1 variant
+                                array1_half = sample_tag[0:int(len(sample_tag) / 2)]  # mate1 part1
+                                array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))]  # mate1 part 2
+                                array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2])  # mate2 part1
+                                array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2])  # mate2 part2
+                                min_tags_list_zeros = []
+                                chimera_tags = []
+                                for mate_b in [False, True]:
+                                    i = 0  # counter, only used to see how many HDs of tags were already calculated
+                                    if mate_b is False:  # HD calculation for all a's
+                                        half1_mate1 = array1_half
+                                        half2_mate1 = array1_half2
+                                        half1_mate2 = array2_half
+                                        half2_mate2 = array2_half2
+                                    elif mate_b is True:  # HD calculation for all b's
+                                        half1_mate1 = array1_half2
+                                        half2_mate1 = array1_half
+                                        half1_mate2 = array2_half2
+                                        half2_mate2 = array2_half
+                                    # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
+                                    dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2])
+                                    min_index = np.where(dist == dist.min())  # get index of min HD
+                                    # get all "b's" of the tag or all "a's" of the tag with minimum HD
+                                    min_tag_half2 = half2_mate2[min_index]
+                                    min_tag_array2 = array2[min_index]  # get whole tag with min HD
+                                    min_value = dist.min()
+                                    # calculate HD of "b" to all "b's" or "a" to all "a's"
+                                    dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e))
+                                                                 for e in min_tag_half2])
+                                    dist2 = dist_second_half.max()
+                                    max_index = np.where(dist_second_half == dist_second_half.max())[0]  # get index of max HD
+                                    max_tag = min_tag_array2[max_index]
+                                    # tags which have identical parts:
+                                    if min_value == 0 or dist2 == 0:
+                                        min_tags_list_zeros.append(tag)
+                                        chimera_tags.append(max_tag)
+                                    i += 1
+                                chimera_tags = [x for x in chimera_tags if x != []]
+                                chimera_tags_new = []
+                                for i in chimera_tags:
+                                    if len(i) > 1:
+                                        for t in i:
+                                            chimera_tags_new.append(t)
+                                    else:
+                                        chimera_tags_new.extend(i)
+                                chimera = ", ".join(chimera_tags_new)
+                            else:
+                                chimera_tags_new = []
+                                chimera = ""
                         else:
                             chimera_tags_new = []
                             chimera = ""
@@ -1316,27 +1328,23 @@
                         if (read_pos3 == -1):
                             read_pos3 = read_len_median3 = None
                         line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
-                        # ws1.write_row(row, 0, line)
-                        # csv_writer.writerow(line)
                         line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
-                        # ws1.write_row(row + 1, 0, line2)
-                        # csv_writer.writerow(line2)
-                        # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                        #                       {'type': 'formula',
-                        #                        'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
-                        #                        'format': format1,
-                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                        #                       {'type': 'formula',
-                        #                        'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
-                        #                        'format': format3,
-                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
-                        #                       {'type': 'formula',
-                        #                        'criteria': '=$B${}>="3"'.format(row + 1),
-                        #                        'format': format2,
-                        #                        'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
-                        change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier))
+                        if tier != "4":
+                            ws1.write_row(row, 0, line)
+                            csv_writer.writerow(line)
+                            ws1.write_row(row + 1, 0, line2)
+                            csv_writer.writerow(line2)
+                            if variant_type == "alt":
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                            elif variant_type == "ref":
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                                ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        else:
+                            change_tier_after_print.append((line, line2, trimmed_actual_high_tier))
+
                         row += 3
 
             if chimera_correction:
@@ -1367,39 +1375,39 @@
                 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"]
                 tier_dict_ref[key1]["tier 4"] = 0
                 correct_tier_ref = True
-
-            for sample in change_tier_after_print:
-                row_number = sample[0]
-                line1 = sample[1]
-                line2 = sample[2]
-                actual_high_tier = sample[3]
-                current_tier = list(line1)[1]
-
-                if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier:
-                    line1 = list(line1)
-                    line1[1] = "2.5"
-                    line1 = tuple(line1)
-                    counter_tier25 += 1
-                    counter_tier4 -= 1
-                if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier:
-                    line1 = list(line1)
-                    line1[1] = "2.5"
-                    line1 = tuple(line1)
-                    counter_tier25 += 1
-                    counter_tier4 -= 1
-
-                ws1.write_row(row_number, 0, line1)
-                csv_writer.writerow(line1)
-                ws1.write_row(row_number + 1, 0, line2)
-                csv_writer.writerow(line2)
-                if line1[2] == "alt":
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
-                elif line1[2] == "ref":
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
-                    ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
+            # print(key1, "change tiers from tier 4 to tier 2.5 for {} DCS ...".format(len(change_tier_after_print)))
+            if len(change_tier_after_print) > 0:
+                for sample in change_tier_after_print:
+                    # row_number = sample[0]
+                    line1 = sample[0]
+                    line2 = sample[1]
+                    actual_high_tier = sample[2]
+                    current_tier = list(line1)[1]
+                    if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier:
+                        line1 = list(line1)
+                        line1[1] = "2.5"
+                        line1 = tuple(line1)
+                        counter_tier25 += 1
+                        counter_tier4 -= 1
+                    if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier:
+                        line1 = list(line1)
+                        line1[1] = "2.5"
+                        line1 = tuple(line1)
+                        counter_tier25 += 1
+                        counter_tier4 -= 1
+                    ws1.write_row(row, 0, line1)
+                    csv_writer.writerow(line1)
+                    ws1.write_row(row + 1, 0, line2)
+                    csv_writer.writerow(line2)
+                    if line1[2] == "alt":
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                    elif line1[2] == "ref":
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                    row += 3
 
     # sheet 2
     if chimera_correction:
@@ -1429,9 +1437,7 @@
             ref_count = cvrg_dict[key1][0]
             alt_count = cvrg_dict[key1][1]
             cvrg = ref_count + alt_count
-
             ref_tiers = tier_dict_ref[key1]
-
             var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
             lst = [var_id, cvrg]
             used_tiers = []
@@ -1453,7 +1459,7 @@
                 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7])))
                 if fraction_chimeras is None:
                     fraction_chimeras = 0.
-                new_cvrg = (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])) * (1. - fraction_chimeras)
+                new_cvrg = (cvrg - sum(used_tiers[-10:])) * (1. - fraction_chimeras)
                 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)])
             lst.extend([alt_count, safe_div(alt_count, cvrg)])
             lst.extend(used_tiers)
--- a/read2mut.xml	Fri Jul 22 09:19:44 2022 +0000
+++ b/read2mut.xml	Mon Jul 25 13:24:04 2022 +0000
@@ -35,7 +35,7 @@
         <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occurring in DCS of less than thresh tags are displayed. Default of 0 displays all."/>
         <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold is considered. Default = 20."/>
         <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/>
-        <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/>
+        <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants (not for the reference allele) and correct the variant frequencies."/>
         <param name="softclipping_dist" type="integer" label="Distance between artifact and softclipping of the reads" min="1" value="15" help="Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the reads. Default = 20"/>
 <param name="reads_threshold" type="float" label="Minimum percentage of softclipped reads in a family" min="0.0" max="1.0" value="1.0" help="Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped."/>
     </inputs>