changeset 61:3722268ffac5 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Wed, 17 Mar 2021 13:14:40 +0000
parents 9ce53bf0931c
children 66c1245436b9
files read2mut.py
diffstat 1 files changed, 147 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/read2mut.py	Mon Mar 15 11:43:41 2021 +0000
+++ b/read2mut.py	Wed Mar 17 13:14:40 2021 +0000
@@ -126,6 +126,7 @@
     mut_read_dict = {}
     reads_dict = {}
     mut_read_cigar_dict = {}
+    real_start_end = {}
     i = 0
     mut_array = []
 
@@ -149,6 +150,7 @@
             mut_read_pos_dict[chrom_stop_pos] = {}
             reads_dict[chrom_stop_pos] = {}
             mut_read_cigar_dict[chrom_stop_pos] = {}
+            real_start_end[chrom_stop_pos] = {}
 
             for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000):
                 if pileupcolumn.reference_pos == stop_pos:
@@ -167,6 +169,21 @@
                             tag = pileupread.alignment.query_name
                             nuc = pileupread.alignment.query_sequence[pileupread.query_position]
                             phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33
+
+                            # if read is softclipped, store real position in reference
+                            if "S" in pileupread.alignment.cigarstring: 
+                                # spftclipped at start
+                                if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring):
+                                    start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0])
+                                    end = pileupread.alignment.reference_end
+                                # softclipped at end
+                                elif re.search(r"S$", pileupread.alignment.cigarstring):
+                                    end = pileupread.alignment.reference_end + int(re.split("[A-Z]", str(pileupread.alignment.cigarstring))[-2])
+                                    start = pileupread.alignment.reference_start
+                            else:
+                                end = pileupread.alignment.reference_end
+                                start = pileupread.alignment.reference_start
+
                             if phred < phred_score:
                                 nuc = "lowQ"
                             if tag not in mut_dict[chrom_stop_pos]:
@@ -179,12 +196,12 @@
                                 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1]
                                 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)]
                                 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring]
-
-                                #alignedRefPositions = pileupread.get_reference_positions()[0]
+                                real_start_end[chrom_stop_pos][tag] = [(start, end)]
                             else:
                             	mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1)
                             	reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence))
                             	mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring)
+                                real_start_end[chrom_stop_pos][tag].append((start, end))
                             if nuc == alt:
                                 count_alt += 1
                                 if tag not in mut_read_dict:
@@ -533,55 +550,63 @@
                         read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']))
                         read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1']))
                         cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']
-                        #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'])
                         pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1']
-                        #print(cigars_dcs1)
                         end_read1 = reads_dict[key1][key2[:-5] + '.ab.1']
+                        ref_positions1 = real_start_end[key1][key2[:-5] + '.ab.1']
                     if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys():
                         read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']))
                         read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2']))
                         cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2']
                         pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2']
                         end_read2 = reads_dict[key1][key2[:-5] + '.ab.2']
+                        ref_positions2 = real_start_end[key1][key2[:-5] + '.ab.2']
                     if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys():
                         read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']))
                         read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1']))
                         cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1']
                         pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1']
                         end_read3 = reads_dict[key1][key2[:-5] + '.ba.1']
+                        ref_positions3 = real_start_end[key1][key2[:-5] + '.ba.1']
                     if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys():
                         read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']))
                         read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2']))
-                        #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'])
                         cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']
-
                         pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2']
-                        #print(cigars_dcs4)
                         end_read4 = reads_dict[key1][key2[:-5] + '.ba.2']
+                        ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2']
 
                     used_keys.append(key2[:-5])
                     counts_mut += 1
                     if (alt1f + alt2f + alt3f + alt4f) > 0.5:
+                    	total1new_trim, total2new_trim, total3new_trim, total4new_trim = total1new, total2new, total3new, total4new
                         if total1new == 0:
                             ref1f = alt1f = None
                             alt1ff = -1
+                            alt1ff_trim = -1
                         else:
                             alt1ff = alt1f
+                            alt1ff_trim = alt1f
                         if total2new == 0:
                             ref2f = alt2f = None
                             alt2ff = -1
+                            alt2ff_trim = -1
                         else:
                             alt2ff = alt2f
+                            alt2ff_trim = alt2f
                         if total3new == 0:
                             ref3f = alt3f = None
                             alt3ff = -1
+                            alt3ff_trim = -1
                         else:
                             alt3ff = alt3f
+                            alt3ff_trim = alt3f
                         if total4new == 0:
                             ref4f = alt4f = None
                             alt4ff = -1
+                            alt4ff_trim = alt4f
                         else:
                             alt4ff = alt4f
+                            alt4ff_trim = alt4f
 
                         beg1 = beg4 = beg2 = beg3 = 0
 
@@ -596,8 +621,9 @@
                         softclipped_mutation_oneOfTwoSSCS_diffMates = False
                         softclipped_mutation_oneMate = False
                         softclipped_mutation_oneMateOneSSCS = False
-                        print()
-                        print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3)
+
+                        trimmed_actual_high_tier = False
+
                         dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = []
                         dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = []
                         ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False
@@ -606,14 +632,12 @@
                         # mate 1 - SSCS ab
                         softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1]
                         ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads
-
                         if any(ij is True for ij in softclipped_idx1):
                         	softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1]
                         	softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1]
                         	softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1]
                         	dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)]
                         	dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)]
-                        	
                         	# if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
                         	if any(ij is True for ij in softclipped_both_ends_idx1):
                         		print(softclipped_both_ends_idx1)
@@ -625,7 +649,6 @@
                         					dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number
                         	ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads
                         	ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads
-                        print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1)
 
                         # mate 1 - SSCS ba
                         softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4]
@@ -636,7 +659,6 @@
                         	softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4]
                         	dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)]
                         	dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)]
-                        	
                         	# if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
                         	if any(ij is True for ij in softclipped_both_ends_idx4):
                         		print(softclipped_both_ends_idx4)
@@ -648,11 +670,9 @@
                         					dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number
                        		ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads
                         	ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads
-                        print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4)
 
                         # mate 2 - SSCS ab
                         softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2]
-                        #print(sum(softclipped_idx2))
                         ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads
                         if any(ij is True for ij in softclipped_idx2):
                             softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2]
@@ -660,7 +680,6 @@
                             softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2]
                             dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)]
                             dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)]
-                            	
                         	# if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
                             if any(ij is True for ij in softclipped_both_ends_idx2):
                             	print(softclipped_both_ends_idx2)
@@ -671,10 +690,7 @@
                                         else:
                                             dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number
                             ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads
-                            #print(ratio_dist_end2)
-                            #print([True if x <= thr else False for x in ratio_dist_end2])
                             ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads
-                        print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2)
 
                         # mate 2 - SSCS ba
                         softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3]
@@ -685,7 +701,6 @@
                             softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3]
                             dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)]
                             dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)]
-                            
                         	# if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
                             if any(ij is True for ij in softclipped_both_ends_idx3):
                             	print(softclipped_both_ends_idx3)
@@ -695,10 +710,8 @@
                                             dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh
                                         else:
                                             dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number
-                            #print([True if x <= thr else False for x in dist_start_read3])
                             ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads
                             ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads
-                        print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3)
 
                         if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) &  # contradictory variant
                             all(float(ij) == 0. for ij in [alt2ff, alt3ff])) |
@@ -728,25 +741,85 @@
                             alt3ff = 0
                             trimmed = False
                             contradictory = False
-                            print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates)
                         # information of both mates available --> only one mate softclipped
                         elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) |
                                (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) &
                                all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
                         	# if distance between softclipping and mutation is at start or end of the read smaller than threshold
+                            min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red
+                            min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue
+                            max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red
+                            max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue
+                            if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue
+                                softclipped_mutation_oneOfTwoMates = False
+                                # blue mate at beginning softclipped
+                                if min_start1 > min_start2:
+                                    n_spacer_barcode = min_start1 - min_start2
+                                    read_pos2 = read_pos2 - n_spacer_barcode
+                                    read_pos3 = read_pos3 - n_spacer_barcode
+                                    read_len_median2 = read_len_median2 - n_spacer_barcode
+                                    read_len_median3 = read_len_median3 - n_spacer_barcode
+                                # red mate at end softclipped
+                                if max_end1 > max_end2: 
+                                    n_spacer_barcode = max_end1 - max_end2
+                                    read_len_median1 = read_len_median1 - n_spacer_barcode
+                                    read_len_median4 = read_len_median4 - n_spacer_barcode
+                            elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red
+                                softclipped_mutation_oneOfTwoMates = False
+                                if min_start1 < min_start2: 
+                                    n_spacer_barcode = min_start2 - min_start1
+                                    read_pos1 = read_pos1 - n_spacer_barcode
+                                    read_pos4 = read_pos4 - n_spacer_barcode
+                                    read_len_median1 = read_len_median1 - n_spacer_barcode
+                                    read_len_median4 = read_len_median4 - n_spacer_barcode
+                                if max_end1 < max_end2: # if mate1 ends after mate 2 starts
+                                    n_spacer_barcode = max_end2 - max_end1
+                                    read_len_median2 = read_len_median2 - n_spacer_barcode
+                                    read_len_median3 = read_len_median3 - n_spacer_barcode
+                            else:
+                                softclipped_mutation_oneOfTwoMates = True
+                                alt1ff = 0
+                                alt4ff = 0
+                                alt2ff = 0
+                                alt3ff = 0
+                                trimmed = False
+                                contradictory = False
                             softclipped_mutation_allMates = False
-                            softclipped_mutation_oneOfTwoMates = True
                             softclipped_mutation_oneOfTwoSSCS = False
-                            softclipped_mutation_oneOfTwoSSCS_diffMates = False
                             softclipped_mutation_oneMate = False
                             softclipped_mutation_oneMateOneSSCS = False
-                            alt1ff = 0
-                            alt4ff = 0
-                            alt2ff = 0
-                            alt3ff = 0
-                            trimmed = False
-                            contradictory = False
-                            print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates)
+                            
+                            if softclipped_mutation_oneOfTwoMates is False: # check trimming tier
+                                if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
+                                    beg1 = total1new
+                                    total1new = 0
+                                    alt1ff = 0
+                                    alt1f = 0
+                                    trimmed = True
+
+                                if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
+                                    beg4 = total4new
+                                    total4new = 0
+                                    alt4ff = 0
+                                    alt4f = 0
+                                    trimmed = True
+
+                                if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
+                                    beg2 = total2new
+                                    total2new = 0
+                                    alt2ff = 0
+                                    alt2f = 0
+                                    trimmed = True
+
+                                if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
+                                    beg3 = total3new
+                                    total3new = 0
+                                    alt3ff = 0
+                                    alt3f = 0
+                                    trimmed = True
+                                details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
+                                details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
+
                         # information of both mates available --> only one mate softclipped
                         elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
                               ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
@@ -764,7 +837,6 @@
                             alt3ff = 0
                             trimmed = False
                             contradictory = False
-                            print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff])
 
                         # information of one mate available --> all reads of one mate are softclipped
                         elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) &
@@ -772,10 +844,6 @@
                         	  (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
                         	  all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available
                         	# if distance between softclipping and mutation is at start or end of the read smaller than threshold
-                            #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) &
-                        #		((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) |
-                        #		(((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) &
-                       # 		((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))):
                             softclipped_mutation_allMates = False
                             softclipped_mutation_oneOfTwoMates = False
                             softclipped_mutation_oneOfTwoSSCS = False
@@ -788,17 +856,12 @@
                             alt3ff = 0
                             trimmed = False
                             contradictory = False
-                            print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate)
                         # information of one mate available --> only one SSCS is softclipped
                         elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
                               (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) |
                         	  (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
                         	  (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available
                         	# if distance between softclipping and mutation is at start or end of the read smaller than threshold
-                            #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) |
-                        	#	all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) |
-                        #		(all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) |
-                       # 		all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))):
                             softclipped_mutation_allMates = False
                             softclipped_mutation_oneOfTwoMates = False
                             softclipped_mutation_oneOfTwoSSCS = False
@@ -811,7 +874,6 @@
                             alt3ff = 0
                             trimmed = False
                             contradictory = False
-                            print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS)
 
                         else:
                             if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
@@ -844,9 +906,6 @@
                             details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
                             details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
 
-
-                        #sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
-
                         # assign tiers
                         if ((all(int(ij) >= 3 for ij in [total1new, total4new]) &
                              all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
@@ -915,16 +974,49 @@
                             counter_tier32 += 1
                             tier_dict[key1]["tier 3.2"] += 1
 
-                        #elif (trimmed) and (sum_highTiers > 1):
-                        #    tier = "2.5"
-                        #    counter_tier25 += 1
-                        #    tier_dict[key1]["tier 2.5"] += 1
-
                         elif (trimmed):
                             tier = "4"
                             counter_tier4 += 1
                             tier_dict[key1]["tier 4"] += 1
 
+                        	# assign tiers
+                        	if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
+                        	     all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) |
+                        	    (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
+                        	     all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))):
+                        	    trimmed_actual_high_tier = True
+                        	elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) &
+                        	      any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
+                        	      any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
+                        	      all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
+                        	    trimmed_actual_high_tier = True
+                        	elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
+                        	       any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) |
+                        	      (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
+                        	       any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))):
+                        	    trimmed_actual_high_tier = True
+                        	elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) &
+                        	      all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt2ff_trim, alt3ff_trim, alt4ff_trim])):
+                        	    trimmed_actual_high_tier = True
+                        	elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
+                        	       any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]) &
+                        	       any(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim])) |
+                        	      (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
+                        	       any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]) &
+                        	       any(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]))):
+                        	    trimmed_actual_high_tier = True
+                        	elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) |
+                        	      (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
+                        	       all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))):
+                        	    trimmed_actual_high_tier = True
+                        	else:
+                        		trimmed_actual_high_tier = False
+
                         elif softclipped_mutation_allMates:
                             tier = "5.1"
                             counter_tier51 += 1
@@ -1008,14 +1100,6 @@
                                 if min_value == 0 or dist2 == 0:
                                     min_tags_list_zeros.append(tag)
                                     chimera_tags.append(max_tag)
-                                    # chimeric = True
-                                # else:
-                                    # chimeric = False
-
-                                # if mate_b is False:
-                                #    text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric)
-                                # else:
-                                #     text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric)
                                 i += 1
                             chimera_tags = [x for x in chimera_tags if x != []]
                             chimera_tags_new = []
@@ -1072,7 +1156,7 @@
                         #if key1 not in list(change_tier_after_print.keys()):
                         #    change_tier_after_print[key1] = [((row, line, line2))]
                         #else:
-                        change_tier_after_print.append((row, line, line2))
+                        change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier))
 
                         row += 3
 
@@ -1101,13 +1185,14 @@
                 tier_dict[key1]["tier 4"] = 0
                 correct_tier = True
 
-            #lines = change_tier_after_print[key1]
             for sample in change_tier_after_print:
                 row_number = sample[0]
                 line1 = sample[1]
                 line2 = sample[2]
+                actual_high_tier = sample[3]
+                current_tier = list(line1)[1]
 
-                if correct_tier and list(line1)[1] == "4":
+                if correct_tier and (current_tier == "4") and actual_high_tier:
                     line1 = list(line1)
                     line1[1] = "2.5"
                     line1 = tuple(line1)