comparison read2mut.py @ 59:0b3df6ea1434 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Fri, 12 Mar 2021 18:45:03 +0000
parents 04741369fc07
children 9ce53bf0931c
comparison
equal deleted inserted replaced
58:04741369fc07 59:0b3df6ea1434
591 trimmed = False 591 trimmed = False
592 contradictory = False 592 contradictory = False
593 softclipped_mutation_allMates = False 593 softclipped_mutation_allMates = False
594 softclipped_mutation_oneOfTwoMates = False 594 softclipped_mutation_oneOfTwoMates = False
595 softclipped_mutation_oneOfTwoSSCS = False 595 softclipped_mutation_oneOfTwoSSCS = False
596 softclipped_mutation_oneOfTwoSSCS_diffMates = False
596 softclipped_mutation_oneMate = False 597 softclipped_mutation_oneMate = False
597 softclipped_mutation_oneMateOneSSCS = False 598 softclipped_mutation_oneMateOneSSCS = False
598 print() 599 print()
599 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) 600 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3)
600 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] 601 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = []
716 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 717 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
717 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 718 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
718 softclipped_mutation_allMates = True 719 softclipped_mutation_allMates = True
719 softclipped_mutation_oneOfTwoMates = False 720 softclipped_mutation_oneOfTwoMates = False
720 softclipped_mutation_oneOfTwoSSCS = False 721 softclipped_mutation_oneOfTwoSSCS = False
722 softclipped_mutation_oneOfTwoSSCS_diffMates = False
721 softclipped_mutation_oneMate = False 723 softclipped_mutation_oneMate = False
722 softclipped_mutation_oneMateOneSSCS = False 724 softclipped_mutation_oneMateOneSSCS = False
723 alt1ff = 0 725 alt1ff = 0
724 alt4ff = 0 726 alt4ff = 0
725 alt2ff = 0 727 alt2ff = 0
733 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 735 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
734 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 736 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
735 softclipped_mutation_allMates = False 737 softclipped_mutation_allMates = False
736 softclipped_mutation_oneOfTwoMates = True 738 softclipped_mutation_oneOfTwoMates = True
737 softclipped_mutation_oneOfTwoSSCS = False 739 softclipped_mutation_oneOfTwoSSCS = False
740 softclipped_mutation_oneOfTwoSSCS_diffMates = False
738 softclipped_mutation_oneMate = False 741 softclipped_mutation_oneMate = False
739 softclipped_mutation_oneMateOneSSCS = False 742 softclipped_mutation_oneMateOneSSCS = False
740 alt1ff = 0 743 alt1ff = 0
741 alt4ff = 0 744 alt4ff = 0
742 alt2ff = 0 745 alt2ff = 0
750 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 753 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
751 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 754 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
752 softclipped_mutation_allMates = False 755 softclipped_mutation_allMates = False
753 softclipped_mutation_oneOfTwoMates = False 756 softclipped_mutation_oneOfTwoMates = False
754 softclipped_mutation_oneOfTwoSSCS = True 757 softclipped_mutation_oneOfTwoSSCS = True
758 softclipped_mutation_oneOfTwoSSCS_diffMates = False
755 softclipped_mutation_oneMate = False 759 softclipped_mutation_oneMate = False
756 softclipped_mutation_oneMateOneSSCS = False 760 softclipped_mutation_oneMateOneSSCS = False
757 alt1ff = 0 761 alt1ff = 0
758 alt4ff = 0 762 alt4ff = 0
759 alt2ff = 0 763 alt2ff = 0
760 alt3ff = 0 764 alt3ff = 0
761 trimmed = False 765 trimmed = False
762 contradictory = False 766 contradictory = False
763 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) 767 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff])
768
764 # information of one mate available --> all reads of one mate are softclipped 769 # information of one mate available --> all reads of one mate are softclipped
765 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & 770 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) &
766 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | 771 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) |
767 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & 772 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
768 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available 773 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available
772 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & 777 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) &
773 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): 778 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))):
774 softclipped_mutation_allMates = False 779 softclipped_mutation_allMates = False
775 softclipped_mutation_oneOfTwoMates = False 780 softclipped_mutation_oneOfTwoMates = False
776 softclipped_mutation_oneOfTwoSSCS = False 781 softclipped_mutation_oneOfTwoSSCS = False
782 softclipped_mutation_oneOfTwoSSCS_diffMates = False
777 softclipped_mutation_oneMate = True 783 softclipped_mutation_oneMate = True
778 softclipped_mutation_oneMateOneSSCS = False 784 softclipped_mutation_oneMateOneSSCS = False
779 alt1ff = 0 785 alt1ff = 0
780 alt4ff = 0 786 alt4ff = 0
781 alt2ff = 0 787 alt2ff = 0
794 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | 800 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) |
795 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): 801 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))):
796 softclipped_mutation_allMates = False 802 softclipped_mutation_allMates = False
797 softclipped_mutation_oneOfTwoMates = False 803 softclipped_mutation_oneOfTwoMates = False
798 softclipped_mutation_oneOfTwoSSCS = False 804 softclipped_mutation_oneOfTwoSSCS = False
805 softclipped_mutation_oneOfTwoSSCS_diffMates = False
799 softclipped_mutation_oneMate = False 806 softclipped_mutation_oneMate = False
800 softclipped_mutation_oneMateOneSSCS = True 807 softclipped_mutation_oneMateOneSSCS = True
801 alt1ff = 0 808 alt1ff = 0
802 alt4ff = 0 809 alt4ff = 0
803 alt2ff = 0 810 alt2ff = 0
1102 1109
1103 if correct_tier: 1110 if correct_tier:
1104 line1 = list(line1) 1111 line1 = list(line1)
1105 line1[1] = "2.5" 1112 line1[1] = "2.5"
1106 line1 = tuple(line1) 1113 line1 = tuple(line1)
1114 counter_tier25 += 1
1115 counter_tier4 -= 1
1107 ws1.write_row(row_number, 0, line1) 1116 ws1.write_row(row_number, 0, line1)
1108 csv_writer.writerow(line1) 1117 csv_writer.writerow(line1)
1109 ws1.write_row(row_number + 1, 0, line2) 1118 ws1.write_row(row_number + 1, 0, line2)
1110 csv_writer.writerow(line2) 1119 csv_writer.writerow(line2)
1111 1120
1229 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), 1238 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"),
1230 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4"), 1239 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4"),
1231 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), 1240 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"),
1232 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), 1241 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"),
1233 ("Tier 4", "variants at the start or end of the reads"), 1242 ("Tier 4", "variants at the start or end of the reads"),
1234 ("Tier 5.1", "variant is close to softclipping in both mates"), 1243 ("Tier 5.1", "variant is close to softclipping in both mates and SSCS"),
1235 ("Tier 5.2", "variant is close to softclipping in one of the mates"), 1244 ("Tier 5.2", "variant is close to softclipping in one of the mates but both SSCS"),
1236 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), 1245 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"),
1237 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), 1246 ("Tier 5.4", "variant is close to softclipping in one mate and both SSCS (no information of second mate)"),
1238 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), 1247 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate)"),
1239 ("Tier 6", "mates with contradictory information"), 1248 ("Tier 6", "mates with contradictory information"),
1240 ("Tier 7", "remaining variants")] 1249 ("Tier 7", "remaining variants")]
1241 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", 1250 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
1242 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1251 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
1243 "4081", "4098", "5", "10", "", ""), 1252 "4081", "4098", "5", "10", "", ""),