annotate annotateVCF.py @ 4:5c959a2dde5f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit c02f74da66f51b431b7cd8bd0d1cffde267e3332"
author iuc
date Tue, 13 Oct 2020 15:59:54 +0000
parents 148f32860445
children efca385ce679
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
1 #!/usr/bin/env python3
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
2
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
3 # Takes in VCF file and a samtools mpileup output file
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
4 # Fills in annotation for the VCF file including AF, DP
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
5 # SB, and DP4
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
6 #
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
7 # Usage statement:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
8 # python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
9 #
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
10 # Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
11
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
12 # 08/24/2020 - Nathan P. Roach, natproach@gmail.com
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
13
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
14 import sys
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
15 from math import isnan, log10
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
16
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
17 from scipy.stats import fisher_exact
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
18
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
19
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
20 def pval_to_phredqual(pval):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
21 return int(round(-10. * log10(pval)))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
22
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
23
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
24 def parseSimpleSNPpileup(fields, ref_base, alt_base):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
25 base_to_idx = {
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
26 'A': 0,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
27 'a': 0,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
28 'T': 1,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
29 't': 1,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
30 'C': 2,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
31 'c': 2,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
32 'G': 3,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
33 'g': 3
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
34 }
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
35
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
36 base_to_idx_stranded = {
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
37 'A': 0,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
38 'T': 1,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
39 'C': 2,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
40 'G': 3,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
41 'a': 4,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
42 't': 5,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
43 'c': 6,
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
44 'g': 7
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
45 }
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
46 ref_base2 = fields[2]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
47 counts = [0, 0, 0, 0]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
48 stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
49 ref_idx = base_to_idx[fields[2]]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
50 dp = int(fields[3])
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
51 carrot_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
52 ins_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
53 ins_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
54 ins_len = 0
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
55 insertion = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
56 del_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
57 del_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
58 del_len = 0
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
59 deletion = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
60 # dollar_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
61 for base in fields[4]:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
62 if carrot_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
63 carrot_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
64 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
65 if ins_len > 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
66 insertion += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
67 ins_len -= 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
68 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
69 if del_len > 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
70 deletion += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
71 del_len -= 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
72 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
73 if ins_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
74 if base.isdigit():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
75 ins_str += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
76 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
77 ins_len = int(ins_str) - 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
78 insertion = base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
79 ins_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
80 elif del_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
81 if base.isdigit():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
82 del_str += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
83 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
84 del_len = int(del_str) - 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
85 deletion = base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
86 del_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
87 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
88 if base == '^':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
89 carrot_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
90 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
91 elif base == '$':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
92 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
93 elif base == '+':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
94 ins_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
95 elif base == '-':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
96 del_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
97 elif base == '.':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
98 counts[ref_idx] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
99 stranded_counts[base_to_idx_stranded[ref_base2]] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
100 elif base == ',':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
101 counts[ref_idx] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
102 stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
103 elif base == 'N' or base == 'n':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
104 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
105 elif base == '*':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
106 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
107 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
108 counts[base_to_idx[base]] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
109 stranded_counts[base_to_idx_stranded[base]] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
110 af = float(counts[base_to_idx[alt_base]]) / float(sum(counts))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
111 if float(sum(stranded_counts[0:4])) == 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
112 faf = float("nan")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
113 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
114 faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4]))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
115 if float(sum(stranded_counts[4:])) == 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
116 raf = float("nan")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
117 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
118 raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:]))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
119 dp4 = [stranded_counts[base_to_idx_stranded[ref_base]],
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
120 stranded_counts[base_to_idx_stranded[ref_base.lower()]],
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
121 stranded_counts[base_to_idx_stranded[alt_base]],
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
122 stranded_counts[base_to_idx_stranded[alt_base.lower()]]]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
123 return (dp, af, faf, raf, dp4)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
124
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
125
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
126 def parseIndelPileup(fields, ref_base, alt_base):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
127 counts = [0, 0, 0, 0, 0, 0, 0, 0, 0] # indel ref match, indel fwd ref match, indel rev ref match, indel alt match, indel fwd alt match, indel rev alt match, other, other fwd, other rev
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
128 ref_base2 = fields[2]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
129
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
130 carrot_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
131 ins_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
132 ins_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
133 ins_len = 0
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
134 del_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
135 del_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
136 del_len = 0
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
137 first_iter = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
138 forward_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
139 last_seq = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
140 last_seq_code = 'b'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
141 for base in fields[4]:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
142 if ins_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
143 if base.isdigit():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
144 ins_str += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
145 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
146 ins_len = int(ins_str)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
147 ins_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
148 if del_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
149 if base.isdigit():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
150 del_str += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
151 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
152 del_len = int(del_str)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
153 del_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
154 if ins_len > 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
155 last_seq += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
156 last_seq_code = 'i'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
157 ins_len -= 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
158 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
159 if del_len > 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
160 last_seq += base
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
161 last_seq_code = 'd'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
162 del_len -= 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
163 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
164 if carrot_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
165 carrot_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
166 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
167 if base == '.' or base == ','\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
168 or base == 'A' or base == 'a'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
169 or base == 'C' or base == 'c'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
170 or base == 'G' or base == 'g'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
171 or base == 'T' or base == 't'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
172 or base == 'N' or base == 'n'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
173 or base == '>' or base == '<'\
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
174 or base == '*' or base == '#':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
175 if first_iter:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
176 first_iter = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
177 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
178 if last_seq_code == 'i':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
179 if last_seq.upper() == alt_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
180 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
181 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
182 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
183 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
184 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
185 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
186 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
187 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
188 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
189 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
190 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
191 elif last_seq_code == 'd':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
192 if last_seq.upper() == ref_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
193 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
194 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
195 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
196 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
197 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
198 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
199 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
200 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
201 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
202 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
203 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
204 elif last_seq_code == 'b':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
205 if last_seq.upper() == ref_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
206 counts[0] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
207 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
208 counts[1] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
209 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
210 counts[2] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
211 elif last_seq.upper() == alt_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
212 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
213 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
214 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
215 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
216 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
217 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
218 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
219 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
220 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
221 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
222 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
223 if base == '.':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
224 last_seq = ref_base2
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
225 forward_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
226 last_seq_code = 'b'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
227 elif base == ',':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
228 last_seq = ref_base2
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
229 forward_flag = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
230 last_seq_code = 'b'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
231 elif base == '>' or base == '<' or base == '*' or base == '#':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
232 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
233 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
234 forward_flag = base.isupper()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
235 last_seq = base.upper()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
236 last_seq_code = 'b'
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
237 elif base == '+':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
238 ins_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
239 ins_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
240 elif base == '-':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
241 del_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
242 del_str = ""
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
243 elif base == '^':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
244 carrot_flag = True
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
245 elif base == '$':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
246 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
247 if first_iter:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
248 first_iter = False
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
249
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
250 if last_seq_code == 'i':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
251 if last_seq.upper() == alt_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
252 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
253 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
254 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
255 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
256 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
257 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
258 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
259 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
260 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
261 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
262 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
263 elif last_seq_code == 'd':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
264 if last_seq.upper() == ref_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
265 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
266 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
267 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
268 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
269 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
270 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
271 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
272 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
273 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
274 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
275 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
276 elif last_seq_code == 'b':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
277 if last_seq.upper() == ref_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
278 counts[0] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
279 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
280 counts[1] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
281 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
282 counts[2] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
283 elif last_seq.upper() == alt_base.upper():
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
284 counts[3] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
285 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
286 counts[4] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
287 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
288 counts[5] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
289 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
290 counts[6] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
291 if forward_flag:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
292 counts[7] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
293 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
294 counts[8] += 1
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
295 dp = int(fields[3])
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
296 af = float(counts[3]) / float(sum([counts[0], counts[3], counts[6]]))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
297 if sum([counts[1], counts[4], counts[7]]) == 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
298 faf = float("nan")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
299 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
300 faf = float(counts[4]) / float(sum([counts[1], counts[4], counts[7]]))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
301 if sum([counts[2], counts[5], counts[8]]) == 0:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
302 raf = float("nan")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
303 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
304 raf = float(counts[5]) / float(sum([counts[2], counts[5], counts[8]]))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
305 dp4 = [counts[1], counts[2], counts[4], counts[5]]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
306 return (dp, af, faf, raf, dp4)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
307
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
308
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
309 def annotateVCF(in_vcf_filepath, in_mpileup_filepath, out_vcf_filepath):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
310 in_vcf = open(in_vcf_filepath, 'r')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
311 in_mpileup = open(in_mpileup_filepath, 'r')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
312 out_vcf = open(out_vcf_filepath, 'w')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
313
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
314 # First pass parsing of input vcf, output headerlines + new headerlines, add VCF sites we care about to to_examine (limits memory usage for sites that don't need annotation)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
315 to_examine = {}
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
316 for line in in_vcf:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
317 if line[0:2] == "##":
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
318 out_vcf.write(line)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
319 elif line[0] == "#":
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
320 out_vcf.write("##annotateVCFVersion=0.1\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
321 out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw Depth\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
322 out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
323 out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\"Forward Allele Frequency\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
324 out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\"Reverse Allele Frequency\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
325 out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\"Phred-scaled strand bias at this position\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
326 out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
327 out_vcf.write(line)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
328 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
329 fields = line.strip().split()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
330 if fields[0] in to_examine:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
331 to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4])
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
332 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
333 to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])}
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
334 in_vcf.close()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
335 data = {}
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
336
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
337 # Populate data dictionary, which relates chromosome and position to the following:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
338 # depth of coverage
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
339 # allele frequency
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
340 # forward strand allele frequency
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
341 # reverse strand allele frequency
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
342 # dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
343 for line in in_mpileup:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
344 fields = line.strip().split()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
345 if fields[0] not in to_examine:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
346 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
347 if int(fields[1]) not in to_examine[fields[0]]:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
348 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
349 (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
350 if len(ref_base.split(',')) > 1: # Can't handle multiple ref alleles
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
351 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
352 if len(alt_base.split(',')) > 1: # Can't handle multiple alt alleles
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
353 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
354 if len(ref_base) > 1 or len(alt_base) > 1:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
355 if len(ref_base) > 1 and len(alt_base) > 1: # Can't handle complex indels
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
356 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
357 data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
358 if len(ref_base) == 1 and len(alt_base) == 1:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
359 data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
360 in_mpileup.close()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
361 # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
362 # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
363 in_vcf = open(in_vcf_filepath, 'r')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
364 for line in in_vcf:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
365 if line[0] == '#':
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
366 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
367 fields = line.strip().split('\t')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
368 if (fields[0], int(fields[1])) not in data:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
369 continue
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
370 (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
371 dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
372 _, p_val = fisher_exact(dp2x2)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
373 sb = pval_to_phredqual(p_val)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
374 if fields[7] == "":
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
375 info = []
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
376 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
377 info = fields[7].split(';')
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
378 info.append("DP=%d" % (dp))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
379 info.append("AF=%.6f" % (af))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
380 if isnan(faf):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
381 info.append("FAF=NaN")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
382 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
383 info.append("FAF=%.6f" % (faf))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
384 if isnan(raf):
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
385 info.append("RAF=NaN")
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
386 else:
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
387 info.append("RAF=%.6f" % (raf))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
388 info.append("SB=%d" % (sb))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
389 info.append("DP4=%s" % (','.join([str(x) for x in dp4])))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
390 new_info = ';'.join(info)
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
391 fields[7] = new_info
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
392 out_vcf.write("%s\n" % ("\t".join(fields)))
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
393 in_vcf.close()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
394 out_vcf.close()
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
395
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
396
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
397 if __name__ == "__main__":
148f32860445 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff changeset
398 annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])