Mercurial > repos > iuc > medaka_variant
annotate annotateVCF.py @ 8:edf6d4003fad draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e0684fe95538cf97b1199ad1072d3da6d1619443"
author | iuc |
---|---|
date | Tue, 23 Feb 2021 20:13:39 +0000 |
parents | 19f3b583a9b3 |
children |
rev | line source |
---|---|
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
2 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
3 # Takes in VCF file and a samtools mpileup output file |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
4 # Fills in annotation for the VCF file including AF, DP |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
5 # SB, and DP4 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
6 # |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
7 # Usage statement: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
8 # python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
9 # |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
10 # Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
11 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
12 # 08/24/2020 - Nathan P. Roach, natproach@gmail.com |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
13 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
14 import sys |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
15 from math import isnan, log10 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
16 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
17 from scipy.stats import fisher_exact |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
18 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
19 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
20 def pval_to_phredqual(pval): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
21 return int(round(-10. * log10(pval))) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
22 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
23 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
24 def parseSimpleSNPpileup(fields, ref_base, alt_base): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
25 base_to_idx = { |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
26 'A': 0, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
27 'a': 0, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
28 'T': 1, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
29 't': 1, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
30 'C': 2, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
31 'c': 2, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
32 'G': 3, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
33 'g': 3 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
34 } |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
35 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
36 base_to_idx_stranded = { |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
37 'A': 0, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
38 'T': 1, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
39 'C': 2, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
40 'G': 3, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
41 'a': 4, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
42 't': 5, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
43 'c': 6, |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
44 'g': 7 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
45 } |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
46 ref_base2 = fields[2] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
47 counts = [0, 0, 0, 0] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
48 stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
49 ref_idx = base_to_idx[fields[2]] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
50 dp = int(fields[3]) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
51 carrot_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
52 ins_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
53 ins_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
54 ins_len = 0 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
55 insertion = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
56 del_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
57 del_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
58 del_len = 0 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
59 deletion = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
60 # dollar_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
61 for base in fields[4]: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
62 if carrot_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
63 carrot_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
64 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
65 if ins_len > 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
66 insertion += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
67 ins_len -= 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
68 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
69 if del_len > 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
70 deletion += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
71 del_len -= 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
72 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
73 if ins_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
74 if base.isdigit(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
75 ins_str += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
76 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
77 ins_len = int(ins_str) - 1 |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
78 ins_str = "" |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
79 insertion = base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
80 ins_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
81 elif del_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
82 if base.isdigit(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
83 del_str += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
84 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
85 del_len = int(del_str) - 1 |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
86 del_str = "" |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
87 deletion = base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
88 del_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
89 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
90 if base == '^': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
91 carrot_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
92 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
93 elif base == '$': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
94 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
95 elif base == '+': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
96 ins_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
97 elif base == '-': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
98 del_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
99 elif base == '.': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
100 counts[ref_idx] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
101 stranded_counts[base_to_idx_stranded[ref_base2]] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
102 elif base == ',': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
103 counts[ref_idx] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
104 stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
105 elif base == 'N' or base == 'n': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
106 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
107 elif base == '*': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
108 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
109 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
110 counts[base_to_idx[base]] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
111 stranded_counts[base_to_idx_stranded[base]] += 1 |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
112 if sum(counts) == 0: |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
113 af = float("nan") |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
114 else: |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
115 af = float(counts[base_to_idx[alt_base]]) / float(sum(counts)) |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
116 if float(sum(stranded_counts[0:4])) == 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
117 faf = float("nan") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
118 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
119 faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4])) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
120 if float(sum(stranded_counts[4:])) == 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
121 raf = float("nan") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
122 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
123 raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:])) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
124 dp4 = [stranded_counts[base_to_idx_stranded[ref_base]], |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
125 stranded_counts[base_to_idx_stranded[ref_base.lower()]], |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
126 stranded_counts[base_to_idx_stranded[alt_base]], |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
127 stranded_counts[base_to_idx_stranded[alt_base.lower()]]] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
128 return (dp, af, faf, raf, dp4) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
129 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
130 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
131 def parseIndelPileup(fields, ref_base, alt_base): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
132 counts = [0, 0, 0, 0, 0, 0, 0, 0, 0] # indel ref match, indel fwd ref match, indel rev ref match, indel alt match, indel fwd alt match, indel rev alt match, other, other fwd, other rev |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
133 ref_base2 = fields[2] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
134 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
135 carrot_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
136 ins_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
137 ins_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
138 ins_len = 0 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
139 del_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
140 del_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
141 del_len = 0 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
142 first_iter = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
143 forward_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
144 last_seq = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
145 last_seq_code = 'b' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
146 for base in fields[4]: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
147 if ins_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
148 if base.isdigit(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
149 ins_str += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
150 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
151 ins_len = int(ins_str) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
152 ins_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
153 if del_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
154 if base.isdigit(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
155 del_str += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
156 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
157 del_len = int(del_str) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
158 del_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
159 if ins_len > 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
160 last_seq += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
161 last_seq_code = 'i' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
162 ins_len -= 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
163 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
164 if del_len > 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
165 last_seq += base |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
166 last_seq_code = 'd' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
167 del_len -= 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
168 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
169 if carrot_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
170 carrot_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
171 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
172 if base == '.' or base == ','\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
173 or base == 'A' or base == 'a'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
174 or base == 'C' or base == 'c'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
175 or base == 'G' or base == 'g'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
176 or base == 'T' or base == 't'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
177 or base == 'N' or base == 'n'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
178 or base == '>' or base == '<'\ |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
179 or base == '*' or base == '#': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
180 if first_iter: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
181 first_iter = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
182 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
183 if last_seq_code == 'i': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
184 if last_seq.upper() == alt_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
185 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
186 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
187 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
188 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
189 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
190 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
191 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
192 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
193 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
194 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
195 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
196 elif last_seq_code == 'd': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
197 if last_seq.upper() == ref_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
198 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
199 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
200 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
201 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
202 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
203 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
204 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
205 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
206 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
207 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
208 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
209 elif last_seq_code == 'b': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
210 if last_seq.upper() == ref_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
211 counts[0] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
212 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
213 counts[1] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
214 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
215 counts[2] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
216 elif last_seq.upper() == alt_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
217 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
218 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
219 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
220 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
221 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
222 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
223 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
224 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
225 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
226 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
227 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
228 if base == '.': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
229 last_seq = ref_base2 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
230 forward_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
231 last_seq_code = 'b' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
232 elif base == ',': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
233 last_seq = ref_base2 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
234 forward_flag = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
235 last_seq_code = 'b' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
236 elif base == '>' or base == '<' or base == '*' or base == '#': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
237 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
238 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
239 forward_flag = base.isupper() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
240 last_seq = base.upper() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
241 last_seq_code = 'b' |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
242 elif base == '+': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
243 ins_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
244 ins_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
245 elif base == '-': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
246 del_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
247 del_str = "" |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
248 elif base == '^': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
249 carrot_flag = True |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
250 elif base == '$': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
251 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
252 if first_iter: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
253 first_iter = False |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
254 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
255 if last_seq_code == 'i': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
256 if last_seq.upper() == alt_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
257 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
258 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
259 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
260 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
261 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
262 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
263 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
264 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
265 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
266 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
267 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
268 elif last_seq_code == 'd': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
269 if last_seq.upper() == ref_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
270 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
271 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
272 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
273 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
274 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
275 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
276 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
277 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
278 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
279 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
280 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
281 elif last_seq_code == 'b': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
282 if last_seq.upper() == ref_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
283 counts[0] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
284 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
285 counts[1] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
286 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
287 counts[2] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
288 elif last_seq.upper() == alt_base.upper(): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
289 counts[3] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
290 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
291 counts[4] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
292 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
293 counts[5] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
294 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
295 counts[6] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
296 if forward_flag: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
297 counts[7] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
298 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
299 counts[8] += 1 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
300 dp = int(fields[3]) |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
301 if sum([counts[0], counts[3], counts[6]]) == 0: |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
302 af = float("nan") |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
303 else: |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
304 af = float(counts[3]) / float(sum([counts[0], counts[3], counts[6]])) |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
305 if sum([counts[1], counts[4], counts[7]]) == 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
306 faf = float("nan") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
307 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
308 faf = float(counts[4]) / float(sum([counts[1], counts[4], counts[7]])) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
309 if sum([counts[2], counts[5], counts[8]]) == 0: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
310 raf = float("nan") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
311 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
312 raf = float(counts[5]) / float(sum([counts[2], counts[5], counts[8]])) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
313 dp4 = [counts[1], counts[2], counts[4], counts[5]] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
314 return (dp, af, faf, raf, dp4) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
315 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
316 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
317 def annotateVCF(in_vcf_filepath, in_mpileup_filepath, out_vcf_filepath): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
318 in_vcf = open(in_vcf_filepath, 'r') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
319 in_mpileup = open(in_mpileup_filepath, 'r') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
320 out_vcf = open(out_vcf_filepath, 'w') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
321 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
322 # First pass parsing of input vcf, output headerlines + new headerlines, add VCF sites we care about to to_examine (limits memory usage for sites that don't need annotation) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
323 to_examine = {} |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
324 for line in in_vcf: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
325 if line[0:2] == "##": |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
326 out_vcf.write(line) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
327 elif line[0] == "#": |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
328 out_vcf.write("##annotateVCFVersion=0.2\n") |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
329 out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw Depth\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
330 out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
331 out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\"Forward Allele Frequency\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
332 out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\"Reverse Allele Frequency\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
333 out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\"Phred-scaled strand bias at this position\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
334 out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
335 out_vcf.write(line) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
336 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
337 fields = line.strip().split() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
338 if fields[0] in to_examine: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
339 to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4]) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
340 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
341 to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])} |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
342 in_vcf.close() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
343 data = {} |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
344 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
345 # Populate data dictionary, which relates chromosome and position to the following: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
346 # depth of coverage |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
347 # allele frequency |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
348 # forward strand allele frequency |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
349 # reverse strand allele frequency |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
350 # dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
351 for line in in_mpileup: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
352 fields = line.strip().split() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
353 if fields[0] not in to_examine: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
354 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
355 if int(fields[1]) not in to_examine[fields[0]]: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
356 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
357 (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
358 if len(ref_base.split(',')) > 1: # Can't handle multiple ref alleles |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
359 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
360 if len(alt_base.split(',')) > 1: # Can't handle multiple alt alleles |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
361 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
362 if len(ref_base) > 1 or len(alt_base) > 1: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
363 if len(ref_base) > 1 and len(alt_base) > 1: # Can't handle complex indels |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
364 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
365 data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
366 if len(ref_base) == 1 and len(alt_base) == 1: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
367 data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
368 in_mpileup.close() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
369 # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
370 # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
371 in_vcf = open(in_vcf_filepath, 'r') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
372 for line in in_vcf: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
373 if line[0] == '#': |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
374 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
375 fields = line.strip().split('\t') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
376 if (fields[0], int(fields[1])) not in data: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
377 continue |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
378 (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
379 dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
380 _, p_val = fisher_exact(dp2x2) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
381 sb = pval_to_phredqual(p_val) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
382 if fields[7] == "": |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
383 info = [] |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
384 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
385 info = fields[7].split(';') |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
386 info.append("DP=%d" % (dp)) |
5
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
387 if isnan(af): |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
388 info.append("AF=NaN") |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
389 else: |
19f3b583a9b3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 13769e7d51b30a1d15eb62a9ba89ee2064f3ddc3"
iuc
parents:
3
diff
changeset
|
390 info.append("AF=%.6f" % (af)) |
3
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
391 if isnan(faf): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
392 info.append("FAF=NaN") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
393 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
394 info.append("FAF=%.6f" % (faf)) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
395 if isnan(raf): |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
396 info.append("RAF=NaN") |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
397 else: |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
398 info.append("RAF=%.6f" % (raf)) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
399 info.append("SB=%d" % (sb)) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
400 info.append("DP4=%s" % (','.join([str(x) for x in dp4]))) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
401 new_info = ';'.join(info) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
402 fields[7] = new_info |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
403 out_vcf.write("%s\n" % ("\t".join(fields))) |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
404 in_vcf.close() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
405 out_vcf.close() |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
406 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
407 |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
408 if __name__ == "__main__": |
e86fcef8ed91
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
iuc
parents:
diff
changeset
|
409 annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3]) |