annotate microsats_mutability.py @ 1:372b2f5668f3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/microsats_mutability commit a1517c9d22029095120643bbe2c8fa53754dd2b7
author devteam
date Wed, 11 Nov 2015 12:21:34 -0500
parents 4aa1ee5d8510
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
2 #Guruprasad Ananda
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
3 """
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
4 This tool computes microsatellite mutability for the orthologous microsatellites fetched from 'Extract Orthologous Microsatellites from pair-wise alignments' tool.
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
5 """
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
6 import fileinput
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
7 import string
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
8 import sys
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
9 import tempfile
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
10 from galaxy.tools.util.galaxyops import *
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
11 from bx.intervals.io import *
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
12 from bx.intervals.operations import quicksect
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
13
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
14 fout = open(sys.argv[2],'w')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
15 p_group = int(sys.argv[3]) #primary "group-by" feature
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
16 p_bin_size = int(sys.argv[4])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
17 s_group = int(sys.argv[5]) #sub-group by feature
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
18 s_bin_size = int(sys.argv[6])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
19 mono_threshold = 9
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
20 non_mono_threshold = 4
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
21 p_group_cols = [p_group, p_group+7]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
22 s_group_cols = [s_group, s_group+7]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
23 num_generations = int(sys.argv[7])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
24 region = sys.argv[8]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
25 int_file = sys.argv[9]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
26 if int_file != "None": #User has specified an interval file
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
27 try:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
28 fint = open(int_file, 'r')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
29 dbkey_i = sys.argv[10]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
30 chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
31 except:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
32 stop_err("Unable to open input Interval file")
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
33
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
34
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
35 def stop_err(msg):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
36 sys.stderr.write(msg)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
37 sys.exit()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
38
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
39
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
40 def reverse_complement(text):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
41 DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
42 comp = [ch for ch in text.translate(DNA_COMP)]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
43 comp.reverse()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
44 return "".join(comp)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
45
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
46
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
47 def get_unique_elems(elems):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
48 seen = set()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
49 return[x for x in elems if x not in seen and not seen.add(x)]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
50
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
51
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
52 def get_binned_lists(uniqlist, binsize):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
53 binnedlist = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
54 uniqlist.sort()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
55 start = int(uniqlist[0])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
56 bin_ind = 0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
57 l_ind = 0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
58 binnedlist.append([])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
59 while l_ind < len(uniqlist):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
60 elem = int(uniqlist[l_ind])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
61 if elem in range(start, start+binsize):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
62 binnedlist[bin_ind].append(elem)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
63 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
64 start += binsize
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
65 bin_ind += 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
66 binnedlist.append([])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
67 binnedlist[bin_ind].append(elem)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
68 l_ind += 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
69 return binnedlist
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
70
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
71
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
72 def fetch_weight(H, C, t):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
73 if (H-(C-H)) < t:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
74 return 2.0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
75 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
76 return 1.0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
77
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
78
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
79 def mutabilityEstimator(repeats1, repeats2, thresholds):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
80 mut_num = 0.0 #Mutability Numerator
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
81 mut_den = 0.0 #Mutability denominator
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
82 for ind, H in enumerate(repeats1):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
83 C = repeats2[ind]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
84 t = thresholds[ind]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
85 w = fetch_weight(H, C, t)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
86 mut_num += ((H-C)*(H-C)*w)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
87 mut_den += w
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
88 return [mut_num, mut_den]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
89
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
90
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
91 def output_writer(blk, blk_lines):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
92 global winspecies, speciesind
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
93 all_elems_1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
94 all_elems_2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
95 all_s_elems_1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
96 all_s_elems_2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
97 for bline in blk_lines:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
98 if not(bline):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
99 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
100 items = bline.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
101 seq1 = items[1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
102 seq2 = items[8]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
103 if p_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
104 items[p_group_cols[0]] = int(items[p_group_cols[0]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
105 items[p_group_cols[1]] = int(items[p_group_cols[1]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
106 if s_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
107 items[s_group_cols[0]] = int(items[s_group_cols[0]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
108 items[s_group_cols[1]] = int(items[s_group_cols[1]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
109 all_elems_1.append(items[p_group_cols[0]]) #primary col elements for species 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
110 all_elems_2.append(items[p_group_cols[1]]) #primary col elements for species 2
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
111 if s_group_cols[0] != -1: #sub-group is not None
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
112 all_s_elems_1.append(items[s_group_cols[0]]) #secondary col elements for species 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
113 all_s_elems_2.append(items[s_group_cols[1]]) #secondary col elements for species 2
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
114 uniq_elems_1 = get_unique_elems(all_elems_1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
115 uniq_elems_2 = get_unique_elems(all_elems_2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
116 if s_group_cols[0] != -1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
117 uniq_s_elems_1 = get_unique_elems(all_s_elems_1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
118 uniq_s_elems_2 = get_unique_elems(all_s_elems_2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
119 mut1 = {}
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
120 mut2 = {}
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
121 count1 = {}
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
122 count2 = {}
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
123 """
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
124 if p_group_cols[0] == 7: #i.e. the option chosen is group-by unit(AG, GTC, etc)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
125 uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y)))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
126 """
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
127 if p_group_cols[0] == 6: #i.e. the option chosen is group-by repeat number.
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
128 uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
129 uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
130
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
131 if s_group_cols[0] == 6: #i.e. the option chosen is subgroup-by repeat number.
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
132 uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
133 uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
134
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
135 for pitem1 in uniq_elems_1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
136 #repeats1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
137 #repeats2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
138 thresholds = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
139 if s_group_cols[0] != -1: #Sub-group by feature is not None
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
140 for sitem1 in uniq_s_elems_1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
141 repeats1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
142 repeats2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
143 if type(sitem1) == type(''):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
144 sitem1 = sitem1.strip()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
145 for bline in blk_lines:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
146 belems = bline.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
147 if type(pitem1) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
148 if p_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
149 belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
150 if belems[p_group_cols[0]] in pitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
151 if belems[s_group_cols[0]] == sitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
152 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
153 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
154 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
155 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
156 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
157 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
158 mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
159 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
160 count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
161 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
162 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
163 count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
164 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
165 count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
166 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
167 if type(sitem1) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
168 if s_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
169 belems[s_group_cols[0]] = int(belems[s_group_cols[0]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
170 if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
171 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
172 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
173 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
174 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
175 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
176 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
177 mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
178 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
179 count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
180 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
181 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
182 count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
183 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
184 count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
185 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
186 if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
187 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
188 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
189 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
190 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
191 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
192 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
193 mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
194 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
195 count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
196 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
197 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
198 count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
199 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
200 count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
201 else: #Sub-group by feature is None
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
202 for bline in blk_lines:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
203 belems = bline.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
204 if type(pitem1) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
205 #print >> sys.stderr, "item: " + str(item1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
206 if p_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
207 belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
208 if belems[p_group_cols[0]] in pitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
209 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
210 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
211 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
212 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
213 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
214 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
215 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
216 if belems[p_group_cols[0]] == pitem1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
217 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
218 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
219 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
220 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
221 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
222 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
223 mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
224 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
225 count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
226 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
227 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
228 count1[str(pitem1)] = sum(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
229 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
230 count1[str(pitem1)] = sum(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
231
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
232 for pitem2 in uniq_elems_2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
233 #repeats1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
234 #repeats2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
235 thresholds = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
236 if s_group_cols[0] != -1: #Sub-group by feature is not None
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
237 for sitem2 in uniq_s_elems_2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
238 repeats1 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
239 repeats2 = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
240 if type(sitem2)==type(''):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
241 sitem2 = sitem2.strip()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
242 for bline in blk_lines:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
243 belems = bline.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
244 if type(pitem2) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
245 if p_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
246 belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
247 if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
248 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
249 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
250 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
251 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
252 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
253 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
254 mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
255 #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
256 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
257 count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
258 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
259 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
260 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
261 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
262 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
263 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
264 if type(sitem2) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
265 if s_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
266 belems[s_group_cols[1]] = int(belems[s_group_cols[1]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
267 if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
268 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
269 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
270 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
271 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
272 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
273 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
274 mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
275 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
276 count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
277 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
278 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
279 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
280 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
281 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
282 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
283 if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
284 repeats1.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
285 repeats2.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
286 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
287 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
288 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
289 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
290 mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
291 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
292 count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
293 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
294 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
295 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
296 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
297 count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
298 else: #Sub-group by feature is None
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
299 for bline in blk_lines:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
300 belems = bline.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
301 if type(pitem2) == list:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
302 if p_group_cols[0] == 6:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
303 belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
304 if belems[p_group_cols[1]] in pitem2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
305 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
306 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
307 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
308 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
309 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
310 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
311 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
312 if belems[p_group_cols[1]] == pitem2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
313 repeats2.append(int(belems[13]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
314 repeats1.append(int(belems[6]))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
315 if belems[4] == 'mononucleotide':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
316 thresholds.append(mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
317 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
318 thresholds.append(non_mono_threshold)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
319 mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
320 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
321 count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
322 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
323 if winspecies == 1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
324 count2["%s" % (pitem2)] = sum(repeats2)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
325 elif winspecies == 2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
326 count2["%s" % (pitem2)] = sum(repeats1)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
327 for key in mut1.keys():
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
328 if key in mut2.keys():
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
329 mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
330 count = count1[key]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
331 del mut2[key]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
332 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
333 unit_found = False
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
334 if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
335 if p_group_cols[0] == 7:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
336 this, other = 0, 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
337 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
338 this, other = 1, 0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
339 groups1 = key.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
340 mutn = mut1[key][0]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
341 mutd = mut1[key][1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
342 count = 0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
343 for key2 in mut2.keys():
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
344 groups2 = key2.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
345 if groups1[other] == groups2[other]:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
346 if groups1[this] in groups2[this]*2 or reverse_complement(groups1[this]) in groups2[this]*2:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
347 #mut = (mut1[key][0]+mut2[key2][0])/(mut1[key][1]+mut2[key2][1])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
348 mutn += mut2[key2][0]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
349 mutd += mut2[key2][1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
350 count += int(count2[key2])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
351 unit_found = True
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
352 del mut2[key2]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
353 #break
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
354 if unit_found:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
355 mut = mutn/mutd
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
356 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
357 mut = mut1[key][0]/mut1[key][1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
358 count = count1[key]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
359 mut = "%.2e" % (mut/num_generations)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
360 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
361 print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
362 elif region == 'win':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
363 fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count ))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
364 fout.flush()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
365
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
366 #catch any remaining repeats, for instance if the orthologous position contained different repeat units
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
367 for remaining_key in mut2.keys():
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
368 mut = mut2[remaining_key][0]/mut2[remaining_key][1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
369 mut = "%.2e" % (mut/num_generations)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
370 count = count2[remaining_key]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
371 if region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
372 print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
373 elif region == 'win':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
374 fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count ))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
375 fout.flush()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
376 #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
377
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
378
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
379 def counter(node, start, end, report_func):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
380 if start <= node.start < end and start < node.end <= end:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
381 report_func(node)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
382 if node.right:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
383 counter(node.right, start, end, report_func)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
384 if node.left:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
385 counter(node.left, start, end, report_func)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
386 elif node.start < start and node.right:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
387 counter(node.right, start, end, report_func)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
388 elif node.start >= end and node.left and node.left.maxend > start:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
389 counter(node.left, start, end, report_func)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
390
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
391
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
392 def main():
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
393 infile = sys.argv[1]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
394
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
395 for i, line in enumerate( file ( infile )):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
396 line = line.rstrip('\r\n')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
397 if len( line )>0 and not line.startswith( '#' ):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
398 elems = line.split( '\t' )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
399 break
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
400 if i == 30:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
401 break # Hopefully we'll never get here...
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
402
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
403 if len( elems ) != 15:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
404 stop_err( "This tool only works on tabular data output by 'Extract Orthologous Microsatellites from pair-wise alignments' tool. The data in your input dataset is either missing or not formatted properly." )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
405 global winspecies, speciesind
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
406 if region == 'win':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
407 if dbkey_i in elems[1]:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
408 winspecies = 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
409 speciesind = 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
410 elif dbkey_i in elems[8]:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
411 winspecies = 2
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
412 speciesind = 8
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
413 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
414 stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.")
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
415
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
416 fin = open(infile, 'r')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
417 skipped = 0
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
418 linestr = ""
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
419
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
420 if region == 'win':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
421 msats = NiceReaderWrapper( fileinput.FileInput( infile ),
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
422 chrom_col = speciesind,
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
423 start_col = speciesind+1,
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
424 end_col = speciesind+2,
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
425 strand_col = -1,
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
426 fix_strand = True)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
427 msatTree = quicksect.IntervalTree()
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
428 for item in msats:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
429 if type( item ) is GenomicInterval:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
430 msatTree.insert( item, msats.linenum, item.fields )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
431
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
432 for iline in fint:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
433 try:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
434 iline = iline.rstrip('\r\n')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
435 if not(iline) or iline == "":
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
436 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
437 ielems = iline.strip("\r\n").split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
438 ichr = ielems[chr_col_i]
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
439 istart = int(ielems[start_col_i])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
440 iend = int(ielems[end_col_i])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
441 isrc = "%s.%s" % ( dbkey_i, ichr )
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
442 if isrc not in msatTree.chroms:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
443 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
444 result = []
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
445 root = msatTree.chroms[isrc] #root node for the chrom
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
446 counter(root, istart, iend, lambda node: result.append( node ))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
447 if not(result):
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
448 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
449 tmpfile1 = tempfile.NamedTemporaryFile('wb+')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
450 for node in result:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
451 tmpfile1.write("%s\n" % "\t".join( node.other ))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
452
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
453 tmpfile1.seek(0)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
454 output_writer(iline, tmpfile1.readlines())
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
455 except:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
456 skipped += 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
457 if skipped:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
458 print "Skipped %d intervals as invalid." % (skipped)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
459 elif region == 'align':
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
460 if s_group_cols[0] != -1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
461 print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
462 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
463 print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
464 prev_bnum = -1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
465 try:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
466 for line in fin:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
467 line = line.strip("\r\n")
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
468 if not(line) or line == "":
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
469 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
470 elems = line.split('\t')
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
471 try:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
472 assert int(elems[0])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
473 assert len(elems) == 15
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
474 except:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
475 continue
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
476 new_bnum = int(elems[0])
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
477 if new_bnum != prev_bnum:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
478 if prev_bnum != -1:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
479 output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
480 linestr = line + "\n"
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
481 else:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
482 linestr += line
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
483 linestr += "\n"
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
484 prev_bnum = new_bnum
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
485 output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
486 except Exception, ea:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
487 print >> sys.stderr, ea
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
488 skipped += 1
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
489 if skipped:
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
490 print "Skipped %d lines as invalid." % (skipped)
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
491
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
492
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
493 if __name__ == "__main__":
4aa1ee5d8510 Imported from capsule None
devteam
parents:
diff changeset
494 main()