annotate quality_filter.py @ 0:8d65bbc52dfe draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:52:42 -0400
parents
children 6c6f15373f96
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
2 #Guruprasad Ananda
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
3 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
4 Filter based on nucleotide quality (PHRED score).
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
5
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
6 usage: %prog input out_file primary_species mask_species score mask_char mask_region mask_region_length
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
7 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
8
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
9
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
10 from __future__ import division
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
11 from galaxy import eggs
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
12 import pkg_resources
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
13 pkg_resources.require( "lrucache" )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
14 import numpy
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
15
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
16 import sys
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
17 import os, os.path
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
18 from UserDict import DictMixin
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
19 from bx.binned_array import FileBinnedArray
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
20 from bx.bitset import *
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
21 from bx.bitset_builders import *
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
22 from bx.cookbook import doc_optparse
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
23 from galaxy.tools.exception_handling import *
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
24 import bx.align.maf
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
25
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
26 class FileBinnedArrayDir( DictMixin ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
27 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
28 Adapter that makes a directory of FileBinnedArray files look like
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
29 a regular dict of BinnedArray objects.
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
30 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
31 def __init__( self, dir ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
32 self.dir = dir
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
33 self.cache = dict()
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
34 def __getitem__( self, key ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
35 value = None
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
36 if key in self.cache:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
37 value = self.cache[key]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
38 else:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
39 fname = os.path.join( self.dir, "%s.qa.bqv" % key )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
40 if os.path.exists( fname ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
41 value = FileBinnedArray( open( fname ) )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
42 self.cache[key] = value
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
43 if value is None:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
44 raise KeyError( "File does not exist: " + fname )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
45 return value
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
46
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
47 def stop_err(msg):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
48 sys.stderr.write(msg)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
49 sys.exit()
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
50
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
51 def load_scores_ba_dir( dir ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
52 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
53 Return a dict-like object (keyed by chromosome) that returns
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
54 FileBinnedArray objects created from "key.ba" files in `dir`
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
55 """
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
56 return FileBinnedArrayDir( dir )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
57
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
58 def bitwise_and ( string1, string2, maskch ):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
59 result = []
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
60 for i, ch in enumerate(string1):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
61 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
62 ch = int(ch)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
63 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
64 pass
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
65 if string2[i] == '-':
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
66 ch = 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
67 if ch and string2[i]:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
68 result.append(string2[i])
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
69 else:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
70 result.append(maskch)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
71 return ''.join(result)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
72
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
73 def main():
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
74 # Parsing Command Line here
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
75 options, args = doc_optparse.parse( __doc__ )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
76
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
77 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
78 #chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
79 inp_file, out_file, pri_species, mask_species, qual_cutoff, mask_chr, mask_region, mask_length, loc_file = args
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
80 qual_cutoff = int(qual_cutoff)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
81 mask_chr = int(mask_chr)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
82 mask_region = int(mask_region)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
83 if mask_region != 3:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
84 mask_length = int(mask_length)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
85 else:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
86 mask_length_r = int(mask_length.split(',')[0])
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
87 mask_length_l = int(mask_length.split(',')[1])
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
88 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
89 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
90
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
91 if pri_species == 'None':
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
92 stop_err( "No primary species selected, try again by selecting at least one primary species." )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
93 if mask_species == 'None':
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
94 stop_err( "No mask species selected, try again by selecting at least one species to mask." )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
95
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
96 mask_chr_count = 0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
97 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
98 mask_reg_dict = {0:'Current pos', 1:'Current+Downstream', 2:'Current+Upstream', 3:'Current+Both sides'}
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
99
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
100 #ensure dbkey is present in the twobit loc file
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
101 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
102 pspecies_all = pri_species.split(',')
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
103 pspecies_all2 = pri_species.split(',')
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
104 pspecies = []
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
105 filepaths = []
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
106 for line in open(loc_file):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
107 if pspecies_all2 == []:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
108 break
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
109 if line[0:1] == "#":
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
110 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
111 fields = line.split('\t')
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
112 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
113 build = fields[0]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
114 for i, dbkey in enumerate(pspecies_all2):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
115 if dbkey == build:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
116 pspecies.append(build)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
117 filepaths.append(fields[1])
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
118 del pspecies_all2[i]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
119 else:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
120 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
121 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
122 pass
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
123 except Exception, exc:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
124 stop_err( 'Initialization errorL %s' % str( exc ) )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
125
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
126 if len(pspecies) == 0:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
127 stop_err( "Quality scores are not available for the following genome builds: %s" % ( pspecies_all2 ) )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
128 if len(pspecies) < len(pspecies_all):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
129 print "Quality scores are not available for the following genome builds: %s" % (pspecies_all2)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
130
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
131 scores_by_chrom = []
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
132 #Get scores for all the primary species
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
133 for file in filepaths:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
134 scores_by_chrom.append(load_scores_ba_dir( file.strip() ))
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
135
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
136 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
137 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
138 maf_writer = bx.align.maf.Writer( open(out_file,'w') )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
139 except Exception, e:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
140 stop_err( "Your MAF file appears to be malformed: %s" % str( e ) )
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
141
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
142 maf_count = 0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
143 for block in maf_reader:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
144 status_strings = []
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
145 for seq in range (len(block.components)):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
146 src = block.components[seq].src
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
147 dbkey = src.split('.')[0]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
148 chr = src.split('.')[1]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
149 if not (dbkey in pspecies):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
150 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
151 else: #enter if the species is a primary species
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
152 index = pspecies.index(dbkey)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
153 sequence = block.components[seq].text
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
154 s_start = block.components[seq].start
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
155 size = len(sequence) #this includes the gaps too
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
156 status_str = '1'*size
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
157 status_list = list(status_str)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
158 if status_strings == []:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
159 status_strings.append(status_str)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
160 ind = 0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
161 s_end = block.components[seq].end
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
162 #Get scores for the entire sequence
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
163 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
164 scores = scores_by_chrom[index][chr][s_start:s_end]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
165 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
166 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
167 pos = 0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
168 while pos < (s_end-s_start):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
169 if sequence[ind] == '-': #No score for GAPS
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
170 ind += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
171 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
172 score = scores[pos]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
173 if score < qual_cutoff:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
174 score = 0
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
175
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
176 if not(score):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
177 if mask_region == 0: #Mask Corresponding position only
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
178 status_list[ind] = '0'
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
179 ind += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
180 pos += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
181 elif mask_region == 1: #Mask Corresponding position + downstream neighbors
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
182 for n in range(mask_length+1):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
183 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
184 status_list[ind+n] = '0'
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
185 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
186 pass
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
187 ind = ind + mask_length + 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
188 pos = pos + mask_length + 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
189 elif mask_region == 2: #Mask Corresponding position + upstream neighbors
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
190 for n in range(mask_length+1):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
191 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
192 status_list[ind-n] = '0'
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
193 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
194 pass
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
195 ind += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
196 pos += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
197 elif mask_region == 3: #Mask Corresponding position + neighbors on both sides
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
198 for n in range(-mask_length_l, mask_length_r+1):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
199 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
200 status_list[ind+n] = '0'
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
201 except:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
202 pass
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
203 ind = ind + mask_length_r + 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
204 pos = pos + mask_length_r + 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
205 else:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
206 pos += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
207 ind += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
208
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
209 status_strings.append(''.join(status_list))
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
210
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
211 if status_strings == []: #this block has no primary species
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
212 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
213 output_status_str = status_strings[0]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
214 for stat in status_strings[1:]:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
215 try:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
216 output_status_str = bitwise_and (status_strings[0], stat, '0')
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
217 except Exception, e:
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
218 break
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
219
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
220 for seq in range (len(block.components)):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
221 src = block.components[seq].src
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
222 dbkey = src.split('.')[0]
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
223 if dbkey not in mask_species.split(','):
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
224 continue
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
225 sequence = block.components[seq].text
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
226 sequence = bitwise_and (output_status_str, sequence, mask_chr_dict[mask_chr])
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
227 block.components[seq].text = sequence
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
228 mask_chr_count += output_status_str.count('0')
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
229 maf_writer.write(block)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
230 maf_count += 1
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
231
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
232 maf_reader.close()
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
233 maf_writer.close()
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
234 print "No. of blocks = %d; No. of masked nucleotides = %s; Mask character = %s; Mask region = %s; Cutoff used = %d" % (maf_count, mask_chr_count, mask_chr_dict[mask_chr], mask_reg_dict[mask_region], qual_cutoff)
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
235
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
236
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
237 if __name__ == "__main__":
8d65bbc52dfe Imported from capsule None
devteam
parents:
diff changeset
238 main()