annotate tools/regVariation/maf_cpg_filter.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Guruprasad Ananda
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 #Adapted from bx/scripts/maf_mask_cpg.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Mask out potential CpG sites from a maf. Restricted or inclusive definition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 of CpG sites can be used. The total fraction masked is printed to stderr.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 usage: %prog < input > output restricted
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 -m, --mask=N: Character to use as mask ('?' is default)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 import pkg_resources
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 pkg_resources.require( "numpy" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 import bx.align
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 import bx.align.maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 import bx.align.sitemask.cpg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 inp_file, out_file, sitetype, definition = args
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if options.mask:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 mask = int(options.mask)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 mask = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 print >> sys.stderr, "Tool initialization error."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 reader = bx.align.maf.Reader( open(inp_file, 'r') )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 writer = bx.align.maf.Writer( open(out_file,'w') )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 mask = mask_chr_dict[mask]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if sitetype == "CpG":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if int(definition) == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 defn = "CpG-Restricted"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 defn = "CpG-Inclusive"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 defn = "non-CpG"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 cpgfilter.run( reader, writer.write )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 main()