0
|
1 #!/usr/bin/env python
|
|
2 #Guruprasad Ananda
|
|
3 #Adapted from bx/scripts/maf_mask_cpg.py
|
|
4 """
|
|
5 Mask out potential CpG sites from a maf. Restricted or inclusive definition
|
|
6 of CpG sites can be used. The total fraction masked is printed to stderr.
|
|
7
|
|
8 usage: %prog < input > output restricted
|
|
9 -m, --mask=N: Character to use as mask ('?' is default)
|
|
10 """
|
|
11
|
|
12 import bx.align
|
|
13 import bx.align.maf
|
|
14 from bx.cookbook import doc_optparse
|
|
15 import sys
|
|
16 import bx.align.sitemask.cpg
|
|
17
|
|
18 assert sys.version_info[:2] >= ( 2, 4 )
|
|
19
|
|
20 def main():
|
|
21 options, args = doc_optparse.parse( __doc__ )
|
|
22 try:
|
|
23 inp_file, out_file, sitetype, definition = args
|
|
24 if options.mask:
|
|
25 mask = int(options.mask)
|
|
26 else:
|
|
27 mask = 0
|
|
28 except:
|
|
29 print >> sys.stderr, "Tool initialization error."
|
|
30 sys.exit()
|
|
31
|
|
32 reader = bx.align.maf.Reader( open(inp_file, 'r') )
|
|
33 writer = bx.align.maf.Writer( open(out_file,'w') )
|
|
34
|
|
35 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
|
|
36 mask = mask_chr_dict[mask]
|
|
37
|
|
38 if sitetype == "CpG":
|
|
39 if int(definition) == 1:
|
|
40 cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
|
|
41 defn = "CpG-Restricted"
|
|
42 else:
|
|
43 cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
|
|
44 defn = "CpG-Inclusive"
|
|
45 else:
|
|
46 cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
|
|
47 defn = "non-CpG"
|
|
48 cpgfilter.run( reader, writer.write )
|
|
49
|
|
50 print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn )
|
|
51
|
|
52 if __name__ == "__main__":
|
|
53 main()
|