annotate maf_cpg_filter.py @ 0:7f2a12cb047d draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:51:18 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
2 #Guruprasad Ananda
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
3 #Adapted from bx/scripts/maf_mask_cpg.py
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
4 """
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
5 Mask out potential CpG sites from a maf. Restricted or inclusive definition
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
6 of CpG sites can be used. The total fraction masked is printed to stderr.
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
7
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
8 usage: %prog < input > output restricted
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
9 -m, --mask=N: Character to use as mask ('?' is default)
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
10 """
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
11
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
12 import bx.align
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
13 import bx.align.maf
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
14 from bx.cookbook import doc_optparse
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
15 import sys
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
16 import bx.align.sitemask.cpg
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
17
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
18 assert sys.version_info[:2] >= ( 2, 4 )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
19
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
20 def main():
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
21 options, args = doc_optparse.parse( __doc__ )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
22 try:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
23 inp_file, out_file, sitetype, definition = args
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
24 if options.mask:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
25 mask = int(options.mask)
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
26 else:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
27 mask = 0
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
28 except:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
29 print >> sys.stderr, "Tool initialization error."
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
30 sys.exit()
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
31
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
32 reader = bx.align.maf.Reader( open(inp_file, 'r') )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
33 writer = bx.align.maf.Writer( open(out_file,'w') )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
34
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
35 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
36 mask = mask_chr_dict[mask]
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
37
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
38 if sitetype == "CpG":
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
39 if int(definition) == 1:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
40 cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
41 defn = "CpG-Restricted"
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
42 else:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
43 cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
44 defn = "CpG-Inclusive"
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
45 else:
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
46 cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
47 defn = "non-CpG"
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
48 cpgfilter.run( reader, writer.write )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
49
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
50 print "%2.2f percent bases masked; Mask character = %s, Definition = %s" % ( float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn )
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
51
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
52 if __name__ == "__main__":
7f2a12cb047d Imported from capsule None
devteam
parents:
diff changeset
53 main()