diff tools/regVariation/maf_cpg_filter.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/regVariation/maf_cpg_filter.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#Guruprasad Ananda
+#Adapted from bx/scripts/maf_mask_cpg.py
+"""
+Mask out potential CpG sites from a maf. Restricted or inclusive definition
+of CpG sites can be used. The total fraction masked is printed to stderr.
+
+usage: %prog < input > output restricted
+    -m, --mask=N: Character to use as mask ('?' is default)
+"""
+
+from galaxy import eggs
+import pkg_resources 
+pkg_resources.require( "bx-python" )
+try:
+    pkg_resources.require( "numpy" )
+except:
+    pass
+import bx.align
+import bx.align.maf
+from bx.cookbook import doc_optparse
+import sys
+import bx.align.sitemask.cpg
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def main():
+    options, args = doc_optparse.parse( __doc__ )
+    try:
+        inp_file, out_file, sitetype, definition = args
+        if options.mask:
+            mask = int(options.mask)
+        else:
+            mask = 0
+    except:
+        print >> sys.stderr, "Tool initialization error."
+        sys.exit()
+
+    reader = bx.align.maf.Reader( open(inp_file, 'r') )
+    writer = bx.align.maf.Writer( open(out_file,'w') )
+    
+    mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
+    mask = mask_chr_dict[mask]
+    
+    if sitetype == "CpG":
+        if int(definition) == 1:
+            cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
+            defn = "CpG-Restricted"
+        else:
+            cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
+            defn = "CpG-Inclusive"
+    else:
+        cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
+        defn = "non-CpG"
+    cpgfilter.run( reader, writer.write )
+    
+    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
+
+if __name__ == "__main__":
+    main()