view tools/regVariation/maf_cpg_filter.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

#!/usr/bin/env python
#Guruprasad Ananda
#Adapted from bx/scripts/maf_mask_cpg.py
"""
Mask out potential CpG sites from a maf. Restricted or inclusive definition
of CpG sites can be used. The total fraction masked is printed to stderr.

usage: %prog < input > output restricted
    -m, --mask=N: Character to use as mask ('?' is default)
"""

from galaxy import eggs
import pkg_resources 
pkg_resources.require( "bx-python" )
try:
    pkg_resources.require( "numpy" )
except:
    pass
import bx.align
import bx.align.maf
from bx.cookbook import doc_optparse
import sys
import bx.align.sitemask.cpg

assert sys.version_info[:2] >= ( 2, 4 )

def main():
    options, args = doc_optparse.parse( __doc__ )
    try:
        inp_file, out_file, sitetype, definition = args
        if options.mask:
            mask = int(options.mask)
        else:
            mask = 0
    except:
        print >> sys.stderr, "Tool initialization error."
        sys.exit()

    reader = bx.align.maf.Reader( open(inp_file, 'r') )
    writer = bx.align.maf.Writer( open(out_file,'w') )
    
    mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'}
    mask = mask_chr_dict[mask]
    
    if sitetype == "CpG":
        if int(definition) == 1:
            cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
            defn = "CpG-Restricted"
        else:
            cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
            defn = "CpG-Inclusive"
    else:
        cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
        defn = "non-CpG"
    cpgfilter.run( reader, writer.write )
    
    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)

if __name__ == "__main__":
    main()