diff tools/maf/maf_limit_to_species.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/maf/maf_limit_to_species.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+"""
+Read a maf file and write out a new maf with only blocks having the 
+required species, after dropping any other species and removing
+columns containing only gaps.
+
+usage: %prog species,species2,... input_maf output_maf allow_partial min_species_per_block
+"""
+#Dan Blankenberg
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+import bx.align.maf
+from galaxy.tools.util import maf_utilities
+import sys
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def main():
+
+    species = maf_utilities.parse_species_option( sys.argv[1] )
+    if species:
+        spec_len = len( species )
+    else:
+        spec_len = 0
+    try:
+        maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) )
+        maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) )
+    except:
+        print >>sys.stderr, "Your MAF file appears to be malformed."
+        sys.exit()
+    allow_partial = False
+    if int( sys.argv[4] ): allow_partial = True
+    min_species_per_block = int( sys.argv[5] )
+    
+    maf_blocks_kept = 0
+    for m in maf_reader:
+        if species:
+            m = m.limit_to_species( species )
+        m.remove_all_gap_columns()
+        spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
+        if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
+            maf_writer.write( m )
+            maf_blocks_kept += 1
+    
+    maf_reader.close()
+    maf_writer.close()
+    
+    print "Restricted to species: %s." % ", ".join( species )
+    print "%i MAF blocks have been kept." % maf_blocks_kept
+
+if __name__ == "__main__": 
+    main()