annotate tools/maf/maf_filter.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #Dan Blankenberg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Filters a MAF file according to the provided code file, which is generated in maf_filter.xml <configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 #Also allows filtering by number of columns in a block, and limiting output species
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 import sys, os, shutil
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import bx.align.maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 #Read command line arguments
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 script_file = sys.argv.pop( 1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 maf_file = sys.argv.pop( 1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 out_file = sys.argv.pop( 1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 additional_files_path = sys.argv.pop( 1 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 min_size = int( sys.argv.pop( 1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 max_size = int( sys.argv.pop( 1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 if max_size < 1: max_size = sys.maxint
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 min_species_per_block = int( sys.argv.pop( 1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 exclude_incomplete_blocks = int( sys.argv.pop( 1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 num_species = len( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 num_species = len( sys.argv.pop( 1 ).split( ',') )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 print >>sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 #Open input and output MAF files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 maf_reader = bx.align.maf.Reader( open( maf_file,'r' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 maf_writer = bx.align.maf.Writer( open( out_file,'w' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 print >>sys.stderr, "Your MAF file appears to be malformed."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 #Save script file for debuging/verification info later
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 os.mkdir( additional_files_path )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 shutil.copy( script_file, os.path.join( additional_files_path, 'debug.txt' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 #Loop through blocks, running filter on each
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 #'maf_block' and 'ret_val' are used/shared in the provided code file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 #'ret_val' should be set to True if the block is to be kept
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 i = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 blocks_kept = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 for i, maf_block in enumerate( maf_reader ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if min_size <= maf_block.text_size <= max_size:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 local = {'maf_block':maf_block, 'ret_val':False}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 execfile( script_file, {}, local )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if local['ret_val']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 #Species limiting must be done after filters as filters could be run on non-requested output species
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 if species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 maf_block = maf_block.limit_to_species( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if len( maf_block.components ) >= min_species_per_block and ( not exclude_incomplete_blocks or len( maf_block.components ) >= num_species ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 maf_writer.write( maf_block )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 blocks_kept += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 maf_writer.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 maf_reader.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 if i == 0: print "Your file contains no valid maf_blocks."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 else: print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 main()