Mercurial > repos > xuebing > sharplabtool
comparison tools/maf/maf_filter.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #Dan Blankenberg | |
2 #Filters a MAF file according to the provided code file, which is generated in maf_filter.xml <configfiles> | |
3 #Also allows filtering by number of columns in a block, and limiting output species | |
4 import sys, os, shutil | |
5 from galaxy import eggs | |
6 import pkg_resources; pkg_resources.require( "bx-python" ) | |
7 import bx.align.maf | |
8 from galaxy.tools.util import maf_utilities | |
9 | |
10 def main(): | |
11 #Read command line arguments | |
12 try: | |
13 script_file = sys.argv.pop( 1 ) | |
14 maf_file = sys.argv.pop( 1 ) | |
15 out_file = sys.argv.pop( 1 ) | |
16 additional_files_path = sys.argv.pop( 1 ) | |
17 species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) ) | |
18 min_size = int( sys.argv.pop( 1 ) ) | |
19 max_size = int( sys.argv.pop( 1 ) ) | |
20 if max_size < 1: max_size = sys.maxint | |
21 min_species_per_block = int( sys.argv.pop( 1 ) ) | |
22 exclude_incomplete_blocks = int( sys.argv.pop( 1 ) ) | |
23 if species: | |
24 num_species = len( species ) | |
25 else: | |
26 num_species = len( sys.argv.pop( 1 ).split( ',') ) | |
27 except: | |
28 print >>sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep" | |
29 sys.exit() | |
30 | |
31 #Open input and output MAF files | |
32 try: | |
33 maf_reader = bx.align.maf.Reader( open( maf_file,'r' ) ) | |
34 maf_writer = bx.align.maf.Writer( open( out_file,'w' ) ) | |
35 except: | |
36 print >>sys.stderr, "Your MAF file appears to be malformed." | |
37 sys.exit() | |
38 | |
39 #Save script file for debuging/verification info later | |
40 os.mkdir( additional_files_path ) | |
41 shutil.copy( script_file, os.path.join( additional_files_path, 'debug.txt' ) ) | |
42 | |
43 #Loop through blocks, running filter on each | |
44 #'maf_block' and 'ret_val' are used/shared in the provided code file | |
45 #'ret_val' should be set to True if the block is to be kept | |
46 i = 0 | |
47 blocks_kept = 0 | |
48 for i, maf_block in enumerate( maf_reader ): | |
49 if min_size <= maf_block.text_size <= max_size: | |
50 local = {'maf_block':maf_block, 'ret_val':False} | |
51 execfile( script_file, {}, local ) | |
52 if local['ret_val']: | |
53 #Species limiting must be done after filters as filters could be run on non-requested output species | |
54 if species: | |
55 maf_block = maf_block.limit_to_species( species ) | |
56 if len( maf_block.components ) >= min_species_per_block and ( not exclude_incomplete_blocks or len( maf_block.components ) >= num_species ): | |
57 maf_writer.write( maf_block ) | |
58 blocks_kept += 1 | |
59 maf_writer.close() | |
60 maf_reader.close() | |
61 if i == 0: print "Your file contains no valid maf_blocks." | |
62 else: print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 ) | |
63 | |
64 if __name__ == "__main__": | |
65 main() |