# HG changeset patch # User Richard Burhans # Date 1334004688 14400 # Node ID f810c756a5d62f137c722be96e22db50d8fe303c # Parent 41ef7e57c2fa7286a92e193484d3834cd7f53de0 removed duplicate tool diff -r 41ef7e57c2fa -r f810c756a5d6 select_restriction_enzymes.py --- a/select_restriction_enzymes.py Mon Apr 09 15:27:28 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -from optparse import OptionParser -import genome_diversity as gd - -def main_function( parse_arguments=None ): - if parse_arguments is None: - parse_arguments = lambda arguments: ( None, arguments ) - def main_decorator( to_decorate ): - def decorated_main( arguments=None ): - if arguments is None: - arguments = sys.argv - options, arguments = parse_arguments( arguments ) - rc = 1 - try: - rc = to_decorate( options, arguments ) - except Exception, err: - sys.stderr.write( 'ERROR: %s\n' % str( err ) ) - traceback.print_exc() - finally: - sys.exit( rc ) - return decorated_main - return main_decorator - -def parse_arguments( arguments ): - parser = OptionParser() - parser.add_option('--input', - type='string', dest='input', - help='file of selected SNPs') - parser.add_option('--output', - type='string', dest='output', - help='output file') - parser.add_option('--primers_loc', - type='string', dest='primers_loc', - help='primers .loc file') - parser.add_option('--scaffold_col', - type="int", dest='scaffold_col', - help='scaffold column in the input file') - parser.add_option('--pos_col', - type="int", dest='pos_col', - help='position column in the input file') - parser.add_option('--enzyme_list', - type="string", dest='enzyme_list_string', - help='comma separated list of enzymes') - parser.add_option('--species', - type="string", dest='species', - help='species') - return parser.parse_args( arguments[1:] ) - - -@main_function( parse_arguments ) -def main( options, arguments ): - if not options.input: - raise RuntimeError( 'missing --input option' ) - if not options.output: - raise RuntimeError( 'missing --output option' ) - if not options.primers_loc: - raise RuntimeError( 'missing --primers_loc option' ) - if not options.scaffold_col: - raise RuntimeError( 'missing --scaffold_col option' ) - if not options.pos_col: - raise RuntimeError( 'missing --pos_col option' ) - if not options.enzyme_list_string: - raise RuntimeError( 'missing --enzyme_list option' ) - if not options.species: - raise RuntimeError( 'missing --species option' ) - - snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) - - out_fh = gd._openfile( options.output, 'w' ) - - enzyme_dict = {} - for enzyme in options.enzyme_list_string.split( ',' ): - enzyme = enzyme.strip() - if enzyme: - enzyme_dict[enzyme] = 1 - - primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) - file_root, file_ext = os.path.splitext( primer_data_file ) - primer_index_file = file_root + ".cdb" - primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) - - comments_printed = False - - while snps.next(): - seq, pos = snps.get_seq_pos() - enzyme_list = primers.get_enzymes( seq, pos ) - for enzyme in enzyme_list: - if enzyme in enzyme_dict: - if not comments_printed: - for comment in snps.comments: - out_fh.write( "%s\n" % comment ) - comments_printed = True - out_fh.write( "%s\n" % snps.line ) - break - - out_fh.close() - -if __name__ == "__main__": - main() - diff -r 41ef7e57c2fa -r f810c756a5d6 select_restriction_enzymes.xml --- a/select_restriction_enzymes.xml Mon Apr 09 15:27:28 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ - - a set of restriction enzymes - - - select_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" - #if $override_metadata.choice == "0": - "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" - #else - "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" - #end if - "--enzyme_list=$enzymes" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - - It selects the SNPs that are differentially cut by at least one of the - specified restriction enzymes. The enzymes are required to cut the amplified - segment (for the specified PCR primers) only at the SNP. - ------ - -**Example** - -- input file:: - - chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 - chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 - chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 - chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 - chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 - chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 - chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 - chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 - chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 - chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 - etc. - -- output file:: - - chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 - chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 - chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 - chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 - etc. - -