# HG changeset patch # User devteam # Date 1583065482 18000 # Node ID 8cacfcf96a5212ae030e163a08ec479dd21d0482 # Parent e626b3ff9922e8b18c60ab199d0d61c0b3cc8e40 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf" diff -r e626b3ff9922 -r 8cacfcf96a52 fasta_filter_by_length.py --- a/fasta_filter_by_length.py Tue Dec 17 13:13:32 2019 -0500 +++ b/fasta_filter_by_length.py Sun Mar 01 07:24:42 2020 -0500 @@ -5,48 +5,50 @@ Return sequences whose lengths are within the range. """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +assert sys.version_info[:2] >= (2, 4) -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() + +def stop_err(msg): + sys.exit(msg) + def __main__(): input_filename = sys.argv[1] try: - min_length = int( sys.argv[2] ) - except: - stop_err( "Minimal length of the return sequence requires a numerical value." ) + min_length = int(sys.argv[2]) + except Exception: + stop_err("Minimal length of the return sequence requires a numerical value.") try: - max_length = int( sys.argv[3] ) - except: - stop_err( "Maximum length of the return sequence requires a numerical value." ) + max_length = int(sys.argv[3]) + except Exception: + stop_err("Maximum length of the return sequence requires a numerical value.") output_filename = sys.argv[4] - output_handle = open( output_filename, 'w' ) - tmp_size = 0 #-1 + tmp_size = 0 # -1 tmp_buf = '' at_least_one = 0 - for line in file(input_filename): - if not line or line.startswith('#'): - continue - if line[0] == '>': - if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): - output_handle.write(tmp_buf) - at_least_one = 1 - tmp_buf = line - tmp_size = 0 - else: - if max_length == 0 or tmp_size <= max_length: - tmp_size += len(line.rstrip('\r\n')) - tmp_buf += line - # final flush of buffer - if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): - output_handle.write(tmp_buf.rstrip('\r\n')) - at_least_one = 1 - output_handle.close() + with open(output_filename, 'w') as output_handle, open(input_filename, 'r') as input_handle: + for line in input_handle: + if not line or line.startswith('#'): + continue + if line[0] == '>': + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf) + at_least_one = 1 + tmp_buf = line + tmp_size = 0 + else: + if max_length == 0 or tmp_size <= max_length: + tmp_size += len(line.rstrip('\r\n')) + tmp_buf += line + # final flush of buffer + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf.rstrip('\r\n')) + at_least_one = 1 if at_least_one == 0: - print "There is no sequence that falls within your range." + print("There is no sequence that falls within your range.") + -if __name__ == "__main__" : __main__() +if __name__ == "__main__": + __main__() diff -r e626b3ff9922 -r 8cacfcf96a52 fasta_filter_by_length.xml --- a/fasta_filter_by_length.xml Tue Dec 17 13:13:32 2019 -0500 +++ b/fasta_filter_by_length.xml Sun Mar 01 07:24:42 2020 -0500 @@ -1,35 +1,34 @@ - - - - python $__tool_directory__/fasta_filter_by_length.py - '$input' - $min_length - $max_length - '$output' - - - - - - - - - - - - - - - - - - - - - - - - + + + + python + + +python '$__tool_directory__/fasta_filter_by_length.py' '$input' $min_length $max_length '$output' + + + + + + + + + + + + + + + + + + + + + + + + seq1 + TCATTTAATGAC + >seq2 + ATGGC + >seq3 + TCACATGATGCCG + >seq4 + ATGGAAGC Setting the **Minimal length** to **10**, and the **Maximum length** to **0** will return all sequences longer than 10 bp:: - >seq1 - TCATTTAATGAC - >seq3 - TCACATGATGCCG + >seq1 + TCATTTAATGAC + >seq3 + TCACATGATGCCG - + ]]>