Mercurial > repos > devteam > fasta_filter_by_length
view fasta_filter_by_length.py @ 3:e626b3ff9922 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit 66d6c3cbb835114dc80e61216756e90da156f6d6"
author | devteam |
---|---|
date | Tue, 17 Dec 2019 13:13:32 -0500 |
parents | 16679a7f554a |
children | 8cacfcf96a52 |
line wrap: on
line source
#!/usr/bin/env python """ Input: fasta, minimal length, maximal length Output: fasta Return sequences whose lengths are within the range. """ import sys, os assert sys.version_info[:2] >= ( 2, 4 ) def stop_err( msg ): sys.stderr.write( msg ) sys.exit() def __main__(): input_filename = sys.argv[1] try: min_length = int( sys.argv[2] ) except: stop_err( "Minimal length of the return sequence requires a numerical value." ) try: max_length = int( sys.argv[3] ) except: stop_err( "Maximum length of the return sequence requires a numerical value." ) output_filename = sys.argv[4] output_handle = open( output_filename, 'w' ) tmp_size = 0 #-1 tmp_buf = '' at_least_one = 0 for line in file(input_filename): if not line or line.startswith('#'): continue if line[0] == '>': if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): output_handle.write(tmp_buf) at_least_one = 1 tmp_buf = line tmp_size = 0 else: if max_length == 0 or tmp_size <= max_length: tmp_size += len(line.rstrip('\r\n')) tmp_buf += line # final flush of buffer if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): output_handle.write(tmp_buf.rstrip('\r\n')) at_least_one = 1 output_handle.close() if at_least_one == 0: print "There is no sequence that falls within your range." if __name__ == "__main__" : __main__()