Mercurial > repos > devteam > fasta_compute_length
diff utils/fasta_to_len.py @ 4:e12f68d2cc4e draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:10 -0500 |
parents | ece409f6573c |
children | 7d37cfda8e00 |
line wrap: on
line diff
--- a/utils/fasta_to_len.py Wed Sep 11 09:41:59 2019 -0400 +++ b/utils/fasta_to_len.py Sun Mar 01 07:24:10 2020 -0500 @@ -5,16 +5,13 @@ Return titles with lengths of corresponding seq """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +assert sys.version_info[:2] >= (2, 4) -def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ): - infile = fasta_file - out = open( out_file, 'w') - keep_first_char = int( keep_first_char ) - +def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): + keep_first_char = int(keep_first_char) fasta_title = '' seq_len = 0 @@ -25,28 +22,28 @@ keep_first_char += 1 first_entry = True - - for line in open( infile ): - line = line.strip() - if not line or line.startswith( '#' ): - continue - if line[0] == '>': - if first_entry == False: - if keep_first_word: - fasta_title = fasta_title.split()[0] - out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) + with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: + for line in in_fh: + line = line.strip() + if not line or line.startswith('#'): + continue + if line[0] == '>': + if first_entry is False: + if keep_first_word: + fasta_title = fasta_title.split()[0] + out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) + else: + first_entry = False + fasta_title = line + seq_len = 0 else: - first_entry = False - fasta_title = line - seq_len = 0 - else: - seq_len += len(line) + seq_len += len(line) - # last fasta-entry - if keep_first_word: - fasta_title = fasta_title.split()[0] - out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) - out.close() + # last fasta-entry + if keep_first_word: + fasta_title = fasta_title.split()[0] + out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) -if __name__ == "__main__" : - compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) \ No newline at end of file + +if __name__ == "__main__": + compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)