| 0 | 1 #!/usr/bin/env python | 
|  | 2 """ | 
|  | 3 Input: fasta, int | 
|  | 4 Output: tabular | 
|  | 5 Return titles with lengths of corresponding seq | 
|  | 6 """ | 
|  | 7 | 
|  | 8 import sys, os | 
|  | 9 | 
|  | 10 assert sys.version_info[:2] >= ( 2, 4 ) | 
|  | 11 | 
|  | 12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ): | 
|  | 13 | 
|  | 14     infile = fasta_file | 
|  | 15     out = open( out_file, 'w') | 
|  | 16     keep_first_char = int( keep_first_char ) | 
|  | 17 | 
|  | 18     fasta_title = '' | 
|  | 19     seq_len = 0 | 
|  | 20 | 
|  | 21     # number of char to keep in the title | 
|  | 22     if keep_first_char == 0: | 
|  | 23         keep_first_char = None | 
|  | 24     else: | 
|  | 25         keep_first_char += 1 | 
|  | 26 | 
|  | 27     first_entry = True | 
|  | 28 | 
|  | 29     for line in open( infile ): | 
|  | 30         line = line.strip() | 
|  | 31         if not line or line.startswith( '#' ): | 
|  | 32             continue | 
|  | 33         if line[0] == '>': | 
|  | 34             if first_entry == False: | 
|  | 35                 if keep_first_word: | 
|  | 36                     fasta_title = fasta_title.split()[0] | 
|  | 37                 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | 
|  | 38             else: | 
|  | 39                 first_entry = False | 
|  | 40             fasta_title = line | 
|  | 41             seq_len = 0 | 
|  | 42         else: | 
|  | 43             seq_len += len(line) | 
|  | 44 | 
|  | 45     # last fasta-entry | 
|  | 46     if keep_first_word: | 
|  | 47         fasta_title = fasta_title.split()[0] | 
|  | 48     out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | 
|  | 49     out.close() | 
|  | 50 | 
|  | 51 if __name__ == "__main__" : | 
|  | 52     compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) |