| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 """
 | 
| 
 | 
     3 Input: fasta, int
 | 
| 
 | 
     4 Output: tabular
 | 
| 
 | 
     5 Return titles with lengths of corresponding seq
 | 
| 
 | 
     6 """
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 import sys, os
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 assert sys.version_info[:2] >= ( 2, 4 )
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ):
 | 
| 
 | 
    13 
 | 
| 
 | 
    14     infile = fasta_file
 | 
| 
 | 
    15     out = open( out_file, 'w')
 | 
| 
 | 
    16     keep_first_char = int( keep_first_char )
 | 
| 
 | 
    17 
 | 
| 
 | 
    18     fasta_title = ''
 | 
| 
 | 
    19     seq_len = 0
 | 
| 
 | 
    20 
 | 
| 
 | 
    21     # number of char to keep in the title
 | 
| 
 | 
    22     if keep_first_char == 0:
 | 
| 
 | 
    23         keep_first_char = None
 | 
| 
 | 
    24     else:
 | 
| 
 | 
    25         keep_first_char += 1
 | 
| 
 | 
    26 
 | 
| 
 | 
    27     first_entry = True
 | 
| 
 | 
    28 
 | 
| 
 | 
    29     for line in open( infile ):
 | 
| 
 | 
    30         line = line.strip()
 | 
| 
 | 
    31         if not line or line.startswith( '#' ):
 | 
| 
 | 
    32             continue
 | 
| 
 | 
    33         if line[0] == '>':
 | 
| 
 | 
    34             if first_entry == False:
 | 
| 
 | 
    35                 if keep_first_word:
 | 
| 
 | 
    36                     fasta_title = fasta_title.split()[0]
 | 
| 
 | 
    37                 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
 | 
| 
 | 
    38             else:
 | 
| 
 | 
    39                 first_entry = False
 | 
| 
 | 
    40             fasta_title = line
 | 
| 
 | 
    41             seq_len = 0
 | 
| 
 | 
    42         else:
 | 
| 
 | 
    43             seq_len += len(line)
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     # last fasta-entry
 | 
| 
 | 
    46     if keep_first_word:
 | 
| 
 | 
    47         fasta_title = fasta_title.split()[0]
 | 
| 
 | 
    48     out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
 | 
| 
 | 
    49     out.close()
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 if __name__ == "__main__" :
 | 
| 
 | 
    52     compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) |