Mercurial > repos > devteam > fasta_to_tabular
comparison fasta_to_tabular.py @ 0:9d189d08f2ad draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 19 May 2014 12:34:27 -0400 |
| parents | |
| children | 091edad7622f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9d189d08f2ad |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools | |
| 3 """ | |
| 4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) | |
| 5 Output: tabular | |
| 6 format convert: fasta to tabular | |
| 7 """ | |
| 8 | |
| 9 import sys, os | |
| 10 | |
| 11 def stop_err( msg ): | |
| 12 sys.stderr.write( msg ) | |
| 13 sys.exit() | |
| 14 | |
| 15 def __main__(): | |
| 16 if len(sys.argv) != 5: | |
| 17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") | |
| 18 infile = sys.argv[1] | |
| 19 outfile = sys.argv[2] | |
| 20 keep_first = int( sys.argv[3] ) | |
| 21 descr_split = int( sys.argv[4] ) | |
| 22 fasta_title = fasta_seq = '' | |
| 23 if keep_first == 0: | |
| 24 keep_first = None | |
| 25 elif descr_split == 1: | |
| 26 #Added one for the ">" character | |
| 27 #(which is removed if using descr_split > 1) | |
| 28 keep_first += 1 | |
| 29 if descr_split < 1: | |
| 30 stop_err("Bad description split value (should be 1 or more)") | |
| 31 out = open( outfile, 'w' ) | |
| 32 for i, line in enumerate( open( infile ) ): | |
| 33 line = line.rstrip( '\r\n' ) | |
| 34 if not line or line.startswith( '#' ): | |
| 35 continue | |
| 36 if line.startswith( '>' ): | |
| 37 #Don't want any existing tabs to trigger extra columns: | |
| 38 line = line.replace('\t', ' ') | |
| 39 if i > 0: | |
| 40 out.write('\n') | |
| 41 if descr_split == 1: | |
| 42 out.write(line[1:keep_first]) | |
| 43 else: | |
| 44 words = line[1:].split(None, descr_split-1) | |
| 45 #apply any truncation to first word (the id) | |
| 46 words[0] = words[0][0:keep_first] | |
| 47 #pad with empty columns if required | |
| 48 words += [""]*(descr_split-len(words)) | |
| 49 out.write("\t".join(words)) | |
| 50 out.write('\t') | |
| 51 else: | |
| 52 out.write(line) | |
| 53 if i > 0: | |
| 54 out.write('\n') | |
| 55 out.close() | |
| 56 | |
| 57 if __name__ == "__main__" : __main__() |
