annotate fasta_to_tabular.py @ 1:7e801ab2b70e draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit a1517c9d22029095120643bbe2c8fa53754dd2b7
author devteam
date Wed, 11 Nov 2015 12:14:09 -0500
parents 9d189d08f2ad
children 091edad7622f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
3 """
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
5 Output: tabular
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
6 format convert: fasta to tabular
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
7 """
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
8
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
9 import sys, os
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
10
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
11 def stop_err( msg ):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
12 sys.stderr.write( msg )
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
13 sys.exit()
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
14
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
15 def __main__():
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
16 if len(sys.argv) != 5:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
18 infile = sys.argv[1]
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
19 outfile = sys.argv[2]
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
20 keep_first = int( sys.argv[3] )
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
21 descr_split = int( sys.argv[4] )
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
22 fasta_title = fasta_seq = ''
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
23 if keep_first == 0:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
24 keep_first = None
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
25 elif descr_split == 1:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
26 #Added one for the ">" character
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
27 #(which is removed if using descr_split > 1)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
28 keep_first += 1
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
29 if descr_split < 1:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
30 stop_err("Bad description split value (should be 1 or more)")
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
31 out = open( outfile, 'w' )
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
32 for i, line in enumerate( open( infile ) ):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
33 line = line.rstrip( '\r\n' )
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
34 if not line or line.startswith( '#' ):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
35 continue
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
36 if line.startswith( '>' ):
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
37 #Don't want any existing tabs to trigger extra columns:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
38 line = line.replace('\t', ' ')
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
39 if i > 0:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
40 out.write('\n')
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
41 if descr_split == 1:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
42 out.write(line[1:keep_first])
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
43 else:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
44 words = line[1:].split(None, descr_split-1)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
45 #apply any truncation to first word (the id)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
46 words[0] = words[0][0:keep_first]
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
47 #pad with empty columns if required
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
48 words += [""]*(descr_split-len(words))
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
49 out.write("\t".join(words))
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
50 out.write('\t')
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
51 else:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
52 out.write(line)
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
53 if i > 0:
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
54 out.write('\n')
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
55 out.close()
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
56
9d189d08f2ad Imported from capsule None
devteam
parents:
diff changeset
57 if __name__ == "__main__" : __main__()