annotate t_coffee_to_cigar.py @ 1:b3833e5b50d4 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
author earlhaminst
date Mon, 19 Dec 2016 17:47:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
1 #!/usr/bin/env python
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
2 """ A script to build specific fasta databases """
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
3 from __future__ import print_function
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
4
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
5 import re
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
6 import sys
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
7
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
8
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
9 FASTA_MATCH_RE = re.compile(r'[^-]')
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
10
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
11
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
12 def convert_and_print(header, sequence):
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
13 # Converts each match into M and each gap into D
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
14 tmp_seq = FASTA_MATCH_RE.sub('M', sequence)
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
15 tmp_seq = tmp_seq.replace('-', 'D')
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
16 # Split the sequence in substrings composed by the same letter
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
17 tmp_seq = tmp_seq.replace('DM', 'D,M')
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
18 tmp_seq = tmp_seq.replace('MD', 'M,D')
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
19 cigar_list = tmp_seq.split(',')
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
21 cigar = ''
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
22 for s in cigar_list:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
23 if len(s) > 1:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
24 cigar += str(len(s))
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
25 cigar += s[0]
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
26 print("%s\t%s" % (header, cigar))
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
27
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
28
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
29 def main():
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
30 with open(sys.argv[1]) as fh:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
31 header = None
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
32 sequence = None
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
33 for line in fh:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
34 line = line.strip()
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
35 if line and line[0] == '>':
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
36 if header:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
37 convert_and_print(header, sequence)
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
38 header = line[1:]
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
39 sequence = ''
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
40 else:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
41 sequence += line
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
42 if header:
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
43 convert_and_print(header, sequence)
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
44
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
45
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
46 if __name__ == "__main__":
b3833e5b50d4 planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff changeset
47 main()