Mercurial > repos > earlhaminst > t_coffee
annotate t_coffee_to_cigar.py @ 2:df6527887a18 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 2773f69afb943d6f6a64c2060180af8bd8941e16
author | earlhaminst |
---|---|
date | Mon, 09 Jan 2017 14:26:41 -0500 |
parents | b3833e5b50d4 |
children |
rev | line source |
---|---|
1
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
2 """ A script to build specific fasta databases """ |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
3 from __future__ import print_function |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
4 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
5 import re |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
6 import sys |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
7 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
8 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
9 FASTA_MATCH_RE = re.compile(r'[^-]') |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
10 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
11 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
12 def convert_and_print(header, sequence): |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
13 # Converts each match into M and each gap into D |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
14 tmp_seq = FASTA_MATCH_RE.sub('M', sequence) |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
15 tmp_seq = tmp_seq.replace('-', 'D') |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
16 # Split the sequence in substrings composed by the same letter |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
17 tmp_seq = tmp_seq.replace('DM', 'D,M') |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
18 tmp_seq = tmp_seq.replace('MD', 'M,D') |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
19 cigar_list = tmp_seq.split(',') |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
21 cigar = '' |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
22 for s in cigar_list: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
23 if len(s) > 1: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
24 cigar += str(len(s)) |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
25 cigar += s[0] |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
26 print("%s\t%s" % (header, cigar)) |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
27 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
28 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
29 def main(): |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
30 with open(sys.argv[1]) as fh: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
31 header = None |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
32 sequence = None |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
33 for line in fh: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
34 line = line.strip() |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
35 if line and line[0] == '>': |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
36 if header: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
37 convert_and_print(header, sequence) |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
38 header = line[1:] |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
39 sequence = '' |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
40 else: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
41 sequence += line |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
42 if header: |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
43 convert_and_print(header, sequence) |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
44 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
45 |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
46 if __name__ == "__main__": |
b3833e5b50d4
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
earlhaminst
parents:
diff
changeset
|
47 main() |