Mercurial > repos > earlhaminst > t_coffee
comparison t_coffee_to_cigar.py @ 1:b3833e5b50d4 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
author | earlhaminst |
---|---|
date | Mon, 19 Dec 2016 17:47:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:794a6e864a96 | 1:b3833e5b50d4 |
---|---|
1 #!/usr/bin/env python | |
2 """ A script to build specific fasta databases """ | |
3 from __future__ import print_function | |
4 | |
5 import re | |
6 import sys | |
7 | |
8 | |
9 FASTA_MATCH_RE = re.compile(r'[^-]') | |
10 | |
11 | |
12 def convert_and_print(header, sequence): | |
13 # Converts each match into M and each gap into D | |
14 tmp_seq = FASTA_MATCH_RE.sub('M', sequence) | |
15 tmp_seq = tmp_seq.replace('-', 'D') | |
16 # Split the sequence in substrings composed by the same letter | |
17 tmp_seq = tmp_seq.replace('DM', 'D,M') | |
18 tmp_seq = tmp_seq.replace('MD', 'M,D') | |
19 cigar_list = tmp_seq.split(',') | |
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again | |
21 cigar = '' | |
22 for s in cigar_list: | |
23 if len(s) > 1: | |
24 cigar += str(len(s)) | |
25 cigar += s[0] | |
26 print("%s\t%s" % (header, cigar)) | |
27 | |
28 | |
29 def main(): | |
30 with open(sys.argv[1]) as fh: | |
31 header = None | |
32 sequence = None | |
33 for line in fh: | |
34 line = line.strip() | |
35 if line and line[0] == '>': | |
36 if header: | |
37 convert_and_print(header, sequence) | |
38 header = line[1:] | |
39 sequence = '' | |
40 else: | |
41 sequence += line | |
42 if header: | |
43 convert_and_print(header, sequence) | |
44 | |
45 | |
46 if __name__ == "__main__": | |
47 main() |