annotate xmfa2gff3.py @ 2:bdb752f3c6bb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 7e386c24c2f9f0293abd4877eff708c1fa285751
author iuc
date Thu, 07 Feb 2019 05:24:24 -0500
parents bca52822843e
children 97a43bcbf44d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
1 #!/usr/bin/env python
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
2 import argparse
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
3 import logging
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
4 import sys
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
5
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
6 from BCBio import GFF
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
7 from Bio import SeqIO
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
8 from Bio.Seq import Seq
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
9 from Bio.SeqFeature import (
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
10 FeatureLocation,
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
11 SeqFeature
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
12 )
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
13 from Bio.SeqRecord import SeqRecord
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
14
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
15 logging.basicConfig(level=logging.INFO)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
16 log = logging.getLogger(__name__)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
17
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
18
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
19 def parse_xmfa(xmfa):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
20 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
21 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
22 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
23 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
24 for line in xmfa.readlines():
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
25 if line.startswith('#'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
26 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
27
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
28 if line.strip() == '=':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
29 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
30 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
31 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
32 yield current_lcb
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
33 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
34 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
35 line = line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
36 if line.startswith('>'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
37 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
38 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
39 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
40 data = line.strip().split()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
41 id, loc = data[1].split(':')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
42 start, end = loc.split('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
43 current_seq = {
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
44 'rid': '_'.join(data[1:]),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
45 'id': id,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
46 'start': int(start),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
47 'end': int(end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
48 'strand': 1 if data[2] == '+' else -1,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
49 'seq': ''
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
50 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
51 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
52 current_seq['seq'] += line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
53
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
54
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
55 def _percent_identity(a, b):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
56 """Calculate % identity, ignoring gaps in the host sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
57 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
58 match = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
59 mismatch = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
60 for char_a, char_b in zip(list(a), list(b)):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
61 if char_a == '-':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
62 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
63 if char_a == char_b:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
64 match += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
65 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
66 mismatch += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
67
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
68 if match + mismatch == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
69 return 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
70 return 100 * float(match) / (match + mismatch)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
71
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
72
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
73 def _id_tn_dict(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
74 """Figure out sequence IDs
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
75 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
76 label_convert = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
77 if sequences is not None:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
78 if len(sequences) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
79 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
80 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
81 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
82 for i, sequence in enumerate(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
83 for record in SeqIO.parse(sequence, 'fasta'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
84 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
85 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
86 return label_convert
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
87
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
88
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
89 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
90 label_convert = _id_tn_dict(sequences)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
91
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
92 lcbs = parse_xmfa(xmfa_file)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
93
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
94 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
95 for lcb in lcbs:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
96 ids = [seq['id'] for seq in lcb]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
97
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
98 # Doesn't match part of our sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
99 if relative_to not in ids:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
100 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
101
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
102 # Skip sequences that are JUST our "relative_to" genome
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
103 if len(ids) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
104 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
105
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
106 parent = [seq for seq in lcb if seq['id'] == relative_to][0]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
107 others = [seq for seq in lcb if seq['id'] != relative_to]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
108
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
109 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
110 other['feature'] = SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
111 FeatureLocation(parent['start'], parent['end'] + 1),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
112 type="match", strand=parent['strand'],
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
113 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
114 "source": "progressiveMauve",
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
115 "target": label_convert.get(other['id'], other['id']),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
116 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
117 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
118 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
119
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
120 for i in range(0, len(lcb[0]['seq']), window_size):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
121 block_seq = parent['seq'][i:i + window_size]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
122 real_window_size = len(block_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
123 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
124 real_end = real_start + real_window_size - block_seq.count('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
125
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
126 if (real_end - real_start) < 10:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
127 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
128
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
129 if parent['start'] < 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
130 strand = -1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
131 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
132 strand = 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
133
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
134 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
135 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
136 # Ignore 0% identity sequences
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
137 if pid == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
138 continue
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
139
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
140 # Support for Biopython 1.68 and above, which removed sub_features
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
141 if not hasattr(other['feature'], "sub_features"):
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
142 other['feature'].sub_features = []
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
143 other['feature'].sub_features.append(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
144 SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
145 FeatureLocation(real_start, real_end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
146 type="match_part", strand=strand,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
147 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
148 "source": "progressiveMauve",
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
149 'score': pid
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
150 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
151 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
152 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
153
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
154 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
155 records[0].features.append(other['feature'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
156 return records
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
157
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
158
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
159 if __name__ == '__main__':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
160 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
161 parser.add_argument('xmfa_file', type=file, help='XMFA File')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
162 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
163 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
164 parser.add_argument('--sequences', type=file, nargs='+',
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
165 help='Fasta files (in same order) passed to parent for reconstructing proper IDs')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
166 parser.add_argument('--version', action='version', version='%(prog)s 1.0')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
167
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
168 args = parser.parse_args()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
169
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
170 result = convert_xmfa_to_gff3(**vars(args))
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
171 GFF.write(result, sys.stdout)