annotate xmfa2gff3.py @ 4:4d869208bd52 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 525bd8ae422c82ea727907e4d7ce7461d196611f"
author iuc
date Wed, 13 Oct 2021 11:56:19 +0000
parents 97a43bcbf44d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
1 #!/usr/bin/env python
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
2 import argparse
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
3 import logging
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
4 import sys
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
5
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
6 from BCBio import GFF
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
7 from Bio import SeqIO
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
8 from Bio.Seq import Seq
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
9 from Bio.SeqFeature import (
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
10 FeatureLocation,
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
11 SeqFeature
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
12 )
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
13 from Bio.SeqRecord import SeqRecord
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
14
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
15 logging.basicConfig(level=logging.INFO)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
16 log = logging.getLogger(__name__)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
17
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
18
3
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
19 # Patch bcbio gff to work around url encoding issue. This is clearly
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
20 # sub-optimal but we should transition to the newer library.
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
21 def _new_format_keyvals(self, keyvals):
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
22 return ";".join(["%s=%s" % (k, ",".join(v)) for (k, v) in sorted(keyvals.items())])
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
23
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
24
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
25 GFF.GFFOutput.GFF3Writer._format_keyvals = _new_format_keyvals
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
26
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
27
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
28 def parse_xmfa(xmfa):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
29 """Simple XMFA parser until https://github.com/biopython/biopython/pull/544
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
30 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
31 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
32 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
33 for line in xmfa.readlines():
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
34 if line.startswith('#'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
35 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
36
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
37 if line.strip() == '=':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
38 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
39 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
40 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
41 yield current_lcb
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
42 current_lcb = []
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
43 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
44 line = line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
45 if line.startswith('>'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
46 if 'id' in current_seq:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
47 current_lcb.append(current_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
48 current_seq = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
49 data = line.strip().split()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
50 id, loc = data[1].split(':')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
51 start, end = loc.split('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
52 current_seq = {
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
53 'rid': '_'.join(data[1:]),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
54 'id': id,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
55 'start': int(start),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
56 'end': int(end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
57 'strand': 1 if data[2] == '+' else -1,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
58 'seq': ''
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
59 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
60 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
61 current_seq['seq'] += line.strip()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
62
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
63
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
64 def _percent_identity(a, b):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
65 """Calculate % identity, ignoring gaps in the host sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
66 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
67 match = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
68 mismatch = 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
69 for char_a, char_b in zip(list(a), list(b)):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
70 if char_a == '-':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
71 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
72 if char_a == char_b:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
73 match += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
74 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
75 mismatch += 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
76
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
77 if match + mismatch == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
78 return 0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
79 return 100 * float(match) / (match + mismatch)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
80
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
81
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
82 def _id_tn_dict(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
83 """Figure out sequence IDs
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
84 """
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
85 label_convert = {}
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
86 if sequences is not None:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
87 if len(sequences) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
88 for i, record in enumerate(SeqIO.parse(sequences[0], 'fasta')):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
89 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
90 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
91 for i, sequence in enumerate(sequences):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
92 for record in SeqIO.parse(sequence, 'fasta'):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
93 label_convert[str(i + 1)] = record.id
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
94 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
95 return label_convert
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
96
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
97
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
98 def convert_xmfa_to_gff3(xmfa_file, relative_to='1', sequences=None, window_size=1000):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
99 label_convert = _id_tn_dict(sequences)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
100
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
101 lcbs = parse_xmfa(xmfa_file)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
102
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
103 records = [SeqRecord(Seq("A"), id=label_convert.get(relative_to, relative_to))]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
104 for lcb in lcbs:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
105 ids = [seq['id'] for seq in lcb]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
106
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
107 # Doesn't match part of our sequence
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
108 if relative_to not in ids:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
109 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
110
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
111 # Skip sequences that are JUST our "relative_to" genome
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
112 if len(ids) == 1:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
113 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
114
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
115 parent = [seq for seq in lcb if seq['id'] == relative_to][0]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
116 others = [seq for seq in lcb if seq['id'] != relative_to]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
117
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
118 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
119 other['feature'] = SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
120 FeatureLocation(parent['start'], parent['end'] + 1),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
121 type="match", strand=parent['strand'],
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
122 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
123 "source": "progressiveMauve",
3
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
124 "Target": " ".join(map(str, [label_convert.get(other['id'], other['id']), other['start'], other['end'], '+' if other['strand'] > 0 else '-'])),
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
125 "ID": label_convert.get(other['id'], 'xmfa_' + other['rid'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
126 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
127 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
128
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
129 for i in range(0, len(lcb[0]['seq']), window_size):
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
130 block_seq = parent['seq'][i:i + window_size]
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
131 real_window_size = len(block_seq)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
132 real_start = abs(parent['start']) - parent['seq'][0:i].count('-') + i
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
133 real_end = real_start + real_window_size - block_seq.count('-')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
134
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
135 if (real_end - real_start) < 10:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
136 continue
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
137
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
138 if parent['start'] < 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
139 strand = -1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
140 else:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
141 strand = 1
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
142
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
143 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
144 pid = _percent_identity(block_seq, other['seq'][i:i + real_window_size])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
145 # Ignore 0% identity sequences
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
146 if pid == 0:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
147 continue
1
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
148
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
149 # Support for Biopython 1.68 and above, which removed sub_features
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
150 if not hasattr(other['feature'], "sub_features"):
bca52822843e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents: 0
diff changeset
151 other['feature'].sub_features = []
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
152 other['feature'].sub_features.append(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
153 SeqFeature(
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
154 FeatureLocation(real_start, real_end),
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
155 type="match_part", strand=strand,
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
156 qualifiers={
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
157 "source": "progressiveMauve",
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
158 'score': pid
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
159 }
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
160 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
161 )
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
162
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
163 for other in others:
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
164 records[0].features.append(other['feature'])
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
165 return records
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
166
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
167
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
168 if __name__ == '__main__':
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
169 parser = argparse.ArgumentParser(description='Convert XMFA alignments to gff3', prog='xmfa2gff3')
3
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
170 parser.add_argument('xmfa_file', type=argparse.FileType('r'), help='XMFA File')
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
171 parser.add_argument('--window_size', type=int, help='Window size for analysis', default=1000)
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
172 parser.add_argument('--relative_to', type=str, help='Index of the parent sequence in the MSA', default='1')
3
97a43bcbf44d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit fc61c9d4850614a6580d25f92e3032dc8edbc10d"
iuc
parents: 1
diff changeset
173 parser.add_argument('--sequences', type=argparse.FileType('r'), nargs='+',
0
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
174 help='Fasta files (in same order) passed to parent for reconstructing proper IDs')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
175 parser.add_argument('--version', action='version', version='%(prog)s 1.0')
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
176
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
177 args = parser.parse_args()
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
178
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
179 result = convert_xmfa_to_gff3(**vars(args))
74093fb62bdf planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/progressivemauve commit 2645abbd04dd68266f995b8259e991c31388cda8
iuc
parents:
diff changeset
180 GFF.write(result, sys.stdout)