annotate transtermhp.py @ 4:9a90656a1af9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
author iuc
date Tue, 30 Oct 2018 20:25:05 -0400
parents 1a1ec22a7e28
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
1 #!/usr/bin/env python
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
2 import re
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
3 import subprocess
3
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
4 import sys
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
5
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
6 from BCBio import GFF
3
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
7 from Bio import SeqIO
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
8 from Bio.SeqFeature import (
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
9 FeatureLocation,
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
10 SeqFeature
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
11 )
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
12
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
13
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
14 def main(expterm, fasta, gff3):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
15 with open(fasta, 'r') as handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
16 seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
17
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
18 # Build coords file
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
19 with open(gff3, 'r') as handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
20 for rec in GFF.parse(handle, base_dict=seq_dict):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
21 with open('tmp.coords', 'w') as coords:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
22 for feat in rec.features:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
23 if feat.type == 'gene':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
24 coords.write('\t'.join([
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
25 feat.id,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
26 str(feat.location.start + 1),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
27 str(feat.location.end),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
28 rec.id,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
29 ]) + '\n')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
30 with open('tmp.fasta', 'w') as fasta_handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
31 SeqIO.write(rec, fasta_handle, 'fasta')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
32
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
33 cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords']
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
34 output = subprocess.check_output(cmd)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
35 # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
36 ttre = re.compile(
4
9a90656a1af9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 3
diff changeset
37 r'^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+'
9a90656a1af9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 3
diff changeset
38 r'(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+'
9a90656a1af9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 3
diff changeset
39 r'(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)'
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
40 )
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
41
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
42 rec.features = []
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
43 batches = output.split('SEQUENCE ')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
44 for batch in batches[1:]:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
45 batch_lines = batch.split('\n')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
46 # Strip the header
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
47 interesting = batch_lines[2:]
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
48 unformatted = [x for x in interesting if x.startswith(' ')][0::2]
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
49 for terminator in unformatted:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
50 m = ttre.match(terminator)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
51 if m:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
52 start = int(m.group('start')) - 1
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
53 end = int(m.group('end'))
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
54 if m.group('strand') == '+':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
55 strand = 1
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
56 else:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
57 strand = 0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
58
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
59 feature = SeqFeature(
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
60 FeatureLocation(start, end),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
61 type="terminator",
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
62 strand=strand,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
63 qualifiers={
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
64 "source": "TransTermHP_2.09",
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
65 "score": m.group('conf'),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
66 "ID": m.group('name'),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
67 }
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
68 )
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
69 rec.features.append(feature)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
70 yield rec
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
71
3
1a1ec22a7e28 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 581d451609b919893ba53d104a5bcf2e9e565d1d
iuc
parents: 0
diff changeset
72
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
73 if __name__ == '__main__':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
74 for record in main(*sys.argv[1:4]):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
75 GFF.write([record], sys.stdout)