annotate transtermhp.py @ 0:c28817831a24 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
author iuc
date Fri, 09 Oct 2015 09:22:42 -0400
parents
children 1a1ec22a7e28
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
1 #!/usr/bin/env python
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
2 import sys
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
3 import re
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
4 import subprocess
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
5 from Bio import SeqIO
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
6 from BCBio import GFF
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
7 from Bio.SeqFeature import SeqFeature, FeatureLocation
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
8
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
9
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
10 def main(expterm, fasta, gff3):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
11 with open(fasta, 'r') as handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
12 seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
13
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
14 # Build coords file
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
15 with open(gff3, 'r') as handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
16 for rec in GFF.parse(handle, base_dict=seq_dict):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
17 with open('tmp.coords', 'w') as coords:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
18 for feat in rec.features:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
19 if feat.type == 'gene':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
20 coords.write('\t'.join([
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
21 feat.id,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
22 str(feat.location.start + 1),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
23 str(feat.location.end),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
24 rec.id,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
25 ]) + '\n')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
26 with open('tmp.fasta', 'w') as fasta_handle:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
27 SeqIO.write(rec, fasta_handle, 'fasta')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
28
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
29 cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords']
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
30 output = subprocess.check_output(cmd)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
31 # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
32 ttre = re.compile(
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
33 '^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+'
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
34 '(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+'
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
35 '(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)'
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
36 )
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
37
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
38 rec.features = []
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
39 batches = output.split('SEQUENCE ')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
40 for batch in batches[1:]:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
41 batch_lines = batch.split('\n')
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
42 # Strip the header
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
43 interesting = batch_lines[2:]
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
44 unformatted = [x for x in interesting if x.startswith(' ')][0::2]
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
45 for terminator in unformatted:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
46 m = ttre.match(terminator)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
47 if m:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
48 start = int(m.group('start')) - 1
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
49 end = int(m.group('end'))
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
50 if m.group('strand') == '+':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
51 strand = 1
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
52 else:
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
53 strand = 0
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
54
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
55 feature = SeqFeature(
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
56 FeatureLocation(start, end),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
57 type="terminator",
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
58 strand=strand,
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
59 qualifiers={
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
60 "source": "TransTermHP_2.09",
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
61 "score": m.group('conf'),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
62 "ID": m.group('name'),
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
63 }
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
64 )
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
65 rec.features.append(feature)
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
66 yield rec
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
67
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
68 if __name__ == '__main__':
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
69 for record in main(*sys.argv[1:4]):
c28817831a24 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff changeset
70 GFF.write([record], sys.stdout)