comparison transtermhp.py @ 0:c28817831a24 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
author iuc
date Fri, 09 Oct 2015 09:22:42 -0400
parents
children 1a1ec22a7e28
comparison
equal deleted inserted replaced
-1:000000000000 0:c28817831a24
1 #!/usr/bin/env python
2 import sys
3 import re
4 import subprocess
5 from Bio import SeqIO
6 from BCBio import GFF
7 from Bio.SeqFeature import SeqFeature, FeatureLocation
8
9
10 def main(expterm, fasta, gff3):
11 with open(fasta, 'r') as handle:
12 seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
13
14 # Build coords file
15 with open(gff3, 'r') as handle:
16 for rec in GFF.parse(handle, base_dict=seq_dict):
17 with open('tmp.coords', 'w') as coords:
18 for feat in rec.features:
19 if feat.type == 'gene':
20 coords.write('\t'.join([
21 feat.id,
22 str(feat.location.start + 1),
23 str(feat.location.end),
24 rec.id,
25 ]) + '\n')
26 with open('tmp.fasta', 'w') as fasta_handle:
27 SeqIO.write(rec, fasta_handle, 'fasta')
28
29 cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords']
30 output = subprocess.check_output(cmd)
31 # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357
32 ttre = re.compile(
33 '^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+'
34 '(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+'
35 '(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)'
36 )
37
38 rec.features = []
39 batches = output.split('SEQUENCE ')
40 for batch in batches[1:]:
41 batch_lines = batch.split('\n')
42 # Strip the header
43 interesting = batch_lines[2:]
44 unformatted = [x for x in interesting if x.startswith(' ')][0::2]
45 for terminator in unformatted:
46 m = ttre.match(terminator)
47 if m:
48 start = int(m.group('start')) - 1
49 end = int(m.group('end'))
50 if m.group('strand') == '+':
51 strand = 1
52 else:
53 strand = 0
54
55 feature = SeqFeature(
56 FeatureLocation(start, end),
57 type="terminator",
58 strand=strand,
59 qualifiers={
60 "source": "TransTermHP_2.09",
61 "score": m.group('conf'),
62 "ID": m.group('name'),
63 }
64 )
65 rec.features.append(feature)
66 yield rec
67
68 if __name__ == '__main__':
69 for record in main(*sys.argv[1:4]):
70 GFF.write([record], sys.stdout)