Mercurial > repos > bgruening > glimmer_glimmer_to_gff
comparison glimmer2seq.py @ 0:650106d2da39 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author | bgruening |
---|---|
date | Tue, 28 Nov 2017 10:09:27 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:650106d2da39 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: DNA FASTA file + Glimmer ORF file | |
4 Output: ORF sequences as FASTA file | |
5 Author: Bjoern Gruening | |
6 """ | |
7 import sys | |
8 | |
9 from Bio import SeqIO | |
10 from Bio.SeqRecord import SeqRecord | |
11 | |
12 | |
13 def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): | |
14 if len(sys.argv) >= 4: | |
15 glimmerfile = open(glimmer_prediction, "r") | |
16 sequence = open(genome_sequence) | |
17 else: | |
18 print("Missing input values.") | |
19 sys.exit() | |
20 | |
21 fastafile = SeqIO.parse(sequence, "fasta") | |
22 | |
23 sequences = dict() | |
24 seq_records = list() | |
25 for entry in fastafile: | |
26 sequences[entry.description] = entry | |
27 | |
28 for line in glimmerfile: | |
29 if line.startswith('>'): | |
30 entry = sequences[line[1:].strip()] | |
31 else: | |
32 orf_start = int(line[8:17]) | |
33 orf_end = int(line[18:26]) | |
34 | |
35 orf_name = line[0:8] | |
36 if orf_start <= orf_end: | |
37 seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) | |
38 else: | |
39 seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) | |
40 | |
41 SeqIO.write(seq_records, outfile, "fasta") | |
42 glimmerfile.close() | |
43 sequence.close() | |
44 | |
45 | |
46 if __name__ == "__main__": | |
47 glimmer2seq() |