Mercurial > repos > bgruening > glimmer_hmm
comparison glimmerHMM/glimmerhmm_gff_to_sequence.py @ 0:c9699375fcf6 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/glimmer_hmm commit 0dc67759bcbdf5a8a285ded9ba751340d741fe63
author | bgruening |
---|---|
date | Wed, 13 Jul 2016 10:55:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c9699375fcf6 |
---|---|
1 #!/usr/bin/env python | |
2 """Convert GlimmerHMM GFF3 gene predictions into protein sequences. | |
3 | |
4 This works with the GlimmerHMM GFF3 output format: | |
5 | |
6 ##gff-version 3 | |
7 ##sequence-region Contig5.15 1 47390 | |
8 Contig5.15 GlimmerHMM mRNA 323 325 . + . ID=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1 | |
9 Contig5.15 GlimmerHMM CDS 323 325 . + 0 ID=Contig5.15.cds1.1;Parent=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1;Note=final-exon | |
10 | |
11 http://www.cbcb.umd.edu/software/GlimmerHMM/ | |
12 | |
13 Modified version of the converter from Brad Chapman: https://github.com/chapmanb/bcbb/blob/master/biopython/glimmergff_to_proteins.py | |
14 | |
15 Usage: | |
16 glimmer_to_proteins.py <glimmer output> <ref fasta> <output file> <convert to protein ... False|True> | |
17 """ | |
18 import sys | |
19 import os | |
20 import operator | |
21 | |
22 from Bio import SeqIO | |
23 from Bio.SeqRecord import SeqRecord | |
24 | |
25 from BCBio import GFF | |
26 | |
27 def main(glimmer_file, ref_file, out_file, to_protein): | |
28 with open(ref_file) as in_handle: | |
29 ref_recs = SeqIO.to_dict(SeqIO.parse(in_handle, "fasta")) | |
30 | |
31 base, ext = os.path.splitext(glimmer_file) | |
32 | |
33 with open(out_file, "w") as out_handle: | |
34 SeqIO.write(protein_recs(glimmer_file, ref_recs, to_protein), out_handle, "fasta") | |
35 | |
36 def protein_recs(glimmer_file, ref_recs, to_protein): | |
37 """Generate protein records from GlimmerHMM gene predictions. | |
38 """ | |
39 with open(glimmer_file) as in_handle: | |
40 for rec in glimmer_predictions(in_handle, ref_recs): | |
41 for feature in rec.features: | |
42 seq_exons = [] | |
43 for cds in feature.sub_features: | |
44 seq_exons.append(rec.seq[ | |
45 cds.location.nofuzzy_start: | |
46 cds.location.nofuzzy_end]) | |
47 gene_seq = reduce(operator.add, seq_exons) | |
48 if feature.strand == -1: | |
49 gene_seq = gene_seq.reverse_complement() | |
50 | |
51 if to_protein: | |
52 yield SeqRecord(gene_seq.translate(), feature.qualifiers["ID"][0], "", "") | |
53 else: | |
54 yield SeqRecord(gene_seq, feature.qualifiers["ID"][0], "", "") | |
55 | |
56 def glimmer_predictions(in_handle, ref_recs): | |
57 """Parse Glimmer output, generating SeqRecord and SeqFeatures for predictions | |
58 """ | |
59 for rec in GFF.parse(in_handle, target_lines=1000, base_dict=ref_recs): | |
60 yield rec | |
61 | |
62 if __name__ == "__main__": | |
63 if len(sys.argv) != 3: | |
64 print __doc__ | |
65 sys.exit() | |
66 main(*sys.argv[1:]) |