Mercurial > repos > bgruening > glimmer_gbk_to_orf
view glimmer2seq.py @ 3:44dc09edd97b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit a80e3e4aa3a40970af507bf9119cf7f1c2ffb336
author | iuc |
---|---|
date | Mon, 16 Dec 2024 14:42:47 +0000 |
parents | 6351903666da |
children |
line wrap: on
line source
#!/usr/bin/env python """ Input: DNA FASTA file + Glimmer ORF file Output: ORF sequences as FASTA file Author: Bjoern Gruening """ import sys from Bio import SeqIO from Bio.SeqRecord import SeqRecord def glimmer2seq(glimmer_prediction=sys.argv[1], genome_sequence=sys.argv[2], outfile=sys.argv[3]): if len(sys.argv) >= 4: glimmerfile = open(glimmer_prediction, "r") sequence = open(genome_sequence) else: print("Missing input values.") sys.exit() fastafile = SeqIO.parse(sequence, "fasta") sequences = dict() seq_records = list() for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): entry = sequences[line[1:].strip()] else: orf_start = int(line[8:17]) orf_end = int(line[18:26]) orf_name = line[0:8] if orf_start <= orf_end: seq_records.append(SeqRecord(entry.seq[orf_start - 1:orf_end], id=orf_name, description=entry.description)) else: seq_records.append(SeqRecord(entry.seq[orf_end - 1:orf_start].reverse_complement(), id=orf_name, description=entry.description)) SeqIO.write(seq_records, outfile, "fasta") glimmerfile.close() sequence.close() if __name__ == "__main__": glimmer2seq()