view glimmer2seq.py @ 2:b1ad88bbc5fa draft default tip

Uploaded
author bgruening
date Mon, 12 Aug 2013 11:55:07 -0400
parents 841357e0acbf
children
line wrap: on
line source

#!/usr/bin/env python
"""
Input: DNA FASTA file + Glimmer ORF file
Output: ORF sequences as FASTA file
Author: Bjoern Gruening
"""
import sys, os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

def glimmer2seq( glimmer_prediction = sys.argv [1], genome_sequence = sys.argv[2], outfile = sys.argv[3] ):
    if len(sys.argv) >= 4:
        glimmerfile = open( glimmer_prediction, "r")
        sequence = open( genome_sequence )
    else:
        print "Missing input values."
        sys.exit()

    fastafile = SeqIO.parse(sequence, "fasta")

    sequences = dict()
    seq_records = list()
    for entry in fastafile:
        sequences[entry.description] = entry

    for line in glimmerfile:
        if line.startswith('>'):
            entry = sequences[ line[1:].strip() ]
        else:
            orf_start = int(line[8:17])
            orf_end = int(line[18:26])

            orf_name = line[0:8]
            if orf_start <= orf_end:
                seq_records.append( SeqRecord( entry.seq[ orf_start-1 : orf_end ], id = orf_name, description = entry.description ) )
            else:
                seq_records.append( SeqRecord( entry.seq[ orf_end-1 : orf_start ].reverse_complement(), id = orf_name, description = entry.description ) )

    SeqIO.write( seq_records, outfile, "fasta" )
    glimmerfile.close()
    sequence.close()

if __name__ == "__main__" :
    glimmer2seq()