Mercurial > repos > iuc > exonerate
view exonerategff_to_gff3.py @ 3:a03dead1bede draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exonerate commit a141c63903d1a598569985e400125d4e7de42801"
author | iuc |
---|---|
date | Sun, 01 Mar 2020 04:48:34 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python3 """ Converts a GFF produced by exonerate into a more standard GFF3 (e.g. usable in JBrowse) """ import argparse import sys from BCBio import GFF from Bio.SeqFeature import FeatureLocation, SeqFeature parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('outfile', nargs='?', type=argparse.FileType('a'), default=sys.stdout) args = parser.parse_args() scaffs = [] gene_number = 0 for scaff in GFF.parse(args.infile): scaff.annotations = {} scaff.seq = "" kept_features = [] current_gene = None exon_number = 0 last_utr = None for feature in scaff.features: if feature.type == "gene": gene_number += 1 mrna_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="mRNA", strand=feature.location.strand) mrna_feature.sub_features = [] mrna_feature.qualifiers['source'] = feature.qualifiers['source'] mrna_id = "mRNA_" + str(gene_number) mrna_feature.qualifiers['ID'] = mrna_id feature.sub_features = [mrna_feature] feature.qualifiers['ID'] = "gene_" + str(gene_number) if 'gene_orientation' in feature.qualifiers: del feature.qualifiers['gene_orientation'] if current_gene: kept_features.append(current_gene) current_gene = feature exon_number = 0 last_utr = None elif feature.type == 'utr5': feature.type = 'five_prime_UTR' feature.qualifiers['ID'] = '%s_five_prime_UTR' % (mrna_id) mrna_feature.sub_features.append(feature) last_utr = {'start': feature.location.start, 'end': feature.location.end} elif feature.type == 'utr3': feature.type = 'three_prime_UTR' feature.qualifiers['ID'] = '%s_three_prime_UTR' % (mrna_id) mrna_feature.sub_features.append(feature) last_utr = {'start': feature.location.start, 'end': feature.location.end} elif feature.type == 'exon': exon_number += 1 feature.qualifiers['ID'] = '%s_exon_%s' % (mrna_id, exon_number) mrna_feature.sub_features.append(feature) if last_utr is None: cds_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="CDS", strand=feature.location.strand) cds_feature.sub_features = [] cds_feature.qualifiers['source'] = feature.qualifiers['source'] cds_feature.qualifiers['ID'] = mrna_id + "_CDS" mrna_feature.sub_features.append(cds_feature) elif feature.location.start != last_utr['start'] or feature.location.end != last_utr['end']: if feature.location.start > last_utr['start']: cds_feature = SeqFeature(FeatureLocation(feature.location.start, last_utr['start']), type="CDS", strand=feature.location.strand) cds_feature.sub_features = [] cds_feature.qualifiers['source'] = feature.qualifiers['source'] cds_feature.qualifiers['ID'] = mrna_id + "_CDS" mrna_feature.sub_features.append(cds_feature) if feature.location.end < last_utr['end']: cds_feature = SeqFeature(FeatureLocation(feature.location.end, last_utr['end']), type="CDS", strand=feature.location.strand) cds_feature.sub_features = [] cds_feature.qualifiers['source'] = feature.qualifiers['source'] cds_feature.qualifiers['ID'] = mrna_id + "_CDS" mrna_feature.sub_features.append(cds_feature) last_utr = None elif feature.type == 'similarity': if current_gene is None: # We haven't seen any gene, just convert similarity to match feature.type = 'match' kept_features.append(feature) last_utr = None elif feature.type not in ['splice3', 'splice5', 'similarity', 'intron']: mrna_feature.sub_features.append(feature) last_utr = None # For the last one if current_gene: kept_features.append(current_gene) scaff.features = kept_features if len(kept_features): GFF.write([scaff], args.outfile)