Mercurial > repos > cpt > cpt_gbk_to_5col
view gbk_to_five_col.py @ 1:1bdd481d5c25 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:42:57 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import BIO_FIX_TOPO # NOQA import argparse import logging from Bio import SeqIO logging.basicConfig(level=logging.INFO) log = logging.getLogger() # Read in Genbank file and parse features # Output features into Five Column format """ >Feature SeqID Line 1 Column 1: Start location (first nucleotide) of a feature Column 2: Stop location (last nucleotide) of a feature Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') Line2: Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') Column 5: Qualifier value Repeat for each feature in a seq Repeat Line 2 for each qualifier in a feature """ def gbk_to_5col(genbank): """Converts genbank to BankIt five column format""" for record in SeqIO.parse(genbank, "genbank"): print(">Feature %s" % record.id) for feature in record.features: if feature.type == "source": continue else: for index, part in enumerate(feature.location.parts): if part.strand > 0: start = int(part.start) + 1 end = int(part.end) else: start = int(part.end) end = int(part.start) + 1 if index == 0: name = feature.type print("%d\t%d\t%s" % (start, end, name)) else: print("%d\t%d" % (start, end)) for (qualifier, values) in feature.qualifiers.items(): for value in values: print("\t\t\t%s\t%s" % (qualifier, value)) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Convert a Genbank file into five column format" ) parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file") args = vars(parser.parse_args()) gbk_to_5col(**args)