Mercurial > repos > curtisross > remove_fasta_description
changeset 0:2b42545705fa draft
Uploaded Python Script
author | curtisross |
---|---|
date | Thu, 23 Sep 2021 16:24:09 +0000 |
parents | |
children | d85af06ab3db |
files | fasta_remove_id.py |
diffstat | 1 files changed, 28 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_remove_id.py Thu Sep 23 16:24:09 2021 +0000 @@ -0,0 +1,28 @@ +#!/usr/bin/env python +import sys +import argparse +import logging +from Bio import SeqIO + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() + + +def drop_id(fasta_file=None): + for rec in SeqIO.parse(fasta_file, "fasta"): + rec.description = "" + ind = str(rec.seq).find("##") + if ( + ind != -1 + ): # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence + rec.seq = rec.seq[0:ind] + yield rec + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences") + parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file") + + args = parser.parse_args() + for rec in drop_id(**vars(args)): + SeqIO.write([rec], sys.stdout, "fasta")