changeset 0:2b42545705fa draft

Uploaded Python Script
author curtisross
date Thu, 23 Sep 2021 16:24:09 +0000
parents
children d85af06ab3db
files fasta_remove_id.py
diffstat 1 files changed, 28 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_remove_id.py	Thu Sep 23 16:24:09 2021 +0000
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+import sys
+import argparse
+import logging
+from Bio import SeqIO
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+def drop_id(fasta_file=None):
+    for rec in SeqIO.parse(fasta_file, "fasta"):
+        rec.description = ""
+        ind = str(rec.seq).find("##")
+        if (
+            ind != -1
+        ):  # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence
+            rec.seq = rec.seq[0:ind]
+        yield rec
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
+    parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file")
+
+    args = parser.parse_args()
+    for rec in drop_id(**vars(args)):
+        SeqIO.write([rec], sys.stdout, "fasta")