diff gbk2fa.py @ 25:5c7b70713fb5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
author iuc
date Wed, 03 Aug 2022 16:33:45 +0000
parents cfcf33df7fc0
children
line wrap: on
line diff
--- a/gbk2fa.py	Wed Oct 13 23:30:29 2021 +0000
+++ b/gbk2fa.py	Wed Aug 03 16:33:45 2022 +0000
@@ -1,43 +1,47 @@
 import argparse
 import bz2
-import contextlib
 import gzip
-import sys
 
-import magic
 from Bio import SeqIO
 
-parser = argparse.ArgumentParser()
-parser.add_argument("genbank_file", help="GenBank input file. Can be compressed with gzip or bzip2")
-parser.add_argument("fasta_file", help="FASTA output datset")
-parser.add_argument("--remove_version", dest="remove_version", action="store_true", help="Remove version number from NCBI form formatted accession numbers. For example, this would convert 'B000657.2' to 'B000657'")
-args = parser.parse_args()
 
-gbk_filename = args.genbank_file
-fa_filename = args.fasta_file
+def get_opener(gbk_filename):
+    try:
+        bz2.open(gbk_filename).read(1)
+        return bz2.open
+    except OSError:
+        pass
+    try:
+        gzip.open(gbk_filename).read(1)
+        return gzip.open
+    except OSError:
+        return open
 
 
-@contextlib.contextmanager
-def get_file_handle(gbk_filename):
-    f_type = magic.from_file(args.genbank_file, mime=True)
-    if f_type == 'text/plain':
-        input_handle = open(gbk_filename, "r")
-    elif f_type == 'application/gzip' or f_type == 'application/x-gzip':
-        input_handle = gzip.open(gbk_filename, "rt")
-    elif f_type == 'application/x-bzip2':
-        input_handle = bz2.open(gbk_filename, "rt")
-    else:
-        sys.exit("Cannot process file of type {}. Only plain, gzip'ed, and bzip2'ed genbank files are accepted ".format(f_type))
-    yield input_handle
-    input_handle.close()
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "genbank_file",
+    help="GenBank input file. Can be compressed with gzip or bzip2"
+)
+parser.add_argument(
+    "fasta_file", help="FASTA output datset"
+)
+parser.add_argument(
+    "--remove_version", action="store_true",
+    help="Remove version number from NCBI form formatted accession numbers. "
+         "For example, this would convert 'B000657.2' to 'B000657'"
+)
+args = parser.parse_args()
 
 
-with get_file_handle(gbk_filename) as input_handle, open(fa_filename, "w") as output_handle:
-
-    for seq_record in SeqIO.parse(input_handle, "genbank"):
+gbk_open = get_opener(args.genbank_file)
+with gbk_open(args.genbank_file, 'rt') as input_handle, \
+     open(args.fasta_file, 'w') as output_handle:
+    for seq_record in SeqIO.parse(input_handle, 'genbank'):
         if args.remove_version:
             seq_id = seq_record.id.split('.')[0]
         else:
             seq_id = seq_record.id
         print('Writing FASTA record: {}'.format(seq_id))
-        output_handle.write(">{}\n{}\n".format(seq_id, seq_record.seq))
+        print('>' + seq_id, file=output_handle)
+        print(seq_record.seq, file=output_handle)