Repository 'snpeff'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/snpeff

Changeset 25:5c7b70713fb5 (2022-08-03)
Previous changeset 24:cfcf33df7fc0 (2021-10-13) Next changeset 26:5b80f544c67f (2022-11-25)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
modified:
gbk2fa.py
snpEff_create_db.xml
snpEff_macros.xml
b
diff -r cfcf33df7fc0 -r 5c7b70713fb5 gbk2fa.py
--- a/gbk2fa.py Wed Oct 13 23:30:29 2021 +0000
+++ b/gbk2fa.py Wed Aug 03 16:33:45 2022 +0000
[
@@ -1,43 +1,47 @@
 import argparse
 import bz2
-import contextlib
 import gzip
-import sys
 
-import magic
 from Bio import SeqIO
 
-parser = argparse.ArgumentParser()
-parser.add_argument("genbank_file", help="GenBank input file. Can be compressed with gzip or bzip2")
-parser.add_argument("fasta_file", help="FASTA output datset")
-parser.add_argument("--remove_version", dest="remove_version", action="store_true", help="Remove version number from NCBI form formatted accession numbers. For example, this would convert 'B000657.2' to 'B000657'")
-args = parser.parse_args()
 
-gbk_filename = args.genbank_file
-fa_filename = args.fasta_file
+def get_opener(gbk_filename):
+    try:
+        bz2.open(gbk_filename).read(1)
+        return bz2.open
+    except OSError:
+        pass
+    try:
+        gzip.open(gbk_filename).read(1)
+        return gzip.open
+    except OSError:
+        return open
 
 
-@contextlib.contextmanager
-def get_file_handle(gbk_filename):
-    f_type = magic.from_file(args.genbank_file, mime=True)
-    if f_type == 'text/plain':
-        input_handle = open(gbk_filename, "r")
-    elif f_type == 'application/gzip' or f_type == 'application/x-gzip':
-        input_handle = gzip.open(gbk_filename, "rt")
-    elif f_type == 'application/x-bzip2':
-        input_handle = bz2.open(gbk_filename, "rt")
-    else:
-        sys.exit("Cannot process file of type {}. Only plain, gzip'ed, and bzip2'ed genbank files are accepted ".format(f_type))
-    yield input_handle
-    input_handle.close()
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "genbank_file",
+    help="GenBank input file. Can be compressed with gzip or bzip2"
+)
+parser.add_argument(
+    "fasta_file", help="FASTA output datset"
+)
+parser.add_argument(
+    "--remove_version", action="store_true",
+    help="Remove version number from NCBI form formatted accession numbers. "
+         "For example, this would convert 'B000657.2' to 'B000657'"
+)
+args = parser.parse_args()
 
 
-with get_file_handle(gbk_filename) as input_handle, open(fa_filename, "w") as output_handle:
-
-    for seq_record in SeqIO.parse(input_handle, "genbank"):
+gbk_open = get_opener(args.genbank_file)
+with gbk_open(args.genbank_file, 'rt') as input_handle, \
+     open(args.fasta_file, 'w') as output_handle:
+    for seq_record in SeqIO.parse(input_handle, 'genbank'):
         if args.remove_version:
             seq_id = seq_record.id.split('.')[0]
         else:
             seq_id = seq_record.id
         print('Writing FASTA record: {}'.format(seq_id))
-        output_handle.write(">{}\n{}\n".format(seq_id, seq_record.seq))
+        print('>' + seq_id, file=output_handle)
+        print(seq_record.seq, file=output_handle)
b
diff -r cfcf33df7fc0 -r 5c7b70713fb5 snpEff_create_db.xml
--- a/snpEff_create_db.xml Wed Oct 13 23:30:29 2021 +0000
+++ b/snpEff_create_db.xml Wed Aug 03 16:33:45 2022 +0000
[
b'@@ -1,77 +1,66 @@\n-<tool id="snpEff_build_gb" name="SnpEff build:" version="@WRAPPER_VERSION@.galaxy4">\n+<tool id="snpEff_build_gb" name="SnpEff build:" version="@WRAPPER_VERSION@.galaxy5">\n     <description> database from Genbank or GFF record</description>\n     <macros>\n         <import>snpEff_macros.xml</import>\n     </macros>\n     <requirements>\n         <expand macro="requirement" />\n-        <requirement type="package" version="3.6">python</requirement>\n-        <requirement type="package" version="1.70">biopython</requirement>\n-        <requirement type="package" version="0.4.15">python-magic</requirement>\n-        <requirement type="package" version="5.32">libmagic</requirement>\n+        <requirement type="package" version="1.79">biopython</requirement>\n     </requirements>\n     <expand macro="stdio" />\n     <expand macro="version_command" />\n     <command><![CDATA[\n-        #if str( $input_type.input_type_selector ) == "gb":\n-            #if str( $input_type.fasta ) == "yes":\n-                python3 \'$__tool_directory__/gbk2fa.py\' \'${input_type.input_gbk}\' \'${output_fasta}\'\n-                #if $input_type.remove_version:\n-                    \'${input_type.remove_version}\'\n-                #end if\n-                &&\n-            #end if\n+        #if str($input_type.input_type_selector) == "gb" and str($input_type.fasta) == "yes":\n+            python3 \'$__tool_directory__/gbk2fa.py\' \'${input_type.input}\' \'${output_fasta}\' ${input_type.remove_version} &&\n         #end if\n \n         mkdir -p \'${snpeff_output.files_path}\'/\'${genome_version}\' &&\n \n-        #if str( $input_type.input_type_selector ) == "gb":\n-            #if $input_type.input_gbk.is_of_type("genbank"):\n-                ln -s \'${input_type.input_gbk}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/genes.gbk &&\n-            #elif $input_type.input_gbk.is_of_type("genbank.gz"):\n-                ln -s \'${input_type.input_gbk}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/genes.gbk.gz &&\n+        #if str($input_type.input_type_selector) == "gb":\n+            #if $input_type.input.is_of_type("genbank"):\n+                ln -s \'${input_type.input}\' \'${snpeff_output.files_path}/${genome_version}/genes.gbk\' &&\n+            #elif $input_type.input.is_of_type("genbank.gz"):\n+                ln -s \'${input_type.input}\' \'${snpeff_output.files_path}/${genome_version}/genes.gbk.gz\' &&\n             #end if\n-        #elif str( $input_type.input_type_selector ) == "gff":\n-\t        #if $input_type.reference_source.reference_source_selector == "history":\n-\t\t\t  #if $input_type.reference_source.input_fasta.is_of_type("fasta"):\n-                ln -s \'${input_type.reference_source.input_fasta}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/sequences.fa &&\n-\t            #elif $input_type.reference_source.input_fasta.is_of_type("fasta.gz"):\n-\t                ln -s \'${input_type.reference_source.input_fasta}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/sequences.fa.gz &&\n-\t            #end if\n-\t\t\t#elif $input_type.reference_source.reference_source_selector == "cached":\n-\t\t\t  ln -s \'${input_type.reference_source.ref_file.fields.path}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/sequences.fa &&\n-\t\t\t#end if\n-            ln -s \'${input_type.input_gff}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/genes.gff &&\n-        #elif str( $input_type.input_type_selector ) == "gtf":\n-\t        #if $input_type.reference_source.reference_source_selector == "history":\n-\t\t\t  #if $input_type.reference_source.input_fasta.is_of_type("fasta"):\n-                ln -s \'${input_type.reference_source.input_fasta}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/sequences.fa &&\n-\t            #elif $input_type.reference_source.input_fasta.is_of_type("fasta.gz"):\n-\t                ln -s \'${input_type.reference_source.input_fasta}\' \'${snpeff_output.files_path}\'/\'${genome_version}\'/sequences.fa.gz &&\n-\t            #end if\n-\t\t\t#elif $input_type.reference_source.reference_source_sele'..b'</when>\n-\t\t        </conditional>\n+                <param name="input" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/>\n+                <expand macro="ref_select" />\n             </when>\n             <when value="gtf">\n-                <param name="input_gtf" type="data" format="gtf" label="GTF dataset to build database from" help="This GTF file will be used to generate snpEff database"/>\n-                <conditional name="reference_source">\n-\t\t            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">\n-\t\t                <option value="cached">Locally cached</option>\n-\t\t                <option value="history">History</option>\n-\t\t            </param>\n-\t\t            <when value="cached">\n-\t\t                <param name="ref_file" type="select" label="Select reference genome">\n-\t\t                    <options from_data_table="fasta_indexes"/>\n-\t\t                </param>\n-\t\t            </when>\n-\t\t            <when value="history"> \n-\t\t                <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/>\n-\t\t            </when>\n-\t\t        </conditional>\n+                <param name="input" type="data" format="gtf" label="GTF dataset to build database from" help="This GTF file will be used to generate snpEff database"/>\n+                <expand macro="ref_select" />\n             </when>\n         </conditional>\n         <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">\n@@ -162,7 +125,7 @@\n         <test>\n             <param name="genome_version" value="pBR322"/>\n             <param name="input_type_selector" value="gb"/>\n-            <param name="input_gbk" value="pBR322.gbk" />\n+            <param name="input" value="pBR322.gbk" />\n             <output name="snpeff_output">\n                 <assert_contents>\n                     <has_text text="pBR322" />\n@@ -173,7 +136,7 @@\n         <test>\n             <param name="genome_version" value="pBR322"/>\n             <param name="input_type_selector" value="gb"/>\n-            <param name="input_gbk" value="pBR322.gbk.gz" />\n+            <param name="input" value="pBR322.gbk.gz" />\n             <output name="snpeff_output">\n                 <assert_contents>\n                     <has_text text="pBR322" />\n@@ -186,7 +149,7 @@\n             <param name="input_type_selector" value="gff"/>\n             <param name="reference_source_selector" value="history"/>\n             <param name="input_fasta" value="pBR322_test2.fna" />\n-            <param name="input_gff" value="pBR322.gff3"/>\n+            <param name="input" value="pBR322.gff3"/>\n             <output name="snpeff_output">\n                 <assert_contents>\n                     <has_text text="pBR322" />\n@@ -198,7 +161,7 @@\n             <param name="input_type_selector" value="gff"/>\n             <param name="reference_source_selector" value="history"/>\n             <param name="input_fasta" value="pBR322_test2.fna.gz" />\n-            <param name="input_gff" value="pBR322.gff3"/>\n+            <param name="input" value="pBR322.gff3"/>\n             <output name="snpeff_output">\n                 <assert_contents>\n                     <has_text text="pBR322" />\n@@ -210,7 +173,7 @@\n             <param name="input_type_selector" value="gtf"/>\n             <param name="reference_source_selector" value="history"/>\n             <param name="input_fasta" value="Saccharomyces_mito.fa.gz" />\n-            <param name="input_gtf" value="Saccharomyces_mito.gtf" />\n+            <param name="input" value="Saccharomyces_mito.gtf" />\n             <output name="snpeff_output">\n                 <assert_contents>\n                     <has_text text="Saccharomyces_mito" />\n'
b
diff -r cfcf33df7fc0 -r 5c7b70713fb5 snpEff_macros.xml
--- a/snpEff_macros.xml Wed Oct 13 23:30:29 2021 +0000
+++ b/snpEff_macros.xml Wed Aug 03 16:33:45 2022 +0000
b
@@ -18,6 +18,22 @@
   <token name="@SNPEFF_VERSION@">SnpEff4.3</token>
   <token name="@SNPEFF_DATABASE_URL@">https://sourceforge.net/projects/snpeff/files/databases/v4_3/</token>
   <token name="@JAVA_OPTIONS@">-Xmx\${GALAXY_MEMORY_MB:-8192}m</token>
+  <xml name="ref_select">
+    <conditional name="reference_source">
+        <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
+            <option value="cached">Locally cached</option>
+            <option value="history">History</option>
+        </param>
+        <when value="cached">
+            <param name="ref_file" type="select" label="Select reference genome">
+                <options from_data_table="fasta_indexes"/>
+            </param>
+        </when>
+        <when value="history">
+            <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/>
+        </when>
+    </conditional>
+  </xml>
   <token name="@EXTERNAL_DOCUMENTATION@">