diff datasets_genome.xml @ 12:d78faac2c6ef draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9
author iuc
date Sat, 03 Dec 2022 13:29:32 +0000
parents ac24fff14f23
children 1e188c9610c3
line wrap: on
line diff
--- a/datasets_genome.xml	Fri Dec 02 10:52:48 2022 +0000
+++ b/datasets_genome.xml	Sat Dec 03 13:29:32 2022 +0000
@@ -1,9 +1,11 @@
 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
     <description>download genome sequence, annotation and metadata</description>
+    <expand macro="bio_tools"/>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"></expand>
+    <expand macro="version_command"/>
     <command><![CDATA[
 #import re
 @SETUP_CERTIFICATES@
@@ -63,8 +65,10 @@
     && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \;
 
     ## unzip all compressed (non-fasta) files (jsonl files are just named .gz)
+    ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip)
+    ##      in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression
     && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \;
-    #if $file_choices.decompress
+    #if not $file_choices.decompress
         && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \;
     #end if
 
@@ -166,7 +170,7 @@
             So with decompress="true" more powerfull assertions are powerful.
             A single test checks the default, ie decompress="false".
         -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="3">
             <conditional name="query|subcommand">
                 <param name="download_by" value="taxon"/>
                 <param name="taxon_positional" value="human"/>
@@ -174,7 +178,9 @@
             <param name="chromosomes" value="21"/>
             <param name="released_before" value="01/01/2018"/>
             <section name="file_choices">
-                <param name="include" value=""/>
+                <!-- include a sequence (which should be downloaded as fasta.gz)
+                     and one non-sequence (which should be decompressed) output -->
+                <param name="include" value="rna,gff3"/>
             </section>
             <output name="genome_data_report">
                 <assert_contents>
@@ -183,6 +189,26 @@
                     <has_n_columns n="4"/>
                 </assert_contents>
             </output>
+            <output_collection name="rna_fasta" type="list" count="1">
+                <element name="GCF_000306695.2" decompress="true">
+                    <assert_contents>
+                        <has_text text=">"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="genomic_gff" type="list">
+                <element name="GCF_000306695.2">
+                    <assert_contents>
+                        <has_n_lines min="1000000"/>
+                        <has_line line="##gff-version 3"/>
+                        <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released 
+                            <has_n_columns n="9" comment="#"/> -->
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <assert_command>
+                <has_text text="gunzip"/>
+            </assert_command>
         </test>
         <test expect_num_outputs="2">
             <conditional name="query|subcommand">