comparison datasets_genome.xml @ 12:d78faac2c6ef draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9
author iuc
date Sat, 03 Dec 2022 13:29:32 +0000
parents ac24fff14f23
children 1e188c9610c3
comparison
equal deleted inserted replaced
11:ac24fff14f23 12:d78faac2c6ef
1 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>download genome sequence, annotation and metadata</description> 2 <description>download genome sequence, annotation and metadata</description>
3 <expand macro="bio_tools"/>
3 <macros> 4 <macros>
4 <import>macros.xml</import> 5 <import>macros.xml</import>
5 </macros> 6 </macros>
6 <expand macro="requirements"></expand> 7 <expand macro="requirements"></expand>
8 <expand macro="version_command"/>
7 <command><![CDATA[ 9 <command><![CDATA[
8 #import re 10 #import re
9 @SETUP_CERTIFICATES@ 11 @SETUP_CERTIFICATES@
10 datasets download genome $query.subcommand.download_by 12 datasets download genome $query.subcommand.download_by
11 #if $query.subcommand.download_by == 'accession': 13 #if $query.subcommand.download_by == 'accession':
61 63
62 ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery 64 ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery
63 && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; 65 && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \;
64 66
65 ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) 67 ## unzip all compressed (non-fasta) files (jsonl files are just named .gz)
68 ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip)
69 ## in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression
66 && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; 70 && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \;
67 #if $file_choices.decompress 71 #if not $file_choices.decompress
68 && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; 72 && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \;
69 #end if 73 #end if
70 74
71 #if "seq-report" in $file_choices.include 75 #if "seq-report" in $file_choices.include
72 && find ncbi_dataset -name sequence_report.jsonl -exec sh -c 'dataformat tsv genome-seq --inputfile {} > \$(dirname {})/\$(basename {} .jsonl).tsv' \; 76 && find ncbi_dataset -name sequence_report.jsonl -exec sh -c 'dataformat tsv genome-seq --inputfile {} > \$(dirname {})/\$(basename {} .jsonl).tsv' \;
164 of compressed files https://github.com/galaxyproject/galaxy/pull/15085 168 of compressed files https://github.com/galaxyproject/galaxy/pull/15085
165 169
166 So with decompress="true" more powerfull assertions are powerful. 170 So with decompress="true" more powerfull assertions are powerful.
167 A single test checks the default, ie decompress="false". 171 A single test checks the default, ie decompress="false".
168 --> 172 -->
169 <test expect_num_outputs="1"> 173 <test expect_num_outputs="3">
170 <conditional name="query|subcommand"> 174 <conditional name="query|subcommand">
171 <param name="download_by" value="taxon"/> 175 <param name="download_by" value="taxon"/>
172 <param name="taxon_positional" value="human"/> 176 <param name="taxon_positional" value="human"/>
173 </conditional> 177 </conditional>
174 <param name="chromosomes" value="21"/> 178 <param name="chromosomes" value="21"/>
175 <param name="released_before" value="01/01/2018"/> 179 <param name="released_before" value="01/01/2018"/>
176 <section name="file_choices"> 180 <section name="file_choices">
177 <param name="include" value=""/> 181 <!-- include a sequence (which should be downloaded as fasta.gz)
182 and one non-sequence (which should be decompressed) output -->
183 <param name="include" value="rna,gff3"/>
178 </section> 184 </section>
179 <output name="genome_data_report"> 185 <output name="genome_data_report">
180 <assert_contents> 186 <assert_contents>
181 <has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/> 187 <has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/>
182 <has_n_lines n="144"/> 188 <has_n_lines n="144"/>
183 <has_n_columns n="4"/> 189 <has_n_columns n="4"/>
184 </assert_contents> 190 </assert_contents>
185 </output> 191 </output>
192 <output_collection name="rna_fasta" type="list" count="1">
193 <element name="GCF_000306695.2" decompress="true">
194 <assert_contents>
195 <has_text text=">"/>
196 </assert_contents>
197 </element>
198 </output_collection>
199 <output_collection name="genomic_gff" type="list">
200 <element name="GCF_000306695.2">
201 <assert_contents>
202 <has_n_lines min="1000000"/>
203 <has_line line="##gff-version 3"/>
204 <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released
205 <has_n_columns n="9" comment="#"/> -->
206 </assert_contents>
207 </element>
208 </output_collection>
209 <assert_command>
210 <has_text text="gunzip"/>
211 </assert_command>
186 </test> 212 </test>
187 <test expect_num_outputs="2"> 213 <test expect_num_outputs="2">
188 <conditional name="query|subcommand"> 214 <conditional name="query|subcommand">
189 <param name="download_by" value="taxon"/> 215 <param name="download_by" value="taxon"/>
190 <param name="taxon_positional" value="human"/> 216 <param name="taxon_positional" value="human"/>