Repository 'ncbi_datasets'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ncbi_datasets

Changeset 10:a3395b1d871b (2022-11-21)
Previous changeset 9:18eed8fa7f23 (2022-08-24) Next changeset 11:ac24fff14f23 (2022-12-02)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 3d012f702b54172f30a49543bf7e5fff2dd71f30
modified:
datasets_genome.xml
macros.xml
b
diff -r 18eed8fa7f23 -r a3395b1d871b datasets_genome.xml
--- a/datasets_genome.xml Wed Aug 24 13:00:58 2022 +0000
+++ b/datasets_genome.xml Mon Nov 21 11:40:05 2022 +0000
[
b'@@ -15,25 +15,28 @@\n     #end if\n #else:\n     \'$query.subcommand.taxon\'\n+    $query.subcommand.tax_exact_match\n #end if\n $filters.reference\n $filters.annotated\n #if $filters.assembly_level:\n---assembly-level $filters.assembly_level\n+    --assembly-level $filters.assembly_level\n #end if\n+--assembly-version $filters.assembly_version\n #if $filters.assembly_source:\n---assembly-source $filters.assembly_source\n+    --assembly-source $filters.assembly_source\n #end if\n #if $filters.chromosomes:\n---chromosomes \'$filters.chromosomes\'\n+    --chromosomes \'$filters.chromosomes\'\n #end if\n-@EXCLUDES_GENOME@\n-@INCLUDES_GENOME@\n+$filters.exclude_atypical\n+@INCLUDE@\n @RELEASED_BEFORE@\n-@RELEASED_SINCE@\n+@RELEASED_AFTER@\n #for search_term in $filters.search:\n     --search \'$filters.search_term\'\n #end for\n+--no-progressbar\n #if $uncompressed\n && 7z x -y ncbi_dataset.zip\n #else\n@@ -51,26 +54,32 @@\n                     <expand macro="text_or_file"/>\n                 </when>\n                 <when value="taxon">\n-                    <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."></param>\n+                    <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."/>\n+                    <param argument="--tax-exact-match" type="boolean" truevalue="--tax-exact-match" falsevalue="" label="Exclude sub-species when a species-level taxon is specified"/>\n                 </when>\n             </conditional>\n         </section>\n         <section name="filters" title="Filters and Limit">\n-            <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/>            \n-            <expand macro="annotation"></expand>\n-            <expand macro="assembly_level"></expand>\n-            <expand macro="assembly_source"></expand>\n-            <expand macro="chromosomes"></expand>\n-            <expand macro="released_options"></expand>\n-            <expand macro="released_options" before_or_after="since"></expand>\n+            <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/>\n+            <expand macro="annotation"/>\n+            <expand macro="assembly_level"/>\n+            <param argument="--assembly-version" type="select" label="Assembly version(s)">\n+                <option value="latest">Latest</option>\n+                <option value="all">All</option>\n+            </param>\n+            <!-- TODO add test for assembly source: according to CLI doc args are RefSeq, GenBank, All and not refseq / genbank-->\n+            <expand macro="assembly_source"/>\n+            <expand macro="chromosomes"/>\n+            <param argument="--exclude-atypical" type="boolean" truevalue="--exclude-atypical" falsevalue="" label="Exclude atypical assemblies"/>\n+            <expand macro="released_options"/>\n+            <expand macro="released_options" before_or_after="after"/>\n \n             <repeat name="search" title="Add search terms">\n                 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>\n             </repeat>\n         </section>\n         <section name="file_choices" title="File Choices" expanded="true">\n-            <expand macro="excludes_genome"></expand>\n-            <expand macro="includes_genome"></expand>\n+            <expand macro="include"/>\n         </section>\n         <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/>\n     </inputs>\n@@ -85,53 +94,49 @@\n             <filter>uncompressed</filter>\n         </data>\n         <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list">\n-            <discover_datasets pattern="(?P&lt;'..b'<param name="include" value="seq-report,genome,rna,cds"/>\n             <param name="uncompressed" value="true"/>\n             <output_collection name="genome_fasta" type="list:list" count="1">\n-                <element name="GCF_000146045.2">\n-                    <element name="chrI">\n-                        <assert_contents>\n-                            <has_text text=">NC_001133.9"/>\n-                        </assert_contents>\n-                    </element>\n-                </element>\n+                <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n             </output_collection>\n         </test>\n         <test expect_num_outputs="3">\n             <conditional name="query|subcommand">\n-                <param name="download_by" value="accession"></param>\n+                <param name="download_by" value="accession"/>\n                 <conditional name="text_or_file">\n-                    <param name="text_or_file" value="text"></param>\n-                    <param name="accession" value="GCF_000146045.2 GCF_000002945.1"></param>\n+                    <param name="text_or_file" value="text"/>\n+                    <param name="accession" value="GCF_000146045.2 GCF_000002945.1"/>\n                 </conditional>\n             </conditional>\n-            <param name="exclude_protein" value="true"/>\n-            <param name="exclude_rna" value="true"/>\n-            <param name="exclude_seq" value="false"/>\n-            <param name="exclude_genomic_cds" value="true"/>\n-            <param name="exclude_gff3" value="true"/>\n+            <param name="include" value="seq-report,genome"/>\n             <param name="uncompressed" value="true"/>\n             <output_collection name="genome_fasta" type="list:list" count="2">\n-                <element name="GCF_000002945.1">\n-                    <element name="chrI">\n-                        <assert_contents>\n-                            <has_text text=">NC_003424.3"/>\n-                        </assert_contents>\n-                    </element>\n-                </element>\n-                <element name="GCF_000146045.2">\n-                    <element name="chrI">\n-                        <assert_contents>\n-                            <has_text text=">NC_001133.9"/>\n-                        </assert_contents>\n-                    </element>\n-                </element>\n+                <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/>\n+                <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n             </output_collection>\n         </test>\n+        <!-- tax_exact_match seems not able to filter out strains\n+             https://github.com/ncbi/datasets/issues/187\n+             hence we set  expect_test_failure="true"-->\n+        <test expect_num_outputs="1" expect_test_failure="true">\n+            <conditional name="query|subcommand">\n+                <param name="download_by" value="taxon"/>\n+                <param name="text_or_file" value="text"/>\n+                <param name="taxon" value="4932"/>\n+                <param name="tax_exact_match" value="true"/>\n+            </conditional>\n+            <param name="include" value=""/>\n+            <param name="uncompressed" value="true"/>\n+            <output name="genome_data_report">\n+                <assert_contents>\n+                   <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n     </tests>\n     <help>\n <![CDATA[\n'
b
diff -r 18eed8fa7f23 -r a3395b1d871b macros.xml
--- a/macros.xml Wed Aug 24 13:00:58 2022 +0000
+++ b/macros.xml Mon Nov 21 11:40:05 2022 +0000
[
b'@@ -1,5 +1,5 @@\n <macros>\n-    <token name="@TOOL_VERSION@">13.35.0</token>\n+    <token name="@TOOL_VERSION@">14.3</token>\n     <token name="@VERSION_SUFFIX@">0</token>\n     <token name="@PROFILE@">21.01</token>\n     <token name="@LICENSE@">MIT</token>\n@@ -11,28 +11,25 @@\n     <xml name="requirements">\n         <requirements>\n             <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement>\n-            <requirement type="package" version="2022.6.15">ca-certificates</requirement>\n+            <requirement type="package" version="2022.9.24">ca-certificates</requirement>\n             <requirement type="package" version="16.02">p7zip</requirement>\n         </requirements>\n     </xml>\n     <xml name="annotation">\n         <param argument="--annotated" type="boolean" truevalue="--annotated" falsevalue="" label="Only include genomes with annotation ?"/>\n     </xml>\n-    <xml name="dehydrated">\n-        <param argument="--dehydrated" type="boolean" truevalue="--dehydrated" falsevalue="" label="Download a dehydrated zip archive including the data report and locations of data files ?" help="Use the rehydrate tools to retrieve data files"/>\n-    </xml>\n     <xml name="assembly_level">\n         <param argument="--assembly-level" type="select" label="Restrict assemblies to a comma-separated list of one or more of these" multiple="true" optional="true">\n             <option value="chromosome">Chromosome</option>\n-            <option value="complete_genome">Complete Genome</option>\n+            <option value="complete">Complete Genome</option>\n             <option value="contig">Contig</option>\n             <option value="scaffold">Scaffold</option>\n         </param>\n     </xml>\n     <xml name="assembly_source">\n-        <param argument="--assembly-source" type="select" optional="true">\n+        <param argument="--assembly-source" type="select" optional="true" label="Assembly source" help="Default (nothing selected) is both " >\n             <option value="refseq">RefSeq</option>\n-            <option value="genabnk">GenBank</option>\n+            <option value="genbank">GenBank</option>\n         </param>\n     </xml>\n     <xml name="text_or_file" token_what="accession" token_what_extended="NCBI Assembly accession" token_help="Can be NCBI Assembly or BioProject accession">\n@@ -62,58 +59,47 @@\n             </sanitizer>\n         </param>\n     </xml>\n-    <xml name="include" token_include_what="gbff" token_include_label="Include GenBank flat file sequence and annotation, if available">\n-        <param argument="--include-@INCLUDE_WHAT@" type="boolean" truevalue="--include-@INCLUDE_WHAT@" falsevalue="" label="@INCLUDE_LABEL@" />\n-    </xml>\n-    <xml name="includes_genome">\n-        <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation, if available"/>\n-        <expand macro="include" include_what="gtf" include_label="Include gtf annotation file, if available"/>\n-    </xml>\n-    <xml name="exclude" token_exclude_what="gff3" token_exclude_label="Exclude gff3 annotation file" token_checked="false">\n-        <param argument="--exclude-@EXCLUDE_WHAT@" type="boolean" truevalue="--exclude-@EXCLUDE_WHAT@" falsevalue="" label="@EXCLUDE_LABEL@" checked="@CHECKED@"/>\n-    </xml>\n-    <xml name="excludes_genome">\n-        <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/>\n-        <expand macro="exclude" exclude_what="gff3" exclude_label="Exclude gff3 annotation file"/>\n-        <expand macro="exclude" exclude_what="genomic-cds" exclude_label="Exclude cds from genomic sequence file"/>\n-        <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/>\n-        <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/>\n-    </xml>\n-    <xml name="excludes_gene">\n-        <expand macro="exclude" exclude_what="gene" exclude_label="Exclude gene sequence file"/>\n-     '..b'le"/>\n-        <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/>\n+    <xml name="include">\n+        <param argument="--include" type="select" multiple="true" optional="true">\n+            <option value="genome" selected="true">genomic sequence (genome)</option>\n+            <option value="rna">transcript (rna)</option>\n+            <option value="protein">amnio acid sequences (protein)</option>\n+            <option value="cds">nucleotide coding sequences (cds)</option>\n+            <option value="gff3">general feature file (gff3)</option>\n+            <option value="gtf">gene transfer format (gtf)</option>\n+            <option value="gbff">GenBank flat file (gbff)</option>\n+            <option value="seq-report">sequence report file (seq-report)</option>\n+        </param>\n     </xml>\n-    <xml name="excludes_virus_protein">\n-        <yield/>\n-        <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/>\n-        <expand macro="exclude" exclude_what="pdb" exclude_label="Exclude protein structure files (pdb)"/>\n-        <expand macro="exclude" exclude_what="gpff" exclude_label="Exclude protein sequence and annotation in GenPept flat file"/>\n-        <expand macro="exclude" exclude_what="cds" exclude_label="Exclude CDS sequence file"/>\n-    </xml>\n-    <xml name="excludes_virus_genome">\n-        <expand macro="excludes_virus_protein">\n-            <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/>\n-        </expand>\n-    </xml>\n-    <token name="@EXCLUDES_GENOME@">$file_choices.exclude_gff3 $file_choices.exclude_genomic_cds $file_choices.exclude_protein $file_choices.exclude_rna $file_choices.exclude_seq</token>\n-    <token name="@EXCLUDES_GENE@">$exclude_gene $exclude_protein $exclude_rna</token>\n-    <token name="@EXCLUDES_VIRUS_PROTEIN@">$exclude_protein $exclude_pdb $exclude_gpff $exclude_cds</token>\n-    <token name="@EXCLUDES_VIRUS_GENOME@">$exclude_seq @EXCLUDES_VIRUS_PROTEIN@</token>\n-    <xml name="includes_virus_genome">\n-        <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation"/>\n-    </xml>\n-    <token name="@INCLUDES_GENOME@">$file_choices.include_gbff $file_choices.include_gtf</token>\n-    <token name="@INCLUDES_VIRUS_GENOME@">$include_gbff</token>\n+    <token name="@INCLUDE@"><![CDATA[\n+        --include\n+        #if $file_choices.include\n+            #echo ",".join($file_choices.include)\n+        #else\n+            none\n+        #end if\n+    ]]></token>\n     <xml name="released_options" token_released_what="genomes" token_before_or_after="before">\n-        <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"></param>\n+        <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)">\n+            <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator>\n+        </param>\n     </xml>\n     <token name="@RELEASED_BEFORE@">#if $filters.released_before:\n --released-before \'$filters.released_before\'\n #end if\n     </token>\n-    <token name="@RELEASED_SINCE@">#if $filters.released_since:\n---released-since \'$filters.released_since\'\n+    <token name="@RELEASED_AFTER@">#if $filters.released_after:\n+--released-after \'$filters.released_after\'\n #end if\n     </token>\n+\n+    <xml name="genome_fasta_assert" tokens="el1,el2,expression" token_expression_n="1">\n+        <element name="@EL1@">\n+            <element name="@EL2@">\n+                <assert_contents>\n+                    <has_text_matching expression="@EXPRESSION@" n="@EXPRESSION_N@"/>\n+                </assert_contents>\n+            </element>\n+        </element>\n+    </xml>\n </macros>\n'