Galaxy |

Changeset 21:0f3b3813b6ae (2025-07-21)

Previous changeset 20:7b925f7c50b2 (2025-03-17) Next changeset 22:03a801ded645 (2025-07-29)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit c1c3f90e4aa7dc258aa61d98ec4eac0b97eef426

modified:
datasets_gene.xml
datasets_genome.xml
macros.xml
test-data/GCF_000007445.1.genomic.gtf
test-data/genome.2.GCF_000013305.1.genomic.gtf

diff -r 7b925f7c50b2 -r 0f3b3813b6ae datasets_gene.xml
--- a/datasets_gene.xml Mon Mar 17 11:05:45 2025 +0000
+++ b/datasets_gene.xml Mon Jul 21 19:28:16 2025 +0000

[

b'@@ -4,7 +4,7 @@\n <import>macros.xml</import>\n </macros>\n <expand macro="bio_tools"/>\n- <expand macro="requirements"></expand>\n+ <expand macro="requirements"/>\n <expand macro="version_command"/>\n <command><![CDATA[\n #import re\n@@ -41,7 +41,7 @@\n \n #if $filters.fasta_filter_cond.fasta_filter_select\n #if $filters.fasta_filter_cond.fasta_filter_select == \'text\'\n- --fasta-filter #echo ",".join(f"\'{x}\'" for x in $filters.fasta_filter_cond.fasta_filter.split(\',\') if x)\n+ --fasta-filter #echo ",".join(f"\'{x}\'" for x in str($filters.fasta_filter_cond.fasta_filter).split(\',\') if x)\n #else\n --fasta-filter-file \'$filters.fasta_filter_cond.fasta_filter_file\'\n #end if\n@@ -97,8 +97,8 @@\n <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name">\n <sanitizer invalid_char="">\n <valid initial="string.letters">\n- <add value=" " />\n- <add value="-" />\n+ <add value=" "/>\n+ <add value="-"/>\n </valid>\n </sanitizer>\n </param>\n@@ -109,8 +109,8 @@\n <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions">\n <sanitizer invalid_char="">\n <valid initial="string.letters">\n- <add value=" " />\n- <add value="-" />\n+ <add value=" "/>\n+ <add value="-"/>\n </valid>\n </sanitizer>\n </param>\n@@ -133,7 +133,7 @@\n <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated">\n <sanitizer invalid_char="">\n <valid initial="string.letters,string.digits">\n- <add value="," />\n+ <add value=","/>\n </valid>\n </sanitizer>\n </param>\n@@ -235,12 +235,12 @@\n </output>\n <output name="rna_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n@@ -263,12 +263,12 @@\n </output>\n <output name="rna_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n@@ -284,6 +284,7 @@\n </conditional>\n <section name="file_choices">\n <conditional name="kingdom_cond">\n+ <param name="kingdom_sel" value="gene"/>\n <param name="include" value="gene,cds"/>\n </conditional>\n </section>\n@@ -297,17 +298,17 @@\n </output>\n <output name="gene_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n'..b'fivep_utr_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n \n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="symbol"/>\n <conditional name="text_or_file">\n@@ -380,11 +376,6 @@\n </conditional>\n <param name="ortholog" value="rodentia"/>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="rat"/>\n@@ -395,7 +386,7 @@\n </output>\n </test>\n \n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="accession"/>\n <conditional name="text_or_file">\n@@ -403,11 +394,6 @@\n <param name="accession" value="NP_000483.3"/>\n </conditional>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="human"/>\n@@ -417,7 +403,7 @@\n </output>\n </test>\n \n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="accession"/>\n <conditional name="text_or_file">\n@@ -426,11 +412,6 @@\n </conditional>\n <param name="ortholog" value="all"/>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="human"/>\n@@ -439,7 +420,6 @@\n </assert_contents>\n </output>\n </test>\n-\n \n <test expect_num_outputs="4">\n <conditional name="query|subcommand">\n@@ -466,24 +446,23 @@\n </output>\n <output name="gene_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="gene_flanks">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <assert_command>\n <has_text text="include-flanks-bp 100"/>\n </assert_command>\n- </test> \n-\n+ </test>\n \n <!-- <test expect_num_outputs="1">\n <conditional name="query|subcommand">\n'

diff -r 7b925f7c50b2 -r 0f3b3813b6ae datasets_genome.xml
--- a/datasets_genome.xml Mon Mar 17 11:05:45 2025 +0000
+++ b/datasets_genome.xml Mon Jul 21 19:28:16 2025 +0000

[

b'@@ -4,9 +4,14 @@\n <import>macros.xml</import>\n </macros>\n <expand macro="bio_tools"/>\n- <expand macro="requirements"></expand>\n+ <expand macro="requirements"/>\n <expand macro="version_command"/>\n- <command><![CDATA[\n+ <stdio>\n+ <regex match="Warning" source="stderr" level="warning" description=""/>\n+ <regex match="skipping" source="stderr" level="warning" description=""/>\n+ <regex match="ERROR" level="fatal"/>\n+ </stdio>\n+ <command detect_errors="exit_code"><![CDATA[\n #import re\n @SETUP_CERTIFICATES@\n datasets download genome $query.subcommand.download_by\n@@ -116,7 +121,6 @@\n </param>\n <expand macro="released_options"/>\n <expand macro="released_options" before_or_after="after"/>\n-\n <repeat name="search" title="Add search terms">\n <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>\n </repeat>\n@@ -137,35 +141,35 @@\n <outputs>\n <data name="genome_data_report" format="tabular" label="NCBI Genome Datasets: Data Report" from_work_dir="genome_data_report.tsv"/>\n <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "seq-report" in file_choices[\'include\']</filter>\n </collection>\n <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list:list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\\.(?P<ext>fasta(\\.gz)?)" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\\.(?P<ext>fasta(\\.gz)?)" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "genome" in file_choices[\'include\']</filter>\n </collection>\n <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/rna\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/rna\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "rna" in file_choices[\'include\']</filter>\n </collection>\n <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/protein\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/protein\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "protein" in file_choices[\'include\']</filter>\n </collection>\n <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/cds_from_genomic\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover'..b'ame="released_before" value="01/01/2015"/>\n+ <param name="assembly_version" value="all"/>\n+ </section>\n <section name="file_choices">\n <param name="include" value="seq-report"/>\n </section>\n@@ -395,19 +409,19 @@\n <param name="decompress" value="true"/>\n </section>\n <output_collection name="genome_fasta" type="list:list" count="1">\n- <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n </output_collection>\n <output_collection name="protein_fasta" type="list" count="1">\n <element name="GCF_000146045.2" decompress="true">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </element>\n </output_collection>\n <output_collection name="rna_fasta" type="list" count="1">\n <element name="GCF_000146045.2" decompress="true">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </element>\n </output_collection>\n@@ -437,7 +451,7 @@\n <output_collection name="protein_fasta" type="list" count="1">\n <element name="GCF_000146045.2" ftype="fasta.gz">\n <assert_contents>\n- <has_size value="1845038" delta="2000"/>\n+ <has_size value="1847862" delta="2000"/>\n </assert_contents>\n </element>\n </output_collection>\n@@ -463,22 +477,21 @@\n </section>\n <output_collection name="sequence_report" type="list" count="2"/>\n <output_collection name="genome_fasta" type="list:list" count="2">\n- <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/>\n- <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/>\n </output_collection>\n </test>\n \n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="2">\n <conditional name="query|subcommand">\n <param name="download_by" value="taxon"/>\n <param name="taxon_positional" value="4932"/>\n <param name="tax_exact_match" value="true"/>\n </conditional>\n- <param name="include" value=""/>\n <output name="genome_data_report">\n <assert_contents>\n- <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n+ <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n </assert_contents>\n </output>\n </test>\n'

diff -r 7b925f7c50b2 -r 0f3b3813b6ae macros.xml
--- a/macros.xml Mon Mar 17 11:05:45 2025 +0000
+++ b/macros.xml Mon Jul 21 19:28:16 2025 +0000

[

@@ -1,5 +1,5 @@
<macros>
-    <token name="@TOOL_VERSION@">17.1.0</token>
+    <token name="@TOOL_VERSION@">18.4.1</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.0</token>
     <token name="@LICENSE@">MIT</token>
@@ -12,7 +12,8 @@
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement>
             <requirement type="package" version="2025.1.31">ca-certificates</requirement>
-            <requirement type="package" version="6.0">unzip</requirement>
+             
+             
         </requirements>
     </xml>
     <xml name="bio_tools">

diff -r 7b925f7c50b2 -r 0f3b3813b6ae test-data/GCF_000007445.1.genomic.gtf
--- a/test-data/GCF_000007445.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000
+++ b/test-data/GCF_000007445.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000

@@ -1,6 +1,5 @@
-#!annotation-source NCBI RefSeq
NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491";
-NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
+NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002";

diff -r 7b925f7c50b2 -r 0f3b3813b6ae test-data/genome.2.GCF_000013305.1.genomic.gtf
--- a/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000
+++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000

@@ -1,6 +1,5 @@
-#!annotation-source NCBI RefSeq
NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001";
-NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
+NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002";