| Previous changeset 20:7b925f7c50b2 (2025-03-17) Next changeset 22:03a801ded645 (2025-07-29) |
|
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit c1c3f90e4aa7dc258aa61d98ec4eac0b97eef426 |
|
modified:
datasets_gene.xml datasets_genome.xml macros.xml test-data/GCF_000007445.1.genomic.gtf test-data/genome.2.GCF_000013305.1.genomic.gtf |
| b |
| diff -r 7b925f7c50b2 -r 0f3b3813b6ae datasets_gene.xml --- a/datasets_gene.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/datasets_gene.xml Mon Jul 21 19:28:16 2025 +0000 |
| [ |
| b'@@ -4,7 +4,7 @@\n <import>macros.xml</import>\n </macros>\n <expand macro="bio_tools"/>\n- <expand macro="requirements"></expand>\n+ <expand macro="requirements"/>\n <expand macro="version_command"/>\n <command><![CDATA[\n #import re\n@@ -41,7 +41,7 @@\n \n #if $filters.fasta_filter_cond.fasta_filter_select\n #if $filters.fasta_filter_cond.fasta_filter_select == \'text\'\n- --fasta-filter #echo ",".join(f"\'{x}\'" for x in $filters.fasta_filter_cond.fasta_filter.split(\',\') if x)\n+ --fasta-filter #echo ",".join(f"\'{x}\'" for x in str($filters.fasta_filter_cond.fasta_filter).split(\',\') if x)\n #else\n --fasta-filter-file \'$filters.fasta_filter_cond.fasta_filter_file\'\n #end if\n@@ -97,8 +97,8 @@\n <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name">\n <sanitizer invalid_char="">\n <valid initial="string.letters">\n- <add value=" " />\n- <add value="-" />\n+ <add value=" "/>\n+ <add value="-"/>\n </valid>\n </sanitizer>\n </param>\n@@ -109,8 +109,8 @@\n <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions">\n <sanitizer invalid_char="">\n <valid initial="string.letters">\n- <add value=" " />\n- <add value="-" />\n+ <add value=" "/>\n+ <add value="-"/>\n </valid>\n </sanitizer>\n </param>\n@@ -133,7 +133,7 @@\n <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated">\n <sanitizer invalid_char="">\n <valid initial="string.letters,string.digits">\n- <add value="," />\n+ <add value=","/>\n </valid>\n </sanitizer>\n </param>\n@@ -235,12 +235,12 @@\n </output>\n <output name="rna_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n@@ -263,12 +263,12 @@\n </output>\n <output name="rna_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n@@ -284,6 +284,7 @@\n </conditional>\n <section name="file_choices">\n <conditional name="kingdom_cond">\n+ <param name="kingdom_sel" value="gene"/>\n <param name="include" value="gene,cds"/>\n </conditional>\n </section>\n@@ -297,17 +298,17 @@\n </output>\n <output name="gene_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n'..b'fivep_utr_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n </test>\n <!-- 6: datasets download gene symbol brca1 \\-\\-ortholog -->\n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="symbol"/>\n <conditional name="text_or_file">\n@@ -380,11 +376,6 @@\n </conditional>\n <param name="ortholog" value="rodentia"/>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="rat"/>\n@@ -395,7 +386,7 @@\n </output>\n </test>\n <!-- 7: datasets download gene accession NP_000483.3 -->\n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="accession"/>\n <conditional name="text_or_file">\n@@ -403,11 +394,6 @@\n <param name="accession" value="NP_000483.3"/>\n </conditional>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="human"/>\n@@ -417,7 +403,7 @@\n </output>\n </test>\n <!-- 8: datasets download gene accession NM_000546.6 NM_000492.4 + ortholog-->\n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="3">\n <conditional name="query|subcommand">\n <param name="download_by" value="accession"/>\n <conditional name="text_or_file">\n@@ -426,11 +412,6 @@\n </conditional>\n <param name="ortholog" value="all"/>\n </conditional>\n- <section name="file_choices">\n- <conditional name="kingdom_cond">\n- <param name="include" value=""/>\n- </conditional>\n- </section>\n <output name="gene_data_report">\n <assert_contents>\n <has_text text="human"/>\n@@ -439,7 +420,6 @@\n </assert_contents>\n </output>\n </test>\n-\n <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp -->\n <test expect_num_outputs="4">\n <conditional name="query|subcommand">\n@@ -466,24 +446,23 @@\n </output>\n <output name="gene_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="gene_flanks">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <output name="protein_fasta">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </output>\n <assert_command>\n <has_text text="include-flanks-bp 100"/>\n </assert_command>\n- </test> \n-\n+ </test>\n <!-- 10: datasets download gene taxon human -->\n <!-- <test expect_num_outputs="1">\n <conditional name="query|subcommand">\n' |
| b |
| diff -r 7b925f7c50b2 -r 0f3b3813b6ae datasets_genome.xml --- a/datasets_genome.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/datasets_genome.xml Mon Jul 21 19:28:16 2025 +0000 |
| [ |
| b'@@ -4,9 +4,14 @@\n <import>macros.xml</import>\n </macros>\n <expand macro="bio_tools"/>\n- <expand macro="requirements"></expand>\n+ <expand macro="requirements"/>\n <expand macro="version_command"/>\n- <command><![CDATA[\n+ <stdio>\n+ <regex match="Warning" source="stderr" level="warning" description=""/>\n+ <regex match="skipping" source="stderr" level="warning" description=""/>\n+ <regex match="ERROR" level="fatal"/>\n+ </stdio>\n+ <command detect_errors="exit_code"><![CDATA[\n #import re\n @SETUP_CERTIFICATES@\n datasets download genome $query.subcommand.download_by\n@@ -116,7 +121,6 @@\n </param>\n <expand macro="released_options"/>\n <expand macro="released_options" before_or_after="after"/>\n-\n <repeat name="search" title="Add search terms">\n <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>\n </repeat>\n@@ -137,35 +141,35 @@\n <outputs>\n <data name="genome_data_report" format="tabular" label="NCBI Genome Datasets: Data Report" from_work_dir="genome_data_report.tsv"/>\n <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "seq-report" in file_choices[\'include\']</filter>\n </collection>\n <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list:list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\\.(?P<ext>fasta(\\.gz)?)" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\\.(?P<ext>fasta(\\.gz)?)" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "genome" in file_choices[\'include\']</filter>\n </collection>\n <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/rna\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/rna\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "rna" in file_choices[\'include\']</filter>\n </collection>\n <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/protein\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover_datasets pattern="(?P<identifier_0>.*?)\\/protein\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/>\n <filter>file_choices[\'include\'] and "protein" in file_choices[\'include\']</filter>\n </collection>\n <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list">\n- <discover_datasets pattern="(?P<identifier_0>.*?)\\/cds_from_genomic\\.(?P<ext>fasta(\\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>\n+ <discover'..b'ame="released_before" value="01/01/2015"/>\n+ <param name="assembly_version" value="all"/>\n+ </section>\n <section name="file_choices">\n <param name="include" value="seq-report"/>\n </section>\n@@ -395,19 +409,19 @@\n <param name="decompress" value="true"/>\n </section>\n <output_collection name="genome_fasta" type="list:list" count="1">\n- <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n </output_collection>\n <output_collection name="protein_fasta" type="list" count="1">\n <element name="GCF_000146045.2" decompress="true">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </element>\n </output_collection>\n <output_collection name="rna_fasta" type="list" count="1">\n <element name="GCF_000146045.2" decompress="true">\n <assert_contents>\n- <has_text text=">"/>\n+ <has_text text=">"/>\n </assert_contents>\n </element>\n </output_collection>\n@@ -437,7 +451,7 @@\n <output_collection name="protein_fasta" type="list" count="1">\n <element name="GCF_000146045.2" ftype="fasta.gz">\n <assert_contents>\n- <has_size value="1845038" delta="2000"/>\n+ <has_size value="1847862" delta="2000"/>\n </assert_contents>\n </element>\n </output_collection>\n@@ -463,22 +477,21 @@\n </section>\n <output_collection name="sequence_report" type="list" count="2"/>\n <output_collection name="genome_fasta" type="list:list" count="2">\n- <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/>\n- <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/>\n+ <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/>\n </output_collection>\n </test>\n <!-- tax_exact_match should filter out strains\n https://github.com/ncbi/datasets/issues/187 -->\n- <test expect_num_outputs="1">\n+ <test expect_num_outputs="2">\n <conditional name="query|subcommand">\n <param name="download_by" value="taxon"/>\n <param name="taxon_positional" value="4932"/>\n <param name="tax_exact_match" value="true"/>\n </conditional>\n- <param name="include" value=""/>\n <output name="genome_data_report">\n <assert_contents>\n- <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n+ <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n </assert_contents>\n </output>\n </test>\n' |
| b |
| diff -r 7b925f7c50b2 -r 0f3b3813b6ae macros.xml --- a/macros.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/macros.xml Mon Jul 21 19:28:16 2025 +0000 |
| [ |
| @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">17.1.0</token> + <token name="@TOOL_VERSION@">18.4.1</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.0</token> <token name="@LICENSE@">MIT</token> @@ -12,7 +12,8 @@ <requirements> <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> <requirement type="package" version="2025.1.31">ca-certificates</requirement> - <requirement type="package" version="6.0">unzip</requirement> + <!-- Removed line below because it was causing "skipping: [..] need PK compat. v4.5 (can do v2.1)" --> + <!-- <requirement type="package" version="6.0">unzip</requirement> --> </requirements> </xml> <xml name="bio_tools"> |
| b |
| diff -r 7b925f7c50b2 -r 0f3b3813b6ae test-data/GCF_000007445.1.genomic.gtf --- a/test-data/GCF_000007445.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000 +++ b/test-data/GCF_000007445.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000 |
| b |
| @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491"; -NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002"; |
| b |
| diff -r 7b925f7c50b2 -r 0f3b3813b6ae test-data/genome.2.GCF_000013305.1.genomic.gtf --- a/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000 +++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000 |
| b |
| @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001"; -NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002"; |