Mercurial > repos > iuc > ncbi_datasets
changeset 21:0f3b3813b6ae draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit c1c3f90e4aa7dc258aa61d98ec4eac0b97eef426
author | iuc |
---|---|
date | Mon, 21 Jul 2025 19:28:16 +0000 (23 hours ago) |
parents | 7b925f7c50b2 |
children | |
files | datasets_gene.xml datasets_genome.xml macros.xml test-data/GCF_000007445.1.genomic.gtf test-data/genome.2.GCF_000013305.1.genomic.gtf |
diffstat | 5 files changed, 99 insertions(+), 108 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets_gene.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/datasets_gene.xml Mon Jul 21 19:28:16 2025 +0000 @@ -4,7 +4,7 @@ <import>macros.xml</import> </macros> <expand macro="bio_tools"/> - <expand macro="requirements"></expand> + <expand macro="requirements"/> <expand macro="version_command"/> <command><![CDATA[ #import re @@ -41,7 +41,7 @@ #if $filters.fasta_filter_cond.fasta_filter_select #if $filters.fasta_filter_cond.fasta_filter_select == 'text' - --fasta-filter #echo ",".join(f"'{x}'" for x in $filters.fasta_filter_cond.fasta_filter.split(',') if x) + --fasta-filter #echo ",".join(f"'{x}'" for x in str($filters.fasta_filter_cond.fasta_filter).split(',') if x) #else --fasta-filter-file '$filters.fasta_filter_cond.fasta_filter_file' #end if @@ -97,8 +97,8 @@ <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name"> <sanitizer invalid_char=""> <valid initial="string.letters"> - <add value=" " /> - <add value="-" /> + <add value=" "/> + <add value="-"/> </valid> </sanitizer> </param> @@ -109,8 +109,8 @@ <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions"> <sanitizer invalid_char=""> <valid initial="string.letters"> - <add value=" " /> - <add value="-" /> + <add value=" "/> + <add value="-"/> </valid> </sanitizer> </param> @@ -133,7 +133,7 @@ <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> - <add value="," /> + <add value=","/> </valid> </sanitizer> </param> @@ -235,12 +235,12 @@ </output> <output name="rna_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> @@ -263,12 +263,12 @@ </output> <output name="rna_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> @@ -284,6 +284,7 @@ </conditional> <section name="file_choices"> <conditional name="kingdom_cond"> + <param name="kingdom_sel" value="gene"/> <param name="include" value="gene,cds"/> </conditional> </section> @@ -297,17 +298,17 @@ </output> <output name="gene_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="cds_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> <!-- 4: datasets download gene symbol tp53 --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="symbol"/> <conditional name="text_or_file"> @@ -315,11 +316,6 @@ <param name="accession" value="tp53"/> </conditional> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -361,17 +357,17 @@ </output> <output name="threep_utr_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="fivep_utr_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> <!-- 6: datasets download gene symbol brca1 \-\-ortholog --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="symbol"/> <conditional name="text_or_file"> @@ -380,11 +376,6 @@ </conditional> <param name="ortholog" value="rodentia"/> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="rat"/> @@ -395,7 +386,7 @@ </output> </test> <!-- 7: datasets download gene accession NP_000483.3 --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> @@ -403,11 +394,6 @@ <param name="accession" value="NP_000483.3"/> </conditional> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -417,7 +403,7 @@ </output> </test> <!-- 8: datasets download gene accession NM_000546.6 NM_000492.4 + ortholog--> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> @@ -426,11 +412,6 @@ </conditional> <param name="ortholog" value="all"/> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -439,7 +420,6 @@ </assert_contents> </output> </test> - <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp --> <test expect_num_outputs="4"> <conditional name="query|subcommand"> @@ -466,24 +446,23 @@ </output> <output name="gene_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="gene_flanks"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <assert_command> <has_text text="include-flanks-bp 100"/> </assert_command> - </test> - + </test> <!-- 10: datasets download gene taxon human --> <!-- <test expect_num_outputs="1"> <conditional name="query|subcommand">
--- a/datasets_genome.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/datasets_genome.xml Mon Jul 21 19:28:16 2025 +0000 @@ -4,9 +4,14 @@ <import>macros.xml</import> </macros> <expand macro="bio_tools"/> - <expand macro="requirements"></expand> + <expand macro="requirements"/> <expand macro="version_command"/> - <command><![CDATA[ + <stdio> + <regex match="Warning" source="stderr" level="warning" description=""/> + <regex match="skipping" source="stderr" level="warning" description=""/> + <regex match="ERROR" level="fatal"/> + </stdio> + <command detect_errors="exit_code"><![CDATA[ #import re @SETUP_CERTIFICATES@ datasets download genome $query.subcommand.download_by @@ -116,7 +121,6 @@ </param> <expand macro="released_options"/> <expand macro="released_options" before_or_after="after"/> - <repeat name="search" title="Add search terms"> <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> </repeat> @@ -137,35 +141,35 @@ <outputs> <data name="genome_data_report" format="tabular" label="NCBI Genome Datasets: Data Report" from_work_dir="genome_data_report.tsv"/> <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.tsv" ext="tabular" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "seq-report" in file_choices['include']</filter> </collection> <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list:list"> - <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\.(?P<ext>fasta(\.gz)?)" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?\.(?P<ext>fasta(\.gz)?)" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "genome" in file_choices['include']</filter> </collection> <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "rna" in file_choices['include']</filter> </collection> <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "protein" in file_choices['include']</filter> </collection> <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.(?P<ext>fasta(\.gz)?)$" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "cds" in file_choices['include']</filter> </collection> <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "gff3" in file_choices['include']</filter> </collection> <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "gtf" in file_choices['include']</filter> </collection> <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data/" recurse="true" match_relative_path="true"/> <filter>file_choices['include'] and "gbff" in file_choices['include']</filter> </collection> </outputs> @@ -175,8 +179,10 @@ <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> </conditional> - <param name="chromosomes" value="21"/> - <param name="released_before" value="01/01/2018"/> + <section name="filters"> + <param name="chromosomes" value="21"/> + <param name="released_before" value="01/01/2018"/> + </section> <section name="file_choices"> <!-- include a sequence (which should be downloaded as fasta.gz) and one non-sequence (which should be decompressed) output --> @@ -184,19 +190,19 @@ </section> <output name="genome_data_report"> <assert_contents> - <has_text text="Assembly Accession	Assembly Name	Assembly Submitter	Organism Name"/> + <has_text text="Assembly Accession	Assembly Name	Assembly Submitter	Organism Name"/> <has_n_lines n="142"/> <has_n_columns n="4"/> </assert_contents> </output> - <output_collection name="rna_fasta" type="list" count="1"> + <output_collection name="rna_fasta" type="list" count="2"> <element name="GCF_000306695.2" decompress="true"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </element> </output_collection> - <output_collection name="genomic_gff" type="list"> + <output_collection name="genomic_gff" type="list" count="12"> <element name="GCF_000306695.2"> <assert_contents> <has_n_lines min="1000000"/> @@ -214,26 +220,27 @@ <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> </conditional> - <param name="chromosomes" value="21"/> - <param name="assembly_level" value="chromosome,complete"/> - <param name="released_before" value="01/01/2018"/> + <section name="filters"> + <param name="chromosomes" value="21"/> + <param name="assembly_level" value="chromosome,complete"/> + <param name="released_before" value="01/01/2018"/> + </section> <section name="file_choices"> <param name="include" value="genome"/> <param name="decompress" value="true"/> </section> - <output_collection name="genome_fasta" type="list:list" count="12"> - <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> - <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> + <output_collection name="genome_fasta" type="list:list" count="11"> + <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> <!-- According to https://github.com/ncbi/datasets/issues/188, the following should not be included among the returned results anymore 09/2023 --> <!-- <expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/> @@ -253,10 +260,12 @@ <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> </conditional> - <param name="chromosomes" value="21"/> - <param name="assembly_level" value="chromosome,complete"/> - <param name="assembly_source" value="refseq"/> - <param name="released_before" value="01/01/2018"/> + <section name="filters"> + <param name="chromosomes" value="21"/> + <param name="assembly_level" value="chromosome,complete"/> + <param name="assembly_source" value="refseq"/> + <param name="released_before" value="01/01/2018"/> + </section> <section name="file_choices"> <param name="include" value="genome"/> <param name="decompress" value="true"/> @@ -288,7 +297,9 @@ <param name="accession" value="GCF_000013305.1 GCF_000007445.1"/> </conditional> </conditional> - <param name="released_before" value="01/01/2007"/> + <section name="filters"> + <param name="released_before" value="01/01/2007"/> + </section> <section name="file_choices"> <param name="include" value="seq-report,gtf,cds"/> <param name="decompress" value="true"/> @@ -300,7 +311,7 @@ <has_n_columns n="4"/> </assert_contents> </output> - <output_collection name="sequence_report" type="list" count="2" > + <output_collection name="sequence_report" type="list" count="2"> <element name="GCF_000007445.1"> <assert_contents> <has_text text="GCF_000007445.1"/> @@ -316,7 +327,7 @@ </assert_contents> </element> </output_collection> - <output_collection name="genomic_gtf" type="list"> + <output_collection name="genomic_gtf" type="list" count="2"> <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> </output_collection> @@ -333,7 +344,9 @@ <param name="inputfile" value="accessions.txt"/> </conditional> </conditional> - <param name="released_before" value="01/01/2007"/> + <section name="filters"> + <param name="released_before" value="01/01/2007"/> + </section> <section name="file_choices"> <param name="include" value="seq-report,gff3,gbff"/> <param name="decompress" value="true"/> @@ -355,7 +368,6 @@ <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> </output_collection> </test> - <!-- should not fail https://github.com/ncbi/datasets/issues/194 --> <test expect_num_outputs="2"> <conditional name="query|subcommand"> @@ -365,8 +377,10 @@ <param name="accession" value="GCF_000001405"/> </conditional> </conditional> - <param name="released_before" value="01/01/2015"/> - <param name="assembly_version" value="all"/> + <section name="filters"> + <param name="released_before" value="01/01/2015"/> + <param name="assembly_version" value="all"/> + </section> <section name="file_choices"> <param name="include" value="seq-report"/> </section> @@ -395,19 +409,19 @@ <param name="decompress" value="true"/> </section> <output_collection name="genome_fasta" type="list:list" count="1"> - <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> </output_collection> <output_collection name="protein_fasta" type="list" count="1"> <element name="GCF_000146045.2" decompress="true"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </element> </output_collection> <output_collection name="rna_fasta" type="list" count="1"> <element name="GCF_000146045.2" decompress="true"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </element> </output_collection> @@ -437,7 +451,7 @@ <output_collection name="protein_fasta" type="list" count="1"> <element name="GCF_000146045.2" ftype="fasta.gz"> <assert_contents> - <has_size value="1845038" delta="2000"/> + <has_size value="1847862" delta="2000"/> </assert_contents> </element> </output_collection> @@ -463,22 +477,21 @@ </section> <output_collection name="sequence_report" type="list" count="2"/> <output_collection name="genome_fasta" type="list:list" count="2"> - <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/> - <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/> + <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/> </output_collection> </test> <!-- tax_exact_match should filter out strains https://github.com/ncbi/datasets/issues/187 --> - <test expect_num_outputs="1"> + <test expect_num_outputs="2"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="4932"/> <param name="tax_exact_match" value="true"/> </conditional> - <param name="include" value=""/> <output name="genome_data_report"> <assert_contents> - <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/> + <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/> </assert_contents> </output> </test>
--- a/macros.xml Mon Mar 17 11:05:45 2025 +0000 +++ b/macros.xml Mon Jul 21 19:28:16 2025 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">17.1.0</token> + <token name="@TOOL_VERSION@">18.4.1</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.0</token> <token name="@LICENSE@">MIT</token> @@ -12,7 +12,8 @@ <requirements> <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> <requirement type="package" version="2025.1.31">ca-certificates</requirement> - <requirement type="package" version="6.0">unzip</requirement> + <!-- Removed line below because it was causing "skipping: [..] need PK compat. v4.5 (can do v2.1)" --> + <!-- <requirement type="package" version="6.0">unzip</requirement> --> </requirements> </xml> <xml name="bio_tools">
--- a/test-data/GCF_000007445.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000 +++ b/test-data/GCF_000007445.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000 @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491"; -NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002";
--- a/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Mar 17 11:05:45 2025 +0000 +++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Jul 21 19:28:16 2025 +0000 @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001"; -NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002";