Repository 'ncbi_datasets'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ncbi_datasets

Changeset 11:ac24fff14f23 (2022-12-02)
Previous changeset 10:a3395b1d871b (2022-11-21) Next changeset 12:d78faac2c6ef (2022-12-03)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 4d7d3a56084e140f4fa63fb0e04a08b732f247f2
modified:
datasets_genome.xml
macros.xml
added:
datasets_gene.xml
test-data/geneids.txt
b
diff -r a3395b1d871b -r ac24fff14f23 datasets_gene.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datasets_gene.xml Fri Dec 02 10:52:48 2022 +0000
[
b'@@ -0,0 +1,536 @@\n+<tool id="datasets_download_gene" name="NCBI Datasets Gene" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+    <description>download gene sequences and metadata</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"></expand>\n+    <command><![CDATA[\n+#import re\n+@SETUP_CERTIFICATES@\n+datasets download gene $query.subcommand.download_by\n+#if $query.subcommand.download_by == \'taxon\':\n+    \'$query.subcommand.taxon_positional\'\n+#else:\n+    #if $query.subcommand.text_or_file.text_or_file == \'text\':\n+        #echo " ".join(f"\'{x}\'" for x in re.split(" |,", str($query.subcommand.text_or_file.accession)) if x)\n+    #else\n+        --inputfile \'$query.subcommand.text_or_file.inputfile\'\n+    #end if\n+#end if\n+\n+#if $query.subcommand.download_by != \'taxon\' and $query.subcommand.ortholog:\n+    --ortholog \'$query.subcommand.ortholog\'\n+#end if\n+\n+#if $query.subcommand.download_by == \'symbol\':\n+    #if $query.subcommand.taxon\n+        --taxon \'$query.subcommand.taxon\'\n+    #end if\n+#end if\n+\n+#if $query.subcommand.download_by == \'accession\':\n+    #if $query.subcommand.taxon_filter\n+        --taxon-filter \'$query.subcommand.taxon_filter\'\n+    #end if\n+    #if str($query.subcommand.include_flanks_bp)\n+        --include-flanks-bp $query.subcommand.include_flanks_bp\n+    #end if\n+#end if\n+\n+#if $filters.fasta_filter_cond.fasta_filter_select\n+    #if $filters.fasta_filter_cond.fasta_filter_select == \'text\'\n+        --fasta-filter #echo ",".join(f"\'{x}\'" for x in $filters.fasta_filter_cond.fasta_filter.split(\',\') if x)\n+    #else\n+        --fasta-filter-file \'$filters.fasta_filter_cond.fasta_filter_file\'\n+    #end if\n+#end if\n+\n+--include\n+#if $file_choices.kingdom_cond.include\n+    #echo ",".join($file_choices.kingdom_cond.include)\n+#else\n+    none\n+#end if\n+\n+--no-progressbar\n+\n+## produce TSV report file (either gene or prok-gene)\n+&& \n+dataformat\n+    tsv\n+    $file_choices.kingdom_cond.kingdom_sel\n+    --package ncbi_dataset.zip\n+    --fields #echo ",".join($file_choices.kingdom_cond.report_columns)\n+    > gene_data_report.tsv\n+## if ! dataformat tsv gene --package ncbi_dataset.zip > gene_data_report.tsv 2> dataformat.log; then\n+##     dataformat tsv prok-gene --package ncbi_dataset.zip > gene_data_report.tsv 2>> dataformat.log;\n+## fi\n+\n+#if $file_choices.kingdom_cond.include and "product-report" in $file_choices.kingdom_cond.include\n+    && dataformat tsv gene-product --package ncbi_dataset.zip > gene_product_report.tsv\n+#end if\n+\n+## unzip and rehydrate if any data is to be downloaded (include is not None)\n+#if $file_choices.kingdom_cond.include\n+    ## unzip\n+    && 7z x -y ncbi_dataset.zip > 7z.log\n+#end if\n+]]></command>\n+    <inputs>\n+        <section name="query" title="Query" expanded="true">\n+            <conditional name="subcommand">\n+                <param name="download_by" type="select" label="Choose how to find genes to download">\n+                    <option value="gene-id">By NCBI Gene ID</option>\n+                    <option value="symbol">By Gene symbol</option>\n+                    <option value="accession">By RefSeq nucleotide or protein accession</option>\n+                    <option value="taxon">By taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)</option>\n+                </param>\n+                <when value="gene-id">\n+                    <expand macro="text_or_file" what="Gene ID" what_extended="NCBI Gene ID" help=""/>\n+                    <expand macro="ortholog"/>\n+                </when>\n+                <when value="symbol">\n+                    <expand macro="text_or_file" what="Gene Symbol" what_extended="NCBI Gene Symbol" help=""/>\n+                    <expand macro="ortholog"/>\n+                    <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name">\n+                        <sanitizer invalid_'..b' datasets download gene accession WP_004675351.1 + include_flanks_bp -->\n+        <test expect_num_outputs="3">\n+            <conditional name="query|subcommand">\n+                <param name="download_by" value="accession"/>\n+                <conditional name="text_or_file">\n+                    <param name="text_or_file" value="text"/>\n+                    <param name="accession" value="WP_004675351.1"/>\n+                </conditional>\n+                <param name="include_flanks_bp" value="100"/>\n+            </conditional>\n+            <section name="file_choices">\n+                <conditional name="kingdom_cond">\n+                    <param name="kingdom_sel" value="prok-gene"/>\n+                    <param name="include" value="gene,protein"/>\n+                </conditional>\n+            </section>\n+            <output name="gene_data_report">\n+                <assert_contents>\n+                    <has_text text="glcE"/>\n+                    <has_n_lines n="2"/>\n+                    <has_n_columns n="7"/>\n+                </assert_contents>\n+            </output>\n+            <output name="gene_fasta">\n+                <assert_contents>\n+                    <has_text text=">"/>\n+                </assert_contents>\n+            </output>\n+            <output name="protein_fasta">\n+                <assert_contents>\n+                    <has_text text=">"/>\n+                </assert_contents>\n+            </output>\n+            <assert_command>\n+                <has_text text="include-flanks-bp 100"/>\n+            </assert_command>\n+        </test> \n+\n+        <!-- 10: datasets download gene taxon human   -->\n+        <test expect_num_outputs="1">\n+            <conditional name="query|subcommand">\n+                <param name="download_by" value="taxon"/>\n+                <param name="taxon_positional" value="human"/>\n+            </conditional>\n+            <section name="file_choices">\n+                <conditional name="kingdom_cond">\n+                    <param name="include" value=""/>\n+                </conditional>\n+            </section>\n+            <output name="gene_data_report">\n+                <assert_contents>\n+                    <has_text text="human"/>\n+                    <has_n_lines n="72533"/>\n+                    <has_n_columns n="8"/>\n+                </assert_contents>\n+            </output>\n+        </test> \n+        <!-- 11: datasets download gene taxon human + \\-\\-fasta-filter  -->\n+        <test expect_num_outputs="2">\n+            <conditional name="query|subcommand">\n+                <param name="download_by" value="taxon"/>\n+                <param name="taxon_positional" value="human"/>\n+            </conditional>\n+            <section name="file_choices">\n+                <conditional name="kingdom_cond">\n+                    <param name="include" value="protein"/>\n+                </conditional>\n+            </section>\n+            <section name="filters">\n+                <conditional name="fasta_filter_cond">\n+                    <param name="fasta_filter_select" value="text"/>\n+                    <param name="fasta_filter" value="NP_542432.2"/>\n+                </conditional>\n+            </section>\n+            <output name="gene_data_report">\n+                <assert_contents>\n+                    <has_text text="human"/>\n+                    <has_n_lines n="72533"/>\n+                    <has_n_columns n="8"/>\n+                </assert_contents>\n+            </output>\n+            <output name="protein_fasta">\n+                <assert_contents>\n+                    <has_text text=">" n="1" />\n+                </assert_contents>\n+            </output></test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+**Download Gene Datasets from NCBI**\n+\n+Download a gene dataset (gene sequence, transcipt, amino acid sequences, \n+nucleotide coding sequences, 5\'-UTR, 3\'-UTR) as well as gene and gene\n+product reports. Genes can be referred by gene id, symbol, accession,\n+or taxon.\n+]]>\n+    </help>\n+</tool>\n'
b
diff -r a3395b1d871b -r ac24fff14f23 datasets_genome.xml
--- a/datasets_genome.xml Mon Nov 21 11:40:05 2022 +0000
+++ b/datasets_genome.xml Fri Dec 02 10:52:48 2022 +0000
[
b'@@ -5,16 +5,17 @@\n     </macros>\n     <expand macro="requirements"></expand>\n     <command><![CDATA[\n+#import re\n @SETUP_CERTIFICATES@\n datasets download genome $query.subcommand.download_by\n #if $query.subcommand.download_by == \'accession\':\n     #if $query.subcommand.text_or_file.text_or_file == \'text\':\n-        #echo " ".join(f"\'{x}\'" for x in $query.subcommand.text_or_file.accession.split(\' \') if x)\n+        #echo " ".join(f"\'{x}\'" for x in re.split(" |,", str($query.subcommand.text_or_file.accession)) if x)\n     #else\n         --inputfile \'$query.subcommand.text_or_file.inputfile\'\n     #end if\n #else:\n-    \'$query.subcommand.taxon\'\n+    \'$query.subcommand.taxon_positional\'\n     $query.subcommand.tax_exact_match\n #end if\n $filters.reference\n@@ -37,24 +38,55 @@\n     --search \'$filters.search_term\'\n #end for\n --no-progressbar\n-#if $uncompressed\n-&& 7z x -y ncbi_dataset.zip\n-#else\n-&& 7z l ncbi_dataset.zip > ncbi_dataset.txt\n+--dehydrated\n+\n+## produce TSV report file\n+&& dataformat tsv genome \n+    --package ncbi_dataset.zip\n+    --fields #echo ",".join($file_choices.report_columns) \n+    > genome_data_report.tsv\n+\n+## unzip and rehydrate if any data is to be downloaded (include is not None)\n+#if $file_choices.include\n+    ## unzip\n+    && 7z x -y ncbi_dataset.zip > 7z.log\n+\n+    ## rehydrate\n+    && datasets rehydrate\n+        --directory ./\n+        #if not $file_choices.decompress\n+            --gzip\n+        #end if\n+        --max-workers \\${NCBI_DATASETS_MAX_WORKERS:-10}\n+\n+    ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery\n+    && find ncbi_dataset \\( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \\) -exec sh -c \'mv {} \\$(echo {} | sed "s/.f[an]a\\(.gz\\)\\?\\$/.fasta\\1/")\' \\;\n+\n+    ## unzip all compressed (non-fasta) files (jsonl files are just named .gz)\n+    && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c \'mv {} \\$(dirname {})/\\$(basename {} .gz)\' \\;\n+    #if $file_choices.decompress\n+        && find ncbi_dataset \\( -name "*.gz" ! -name "*fasta.gz" \\) -exec gunzip {} \\;\n+    #end if\n+\n+    #if "seq-report" in $file_choices.include\n+        && find ncbi_dataset -name sequence_report.jsonl -exec sh -c \'dataformat tsv genome-seq --inputfile {} > \\$(dirname {})/\\$(basename {} .jsonl).tsv\' \\;\n+    #end if\n+    \n+    && true  ## because Galaxy removes trailing ; from command\n #end if\n ]]></command>\n     <inputs>\n         <section name="query" title="Query" expanded="true">\n             <conditional name="subcommand">\n                 <param name="download_by" type="select" label="Choose how to find genomes to download">\n-                    <option value="accession">Download by NCBI assembly or BioProject accession</option>\n-                    <option value="taxon">Download by taxon</option>\n+                    <option value="accession">By NCBI assembly or BioProject accession</option>\n+                    <option value="taxon">By taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)</option>\n                 </param>\n                 <when value="accession">\n                     <expand macro="text_or_file"/>\n                 </when>\n                 <when value="taxon">\n-                    <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."/>\n+                    <expand macro="taxon_positional"/>\n                     <param argument="--tax-exact-match" type="boolean" truevalue="--tax-exact-match" falsevalue="" label="Exclude sub-species when a species-level taxon is specified"/>\n                 </when>\n             </conditional>\n@@ -67,7 +99,6 @@\n                 <option value="latest">Latest</option>\n                 <option value="all">All</option>\n             </param>\n-            <!-- TODO add test for assembly source: according to CLI doc args are RefSeq, GenBank, All and not refseq / genbank-->\n             <expand macro="assembly_source"/>\n             <expand macro="chromosomes"/>\n  '..b'ion name="protein_fasta" type="list" count="1">\n+                <element name="GCF_000146045.2" ftype="fasta.gz">\n+                    <assert_contents>\n+                        <has_size value="1844838"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+            <output_collection name="rna_fasta" type="list" count="1">\n+                <element name="GCF_000146045.2" ftype="fasta.gz">\n+                    <assert_contents>\n+                        <has_size value="2784534"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n         </test>\n         <test expect_num_outputs="3">\n             <conditional name="query|subcommand">\n@@ -307,8 +415,11 @@\n                     <param name="accession" value="GCF_000146045.2 GCF_000002945.1"/>\n                 </conditional>\n             </conditional>\n-            <param name="include" value="seq-report,genome"/>\n-            <param name="uncompressed" value="true"/>\n+            <section name="file_choices">\n+                <param name="include" value="seq-report,genome"/>\n+                <param name="decompress" value="true"/>\n+            </section>\n+            <output_collection name="sequence_report" type="list" count="2"/>\n             <output_collection name="genome_fasta" type="list:list" count="2">\n                 <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/>\n                 <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>\n@@ -320,12 +431,10 @@\n         <test expect_num_outputs="1" expect_test_failure="true">\n             <conditional name="query|subcommand">\n                 <param name="download_by" value="taxon"/>\n-                <param name="text_or_file" value="text"/>\n-                <param name="taxon" value="4932"/>\n+                <param name="taxon_positional" value="4932"/>\n                 <param name="tax_exact_match" value="true"/>\n             </conditional>\n             <param name="include" value=""/>\n-            <param name="uncompressed" value="true"/>\n             <output name="genome_data_report">\n                 <assert_contents>\n                    <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>\n@@ -338,15 +447,18 @@\n **Download Genome Datasets from NCBI**\n \n Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report.\n-Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file.\n+Genome datasets can be specified by NCBI Assembly or BioProject accession(s) or by taxon.\n+\n+The download is a three step process:\n \n-Tthe default genome dataset includes the following files (if available):\n- * data_report.jsonl (genome assembly and annotation metadata, not always available)\n- * genomic.fna (genomic sequences)\n- * rna.fna (transcript sequences)\n- * protein.faa (protein sequences)\n- * genomic.gff (genome annotation in gff3 format)\n- * dataset_catalog.json (a list of files and file types included in the dataset)\n+1. A "dehydrated" zip file is downloaded which includes the metadata and the download URL)\n+2. The metadata is transformed into a tabular (TSV) file\n+3. The data is hydrated (the actual data is downloaded)\n+\n+The 3rd step can be skipped by unselecting all output types in the `Include` parameter.\n+Thereby its possible to inspect the metadata prior to the actual data download. Also this\n+allows to use the tool for querying data sets (and their accessions) of interest which\n+can then be downloaded in a second call using the accessions.\n ]]>\n     </help>\n \n'
b
diff -r a3395b1d871b -r ac24fff14f23 macros.xml
--- a/macros.xml Mon Nov 21 11:40:05 2022 +0000
+++ b/macros.xml Fri Dec 02 10:52:48 2022 +0000
[
b'@@ -1,5 +1,5 @@\n <macros>\n-    <token name="@TOOL_VERSION@">14.3</token>\n+    <token name="@TOOL_VERSION@">14.4</token>\n     <token name="@VERSION_SUFFIX@">0</token>\n     <token name="@PROFILE@">21.01</token>\n     <token name="@LICENSE@">MIT</token>\n@@ -39,8 +39,10 @@\n                 <option value="file">Read a list of @WHAT_EXTENDED@s from a dataset</option>\n             </param>\n             <when value="text">\n-                <param name="accession" type="text" label="Enter space separated list of @WHAT@s" help="@HELP@">\n-                    <yield/>\n+                <!-- command section also allows spaces as separator for backward compatibility\n+                     prefer comma because this is used also in other text params-->\n+                <param name="accession" type="text" label="Enter comma separated list of @WHAT@s" help="@HELP@">\n+                    <validator type="length" min="1" message="Provide at least one @WHAT@"/>\n                 </param>\n             </when>\n             <when value="file">\n@@ -59,18 +61,45 @@\n             </sanitizer>\n         </param>\n     </xml>\n-    <xml name="include">\n-        <param argument="--include" type="select" multiple="true" optional="true">\n-            <option value="genome" selected="true">genomic sequence (genome)</option>\n-            <option value="rna">transcript (rna)</option>\n-            <option value="protein">amnio acid sequences (protein)</option>\n-            <option value="cds">nucleotide coding sequences (cds)</option>\n-            <option value="gff3">general feature file (gff3)</option>\n-            <option value="gtf">gene transfer format (gtf)</option>\n-            <option value="gbff">GenBank flat file (gbff)</option>\n-            <option value="seq-report">sequence report file (seq-report)</option>\n+\n+    <xml name="taxon_positional">\n+        <param name="taxon_positional" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."/>\n+    </xml>\n+\n+    <xml name="ortholog">\n+        <param argument="--ortholog" type="text" label="Retrieve orthologs for taxa" help="Retrieve data for an ortholog set. Provide one or more comma separated taxa (any rank) to filter results or \'all\' for the complete set.">\n+            <sanitizer invalid_char="">\n+                <valid initial="string.letters,string.digits">\n+                    <add value=" " />\n+                    <add value="," />\n+                    <add value="-" />\n+                </valid>\n+            </sanitizer>\n         </param>\n     </xml>\n+\n+    <xml name="include">\n+        <param argument="--include" type="select" multiple="true" optional="true" label="Include" help="Download the following datasets (if available)">\n+            <yield/>\n+        </param>\n+    </xml>\n+    <xml name="genome_includes">\n+        <option value="genome" selected="true">genomic sequence (genome)</option>\n+        <option value="rna">transcript (rna)</option>\n+        <option value="protein">amnio acid sequences (protein)</option>\n+        <option value="cds">nucleotide coding sequences (cds)</option>\n+        <option value="gff3">general feature file (gff3)</option>\n+        <option value="gtf">gene transfer format (gtf)</option>\n+        <option value="gbff">GenBank flat file (gbff)</option>\n+        <option value="seq-report">sequence report file (seq-report)</option>\n+        <yield/>\n+    </xml>\n+    <xml name="gene_includes">\n+        <option value="gene">gene sequence (gene)</option>\n+        <option value="protein" selected="true">amnio acid sequences (protein)</option>\n+        <yield/>\n+    </xml>\n+\n     <token name="@INCLUDE@"><![CDATA[\n         --include\n         #if $file_choices.include\n@@ -79,6 +108,187 @@\n             none\n         #end if\n     ]]></token>\n+    <xml name="tsv_report_columns">\n+        <param name="report_columns" type="select" multiple="true" optional="false" label="Columns in the report">\n+            <option value="accession">Assembly Accession</option>\n+      '..b'         <option value="genomic-region-gene-range-range-order">Genomic Region Gene Range Order</option>\n+            <option value="genomic-region-gene-range-range-orientation">Genomic Region Gene Range Orientation</option>\n+            <option value="genomic-region-gene-range-range-start">Genomic Region Gene Range Start</option>\n+            <option value="genomic-region-gene-range-range-stop">Genomic Region Gene Range Stop</option>\n+            <option value="genomic-region-genomic-region-type">Genomic Region Genomic Region Type</option>\n+            <option value="group-id">Gene Group Identifier</option>\n+            <option value="group-method">Gene Group Method</option>\n+            <option value="name-authority">Nomenclature Authority</option>\n+            <option value="name-id">Nomenclature ID</option>\n+            <option value="omim-ids">OMIM IDs</option>\n+            <option value="orientation">Orientation</option>\n+            <option value="ref-standard-gene-range-accession">Reference Standard Gene Range Sequence Accession</option>\n+            <option value="ref-standard-gene-range-range-order">Reference Standard Gene Range Order</option>\n+            <option value="ref-standard-gene-range-range-orientation">Reference Standard Gene Range Orientation</option>\n+            <option value="ref-standard-gene-range-range-start">Reference Standard Gene Range Start</option>\n+            <option value="ref-standard-gene-range-range-stop">Reference Standard Gene Range Stop</option>\n+            <option value="ref-standard-genomic-region-type">Reference Standard Genomic Region Type</option>\n+            <option value="replaced-gene-id">Replaced NCBI GeneID</option>\n+            <option value="rna-type">RNA Type</option>\n+            <option value="swissprot-accessions">SwissProt Accessions</option>\n+            <option value="symbol">Symbol</option>\n+            <option value="synonyms">Synonyms</option>\n+            <option value="tax-id">Taxonomic ID</option>\n+            <option value="tax-name">Taxonomic Name</option>\n+            <yield/>\n+        </param>\n+    </xml>\n+    <xml name="prok_gene_tsv_report_columns">\n+        <param name="report_columns" type="select" multiple="true" optional="false" label="Columns in the report">\n+            <option value="accession">Accession</option>\n+            <option value="description">Description</option>\n+            <option value="ec-number">EC Number</option>\n+            <option value="gene-symbol">Gene Symbol</option>\n+            <option value="mapping-count">Number of Genome Mappings</option>\n+            <option value="name-evidence-accession">Protein Name EvidenceAccession</option>\n+            <option value="name-evidence-category">Protein Name EvidenceCategory</option>\n+            <option value="name-evidence-source">Protein Name EvidenceSource</option>\n+            <option value="protein-length">Protein Length</option>\n+            <option value="protein-name">Protein Name</option>\n+            <yield/>\n+        </param>\n+    </xml>\n     <xml name="released_options" token_released_what="genomes" token_before_or_after="before">\n         <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)">\n             <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator>\n@@ -93,9 +303,9 @@\n #end if\n     </token>\n \n-    <xml name="genome_fasta_assert" tokens="el1,el2,expression" token_expression_n="1">\n+    <xml name="genome_fasta_assert" tokens="el1,el2,expression" token_ftype="fasta" token_expression_n="1">\n         <element name="@EL1@">\n-            <element name="@EL2@">\n+            <element name="@EL2@" ftype="@FTYPE@" decompress="true">\n                 <assert_contents>\n                     <has_text_matching expression="@EXPRESSION@" n="@EXPRESSION_N@"/>\n                 </assert_contents>\n'
b
diff -r a3395b1d871b -r ac24fff14f23 test-data/geneids.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/geneids.txt Fri Dec 02 10:52:48 2022 +0000
b
@@ -0,0 +1,2 @@
+2597
+14433