Repository 'metaphlan'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/metaphlan

Changeset 0:f5df500fcc3c (2021-04-19)
Next changeset 1:b89b0765695d (2021-05-17)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
added:
customizemetadata.py
generate_test_data.sh
macros.xml
metaphlan.xml
test-data/SRS014464-Anterior_nares-abundances.tabular
test-data/SRS014464-Anterior_nares-bowtie2out.tabular
test-data/SRS014464-Anterior_nares-ignore-marker-bowtie2out.tabular
test-data/SRS014464-Anterior_nares-legacy-abundances.tabular
test-data/SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular
test-data/SRS014464-Anterior_nares-two-inputs.sam
test-data/SRS014464-Anterior_nares.biom
test-data/SRS014464-Anterior_nares.fasta
test-data/SRS014464-Anterior_nares.fasta.gz
test-data/SRS014464-Anterior_nares.sam
test-data/marker.txt
test-data/marker_sequence.fasta
test-data/test-db-with-one-marker.fasta
test-data/test-db-with-one-marker.json
test-data/test-db-without-one-marker.fasta
test-data/test-db-without-one-marker.json
test-data/test-db.fasta
test-data/test-db.json
test-data/test-db/test-db.1.bt2
test-data/test-db/test-db.2.bt2
test-data/test-db/test-db.3.bt2
test-data/test-db/test-db.4.bt2
test-data/test-db/test-db.pkl
test-data/test-db/test-db.rev.1.bt2
test-data/test-db/test-db.rev.2.bt2
test-data/test_database.loc
tool-data/metaphlan_database.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r f5df500fcc3c customizemetadata.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/customizemetadata.py Mon Apr 19 20:56:20 2021 +0000
[
b"@@ -0,0 +1,480 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+\n+import argparse\n+import bz2\n+import json\n+import pickle\n+import re\n+from pathlib import Path\n+\n+\n+def load_from_json(json_fp):\n+    '''\n+    Read JSON file with marker metadata\n+\n+    :param json_fp: Path to JSON file\n+    '''\n+    with open(json_fp, 'r') as json_f:\n+        data = json.load(json_f)\n+\n+    for m in data['markers']:\n+        data['markers'][m]['ext'] = set(data['markers'][m]['ext'])\n+\n+    for t in data['taxonomy']:\n+        if isinstance(data['taxonomy'][t], list):\n+            data['taxonomy'][t] = tuple(data['taxonomy'][t])\n+    return data\n+\n+\n+def dump_to_json(data, json_fp):\n+    '''\n+    Dump marker metadata to JSON file\n+\n+    :param json_fp: Path to JSON file\n+    '''\n+    for m in data['markers']:\n+        data['markers'][m]['ext'] = list(data['markers'][m]['ext'])\n+\n+    with open(json_fp, 'w') as json_f:\n+        json.dump(data, json_f)\n+\n+\n+def transform_pkl_to_json(pkl_fp, json_fp):\n+    '''\n+    Read Pickle file and drop it to a JSON file\n+\n+    :param pkl_fp: Path to input Pickle file\n+    :param json_fp: Path to output JSON file\n+    '''\n+    # load metadata from Pickle file\n+    with bz2.BZ2File(pkl_fp, 'r') as pkl_f:\n+        in_metadata = pickle.load(pkl_f)\n+\n+    out_metadata = {\n+        'markers': in_metadata['markers'],\n+        'taxonomy': in_metadata['taxonomy'],\n+        'merged_taxon': {}\n+    }\n+    # transform merged_taxons tuple keys to string\n+    for k in in_metadata['merged_taxon']:\n+        n = ' , '.join(k)\n+        out_metadata[n] = in_metadata['merged_taxon'][k]\n+\n+    # dump metadata to JSON file\n+    dump_to_json(out_metadata, json_fp)\n+\n+\n+def transform_json_to_pkl(json_fp, pkl_fp):\n+    '''\n+    Read JSON file and drop it to a Pickle file\n+\n+    :param json_fp: Path to input JSON file\n+    :param pkl_fp: Path to output Pickle file\n+    '''\n+    # load metadata from JSON file\n+    in_metadata = load_from_json(json_fp)\n+\n+    out_metadata = {\n+        'markers': in_metadata['markers'],\n+        'taxonomy': in_metadata['taxonomy'],\n+        'merged_taxon': {}\n+    }\n+    # transform merged_taxons keys to tuple\n+    for k in in_metadata['merged_taxon']:\n+        n = ' , '.split(k)\n+        out_metadata[n] = in_metadata['merged_taxon'][k]\n+\n+    # dump metadata to Pickle file\n+    with bz2.BZ2File(pkl_fp, 'w') as pkl_f:\n+        pickle.dump(out_metadata, pkl_f)\n+\n+\n+def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name):\n+    '''\n+    Add marker to JSON file\n+\n+    :param in_json_fp: Path to input JSON file\n+    :param out_json_fp: Path to output JSON file\n+    :param name: Name of new marker\n+    :param m_length: Length of new marker\n+    :param g_length: List with lengths of genomes from which the new marker has been extracted\n+    :param gca: List with GCA of genomes from which the new marker has been extracted\n+    :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted\n+    :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted\n+    :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted\n+    :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted\n+    :param c_name: List with Name of Class for genomes from which the new marker has been extracted\n+    :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted\n+    :param o_name: List with Name of Order for genomes from which the new marker has been extracted\n+    :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted\n+    :param f_name: List with Name of Family for genomes from which the new marker has been extracted\n+    :param f_id: List with NCBI id of Family for genomes from which th"..b'for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--p_name\', help="Name of Phylum for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--p_id\', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--c_name\', help="Name of Class for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--c_id\', help="NCBI id of Class for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--o_name\', help="Name of Order for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--o_id\', help="NCBI id of Order for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--f_name\', help="Name of Family for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--f_id\', help="NCBI id of Family for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--g_name\', help="Name of Genus for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--g_id\', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--s_name\', help="Name of Species for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--s_id\', help="NCBI id of Species for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--t_name\', help="Name of Strain for genome from which the new marker has been extracted", action="append")\n+    # remove_markers subcommand\n+    remove_markers_parser = subparsers.add_parser(\'remove_markers\', help=\'Remove markers from JSON file\')\n+    remove_markers_parser.add_argument(\'--in_json\', help="Path to input JSON file")\n+    remove_markers_parser.add_argument(\'--markers\', help="Path to file with markers to remove (1 per line)")\n+    remove_markers_parser.add_argument(\'--out_json\', help="Path to output JSON file")\n+    remove_markers_parser.add_argument(\'--kept_markers\', help="Path to file with kept markers")\n+    # keep_markers subcommand\n+    keep_markers_parser = subparsers.add_parser(\'keep_markers\', help=\'Keep markers from JSON file, others will be removed\')\n+    keep_markers_parser.add_argument(\'--in_json\', help="Path to input JSON file")\n+    keep_markers_parser.add_argument(\'--markers\', help="Path to file with markers to keep (1 per line)")\n+    keep_markers_parser.add_argument(\'--out_json\', help="Path to output JSON file")\n+\n+    args = parser.parse_args()\n+\n+    if args.function == \'transform_pkl_to_json\':\n+        transform_pkl_to_json(Path(args.pkl), Path(args.json))\n+    elif args.function == \'transform_json_to_pkl\':\n+        transform_json_to_pkl(Path(args.json), Path(args.pkl))\n+    elif args.function == \'add_marker\':\n+        add_marker(\n+            args.in_json,\n+            args.out_json,\n+            args.name,\n+            args.m_length,\n+            args.g_length,\n+            args.gca,\n+            args.k_name,\n+            args.k_id,\n+            args.p_name,\n+            args.p_id,\n+            args.c_name,\n+            args.c_id,\n+            args.o_name,\n+            args.o_id,\n+            args.f_name,\n+            args.f_id,\n+            args.g_name,\n+            args.g_id,\n+            args.s_name,\n+            args.s_id,\n+            args.t_name)\n+    elif args.function == \'remove_markers\':\n+        remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers)\n+    elif args.function == \'keep_markers\':\n+        keep_markers(args.in_json, args.markers, args.out_json)\n'
b
diff -r 000000000000 -r f5df500fcc3c generate_test_data.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_test_data.sh Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,49 @@
+#/usr/bin/env bash
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.png \
+    -m 'average' \
+    -d 'braycurtis' \
+    -f 'correlation' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'jet'
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.pdf \
+    -m 'ward' \
+    -d 'euclidean' \
+    -f 'euclidean' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'pink'
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.svg \
+    -m 'complete' \
+    -d 'hamming' \
+    -f 'matching' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'pink'
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">3.0.7</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.01</token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_3174</edam_topic>
+            <edam_topic>topic_0194</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_2478</edam_operation>
+            <edam_operation>operation_0324</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">metaphlan</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">1101/2020.11.19.388223</citation>
+        </citations>
+    </xml>
+    <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>
+    <xml name="tax_lev">
+        <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
+            <option value="a" selected="true">All taxonomic levels</option>
+            <option value="k">Kingdoms only</option>
+            <option value="p">Phyla only</option>
+            <option value="c">Classes only</option>
+            <option value="o">Orders only</option>
+            <option value="f">Families only</option>
+            <option value="g">Genera only</option>
+            <option value="s">Species only</option>
+        </param>
+    </xml>
+</macros>
b
diff -r 000000000000 -r f5df500fcc3c metaphlan.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/metaphlan.xml Mon Apr 19 20:56:20 2021 +0000
[
b'@@ -0,0 +1,716 @@\n+<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>to profile the composition of microbial communities</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="edam_ontology"/>\n+    <expand macro="requirements"/>\n+    <version_command>metaphlan -v</version_command>\n+    <command detect_errors="aggressive"><![CDATA[\n+#if $inputs.in.selector == "raw"\n+    #if $inputs.in.raw_in.selector == "single"\n+        #set full_ext=$inputs.in.raw_in.in.datatype.file_ext\n+        #if $full_ext.endswith("gz")\n+            #set $file_path="in"     \n+zcat \'$inputs.in.raw_in.in\' > \'$file_path\'\n+&&\n+        #else if $full_ext.endswith("bz2")\n+            #set $file_path="in"\n+bzcat \'$inputs.in.raw_in.in\' > \'$file_path\'\n+&&\n+        #else\n+            #set $file_path=$inputs.in.raw_in.in\n+        #end if\n+    #else if $inputs.in.raw_in.selector == "multiple"\n+        #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext\n+        #set file_path=""\n+        #set sep=""\n+        #for $i, $f in enumerate($inputs.in.raw_in.in)\n+            #if $f.datatype.file_ext != $full_ext\n+echo "Different datatypes for input files"\n+&&\n+exit 1\n+            #end if          \n+            #if $full_ext.endswith("gz")\n+                #set fp="input_%s" % ($i)\n+zcat \'$f\' > \'$fp\'\n+&&\n+            #else if $full_ext.endswith("bz2")\n+                #set fp="input_%s" % ($i)\n+bzcat \'$f\' > \'$fp\'\n+&&\n+            #else\n+                #set fp=$f\n+            #end if\n+            #set $file_path+="%s%s" % ($sep, $fp)\n+            #set $sep=","\n+        #end for\n+    #else if $inputs.in.raw_in.selector == "paired"\n+        #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext\n+        #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext\n+echo "Different datatypes for input paired-end files"\n+&&\n+exit 1\n+        #end if\n+        #if $full_ext.endswith("gz")\n+zcat \'$inputs.in.raw_in.in_f\' > \'in_f\'\n+&&\n+zcat \'$inputs.in.raw_in.in_r\' > \'in_r\'\n+&&\n+            #set file_path="in_f,in_r"\n+        #else if $full_ext.endswith("bz2")\n+bzcat \'$inputs.in.raw_in.in_f\' > \'in_f\'\n+&&\n+bzcat \'$inputs.in.raw_in.in_r\' > \'in_r\'\n+&&\n+            #set file_path="in_f,in_r"\n+        #else\n+            #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)\n+        #end if\n+    #end if\n+\n+    #if $full_ext.startswith("fastq")\n+        #set ext=\'fastq\'\n+    #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))\n+        #set ext=\'fasta\'\n+    #else\n+        #set ext=$full_ext\n+    #end if\n+#end if\n+\n+#if $inputs.db.db_selector == "history"\n+mkdir \'ref_db\'\n+&&\n+bowtie2-build \'$inputs.db.bowtie2db\' \'ref_db/custom_db\'\n+&&\n+python \'$__tool_directory__/customizemetadata.py\'\n+    transform_json_to_pkl\n+    --json \'$inputs.db.mpa_pkl\'\n+    --pkl \'ref_db/custom_db.pkl\'\n+&&\n+#end if\n+\n+metaphlan\n+#if $inputs.in.selector == "raw"\n+    \'$file_path\'\n+    --input_type \'$ext\'\n+    --read_min_len $inputs.in.read_min_len\n+    --bt2_ps \'$inputs.in.mapping.bt2_ps\'\n+    --min_mapq_val $inputs.in.mapping.min_mapq_val\n+#else\n+    \'$inputs.in.in\'\n+    --input_type \'$inputs.in.selector\'\n+#end if\n+#if $inputs.db.db_selector == "cached"\n+    --bowtie2db \'$inputs.db.cached_db.fields.path\'\n+    --index \'$inputs.db.cached_db.fields.dbkey\'\n+#else\n+    --bowtie2db \'ref_db/\'\n+    --index \'custom_db\'\n+#end if\n+    -t \'$analysis.analysis_type.t\'\n+#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"\n+    --tax_lev \'$analysis.analysis_type.tax_lev\'\n+#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"\n+    --clade \'$analysis.analysis_type.clade\'\n+    #if str($analysis.analysis_type.min_ab) != \'\'\n+    --min_ab $analysis.analysis_type.min_ab\n+    #end if\n+#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != \'\'\n+    --nreads $$analysis.analysis_type.'..b'bacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>\n+                    <has_text text="SampleID"/>\n+                    <has_text text="Metaphlan_Analysis"/>\n+                </assert_contents>\n+            </output>\n+            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">\n+                <assert_contents>\n+                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>\n+                    <has_text text="37637__U2I1U8__N579_01580"/>\n+                </assert_contents>\n+            </output>\n+            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">\n+                <assert_contents>\n+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>\n+                </assert_contents>\n+            </output>\n+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">\n+                <assert_contents>\n+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+What it does\n+============\n+\n+MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, \n+Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. \n+\n+MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes \n+(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:\n+\n+- unambiguous taxonomic assignments;\n+- accurate estimation of organismal relative abundance;\n+- species-level resolution for bacteria, archaea, eukaryotes and viruses;\n+- strain identification and tracking\n+- orders of magnitude speedups compared to existing methods.\n+- metagenomic strain-level population genomics\n+\n+MetaPhlAn clade-abundance estimation\n+------------------------------------\n+\n+The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and \n+strains in particular cases) present in the metagenome obtained from a microbiome sample and their \n+relative abundance.\n+\n+Marker level analysis\n+---------------------\n+\n+MetaPhlAn introduces the capability of characterizing organisms at the strain level using non\n+aggregated marker information. Such capability comes with several slightly different flavours and \n+are a way to perform strain tracking and comparison across multiple samples.\n+\n+Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the \n+species present in the community, and then a strain-level profiling can be performed to zoom-in into \n+specific species of interest. This operation can be performed quickly as it exploits the bowtie2out \n+intermediate file saved during the execution of the default analysis type.\n+\n+Inputs\n+======\n+\n+Metaphlan takes as input either:\n+\n+- one or several sequence files in Fasta, FastQ (compressed or not)\n+- a BowTie2 produced SAM file\n+- an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run\n+\n+It also need the reference database, which can be locally installed or customized using the dedicated tools.\n+\n+Outputs\n+=======\n+\n+The main output file is a tab-separated file with the predicted taxon relative abundances.\n+\n+It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.\n+\n+\n+More help and use cases\n+=======================\n+\n+To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.\n+\n+.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage\n+\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-abundances.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-abundances.tabular Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,30 @@
+#custom_db
+#/home/bebatut/miniconda3/envs/mulled-v1-b4915a464cf72db4053ec566c65e3f5f431691323f08f9b6b1c9ecfc4f8b9c88/bin/metaphlan in --input_type fasta --read_min_len 70 --bowtie2db ref_db/ --index custom_db --bt2_ps very-sensitive --min_mapq_val 5 -t rel_ab --tax_lev a --min_cu_len 2000 --add_viruses --stat_q 0.2 --perc_nonzero 0.33 --avoid_disqm --sample_id_key SampleID --sample_id Metaphlan_Analysis -o /tmp/tmptu3575j7/files/000/dataset_19.dat --bowtie2out bowtie2out -s /tmp/tmptu3575j7/files/000/dataset_21.dat --biom /tmp/tmptu3575j7/files/000/dataset_22.dat --nproc 1
+#SampleID Metaphlan_Analysis
+#clade_name NCBI_tax_id relative_abundance additional_species
+k__Bacteria 2 100.0
+k__Bacteria|p__Proteobacteria 2|1224 52.20019
+k__Bacteria|p__Actinobacteria 2|201174 34.37371
+k__Bacteria|p__Firmicutes 2|1239 13.4261
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 2|1224|1236 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria 2|201174|1760 34.37371
+k__Bacteria|p__Firmicutes|c__Bacilli 2|1239|91061 13.4261
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales 2|1224|1236|72274 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales 2|201174|1760|85007 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales 2|1239|91061|186826 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales 2|201174|1760|85009 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae 2|1224|1236|72274|468 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae 2|201174|1760|85007|1653 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae 2|1239|91061|186826|186828 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae 2|201174|1760|85009|31957 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella 2|1224|1236|72274|468|475 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium 2|201174|1760|85007|1653|1716 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum 2|1239|91061|186826|186828|29393 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium 2|201174|1760|85009|31957|1912216 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata 2|1224|1236|72274|468|475|477 22.57968
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_pseudodiphtheriticum 2|201174|1760|85007|1653|1716|37637 18.75365
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_nonliquefaciens 2|1224|1236|72274|468|475|478 15.88652
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_equi 2|1224|1236|72274|468|475|60442 13.73399
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum 2|1239|91061|186826|186828|29393|29394 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes 2|201174|1760|85009|31957|1912216|1747 9.63281
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens 2|201174|1760|85007|1653|1716|38284 5.98726
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-bowtie2out.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-bowtie2out.tabular Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,140 @@\n+HWI-EAS109_102883399:3:104:7342:14360/1\t37637__U2I1U8__N579_01580\n+HWI-EAS109_102883399:3:107:9938:7093/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:108:10698:12367/1\t38284__A0A376GA42__HMPREF0276_2319\n+HWI-EAS109_102883399:3:114:6282:15170/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:116:11192:12957/1\t1747__D4HCW6__PAGK_1340\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:17:12574:2133/1\t478__A0A1B8QSP6__A9Z60_08260\n+HWI-EAS109_102883399:3:21:9846:16342/1\t13076__A0A2I1PE66__CYJ72_10760\n+HWI-EAS109_102883399:3:24:9356:13561/1\t478__A0A1B8PJN3__A9Z65_07575\n+HWI-EAS109_102883399:3:31:18846:14786/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:3:31:9294:14763/1\t29394__H3NCV0__B8A46_05195\n+HWI-EAS109_102883399:3:32:7234:7840/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:3:34:10553:20876/1\t1747__D4HAL2__PAJL_1002\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:3:35:16839:12173/1\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:3:43:14863:11630/1\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:3:43:7420:17273/1\t29394__H3NE38__B8A42_05570\n+HWI-EAS109_102883399:3:50:11989:13662/1\t29394__H3NET0__B8A33_05820\n+HWI-EAS109_102883399:3:61:16396:12608/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:63:2385:6258/1\t29394__H3NDH9__B8A44_07175\n+HWI-EAS109_102883399:3:63:7222:18482/1\t478__A0A1B8PJH3__A9Z60_02875\n+HWI-EAS109_102883399:3:67:10236:17129/1\t29394__H3NG09__B8A42_04750\n+HWI-EAS109_102883399:3:6:4450:19117/1\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:3:6:8517:20263/1\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:72:6658:15119/1\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:3:74:12507:6850/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:77:13385:16207/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:7:15731:5483/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:82:6561:17915/1\t29394__H3NE72__B8A39_01430\n+HWI-EAS109_102883399:3:83:7057:17139/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:87:7599:5581/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:3:88:12835:16222/1\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:3:91:5341:14940/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:92:10442:16461/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:96:16052:11956/1\t478__A0A1B8QQ64__A9Z65_06575\n+HWI-EAS109_102883399:3:96:7655:6922/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:99:16677:13714/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:4:101:15092:11595/1\t29394__H3NF31__B8A40_05690\n+HWI-EAS109_102883399:4:107:17626:14917/1\t37637__V7ZTI2__N579_0112885\n+HWI-EAS109_102883399:4:110:4574:6161/1\t470453__A0A1T0CCU0__B0680_10530\n+HWI-EAS109_102883399:4:114:5708:8547/1\t37637__U2HYJ1__N579_05255\n+HWI-EAS109_102883399:4:116:15438:1531/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:4:116:19658:5948/1\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:117:16471:20857/1\t1747__U7MBR0__BBJ67_02950\n+HWI-EAS109_102883399:4:17:18391:6883/1\t29394__H3NCU9__B8A45_08665\n+HWI-EAS109_102883399:4:1:17744:12898/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:4:21:15793:10115/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:4:22:11354:20243/1\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:28:3117:17715/1\t29394__H3NDD0__B8A46_01590\n+HWI-EAS109_102883399:4:2:15391:8448/1\t29394__H3NC06__B8A41_08715\n+HWI-EAS109_102883399:4:30:14174:14723/1\t38284__C0WFM5__HMPREF0276_0261\n+HWI-EAS109_102883399:4:31:12400:9410/1\t478__A0A1B8QJ77__A9Z65_03345\n+HWI-EAS109_102883399:4:46:7502:15348/1\t1747__W4TWS7__COH18_07470\n+HWI-EAS109_102883399:4:49:19733:3881/1\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:49:7719:20475/1\t478__A0A1B8PJZ8__A9Z65_05485\n+HWI-EAS109_102883399:4:4:17929:20737/1\t37637__U2GT22__N579_02835\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:58:7856:5386/1\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102'..b'20/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:3:54:2342:11205/2\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:56:12531:13850/2\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:61:2756:15703/2\t29394__H3NDD1__B8A45_02325\n+HWI-EAS109_102883399:3:63:15642:18096/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:3:74:3661:6609/2\t37637__U2HXD5__N579_07285\n+HWI-EAS109_102883399:3:74:9927:4336/2\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:81:13447:6432/2\t478__A0A1B8PJ82__A7456_08640\n+HWI-EAS109_102883399:3:84:9550:17402/2\t60442__A0A378QNQ9__B5J93_05455\n+HWI-EAS109_102883399:3:84:9622:5528/2\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102883399:3:88:16675:15992/2\t29394__H3NEK5__B8A42_06490\n+HWI-EAS109_102883399:4:100:4233:13414/2\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:4:102:17753:15275/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:103:12056:11798/2\t29433__A0A378PKN6__MOVS_06365\n+HWI-EAS109_102883399:4:105:15511:1280/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:4:113:9655:1613/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:116:8110:19393/2\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:14:14739:6774/2\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:4:18:6289:20196/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:1:15928:5893/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:1:1929:4508/2\t29394__H3NC21__HMPREF9703_00102\n+HWI-EAS109_102883399:4:1:8293:4832/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:22:11354:20243/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:25:11205:5663/2\t29394__H3NGD0__B8A39_02610\n+HWI-EAS109_102883399:4:26:13606:15012/2\t37637__U2HPE4__N579_11550\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:36:3448:17267/2\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:4:39:1605:12384/2\t29394__H3NC33__B8A40_05755\n+HWI-EAS109_102883399:4:39:2895:3590/2\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:4:44:18384:3597/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:49:10481:12646/2\t386414__D1VYE0__HMPREF9019_1663\n+HWI-EAS109_102883399:4:49:8020:14379/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:53:5625:18502/2\t29394__H3NE98__B8A46_08570\n+HWI-EAS109_102883399:4:55:2034:11890/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:55:4630:1592/2\t29394__H3NFY1__B8A41_07655\n+HWI-EAS109_102883399:4:56:12559:15511/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:56:19427:4005/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:68:7946:14742/2\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:4:7:7569:9747/2\t29394__H3NFL8__B8A45_06985\n+HWI-EAS109_102883399:4:84:1158:10675/2\t29394__H3NCJ1__B8A42_01505\n+HWI-EAS109_102883399:4:93:16478:8198/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:94:16955:7520/2\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:9:11256:16883/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:9:9586:20055/2\t477__A0A1B8Q5X7__A9Z63_04935\n+HWI-EAS109_102883399:3:10:19654:7883/2\t1747__A0A1N4YDI5__CP875_06750\n+HWI-EAS109_102883399:3:3:9498:7494/2\t504553__GeneID:10498608\n+HWI-EAS109_102883399:3:69:13778:8249/2\t1747__A0A1N5AYP4__B1B09_10705\n+HWI-EAS109_102883399:4:43:7696:5439/2\t38284__C0WL80__HMPREF0276_2216\n+HWI-EAS109_102883399:4:86:2354:13206/2\t38303__E2S5B9__HMPREF0305_11721\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:65:18885:8678/1\t37637__V7ZTH7__N579_0112775\n+HWI-EAS109_102883399:3:54:17385:9920/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+#nreads\t17695\n+#avg_read_length\t96.3479513987002\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-ignore-marker-bowtie2out.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-ignore-marker-bowtie2out.tabular Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,140 @@\n+HWI-EAS109_102883399:3:104:7342:14360/1\t37637__U2I1U8__N579_01580\n+HWI-EAS109_102883399:3:107:9938:7093/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:108:10698:12367/1\t38284__A0A376GA42__HMPREF0276_2319\n+HWI-EAS109_102883399:3:114:6282:15170/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:116:11192:12957/1\t1747__D4HCW6__PAGK_1340\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:17:12574:2133/1\t478__A0A1B8QSP6__A9Z60_08260\n+HWI-EAS109_102883399:3:21:9846:16342/1\t13076__A0A2I1PE66__CYJ72_10760\n+HWI-EAS109_102883399:3:24:9356:13561/1\t478__A0A1B8PJN3__A9Z65_07575\n+HWI-EAS109_102883399:3:31:18846:14786/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:3:31:9294:14763/1\t29394__H3NCV0__B8A46_05195\n+HWI-EAS109_102883399:3:32:7234:7840/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:3:34:10553:20876/1\t1747__D4HAL2__PAJL_1002\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:3:35:16839:12173/1\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:3:43:14863:11630/1\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:3:43:7420:17273/1\t29394__H3NE38__B8A42_05570\n+HWI-EAS109_102883399:3:50:11989:13662/1\t29394__H3NET0__B8A33_05820\n+HWI-EAS109_102883399:3:61:16396:12608/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:63:2385:6258/1\t29394__H3NDH9__B8A44_07175\n+HWI-EAS109_102883399:3:63:7222:18482/1\t478__A0A1B8PJH3__A9Z60_02875\n+HWI-EAS109_102883399:3:67:10236:17129/1\t29394__H3NG09__B8A42_04750\n+HWI-EAS109_102883399:3:6:4450:19117/1\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:3:6:8517:20263/1\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:72:6658:15119/1\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:3:74:12507:6850/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:77:13385:16207/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:7:15731:5483/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:82:6561:17915/1\t29394__H3NE72__B8A39_01430\n+HWI-EAS109_102883399:3:83:7057:17139/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:87:7599:5581/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:3:88:12835:16222/1\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:3:91:5341:14940/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:92:10442:16461/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:96:16052:11956/1\t478__A0A1B8QQ64__A9Z65_06575\n+HWI-EAS109_102883399:3:96:7655:6922/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:99:16677:13714/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:4:101:15092:11595/1\t29394__H3NF31__B8A40_05690\n+HWI-EAS109_102883399:4:107:17626:14917/1\t37637__V7ZTI2__N579_0112885\n+HWI-EAS109_102883399:4:110:4574:6161/1\t470453__A0A1T0CCU0__B0680_10530\n+HWI-EAS109_102883399:4:114:5708:8547/1\t37637__U2HYJ1__N579_05255\n+HWI-EAS109_102883399:4:116:15438:1531/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:4:116:19658:5948/1\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:117:16471:20857/1\t1747__U7MBR0__BBJ67_02950\n+HWI-EAS109_102883399:4:17:18391:6883/1\t29394__H3NCU9__B8A45_08665\n+HWI-EAS109_102883399:4:1:17744:12898/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:4:21:15793:10115/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:4:22:11354:20243/1\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:28:3117:17715/1\t29394__H3NDD0__B8A46_01590\n+HWI-EAS109_102883399:4:2:15391:8448/1\t29394__H3NC06__B8A41_08715\n+HWI-EAS109_102883399:4:30:14174:14723/1\t38284__C0WFM5__HMPREF0276_0261\n+HWI-EAS109_102883399:4:31:12400:9410/1\t478__A0A1B8QJ77__A9Z65_03345\n+HWI-EAS109_102883399:4:46:7502:15348/1\t1747__W4TWS7__COH18_07470\n+HWI-EAS109_102883399:4:49:19733:3881/1\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:49:7719:20475/1\t478__A0A1B8PJZ8__A9Z65_05485\n+HWI-EAS109_102883399:4:4:17929:20737/1\t37637__U2GT22__N579_02835\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:58:7856:5386/1\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102'..b'20/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:3:54:2342:11205/2\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:56:12531:13850/2\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:61:2756:15703/2\t29394__H3NDD1__B8A45_02325\n+HWI-EAS109_102883399:3:63:15642:18096/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:3:74:3661:6609/2\t37637__U2HXD5__N579_07285\n+HWI-EAS109_102883399:3:74:9927:4336/2\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:81:13447:6432/2\t478__A0A1B8PJ82__A7456_08640\n+HWI-EAS109_102883399:3:84:9550:17402/2\t60442__A0A378QNQ9__B5J93_05455\n+HWI-EAS109_102883399:3:84:9622:5528/2\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102883399:3:88:16675:15992/2\t29394__H3NEK5__B8A42_06490\n+HWI-EAS109_102883399:4:100:4233:13414/2\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:4:102:17753:15275/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:103:12056:11798/2\t29433__A0A378PKN6__MOVS_06365\n+HWI-EAS109_102883399:4:105:15511:1280/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:4:113:9655:1613/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:116:8110:19393/2\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:14:14739:6774/2\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:4:18:6289:20196/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:1:15928:5893/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:1:1929:4508/2\t29394__H3NC21__HMPREF9703_00102\n+HWI-EAS109_102883399:4:1:8293:4832/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:22:11354:20243/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:25:11205:5663/2\t29394__H3NGD0__B8A39_02610\n+HWI-EAS109_102883399:4:26:13606:15012/2\t37637__U2HPE4__N579_11550\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:36:3448:17267/2\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:4:39:1605:12384/2\t29394__H3NC33__B8A40_05755\n+HWI-EAS109_102883399:4:39:2895:3590/2\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:4:44:18384:3597/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:49:10481:12646/2\t386414__D1VYE0__HMPREF9019_1663\n+HWI-EAS109_102883399:4:49:8020:14379/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:53:5625:18502/2\t29394__H3NE98__B8A46_08570\n+HWI-EAS109_102883399:4:55:2034:11890/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:55:4630:1592/2\t29394__H3NFY1__B8A41_07655\n+HWI-EAS109_102883399:4:56:12559:15511/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:56:19427:4005/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:68:7946:14742/2\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:4:7:7569:9747/2\t29394__H3NFL8__B8A45_06985\n+HWI-EAS109_102883399:4:84:1158:10675/2\t29394__H3NCJ1__B8A42_01505\n+HWI-EAS109_102883399:4:93:16478:8198/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:94:16955:7520/2\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:9:11256:16883/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:9:9586:20055/2\t477__A0A1B8Q5X7__A9Z63_04935\n+HWI-EAS109_102883399:3:10:19654:7883/2\t1747__A0A1N4YDI5__CP875_06750\n+HWI-EAS109_102883399:3:3:9498:7494/2\t504553__GeneID:10498608\n+HWI-EAS109_102883399:3:69:13778:8249/2\t1747__A0A1N5AYP4__B1B09_10705\n+HWI-EAS109_102883399:4:43:7696:5439/2\t38284__C0WL80__HMPREF0276_2216\n+HWI-EAS109_102883399:4:86:2354:13206/2\t38303__E2S5B9__HMPREF0305_11721\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:65:18885:8678/1\t37637__V7ZTH7__N579_0112775\n+HWI-EAS109_102883399:3:54:17385:9920/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+#nreads\t17695\n+#avg_read_length\t96.3479513987002\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-legacy-abundances.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-legacy-abundances.tabular Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,27 @@
+#SampleID Metaphlan_Analysis
+k__Bacteria 100.0
+k__Bacteria|p__Proteobacteria 52.20019
+k__Bacteria|p__Actinobacteria 34.37371
+k__Bacteria|p__Firmicutes 13.4261
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria 34.37371
+k__Bacteria|p__Firmicutes|c__Bacilli 13.4261
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella 52.20019
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium 24.7409
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium 9.63281
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata 22.57968
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_pseudodiphtheriticum 18.75365
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_nonliquefaciens 15.88652
+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_equi 13.73399
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum 13.4261
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes 9.63281
+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens 5.98726
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,278 @@\n+HWI-EAS109_102883399:3:104:7342:14360/1\t37637__U2I1U8__N579_01580\n+HWI-EAS109_102883399:3:107:9938:7093/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:108:10698:12367/1\t38284__A0A376GA42__HMPREF0276_2319\n+HWI-EAS109_102883399:3:114:6282:15170/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:116:11192:12957/1\t1747__D4HCW6__PAGK_1340\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:17:12574:2133/1\t478__A0A1B8QSP6__A9Z60_08260\n+HWI-EAS109_102883399:3:21:9846:16342/1\t13076__A0A2I1PE66__CYJ72_10760\n+HWI-EAS109_102883399:3:24:9356:13561/1\t478__A0A1B8PJN3__A9Z65_07575\n+HWI-EAS109_102883399:3:31:18846:14786/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:3:31:9294:14763/1\t29394__H3NCV0__B8A46_05195\n+HWI-EAS109_102883399:3:32:7234:7840/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:3:34:10553:20876/1\t1747__D4HAL2__PAJL_1002\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:3:35:16839:12173/1\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:3:43:14863:11630/1\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:3:43:7420:17273/1\t29394__H3NE38__B8A42_05570\n+HWI-EAS109_102883399:3:50:11989:13662/1\t29394__H3NET0__B8A33_05820\n+HWI-EAS109_102883399:3:61:16396:12608/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:63:2385:6258/1\t29394__H3NDH9__B8A44_07175\n+HWI-EAS109_102883399:3:63:7222:18482/1\t478__A0A1B8PJH3__A9Z60_02875\n+HWI-EAS109_102883399:3:67:10236:17129/1\t29394__H3NG09__B8A42_04750\n+HWI-EAS109_102883399:3:6:4450:19117/1\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:3:6:8517:20263/1\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:72:6658:15119/1\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:3:74:12507:6850/1\t90240__A0A378QWM4__NCTC12877_00123\n+HWI-EAS109_102883399:3:77:13385:16207/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:7:15731:5483/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:82:6561:17915/1\t29394__H3NE72__B8A39_01430\n+HWI-EAS109_102883399:3:83:7057:17139/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:87:7599:5581/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:3:88:12835:16222/1\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:3:91:5341:14940/1\t480__A0A3A9R6N2__ccmA\n+HWI-EAS109_102883399:3:92:10442:16461/1\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:96:16052:11956/1\t478__A0A1B8QQ64__A9Z65_06575\n+HWI-EAS109_102883399:3:96:7655:6922/1\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:3:99:16677:13714/1\t60442__A0A378QWP5__cya_3\n+HWI-EAS109_102883399:4:101:15092:11595/1\t29394__H3NF31__B8A40_05690\n+HWI-EAS109_102883399:4:107:17626:14917/1\t37637__V7ZTI2__N579_0112885\n+HWI-EAS109_102883399:4:110:4574:6161/1\t470453__A0A1T0CCU0__B0680_10530\n+HWI-EAS109_102883399:4:114:5708:8547/1\t37637__U2HYJ1__N579_05255\n+HWI-EAS109_102883399:4:116:15438:1531/1\t478__A0A1B8PMF6__A9Z65_02265\n+HWI-EAS109_102883399:4:116:19658:5948/1\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:117:16471:20857/1\t1747__U7MBR0__BBJ67_02950\n+HWI-EAS109_102883399:4:17:18391:6883/1\t29394__H3NCU9__B8A45_08665\n+HWI-EAS109_102883399:4:1:17744:12898/1\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:4:21:15793:10115/1\t478__A0A1B8PLP8__A9Z60_06460\n+HWI-EAS109_102883399:4:22:11354:20243/1\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:28:3117:17715/1\t29394__H3NDD0__B8A46_01590\n+HWI-EAS109_102883399:4:2:15391:8448/1\t29394__H3NC06__B8A41_08715\n+HWI-EAS109_102883399:4:30:14174:14723/1\t38284__C0WFM5__HMPREF0276_0261\n+HWI-EAS109_102883399:4:31:12400:9410/1\t478__A0A1B8QJ77__A9Z65_03345\n+HWI-EAS109_102883399:4:46:7502:15348/1\t1747__W4TWS7__COH18_07470\n+HWI-EAS109_102883399:4:49:19733:3881/1\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:49:7719:20475/1\t478__A0A1B8PJZ8__A9Z65_05485\n+HWI-EAS109_102883399:4:4:17929:20737/1\t37637__U2GT22__N579_02835\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:58:7856:5386/1\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102'..b'20/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:3:54:2342:11205/2\t478__A0A1B8PM91__A9Z60_01205\n+HWI-EAS109_102883399:3:56:12531:13850/2\t478__A0A1B8PHZ1__A9Z60_04570\n+HWI-EAS109_102883399:3:61:2756:15703/2\t29394__H3NDD1__B8A45_02325\n+HWI-EAS109_102883399:3:63:15642:18096/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:3:74:3661:6609/2\t37637__U2HXD5__N579_07285\n+HWI-EAS109_102883399:3:74:9927:4336/2\t477__A0A1V4GM23__A9Z63_04930\n+HWI-EAS109_102883399:3:81:13447:6432/2\t478__A0A1B8PJ82__A7456_08640\n+HWI-EAS109_102883399:3:84:9550:17402/2\t60442__A0A378QNQ9__B5J93_05455\n+HWI-EAS109_102883399:3:84:9622:5528/2\t478__A0A1B8PK82__A9Z60_08695\n+HWI-EAS109_102883399:3:88:16675:15992/2\t29394__H3NEK5__B8A42_06490\n+HWI-EAS109_102883399:4:100:4233:13414/2\t29394__H3ND14__B8A42_03745\n+HWI-EAS109_102883399:4:102:17753:15275/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:103:12056:11798/2\t29433__A0A378PKN6__MOVS_06365\n+HWI-EAS109_102883399:4:105:15511:1280/2\t478__A0A1B8PJP5__A7456_08645\n+HWI-EAS109_102883399:4:113:9655:1613/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:116:8110:19393/2\t29394__H3NFS3__B8A31_08415\n+HWI-EAS109_102883399:4:14:14739:6774/2\t478__A0A1B8PKZ1__A9Z65_08945\n+HWI-EAS109_102883399:4:18:6289:20196/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:1:15928:5893/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:1:1929:4508/2\t29394__H3NC21__HMPREF9703_00102\n+HWI-EAS109_102883399:4:1:8293:4832/2\t60442__A0A378QTS8__B5J93_12645\n+HWI-EAS109_102883399:4:22:11354:20243/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:25:11205:5663/2\t29394__H3NGD0__B8A39_02610\n+HWI-EAS109_102883399:4:26:13606:15012/2\t37637__U2HPE4__N579_11550\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:36:3448:17267/2\t37637__U2I1H6__N579_01890\n+HWI-EAS109_102883399:4:39:1605:12384/2\t29394__H3NC33__B8A40_05755\n+HWI-EAS109_102883399:4:39:2895:3590/2\t478__A0A1B8PM86__A9Z65_01295\n+HWI-EAS109_102883399:4:44:18384:3597/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:49:10481:12646/2\t386414__D1VYE0__HMPREF9019_1663\n+HWI-EAS109_102883399:4:49:8020:14379/2\t29394__H3NDY2__B8A44_05015\n+HWI-EAS109_102883399:4:53:5625:18502/2\t29394__H3NE98__B8A46_08570\n+HWI-EAS109_102883399:4:55:2034:11890/2\t478__A0A1B8PK17__A9Z60_02920\n+HWI-EAS109_102883399:4:55:4630:1592/2\t29394__H3NFY1__B8A41_07655\n+HWI-EAS109_102883399:4:56:12559:15511/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:56:19427:4005/2\t478__A0A1B8PKC6__A9Z65_00240\n+HWI-EAS109_102883399:4:68:7946:14742/2\t478__A0A1B8PI70__A9Z60_04770\n+HWI-EAS109_102883399:4:7:7569:9747/2\t29394__H3NFL8__B8A45_06985\n+HWI-EAS109_102883399:4:84:1158:10675/2\t29394__H3NCJ1__B8A42_01505\n+HWI-EAS109_102883399:4:93:16478:8198/2\t478__A0A1B8PK12__A9Z60_07995\n+HWI-EAS109_102883399:4:94:16955:7520/2\t29394__H3NGF9__B8A33_01850\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:9:11256:16883/2\t29394__H3NCD6__B8A33_07975\n+HWI-EAS109_102883399:4:9:9586:20055/2\t477__A0A1B8Q5X7__A9Z63_04935\n+HWI-EAS109_102883399:3:10:19654:7883/2\t1747__A0A1N4YDI5__CP875_06750\n+HWI-EAS109_102883399:3:3:9498:7494/2\t504553__GeneID:10498608\n+HWI-EAS109_102883399:3:69:13778:8249/2\t1747__A0A1N5AYP4__B1B09_10705\n+HWI-EAS109_102883399:4:43:7696:5439/2\t38284__C0WL80__HMPREF0276_2216\n+HWI-EAS109_102883399:4:86:2354:13206/2\t38303__E2S5B9__HMPREF0305_11721\n+HWI-EAS109_102883399:3:118:12526:20595/1\t29394__H3NDE2__B8A45_00965\n+HWI-EAS109_102883399:3:35:14701:14421/1\t478__A0A1B8PMT8__A9Z60_01500\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+HWI-EAS109_102883399:4:65:18885:8678/1\t37637__V7ZTH7__N579_0112775\n+HWI-EAS109_102883399:3:54:17385:9920/2\t29394__H3NE37__B8A45_05170\n+HWI-EAS109_102883399:4:2:5506:17168/2\t29394__H3NFP2__B8A45_07105\n+HWI-EAS109_102883399:4:95:2036:2688/2\t478__A0A1B8PJ21__A9Z60_08925\n+HWI-EAS109_102883399:4:51:2231:13778/1\t1747__D4HCY1__CP884_07625\n+#nreads\t35390\n+#avg_read_length\t96.3479513987002\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares-two-inputs.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares-two-inputs.sam Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,377 @@\n+@HD\tVN:1.0\tSO:unsorted\n+@SQ\tSN:13076__A0A2I1PE66__CYJ72_10760\tLN:540\n+@SQ\tSN:1747__A0A1N4YDI5__CP875_06750\tLN:1164\n+@SQ\tSN:1747__A0A1N5AYP4__B1B09_10705\tLN:1476\n+@SQ\tSN:1747__A0A3D8ZS35__CP884_00215\tLN:1011\n+@SQ\tSN:1747__D4HAL2__PAJL_1002\tLN:705\n+@SQ\tSN:1747__D4HCW6__PAGK_1340\tLN:3834\n+@SQ\tSN:1747__D4HCY1__CP884_07625\tLN:1650\n+@SQ\tSN:1747__U7MBR0__BBJ67_02950\tLN:1779\n+@SQ\tSN:1747__W4TU52__HMPREF9593_02327\tLN:975\n+@SQ\tSN:1747__W4TWS7__COH18_07470\tLN:477\n+@SQ\tSN:29394__H3NBW7__B8A40_04000\tLN:1101\n+@SQ\tSN:29394__H3NC06__B8A41_08715\tLN:1317\n+@SQ\tSN:29394__H3NC21__HMPREF9703_00102\tLN:1617\n+@SQ\tSN:29394__H3NC33__B8A40_05755\tLN:3063\n+@SQ\tSN:29394__H3NCD6__B8A33_07975\tLN:1560\n+@SQ\tSN:29394__H3NCJ1__B8A42_01505\tLN:930\n+@SQ\tSN:29394__H3NCU9__B8A45_08665\tLN:765\n+@SQ\tSN:29394__H3NCV0__B8A46_05195\tLN:888\n+@SQ\tSN:29394__H3ND14__B8A42_03745\tLN:3069\n+@SQ\tSN:29394__H3NDD0__B8A46_01590\tLN:1173\n+@SQ\tSN:29394__H3NDD1__B8A45_02325\tLN:480\n+@SQ\tSN:29394__H3NDE2__B8A45_00965\tLN:699\n+@SQ\tSN:29394__H3NDH9__B8A44_07175\tLN:870\n+@SQ\tSN:29394__H3NDY2__B8A44_05015\tLN:3315\n+@SQ\tSN:29394__H3NE37__B8A45_05170\tLN:756\n+@SQ\tSN:29394__H3NE38__B8A42_05570\tLN:1374\n+@SQ\tSN:29394__H3NE72__B8A39_01430\tLN:654\n+@SQ\tSN:29394__H3NE98__B8A46_08570\tLN:1188\n+@SQ\tSN:29394__H3NEK0__HMPREF9703_00981\tLN:1110\n+@SQ\tSN:29394__H3NEK5__B8A42_06490\tLN:741\n+@SQ\tSN:29394__H3NET0__B8A33_05820\tLN:1332\n+@SQ\tSN:29394__H3NF31__B8A40_05690\tLN:1860\n+@SQ\tSN:29394__H3NFD8__HMPREF9703_01269\tLN:732\n+@SQ\tSN:29394__H3NFL8__B8A45_06985\tLN:582\n+@SQ\tSN:29394__H3NFP2__B8A45_07105\tLN:651\n+@SQ\tSN:29394__H3NFS3__B8A31_08415\tLN:2691\n+@SQ\tSN:29394__H3NFY1__B8A41_07655\tLN:534\n+@SQ\tSN:29394__H3NG09__B8A42_04750\tLN:1062\n+@SQ\tSN:29394__H3NGD0__B8A39_02610\tLN:1620\n+@SQ\tSN:29394__H3NGF9__B8A33_01850\tLN:1086\n+@SQ\tSN:29394__H3NGH1__B5772_02200\tLN:972\n+@SQ\tSN:29433__A0A378PKN6__MOVS_06365\tLN:828\n+@SQ\tSN:37637__U2GCN8__N579_00170\tLN:576\n+@SQ\tSN:37637__U2GT22__N579_02835\tLN:471\n+@SQ\tSN:37637__U2HPE4__N579_11550\tLN:1275\n+@SQ\tSN:37637__U2HXD5__N579_07285\tLN:1158\n+@SQ\tSN:37637__U2HYJ1__N579_05255\tLN:642\n+@SQ\tSN:37637__U2I1H6__N579_01890\tLN:975\n+@SQ\tSN:37637__U2I1U8__N579_01580\tLN:867\n+@SQ\tSN:37637__V7ZTH7__N579_0112775\tLN:729\n+@SQ\tSN:37637__V7ZTI2__N579_0112885\tLN:456\n+@SQ\tSN:38284__A0A376GA42__HMPREF0276_2319\tLN:1494\n+@SQ\tSN:38284__C0WFM5__HMPREF0276_0261\tLN:2589\n+@SQ\tSN:38284__C0WL80__HMPREF0276_2216\tLN:1497\n+@SQ\tSN:38303__E2S5B9__HMPREF0305_11721\tLN:1539\n+@SQ\tSN:386414__D1VYE0__HMPREF9019_1663\tLN:1065\n+@SQ\tSN:470453__A0A1T0CCU0__B0680_10530\tLN:741\n+@SQ\tSN:477__A0A1B8Q5X7__A9Z63_04935\tLN:618\n+@SQ\tSN:477__A0A1V4GM23__A9Z63_04930\tLN:1848\n+@SQ\tSN:478__A0A1B8PHZ1__A9Z60_04570\tLN:1320\n+@SQ\tSN:478__A0A1B8PI70__A9Z60_04770\tLN:1698\n+@SQ\tSN:478__A0A1B8PJ21__A9Z60_08925\tLN:843\n+@SQ\tSN:478__A0A1B8PJ82__A7456_08640\tLN:2004\n+@SQ\tSN:478__A0A1B8PJH3__A9Z60_02875\tLN:840\n+@SQ\tSN:478__A0A1B8PJN3__A9Z65_07575\tLN:1107\n+@SQ\tSN:478__A0A1B8PJP5__A7456_08645\tLN:1224\n+@SQ\tSN:478__A0A1B8PJZ8__A9Z65_05485\tLN:1053\n+@SQ\tSN:478__A0A1B8PK12__A9Z60_07995\tLN:2178\n+@SQ\tSN:478__A0A1B8PK17__A9Z60_02920\tLN:1296\n+@SQ\tSN:478__A0A1B8PK82__A9Z60_08695\tLN:1305\n+@SQ\tSN:478__A0A1B8PKC6__A9Z65_00240\tLN:2349\n+@SQ\tSN:478__A0A1B8PKZ1__A9Z65_08945\tLN:927\n+@SQ\tSN:478__A0A1B8PLP8__A9Z60_06460\tLN:2715\n+@SQ\tSN:478__A0A1B8PM86__A9Z65_01295\tLN:474\n+@SQ\tSN:478__A0A1B8PM91__A9Z60_01205\tLN:795\n+@SQ\tSN:478__A0A1B8PMF6__A9Z65_02265\tLN:687\n+@SQ\tSN:478__A0A1B8PMQ2__A9Z65_03275\tLN:1467\n+@SQ\tSN:478__A0A1B8PMT8__A9Z60_01500\tLN:573\n+@SQ\tSN:478__A0A1B8QJ77__A9Z65_03345\tLN:759\n+@SQ\tSN:478__A0A1B8QJK5__A7456_05585\tLN:750\n+@SQ\tSN:478__A0A1B8QQ64__A9Z65_06575\tLN:489\n+@SQ\tSN:478__A0A1B8QS56__A9Z60_09160\tLN:1569\n+@SQ\tSN:478__A0A1B8QSP6__A9Z60_08260\tLN:1455\n+@SQ\tSN:478__A0A378QF70__A7456_02830\tLN:1422\n+@SQ\tSN:480__A0A3A9R6N2__ccmA\tLN:1662\n+@SQ\tSN:504553__GeneID:10498608\tLN:663\n+@SQ\tSN:60442__A0A378QNQ9__B5J93_05455\tLN:501\n+@SQ\tSN:60442__A0A378QTS8__B5J93_12645\tLN:780\n+@SQ\tSN:60442__A0A378QU90__cya_5\tLN:3615\n+@SQ\tSN:60442__A0A378QWP5__cya_3\tLN:780\n+@SQ\tSN:90240__A0A378QWM4__'..b'83399:3:3:9498:7494/2\t0\t504553__GeneID:10498608\t354\t3\t89M\t*\t0\t0\tCGGGTTGCCTGGCGCTAACGGTTCGGATGGCCATGATGGTGTTCCGGGCCGTGCAGGTGCTGACGGTGTGAACGGCGCTGATGGTCGGG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-36\tXN:i:0\tXM:i:6\tXO:i:0\tXG:i:0\tNM:i:6\tMD:Z:14T17G15T28T3C2C4\tYT:Z:UU\n+HWI-EAS109_102883399:3:69:13778:8249/2\t0\t1747__A0A1N5AYP4__B1B09_10705\t368\t42\t79M\t*\t0\t0\tTCCTCATCGCGGGTGGTATGAGCTACGGCCCGCAGACCAAGGCTTTTAAGAGGGGAGTCGACCTTGTTGTCGCCACCCC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:79\tYT:Z:UU\n+HWI-EAS109_102883399:4:43:7696:5439/2\t0\t38284__C0WL80__HMPREF0276_2216\t765\t42\t91M\t*\t0\t0\tCAACGATGGCGCCGCTGTAGAGGGGCAGGCTCACTCGACTACCCCGAAGGCAGAGTTCGGCACCGCGCTTAACCAAGGGGCAACGCCGTCC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:75G15\tYT:Z:UU\n+HWI-EAS109_102883399:4:86:2354:13206/2\t0\t38303__E2S5B9__HMPREF0305_11721\t628\t42\t100M\t*\t0\t0\tGAGTCGTATCTTCGCTATGGGCAGTTTGTTGATGCAGCCCAAGACTCAGAGAAGATGAGTATTACCGAACTGGTCGATACCGCTATCGAAGAGTCCGATA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:35G64\tYT:Z:UU\n+HWI-EAS109_102883399:3:118:12526:20595/1\t16\t29394__H3NDE2__B8A45_00965\t86\t23\t100M\t*\t0\t0\tAACGTGGGGTGAAAAATCGACCAGCAGGATTACGGACATACAGTTTAGTTTGCTTAGGATCCGCTTTGATTATGTTGACGAATCAATATATTACAGAACA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-30\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:29C4A17T8T2C35\tYT:Z:UU\n+HWI-EAS109_102883399:3:35:14701:14421/1\t16\t478__A0A1B8PMT8__A9Z60_01500\t160\t40\t100M\t*\t0\t0\tACTCGTGATAATACTTCACTGGCATTATCAAGTTCTTTGGTTAAAGAAACTGTGGGTGATACCAAAAAAGACTCAGAGATGTTACTGGGACAACTCTCCA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:41C2G0C54\tYT:Z:UU\n+HWI-EAS109_102883399:4:51:2231:13778/1\t16\t1747__D4HCY1__CP884_07625\t394\t42\t78M\t*\t0\t0\tGAATACGAGGATAGTCTCGGGGCTGTCGCGGGTGGTGAAGCATTCCACTACGCCGTTCTGCATTTTCGGCTTGATGAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:78\tYT:Z:UU\n+HWI-EAS109_102883399:4:65:18885:8678/1\t16\t37637__V7ZTH7__N579_0112775\t469\t42\t100M\t*\t0\t0\tGATTTTGTTGATGATCCGGATTCTACTCGGATGTTTTCTGAGAACTCCGCCTGCCCAAACGGGCATGCTCTGACGATTGATGAGTATGAACCCCGCTCCT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:100\tYT:Z:UU\n+HWI-EAS109_102883399:3:54:17385:9920/2\t0\t29394__H3NE37__B8A45_05170\t63\t42\t100M\t*\t0\t0\tACAGTATTACAACGATCAAGCCTTATTTAAATACGATTGGAACTTTTTCCAATTAAAATTTCAGCCGTATTTACAGGAATTCAAGTTAATTGAAGAAATG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:100\tYT:Z:UU\n+HWI-EAS109_102883399:4:2:5506:17168/2\t0\t29394__H3NFP2__B8A45_07105\t284\t42\t100M\t*\t0\t0\tGCCTAGCAATTGAGCAAGTTGATGGTAAGTTTATCTTGAAGCTCTATATTTGGGATGGGGCTGAGCATTATTATATGGACATGGATGCTGAGACCGGTGA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:10C89\tYT:Z:UU\n+HWI-EAS109_102883399:4:95:2036:2688/2\t16\t478__A0A1B8PJ21__A9Z60_08925\t339\t24\t76M\t*\t0\t0\tCATTTGGGCATCTTCGATCTCTATCACGCCTGAGCGTGAACAAGTGATGGATTTTAGTGAGCCATATTTGAATCAT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:0T8G8T57\tYT:Z:UU\n+HWI-EAS109_102883399:4:51:2231:13778/1\t16\t1747__D4HCY1__CP884_07625\t394\t42\t78M\t*\t0\t0\tGAATACGAGGATAGTCTCGGGGCTGTCGCGGGTGGTGAAGCATTCCACTACGCCGTTCTGCATTTTCGGCTTGATGAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:78\tYT:Z:UU\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares.biom Mon Apr 19 20:56:20 2021 +0000
[
@@ -0,0 +1,1 @@
+{"id": "MetaPhlAn_Analysis","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","generated_by": "MetaPhlAn","date": "2021-04-09T17:17:44.233735","matrix_element_type": "float","shape": [7, 1],"type": null,"matrix_type": "sparse","data": [[0,0,5.98726],[1,0,18.75365],[2,0,9.63281],[3,0,13.4261],[4,0,13.73399],[5,0,22.57968],[6,0,15.88652]],"rows": [{"id": "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens", "metadata": {"taxonomy": ["k__Bacteria", "p__Actinobacteria", "c__Actinobacteria", "o__Corynebacteriales", "f__Corynebacteriaceae", "g__Corynebacterium", "s__Corynebacterium_accolens"]}},{"id": "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_pseudodiphtheriticum", "metadata": {"taxonomy": ["k__Bacteria", "p__Actinobacteria", "c__Actinobacteria", "o__Corynebacteriales", "f__Corynebacteriaceae", "g__Corynebacterium", "s__Corynebacterium_pseudodiphtheriticum"]}},{"id": "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes", "metadata": {"taxonomy": ["k__Bacteria", "p__Actinobacteria", "c__Actinobacteria", "o__Propionibacteriales", "f__Propionibacteriaceae", "g__Cutibacterium", "s__Cutibacterium_acnes"]}},{"id": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum", "metadata": {"taxonomy": ["k__Bacteria", "p__Firmicutes", "c__Bacilli", "o__Lactobacillales", "f__Carnobacteriaceae", "g__Dolosigranulum", "s__Dolosigranulum_pigrum"]}},{"id": "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_equi", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Pseudomonadales", "f__Moraxellaceae", "g__Moraxella", "s__Moraxella_equi"]}},{"id": "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Pseudomonadales", "f__Moraxellaceae", "g__Moraxella", "s__Moraxella_lacunata"]}},{"id": "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_nonliquefaciens", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Pseudomonadales", "f__Moraxellaceae", "g__Moraxella", "s__Moraxella_nonliquefaciens"]}}],"columns": [{"id": "Metaphlan_Analysis", "metadata": null}]}
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares.fasta Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,40514 @@\n+>HWI-EAS109_102883399:3:100:10648:20124/1\n+TTGATATTTCATGTACAGTATAAAATATATATTTGGGTTACTTTGGTATTTTATGTACAGTATATAATCTATATTTGATGTACTTTCATATTTTATGTAC\n+>HWI-EAS109_102883399:3:100:1079:13862/1\n+ATATTGATACTTGCATTAATATCACGGTCATGATTGGCATGGCAACTAGGACAAATCCAATGTCTAACCGCAAGTATCAAGTTTTCTTTTGCCATAATAA\n+>HWI-EAS109_102883399:3:100:10822:2790/1\n+CACACACACATATACACACACACATATACACACACACATACACACACACACAAACACACACACACACAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGA\n+>HWI-EAS109_102883399:3:100:10896:21102/1\n+CTCTTCGTCCTAAGGACCACTGTCTGTGCTGTGTCTTTCAAAGGTCAGAAGAGATTGAACCTTTGTGTTTTTATTTTCCCTGTGTTTGCTTTTTC\n+>HWI-EAS109_102883399:3:100:11210:16352/1\n+GTCTTTTCTTTTTGCTTGCCATTGTGGCTGTAAATTGATACAAAAACTTTTGTGCCTGTATGTTGCACATAAAGCCATAGCCCATTACCATCACTGATTT\n+>HWI-EAS109_102883399:3:100:11502:9110/1\n+TCGCCCAGCAAATTCATCCACCTTGTCAGATGGCACACCCACACGACTTGGGCGAAATTCTACGCACAGTTTAAATAATTTATCCAAACGACCAAAGCCC\n+>HWI-EAS109_102883399:3:100:11549:6243/1\n+TGGAAGCGTTGGGGATTTACGTAGTTTGTTGGATTTGCTTCCCGTCGCTGGTGGTGATGGCACGCTGGACGATCGTTACGCTGAGCTTTCT\n+>HWI-EAS109_102883399:3:100:11778:13384/1\n+AAGTACATCAAATATATATTTTATTCTGTACATAAAATATGAAAGTACACCAGATATATATTCTATAGTGTACATAAAATATCAAAGTACCCAAACTATA\n+>HWI-EAS109_102883399:3:100:12997:10542/1\n+ATAATATATGATACATATTATACATCTTATATATGAAATTATATAATTATAGATAACATAATACACATTTATATGTATTATGAAATATAACAAAGGTACT\n+>HWI-EAS109_102883399:3:100:13379:16343/1\n+TATTATATACTGTACATCAAATATGAAAGTTCCCAAACATTTATAATAATCTGTACAGAAAATATCAAAGTACTCAAACTATAAACTGTACATAAAATAT\n+>HWI-EAS109_102883399:3:100:13680:7440/1\n+AATGTTTATGCCCTATCGCCATGGTGACGGAATTAGGGGTCTCCTGCTCTTCGTCCTAAGGACCACTGTCTGTGCTGTGTCTTTCAAAGGTCAGAAGAGA\n+>HWI-EAS109_102883399:3:100:14551:6350/1\n+CGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTG\n+>HWI-EAS109_102883399:3:100:14666:5491/1\n+TCGTATCTTGTCGACGGCTGGGCCTACCTGGTGTTTTACGGGGCGCTGGCGTTTTTGCTGATCGTTTTTGCCAACTATTCCGCCTTCGGGGTGCGG\n+>HWI-EAS109_102883399:3:100:14711:18833/1\n+TATTTAATACGTATATTAACCGTATAATATATTCTATATATGATATATAACATATATTATATATGTCATAAATTATAATATATACTATATGTCATATTGT\n+>HWI-EAS109_102883399:3:100:14778:2943/1\n+CCTTAACTAGTAATTTGAAGAGTTGGCATTTGTAAGGCCAGTATGAATATACCTTCAAAGCAGCAACACAGGTTCCCCATGAGAAAAAGCAAACTTAGGG\n+>HWI-EAS109_102883399:3:100:15216:16187/1\n+TCAATACCAAGACTTTTATTCAGCACCAGCACCAGCACCAGCACCCATGCCATTTTATGCAACAGATTGATGCTAATTTATGCTTAATTTGCCCTATTTT\n+>HWI-EAS109_102883399:3:100:15403:10319/1\n+GATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGTATGCCGTCTTCTGCTTGAAAAAAA\n+>HWI-EAS109_102883399:3:100:15588:15047/1\n+GTATATAATAATATACAGTGTCATATATATAATATATATAACCTATGTATAATATTGTTTTACATCCTGCATCTTATGTTATATATATTGTATAAAATAT\n+>HWI-EAS109_102883399:3:100:15950:5267/1\n+CGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGT\n+>HWI-EAS109_102883399:3:100:16342:18195/1\n+AATACAAAACTTAACACAAGCCCAATCACTGTACCCAAAAAAGCAGTAATCAGCGTATATTTGGCATGACTCATGAGTAAATAAAAATCATCAATAAACG\n+>HWI-EAS109_102883399:3:100:16772:11935/1\n+TATATATTATATATAATACGTTGTATGTATAATACCTATATTAAATATATATTATATGACTATGTTATTAATTACATGTCACATGTGTTATATATTATAT\n+>HWI-EAS109_102883399:3:100:17086:13140/1\n+GTGTATATTTCTTATTATTTCATACTAAGGTACCAGCAGGAAATTGACTGAGAAAGCAGAACCCCCGAGGTTCAAAAGTAGAGATGTAGAGGTGGTCAAG\n+>HWI-EAS109_102883399:3:100:17665:4571/1\n+GAGGTAAATCCAATATAGAGATCGCGCAAGAAATGCACTACTCAGAATCAGCAACGAAAAAACGAGTATCTAAGCTCATTCATCGGTTTGGTGCTAGCTC\n+>HWI-EAS109_102883399:3:100:17797:18754/1\n+ATAAAAACACAAAGGTTCAATCTCTTCTGACCTTTGAAAGACACAGCACAGACAGTGGTCCTTAGGACGAAGAGCAGGAGACCCCTAATTCCGTCACCAT\n+>HWI-EAS109_102883399:3:100:18441:1058/1\n+CTGTGTCTTTCAAAGGTCAGAAGAGATTGAACCTTTGTGGTTTTATTTTCCCTGAGTTTGCTTTTTCTCATGGGGAACCGGTGTTGCTGCTTTG\n+>HWI-EAS109_102883399:3:100:19075:9045/1\n+ATATATTATAATTTATATAATGATAAAATTTTTTATAGAATATAAATAATTATATATGATTATATAATTCTACATATTACAAATGAAAATATGTAGAATT\n+>HWI-EAS109_102883399:3:100:19100:11382/1\n+TAATATGGCTGCAGAAACCCCCCAAGAGCGTTCAGTGCCCCCGCATGAACAATGTTCCCC\n+>HWI-EAS109_102883399:3:100:2227:10884/1\n+AATGACAGCTGGCATGAGCTTGGCATTGAGCCTGTCGGTCATTTGGTAATCCACGGGTATCGTATCCACCGCTTGCCCATTAATAGACAGCGCG\n+>HWI-EAS109_102883399:3:100:2800:14548/1\n+GCAAGAGTATTCAATGCAGAGATACACGTTGTCGTTGTTTGCATAT'..b'GAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG\n+>HWI-EAS109_102883399:4:96:5093:3048/1\n+CGGGAGAGCGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGA\n+>HWI-EAS109_102883399:4:97:1937:11653/1\n+CAGCTCCCCCTATGCCTCCGTTTCTAGGAAATGTTGCGCCTGCAGCAGAAACTCCGGGCT\n+>HWI-EAS109_102883399:4:9:4334:5972/1\n+GATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTGAAAAAA\n+>HWI-EAS109_102883399:3:104:6463:17673/1\n+CACACACATTATTACGCTGCGCTTCTTACAACAAGTTATCAGATGGGGAATAGGGCACCCACAAATATAAATATACATTATGTATATAATCATATATAGT\n+>HWI-EAS109_102883399:3:105:3530:10413/1\n+ATATATAATATTGTTTTACATCCTGTATCTTATATTAGATATATCATAGGTAACATAATAATGGATAATTCATAATTATAATATTTATACAATACGACAC\n+>HWI-EAS109_102883399:3:120:6816:3534/1\n+GCCTTGACGACGTCTTATCCGCTGGCGATTGTTGCTCGCGTATTGATCGGCGCCGGTGATGCCTCGGCGTTCTTGG\n+>HWI-EAS109_102883399:3:14:13657:17008/1\n+TAATTACATGTCACATATGTTATATATTATATATTTTACATAGAATGTACTGGTTACATACAATATATAGTATGTTACCTGTAATGTATAATTTATTACA\n+>HWI-EAS109_102883399:3:1:6565:9569/1\n+TATTATGCATAATTTATAATATACAATATGTGGAATGTGATATATATAACATATAATATATGATATATGATATACAGTATATGATATATAACATACAATA\n+>HWI-EAS109_102883399:3:24:10907:13582/1\n+CGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGT\n+>HWI-EAS109_102883399:3:43:5734:14285/1\n+AGGCGTTTGGTTTCATCAAGGCTTGCTCCTGCCTTTGGGGTTGCTATGACCACAAAAAAGCCATCACCATAGCCCAAACTGTCATAACTGCTGTATGCT\n+>HWI-EAS109_102883399:3:46:16209:7151/1\n+ATCTTATATCATACATTATATGTTATATATATCACATTCTACATATTGTATATTATAAATTGTACACAATATATTGCATTCTTTATTTTACATGTAATCA\n+>HWI-EAS109_102883399:3:57:11738:2514/1\n+CGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATATCGT\n+>HWI-EAS109_102883399:3:59:12370:16325/1\n+CTAAGATGATTTGACCGCCTTGTCAGTAGGAGTTTGGGTTTTGGTTCGTCTGTAATAACTAAAAGCACACATGCCTGTGGTAAAGGGTTTGAAGCTAACA\n+>HWI-EAS109_102883399:3:62:12328:3801/1\n+ATATTATATATCATATATGATATAGTACCTTTGTTATATATCATAATACATATAAATGTGTATTATGTTATCTATAATTATATAATTTCATATATAAGAT\n+>HWI-EAS109_102883399:3:64:10378:11274/1\n+CGTTGTACTTTAAGCTGGCCTTGCTCATCGGACATATACAGATAAAACGGATCATCCTGGTTGCGCGTATCCCACCACTGTTTTCGGGTTTCCTTAGTGG\n+>HWI-EAS109_102883399:3:69:7124:5695/1\n+TTGAGAAAACTCTTCCGGCATCCTAGGGGAACAGAGGTACGATTTTTCGAGACAGTCGAGGGAGAAGCCACCCCAGATTTTAGGATTGGATCTTTATTCA\n+>HWI-EAS109_102883399:3:75:4419:11351/1\n+TGCGGATTCATCGGGTTGGGCCGCCTGGGCGGGGCTCACCCCCACACTGCTGACGAGCGC\n+>HWI-EAS109_102883399:3:78:3607:5515/1\n+GAAAAATTAAAGGCAGTCTGTCCTGCTTTTTATGGGCAAATCTACTCATGTATTGCCACAGGTATGGGATTGATGTCTTGACTGTTTTGG\n+>HWI-EAS109_102883399:3:86:19018:14133/1\n+ACAAAATCTTACCTTTGTCTTCCTTGGACGTGCCTGGGCACCACTTCTGCCAACCTGTGAAGCCATCCTGCCCTGACAGAGATCGGAAGAGCGGT\n+>HWI-EAS109_102883399:4:114:5270:19170/1\n+TGTACCTTCATATTTTATGTACAGTATACAATATACATTCTGGGTACTTTGATATTGTATGTACAGTATGCTATATATTCTGGTTACTTTGATATATTAT\n+>HWI-EAS109_102883399:4:26:13028:14044/1\n+TGAGTGGGATTAAGGCGGTCAAGGTCTAAGGAGTTGAATAAAATTGGCAGGCAACATAAGCGGTCATTCATGGCGACAGGCTGTGAATAAAAGGG\n+>HWI-EAS109_102883399:4:29:1227:15411/1\n+ACGTATCGGTTGTATGGAAATAGACTTCTGTATGATAGATGTAGGTGTCTGTGTTATACAAATAAATACACATCGCTCTATAAAGAAGGGATCGTC\n+>HWI-EAS109_102883399:4:29:16361:20631/1\n+ATCAGTACGATGTCATCAGATGGGCGGACAACAAACCCAAAACTCTCTTCATTGATGTTGCCCGTTACTCGATAATAGAGGTTGATTTTGT\n+>HWI-EAS109_102883399:4:2:16905:14371/1\n+CGGTTCAGCAGGAATGCCGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTG\n+>HWI-EAS109_102883399:4:51:2231:13778/1\n+GTCATCAAGCCGAAAATGCAGAACGGCGTAGTGGAATGCTTCACCACCCGCGACAGCCCCGAGACTATCCTCGTATTC\n+>HWI-EAS109_102883399:4:70:16446:12691/1\n+TATATAGTGCCAAATGAGAGAGTCCAACCAAGCGAAAAGGGAGTAGGACATTCTCACCATCTAAGCTGTGGCGGTGAGCATTGTCCGCATCGGT\n+>HWI-EAS109_102883399:4:70:1686:3360/1\n+CATGTGAAGGCTTTTATGGGCATGTTGGATCGATATTCGGTTGATGTGGCGTCTGGTGGCCGTGGTGGGGGTTCTGCTGTGGCGATGATTGACCGG\n+>HWI-EAS109_102883399:4:71:12108:17024/1\n+GGGTGTTATTTGGTGATGATACTTGATTGTTAAATCAATGTCATTGTTAACATCAAGAAGGCAAAAATTGGCAACGGTTATTCTTATATTCTTATTTATA\n+>HWI-EAS109_102883399:4:90:13950:7907/1\n+AAATATGAAAGTACATCAAATATAGATTATATACTGTACATTAAATACCCAAGTACCCCAAATATACATTTTGTACTGTACATGAAATATCAAAGTTCAC\n+>HWI-EAS109_102883399:4:98:17563:4110/1\n+TATATTATAATATTATGTATGTTAGTTATATTGGGTGATATGTAATATATATTATGTAATATGAAATAATATAATATATATTATATTATGATATTTTATG\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares.fasta.gz
b
Binary file test-data/SRS014464-Anterior_nares.fasta.gz has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/SRS014464-Anterior_nares.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SRS014464-Anterior_nares.sam Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,235 @@\n+@HD\tVN:1.0\tSO:unsorted\n+@SQ\tSN:13076__A0A2I1PE66__CYJ72_10760\tLN:540\n+@SQ\tSN:1747__A0A1N4YDI5__CP875_06750\tLN:1164\n+@SQ\tSN:1747__A0A1N5AYP4__B1B09_10705\tLN:1476\n+@SQ\tSN:1747__A0A3D8ZS35__CP884_00215\tLN:1011\n+@SQ\tSN:1747__D4HAL2__PAJL_1002\tLN:705\n+@SQ\tSN:1747__D4HCW6__PAGK_1340\tLN:3834\n+@SQ\tSN:1747__D4HCY1__CP884_07625\tLN:1650\n+@SQ\tSN:1747__U7MBR0__BBJ67_02950\tLN:1779\n+@SQ\tSN:1747__W4TU52__HMPREF9593_02327\tLN:975\n+@SQ\tSN:1747__W4TWS7__COH18_07470\tLN:477\n+@SQ\tSN:29394__H3NBW7__B8A40_04000\tLN:1101\n+@SQ\tSN:29394__H3NC06__B8A41_08715\tLN:1317\n+@SQ\tSN:29394__H3NC21__HMPREF9703_00102\tLN:1617\n+@SQ\tSN:29394__H3NC33__B8A40_05755\tLN:3063\n+@SQ\tSN:29394__H3NCD6__B8A33_07975\tLN:1560\n+@SQ\tSN:29394__H3NCJ1__B8A42_01505\tLN:930\n+@SQ\tSN:29394__H3NCU9__B8A45_08665\tLN:765\n+@SQ\tSN:29394__H3NCV0__B8A46_05195\tLN:888\n+@SQ\tSN:29394__H3ND14__B8A42_03745\tLN:3069\n+@SQ\tSN:29394__H3NDD0__B8A46_01590\tLN:1173\n+@SQ\tSN:29394__H3NDD1__B8A45_02325\tLN:480\n+@SQ\tSN:29394__H3NDE2__B8A45_00965\tLN:699\n+@SQ\tSN:29394__H3NDH9__B8A44_07175\tLN:870\n+@SQ\tSN:29394__H3NDY2__B8A44_05015\tLN:3315\n+@SQ\tSN:29394__H3NE37__B8A45_05170\tLN:756\n+@SQ\tSN:29394__H3NE38__B8A42_05570\tLN:1374\n+@SQ\tSN:29394__H3NE72__B8A39_01430\tLN:654\n+@SQ\tSN:29394__H3NE98__B8A46_08570\tLN:1188\n+@SQ\tSN:29394__H3NEK0__HMPREF9703_00981\tLN:1110\n+@SQ\tSN:29394__H3NEK5__B8A42_06490\tLN:741\n+@SQ\tSN:29394__H3NET0__B8A33_05820\tLN:1332\n+@SQ\tSN:29394__H3NF31__B8A40_05690\tLN:1860\n+@SQ\tSN:29394__H3NFD8__HMPREF9703_01269\tLN:732\n+@SQ\tSN:29394__H3NFL8__B8A45_06985\tLN:582\n+@SQ\tSN:29394__H3NFP2__B8A45_07105\tLN:651\n+@SQ\tSN:29394__H3NFS3__B8A31_08415\tLN:2691\n+@SQ\tSN:29394__H3NFY1__B8A41_07655\tLN:534\n+@SQ\tSN:29394__H3NG09__B8A42_04750\tLN:1062\n+@SQ\tSN:29394__H3NGD0__B8A39_02610\tLN:1620\n+@SQ\tSN:29394__H3NGF9__B8A33_01850\tLN:1086\n+@SQ\tSN:29394__H3NGH1__B5772_02200\tLN:972\n+@SQ\tSN:29433__A0A378PKN6__MOVS_06365\tLN:828\n+@SQ\tSN:37637__U2GCN8__N579_00170\tLN:576\n+@SQ\tSN:37637__U2GT22__N579_02835\tLN:471\n+@SQ\tSN:37637__U2HPE4__N579_11550\tLN:1275\n+@SQ\tSN:37637__U2HXD5__N579_07285\tLN:1158\n+@SQ\tSN:37637__U2HYJ1__N579_05255\tLN:642\n+@SQ\tSN:37637__U2I1H6__N579_01890\tLN:975\n+@SQ\tSN:37637__U2I1U8__N579_01580\tLN:867\n+@SQ\tSN:37637__V7ZTH7__N579_0112775\tLN:729\n+@SQ\tSN:37637__V7ZTI2__N579_0112885\tLN:456\n+@SQ\tSN:38284__A0A376GA42__HMPREF0276_2319\tLN:1494\n+@SQ\tSN:38284__C0WFM5__HMPREF0276_0261\tLN:2589\n+@SQ\tSN:38284__C0WL80__HMPREF0276_2216\tLN:1497\n+@SQ\tSN:38303__E2S5B9__HMPREF0305_11721\tLN:1539\n+@SQ\tSN:386414__D1VYE0__HMPREF9019_1663\tLN:1065\n+@SQ\tSN:470453__A0A1T0CCU0__B0680_10530\tLN:741\n+@SQ\tSN:477__A0A1B8Q5X7__A9Z63_04935\tLN:618\n+@SQ\tSN:477__A0A1V4GM23__A9Z63_04930\tLN:1848\n+@SQ\tSN:478__A0A1B8PHZ1__A9Z60_04570\tLN:1320\n+@SQ\tSN:478__A0A1B8PI70__A9Z60_04770\tLN:1698\n+@SQ\tSN:478__A0A1B8PJ21__A9Z60_08925\tLN:843\n+@SQ\tSN:478__A0A1B8PJ82__A7456_08640\tLN:2004\n+@SQ\tSN:478__A0A1B8PJH3__A9Z60_02875\tLN:840\n+@SQ\tSN:478__A0A1B8PJN3__A9Z65_07575\tLN:1107\n+@SQ\tSN:478__A0A1B8PJP5__A7456_08645\tLN:1224\n+@SQ\tSN:478__A0A1B8PJZ8__A9Z65_05485\tLN:1053\n+@SQ\tSN:478__A0A1B8PK12__A9Z60_07995\tLN:2178\n+@SQ\tSN:478__A0A1B8PK17__A9Z60_02920\tLN:1296\n+@SQ\tSN:478__A0A1B8PK82__A9Z60_08695\tLN:1305\n+@SQ\tSN:478__A0A1B8PKC6__A9Z65_00240\tLN:2349\n+@SQ\tSN:478__A0A1B8PKZ1__A9Z65_08945\tLN:927\n+@SQ\tSN:478__A0A1B8PLP8__A9Z60_06460\tLN:2715\n+@SQ\tSN:478__A0A1B8PM86__A9Z65_01295\tLN:474\n+@SQ\tSN:478__A0A1B8PM91__A9Z60_01205\tLN:795\n+@SQ\tSN:478__A0A1B8PMF6__A9Z65_02265\tLN:687\n+@SQ\tSN:478__A0A1B8PMQ2__A9Z65_03275\tLN:1467\n+@SQ\tSN:478__A0A1B8PMT8__A9Z60_01500\tLN:573\n+@SQ\tSN:478__A0A1B8QJ77__A9Z65_03345\tLN:759\n+@SQ\tSN:478__A0A1B8QJK5__A7456_05585\tLN:750\n+@SQ\tSN:478__A0A1B8QQ64__A9Z65_06575\tLN:489\n+@SQ\tSN:478__A0A1B8QS56__A9Z60_09160\tLN:1569\n+@SQ\tSN:478__A0A1B8QSP6__A9Z60_08260\tLN:1455\n+@SQ\tSN:478__A0A378QF70__A7456_02830\tLN:1422\n+@SQ\tSN:480__A0A3A9R6N2__ccmA\tLN:1662\n+@SQ\tSN:504553__GeneID:10498608\tLN:663\n+@SQ\tSN:60442__A0A378QNQ9__B5J93_05455\tLN:501\n+@SQ\tSN:60442__A0A378QTS8__B5J93_12645\tLN:780\n+@SQ\tSN:60442__A0A378QU90__cya_5\tLN:3615\n+@SQ\tSN:60442__A0A378QWP5__cya_3\tLN:780\n+@SQ\tSN:90240__A0A378QWM4__'..b'83399:3:3:9498:7494/2\t0\t504553__GeneID:10498608\t354\t3\t89M\t*\t0\t0\tCGGGTTGCCTGGCGCTAACGGTTCGGATGGCCATGATGGTGTTCCGGGCCGTGCAGGTGCTGACGGTGTGAACGGCGCTGATGGTCGGG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-36\tXN:i:0\tXM:i:6\tXO:i:0\tXG:i:0\tNM:i:6\tMD:Z:14T17G15T28T3C2C4\tYT:Z:UU\n+HWI-EAS109_102883399:3:69:13778:8249/2\t0\t1747__A0A1N5AYP4__B1B09_10705\t368\t42\t79M\t*\t0\t0\tTCCTCATCGCGGGTGGTATGAGCTACGGCCCGCAGACCAAGGCTTTTAAGAGGGGAGTCGACCTTGTTGTCGCCACCCC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:79\tYT:Z:UU\n+HWI-EAS109_102883399:4:43:7696:5439/2\t0\t38284__C0WL80__HMPREF0276_2216\t765\t42\t91M\t*\t0\t0\tCAACGATGGCGCCGCTGTAGAGGGGCAGGCTCACTCGACTACCCCGAAGGCAGAGTTCGGCACCGCGCTTAACCAAGGGGCAACGCCGTCC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:75G15\tYT:Z:UU\n+HWI-EAS109_102883399:4:86:2354:13206/2\t0\t38303__E2S5B9__HMPREF0305_11721\t628\t42\t100M\t*\t0\t0\tGAGTCGTATCTTCGCTATGGGCAGTTTGTTGATGCAGCCCAAGACTCAGAGAAGATGAGTATTACCGAACTGGTCGATACCGCTATCGAAGAGTCCGATA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:35G64\tYT:Z:UU\n+HWI-EAS109_102883399:3:118:12526:20595/1\t16\t29394__H3NDE2__B8A45_00965\t86\t23\t100M\t*\t0\t0\tAACGTGGGGTGAAAAATCGACCAGCAGGATTACGGACATACAGTTTAGTTTGCTTAGGATCCGCTTTGATTATGTTGACGAATCAATATATTACAGAACA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-30\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:29C4A17T8T2C35\tYT:Z:UU\n+HWI-EAS109_102883399:3:35:14701:14421/1\t16\t478__A0A1B8PMT8__A9Z60_01500\t160\t40\t100M\t*\t0\t0\tACTCGTGATAATACTTCACTGGCATTATCAAGTTCTTTGGTTAAAGAAACTGTGGGTGATACCAAAAAAGACTCAGAGATGTTACTGGGACAACTCTCCA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:41C2G0C54\tYT:Z:UU\n+HWI-EAS109_102883399:4:51:2231:13778/1\t16\t1747__D4HCY1__CP884_07625\t394\t42\t78M\t*\t0\t0\tGAATACGAGGATAGTCTCGGGGCTGTCGCGGGTGGTGAAGCATTCCACTACGCCGTTCTGCATTTTCGGCTTGATGAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:78\tYT:Z:UU\n+HWI-EAS109_102883399:4:65:18885:8678/1\t16\t37637__V7ZTH7__N579_0112775\t469\t42\t100M\t*\t0\t0\tGATTTTGTTGATGATCCGGATTCTACTCGGATGTTTTCTGAGAACTCCGCCTGCCCAAACGGGCATGCTCTGACGATTGATGAGTATGAACCCCGCTCCT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:100\tYT:Z:UU\n+HWI-EAS109_102883399:3:54:17385:9920/2\t0\t29394__H3NE37__B8A45_05170\t63\t42\t100M\t*\t0\t0\tACAGTATTACAACGATCAAGCCTTATTTAAATACGATTGGAACTTTTTCCAATTAAAATTTCAGCCGTATTTACAGGAATTCAAGTTAATTGAAGAAATG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:100\tYT:Z:UU\n+HWI-EAS109_102883399:4:2:5506:17168/2\t0\t29394__H3NFP2__B8A45_07105\t284\t42\t100M\t*\t0\t0\tGCCTAGCAATTGAGCAAGTTGATGGTAAGTTTATCTTGAAGCTCTATATTTGGGATGGGGCTGAGCATTATTATATGGACATGGATGCTGAGACCGGTGA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:10C89\tYT:Z:UU\n+HWI-EAS109_102883399:4:95:2036:2688/2\t16\t478__A0A1B8PJ21__A9Z60_08925\t339\t24\t76M\t*\t0\t0\tCATTTGGGCATCTTCGATCTCTATCACGCCTGAGCGTGAACAAGTGATGGATTTTAGTGAGCCATATTTGAATCAT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:0T8G8T57\tYT:Z:UU\n+HWI-EAS109_102883399:4:51:2231:13778/1\t16\t1747__D4HCY1__CP884_07625\t394\t42\t78M\t*\t0\t0\tGAATACGAGGATAGTCTCGGGGCTGTCGCGGGTGGTGAAGCATTCCACTACGCCGTTCTGCATTTTCGGCTTGATGAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:78\tYT:Z:UU\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/marker.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker.txt Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,1 @@
+13076__A0A2I1PE66__CYJ72_10760
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c test-data/marker_sequence.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_sequence.fasta Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,2 @@
+>13076__A0A2I1PE66__CYJ72_10760 UniRef90_A0A2I1PE66;k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Globicatella|s__Globicatella_sanguinis;GCA_002847845
+ATGCTTGAAATGTATATCCAAGGTGTCTCTACAAGGAAAGTTTCGAAAGTTATTGAAAATTTATGTGGGAAAACCTATTCTAAATCATTTGTCTCTTCTCTTACGAAACAACTGGATGAAGAGGTTCGACAATGGCGTCATCACGATTTAAGTCCTGTCCAATACGCTTACCTAGTGGTGGATGTTATTTATATAAAAGTAAGAGAAAATCATAAGGTTGTATCCAAAGCGTGCCATATCGCTATTGGGATTAGTGAAGAAGGAAAACGAAGACTCCTTGGTTTTGACATCAGTGATGGCGAAAGTGATTACTCTTGGTCTCGTTTCTTTAACCACTTAAAAGAAAGAGGCCTAAATGGCCTTAAAATGGTCATATCTGATGCACACACGGGTCTAGTAAAAGCAATTAAGGAAAACTTCCTGAATGTTTCTTGGCAAAGATGTCAGGTTCACTTCTTAATGTATTTGGCAATAGGGAATTGCGTTAAACGGCAATTAATTTTGTATGCCACTTGCCATCACTATCTACTCATGAATTAA
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db-with-one-marker.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db-with-one-marker.fasta Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,2 @@
+>13076__A0A2I1PE66__CYJ72_10760 UniRef90_A0A2I1PE66;k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Globicatella|s__Globicatella_sanguinis;GCA_002847845
+ATGCTTGAAATGTATATCCAAGGTGTCTCTACAAGGAAAGTTTCGAAAGTTATTGAAAATTTATGTGGGAAAACCTATTCTAAATCATTTGTCTCTTCTCTTACGAAACAACTGGATGAAGAGGTTCGACAATGGCGTCATCACGATTTAAGTCCTGTCCAATACGCTTACCTAGTGGTGGATGTTATTTATATAAAAGTAAGAGAAAATCATAAGGTTGTATCCAAAGCGTGCCATATCGCTATTGGGATTAGTGAAGAAGGAAAACGAAGACTCCTTGGTTTTGACATCAGTGATGGCGAAAGTGATTACTCTTGGTCTCGTTTCTTTAACCACTTAAAAGAAAGAGGCCTAAATGGCCTTAAAATGGTCATATCTGATGCACACACGGGTCTAGTAAAAGCAATTAAGGAAAACTTCCTGAATGTTTCTTGGCAAAGATGTCAGGTTCACTTCTTAATGTATTTGGCAATAGGGAATTGCGTTAAACGGCAATTAATTTTGTATGCCACTTGCCATCACTATCTACTCATGAATTAA
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db-with-one-marker.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db-with-one-marker.json Mon Apr 19 20:56:20 2021 +0000
[
@@ -0,0 +1,1 @@
+{"markers": {"13076__A0A2I1PE66__CYJ72_10760": {"clade": "s__Globicatella_sanguinis", "ext": [], "len": 540, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Globicatella|s__Globicatella_sanguinis"}}, "taxonomy": {"k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Globicatella|s__Globicatella_sanguinis|t__GCA_002847845": ["2|1239|91061|186826|186827|13075|13076", 2411251]}, "merged_taxon": {}}
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db-without-one-marker.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db-without-one-marker.fasta Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,180 @@\n+>1747__A0A1N4YDI5__CP875_06750 UniRef90_A0A1N4YDI5;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_003384555\n+ATGTCCCGTGAGTACGGTGGACGCGTGGCTGACACGAAAACCTCCCGTCATTCCTGGCTACGCATCACTTCCGGTCTCGTCGGGCTGATCCTGACCCTGCCGGTACTGACGTGGTCAGTGGCCGCGATCCTCCCCTCACACAATGCCCTCACCTTCATCTCCTCCGTTCTGGTGGGATCGTTGGCTGTACCGGCTCTGGTCATCGCCGTCATCGCCTTCGTCCTCGCTCTTCTGGGACGAGGTGGTCTGCGAGTGGTGGCCGTTCTCTTCTCGGTGATGGCCCTCATCGTGCCAGTGGCGGCAACGGCGACGACGGCGTGGATCACGGACCGCGCCGGTGGACGTATCAACGTCGTCTCGGCATCTGCGGTGTCGTCGATGTCCGACCACCCTGACGAAACGGTCCGGTATGGATCCGGCCCTGACGAGACGGCCCAGATCTACCGTCCCCATAACCACAATGCCCCGGTACTTGTCGACATTCATGGTGGAGGATGGAGCACCGACGCCACCATGCCCGCCACGTTGAGATGGTTTTCCGACCATGGCTGGTTAGTCATTCGTCCCTCGTACACCCTGGCCACCCAGGGCCACCCCACCTGGAATACTGCACCAAAACAGGTAGCATGTGCCTGGGCCTGGAGCCTGTCCCACGTGAAAGAACTCGGTGGCGACCCTTCACAGGTATCGATCATGGGTGATTCCGCTGGTGGTGGAATGGCCATTAATCTCGCCTACGGCGCCGTCAGCGGAAAGTTGAAGAGTTCCTGCGGCTCAATACGGGCCCCGAAGTCAGTCCTCGCCCTCTATCCAACGGTGGATGTCAGTACCGTCGAGTCAGTCACCACACTGAGCGCGGGCAATGCCGCCAAAATGTATATCGGTGGCACCCCCAAGCAATTCCCCGACCGTTATCGCGCCGTCAACAGTTCCACCTGGATCACTCCTCAGGCACCACCAACAATGGTGATCCAGGGAAATCACGACACCTTTGTCCCGCCATCCAGCGTGCGGAAATTCATCAATCGTGCCCGCCAGGCTGGAGTCCGGGTCGATCACGTACAGTTACCAATGCTCAATCACGCTTTCGATTCCCAGGCGCGCAACTCACTGGGATTCCAGGTCGTCACGAGCCTTGGGCAACGTTTCCTTACTAACCACTGA\n+>1747__A0A1N5AYP4__B1B09_10705 UniRef90_A0A1N5AYP4;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_002572615\n+ATGGATTGGGAGGCCACTGAGCTGACCGATCTCGACGTCACCGGTATTGATCACGAGGGCGGTTTCTCGGCCCTTGGCGTGCCCGACGAGATCGTTGCTGCTTTGGCCAAGACTGGCATTACCGATCCCTTCCGCATCCAGATCGCCGCCATCCCCGACGCCATCGCTGGGCGTGACGTGTTGGGCCGCGCCTCAACGGGTTCGGGTAAGACGTTGGCCTTTGGTGTCCCGCTGCTGTCGCGCCTGTCGGCCACCCCGCGTGAGGATAACCGGCCTCGAGCTCTCATCCTTTCTCCCACTCGTGAGCTGGCCATGCAGATTGCCGACGTGCTGTCCTCGCTGGCGTCCTCAATGGGGCTATCGACCATCCTCATCGCGGGTGGTATGAGCTACGGCCCGCAGACCAAGGCTTTTAAGAGGGGAGTCGACCTTGTTGTCGCCACCCCGGGGCGTCTGGTCGACCTCTTGGAGACTGGTGACGCCGACCTTTCCGGTGTCGCCGTGACTGTCCTCGACGAAGCCGATCACATGGCTGAACTGGGATTCATGGAGGCCGTTGGCTCGATCCTCGATGCCATTCCCGCTGACGGGCAGCGCCTGCTGTTCTCTGCCACCCTCGACGGTGCCGTTAACAAGTTGGTTAGGCGGTACATGCACGAGCCGGTTATCCACGAGGTCGACCCCGACAAGGGATCGGTCGCGACGATGACCCATCACGCTTTCCAGATCAAGCCTCACGAGAAGGTCGGGCTGATGTGCGAGATCGCTAACCGTAGCGGCCACACCATCGTCTTTGCTCGAACCCAGCGAGGTGCCTCCCGGATGGCCGAGCAACTGCGTGAGGCCGGCGTCATGGCTGGTGCTCTGCACGGCGGCCTGACCCAGGGTGCCCGTGCCCGCGTGTTGGCGGCATTCAAGGACGGTTCCCTGCCGGTCTTGGTTGCCACTGACGTCGCGGCCCGAGGAATTGACGTCGATGACGTCACTCTGGTGTTGCAAGTCGATCCGCCGATGAACTTCAAGGACTACCTGCACCGCTCCGGCCGTACCGCTCGCGCCGGACACGACGGGGCCGTCGTCTCTCTTGTCCTGCCGCATCAGCGTCGCACAATGGCTCGTTTGTACCGTCAAGCCGGGGTGAAGCCGGTAGAGGCCCAGGTCACGCGTGGCGACGACCACGTTGCCGAGGTGGCCGGTTGCAGCCCGGCCCTGGGCGCTCCTATTGACGAGAAGGATTACGAGGCTCTCGTCGCTCCCAAGCAGCAGCGCGGCAAGAAGGCTCGTGACGGAAAGCCTCGTCGTGGCAAGGGCGGACGCAACCGCGGTGGACGTGGATTCGACGGGGGCCGTCATCGCGGTCACGACTCCCGTGATCACCATGATCGTGGGCATGACTCGCGCGGACACAATTTCCGTGGCCGCGATCGTCGCGACATTGAGGCTAAATACGGCCGTTCCGGAAACGACACCTGGTGA\n+>1747__A0A3D8ZS35__CP884_00215 UniRef90_A0A3D8ZS35;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_003384705\n+ATGAACACCCACGACGTTCCGATGATCCTGTTCACCGTCGTCACACAGATGTGCGTTGGTGCCTTTTTGGTCCTTGGGACCGCCCACGTGGGGGCAGCCATCAGGGGACGCGAAAACAGCGTCGTGGAGCGGACCAGTCGCCCAGTGCTCTATGCCATCGGCCCGGCCATGGTATTCGGCCTCTTCGTCTCCATGTTTCATATGGGTTACCCGGCTCATACCCTCAACGTCTTGCGTCACCCGCAGACTTCCTGGCTATCGCGTGAAATCATGTTTGGCTCCGGCTTCGCCCTACTTGGCTTTGTCTTTGCCATGCTGAGTTGGTTTAAGCGACCTGCTTTCGCTATTCAGCGGGTTCTCGCGGTTGTCACCGCTATCGTTGGCATCGGACTGTTGGTGTGCGAGTCGATGATCTATTACTCCTTGGTGACGGTCCCGGCTTGGCACAGCTGGTGGGTGCCTTTCTCCTTTGCAGCGACGACCATCATCTTGGGAACTCTCTCAGTGGCCTGTGCACTCATGATCACCGCGATGGTGCGTCATCGTCACGAAACAGCTGTCCCCTCAGCCAGGGAAAAGCATCCGAAAACCACGGGCTGGTGGTCCCGGCACGTCACCGAGGAGATCCGTGCCATCAACGCTCCAACCGATGATCAGGAATGGGACCTGTCGTTGACGGTAACCAAATGGTGCTCCATCGGCGCCGCCGTGGTCTCGGTTGCCCTCATGGTGG'..b'TTGATATAGACACACAAGGAGGCAATGATGTGATTTATGGCGGTGATGGTCATGATACCCTAAAAGGAGGTGATGGCAATGATGTGATTTATGGCTCTGCAGTCTTTTATGATTTTGAACCCAAAGACAGTCAAAGACTACCTAAGCCAAAACAAGACAAAGAGGGTAATTTAATTGGGGTGGATAAAGATATTATTATTGGTGGCATGGGAAGAGATTTGATTGCAGCAGGGATTGGGGATGATATTATTTGGGTGGACAATGAACAAGCAGAGTCTAATAACACCCATAACAACGACAAAGGAGACTGGGCATTAGGCGGACAAGGAAGTGATGTGATTTATGGGGGAGCAAATAAAGACTTTTTACAAGGTGGGGCGGATAATGATTATGTGTATGGCGGTGGTGGCGATGATGTGATATTGGGTGATGGACATATTAGGTTTGGCATAAAAAATCAAGTGCTACGCAGTGATAATCAGATTTTAAGCATAGAGCATACCTACGACCAAGTCAATCACAAAGAAAACACCCTAAAACAAAAAACCGCAAATTTACAAGACTACAAAACCTTTGAATGGAACATTCAAGTAGATACTGACAAAATGGATTATCTCTTAACCAAACAAAAAGACATTGCTTTGGCAAATAATTACCATCTTTTAGACGAATCATACAAACACAATGCCAATGACTTTTTGTACGGCGGCAAGGGAGACGACCTTATCATCGGTCAATATGGCAATGATTATCTAGATGGTGGTGAGGGTGATGACATCCTATGGGGTGATGATAACAGAGATGAGGATATTGAAGGGAATGACACGCTAAAAGGCGGTGCAGGAAGAGATAGGCTAATTGGTGGCAAAGGGAATGATGCGTATGTGTTTGACCTAGAAGACCTAGCCAAAAGCGGGGCGGATGACAAAACCATCATTGACAGTGATAATGGTGGCATCATCAAAATCAATCATTATGATCTAACAGGCACACAGTTTTTTAAATCCGCCAATGCCAGCAACCTCTACCAAAGCGATGATGGCAAATTTGACTTGATAAGATACGATAACGGCAACTACAGCCTATTAAGCAAAGATTTCAACGCCTCCATTGCCATCAAAGACACCCCCATCACAAAAAATGGCGACAATGACATCTTGCTAGGCATGATACTAAAAGAAACCAAAAACACCGCTCCTATCGTTCATAATCCCGTCTCTGACCAGATGGTGTTGGCAGAGCGAGACATCGCACTCGGTCTTGGCGAGGTATTCTTCGATGCTGATGGTGATGAGCTGACGTATAGCATCCAAGGGGCGGATGGACTGCATTTTGACCCTGCTACAAAGATGCTAACAGGCAGGGCACCTGATAAAGGGGTATTTAACATCACATTGACCGCCACCGACCCCAAGGGCGAGCATGCCAATACCAGCTTCACACTAAGAGTAAATGAACGACCAACGCTCGTCTCTGCCTTGACTCTACCGCTTGTGCTAAGCCAAGATGATGCCATGATGCAGATAGGTCTAGATAAGCTGTTTGTTGATAACGATGGTGATGCGTTAAGTTATTCGCTATCGGCAAATTCTTTGAGCGGTATCTACATTGACAACAATCATCTCATCATAGACCCTGCGACCACAGAGATTGGCATGCATGACATTACCATAACAGCGACAGACAGCTTCGGTCAGAGTGTCAGTACCAATGCCAGCTTTACCATCAAAGGCAGTGAGCCCATCGTGATGCCAGAGCCTGTTCTGCCCATCACGCCAACGCCTGACCCCATCACTGTTGGTAAGCGATATGTGGGCAAGCTCGGTGCTGATGACATCACAGGTGATGATAAAGCCAATACCATCAATGGCCTGACAGGAGATGATGTCCTAAGTGGTGGTCGTGGTAATGACACGCTCATCGGTGGATTTGGTCATGACACTTTAATTGGCGGACTTGATAACGACCTGCTTGTCGGTGGCTATGGCAATGATACTTATCTGTATCACAAAGGCGATGGTCTGGACACGATTCGTGATGTTGGTGGGTTGGATATCTTAAAGATTTCGGGGCTGACGTTGTCTGATTTGGGCTTTGCCAAACAGGGCAATCATCTGTTTATTGATGTCAAGCAAAATGATGATGGCATTATTATTGAAGATTATTTCAAATCAGGCAGTATCATACCAAGCCAAAAATCATCCCCCAACATAGAACGTATCTACATCAATGATAAGTTTGTCGGGCATGATGAGATTATCAAGATGGCGGATGTGATTATTTAA\n+>60442__A0A378QWP5__cya_3 UniRef90_A0A378QWP5;k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_equi;GCA_900453335\n+ATGGCAGAGTTTGATAAAATCAAATATAAAATTGGTGTTATCGATAGTTTCATGAGCACAAAAACAGATAACATGTTTTATGCTAACGTCAAAGACTTTGAAAAAACAACCAAAATTATCAATGATACTTATGATAGGCTGATTAAGTCTGTTTATAGTGGACTCTATCCGCAAACTGTTCAGAATCGTTATGGCGATTTGATAGATATTGTGGTAGATGTCAAAGATGGTAATCTTCTGTTTGGTTTGGACATTAATCGACTGGTAAGCGACTTTAAGGCAAGGTTAGATGTAGGTGGTCAAACCGCAATGTCTGCCTTTGGCGATATCATGGAGTATTTTGCTTATCTTCAAGTGCAAACACGACCTAACATTGTTGGCAGAGATAAGTTGGTTAATTTGATAGAGCATGTTGTATCTAGCGTGCCAAAACATGAATTGGATGGTTGGATAAATGGTATTGATTTTAAGATTGATAGAGAATTACCAATTAAGATGGGCGATGCACAAGATAATCATTTAAATGGCGGTGCTGTGTTTGCAGGTGCTGGTAATGATCGGTTGACTGGAACTAAAAATTCTGACCTCCTATTTGGTCAAGATGGTAACGACACCCTAAATGGCGGTAACGGCGATGACATCTTAAATGGTGGTGCTGGCGATGATTATTTAAGCAATTCGGGTCAGAATAACGGTAATGACACTTACATCTTTAGTGGTGATTATGGTAATGACATCATTCATGACTCTGATTCAAATGTCTCTAAAGAACGCCATTAA\n+>90240__A0A378QWM4__NCTC12877_00123 UniRef90_A0A378QWM4;k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_caprae;GCA_900453285\n+ATGCTCACAAACCCCATTGACTATTTATCACAGGTTCATGACCCAAGGCGTCAAAACAAAAACCTGCTACACCCACTTAAAAACATTCTAACCATTGCCTTAACCGCAGTTATCTGTGGTTATCATGACTGGGTAGATATAGAAGACTTTGGCAATGAAAACAAAACATGGTTTGCCACATTTCTTGACTTAACTCATGGCATACCCTCACATGACACCTTTGGCAATGTCTTTAAACGCCTTAACAAAGATGAACTTAGTCGATACTTGAGTGAGTGGATAAATCAAACCCAAGCCAATCATCCACACATCGCCATTGATGGTAAATTCATCCAAGGTGGCTACAAAAATGACAACGCATTACAGCTTGTTACCGCCTTTGCCAGTCAAACCAAACTCATTCTTGCTCAAGTTGACATTGCTGATAAAAACAACGAAATTAGCACATTACCCCAACTGTTAAAACTCATTGATATCAGCGGTAGTATAGTGACAGCCGATGCCATTTACACCCCACAAACCCCAAATATGCTACAATACCCTAACATCCATTAA\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db-without-one-marker.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db-without-one-marker.json Mon Apr 19 20:56:20 2021 +0000
[
b'@@ -0,0 +1,1 @@\n+{"markers": {"504553__GeneID:10498608": {"ext": ["GCA_003384495", "GCA_003384705", "GCA_003384195", "GCA_000144875", "GCA_003426255", "GCA_000376705", "GCA_000221125", "GCA_001469595", "GCA_002572575", "GCA_000144185", "GCA_003384345", "GCA_001481615", "GCA_000240015", "GCA_003384315", "GCA_000144485", "GCA_003384365", "GCA_000144735", "GCA_000178055", "GCA_001660115", "GCA_000144545", "GCA_000144245", "GCA_000735055", "GCA_001750535", "GCA_003384145", "GCA_003426625", "GCA_000144005", "GCA_003384485", "GCA_001469635", "GCA_000144505", "GCA_000144815", "GCA_000240055", "GCA_000144345", "GCA_003384395", "GCA_003390995", "GCA_003384555", "GCA_003426665", "GCA_000144895", "GCA_000144465", "GCA_900102845", "GCA_002572615", "GCA_002572665", "GCA_000194825", "GCA_003384585", "GCA_003384255", "GCA_000145195", "GCA_000144325", "GCA_000145575", "GCA_000231215", "GCA_000144365", "GCA_000144795", "GCA_002556485", "GCA_002572745", "GCA_003384445", "GCA_001660935", "GCA_001469655", "GCA_000145335", "GCA_000008345", "GCA_000488875", "GCA_001660945", "GCA_000145095", "GCA_003425775", "GCA_001469555", "GCA_000144145", "GCA_002775655", "GCA_000145155", "GCA_000302515", "GCA_000342585", "GCA_002572695", "GCA_000144285", "GCA_001660855", "GCA_000144775", "GCA_000144125", "GCA_000145375", "GCA_003384285", "GCA_003426225", "GCA_000144045", "GCA_001750525", "GCA_000730485", "GCA_002572655", "GCA_002572705", "GCA_003384385", "GCA_000147145", "GCA_001750555", "GCA_002861085", "GCA_002572835", "GCA_003426685", "GCA_000145455", "GCA_001660965", "GCA_000144445", "GCA_002572775", "GCA_002831715"], "score": 37.0, "clade": "t__Propionibacterium_phage_PAS50", "len": 663, "taxon": "k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Caudovirales|f__Siphoviridae|g__Pa6virus|s__Propionibacterium_virus_PAS50|t__Propionibacterium_phage_PAS50"}, "386414__D1VYE0__HMPREF9019_1663": {"clade": "s__Prevotella_timonensis", "ext": [], "len": 1065, "score": 0, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_timonensis"}, "29394__H3NGH1__B5772_02200": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 972, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFY1__B8A41_07655": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 534, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NC06__B8A41_08715": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 1317, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDE2__B8A45_00965": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 699, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFL8__B8A45_06985": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 582, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDD1__B8A45_02325": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 480, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NE37__B8A45_05170": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 756, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NCU9__B8A45_08665": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 765, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "2'..b'roteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076675": ["2|1224|1236|91347|543|547|354276", 4289814], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958695": ["2|1224|1236|91347|543|547|354276", 4336076], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077255": ["2|1224|1236|91347|543|547|354276", 4482974], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075885": ["2|1224|1236|91347|543|547|354276", 4396160], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001487035": ["2|1224|1236|91347|543|547|354276", 4279410], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077025": ["2|1224|1236|91347|543|547|354276", 4303785], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000952555": ["2|1224|1236|91347|543|547|354276", 4340464], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076755": ["2|1224|1236|91347|543|547|354276", 4236759], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000783855": ["2|1224|1236|91347|543|547|354276", 4188638], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022075": ["2|1224|1236|91347|543|547|354276", 4412384], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022255": ["2|1224|1236|91347|543|547|354276", 4385357], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075375": ["2|1224|1236|91347|543|547|354276", 4164369], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075395": ["2|1224|1236|91347|543|547|354276", 4013880], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_003261215": ["2|1224|1236|91347|543|547|354276", 5016502], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075185": ["2|1224|1236|91347|543|547|354276", 4392401], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_002192395": ["2|1224|1236|91347|543|547|354276", 4556770], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958745": ["2|1224|1236|91347|543|547|354276", 4382425], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001025055": ["2|1224|1236|91347|543|547|354276", 4360879], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076455": ["2|1224|1236|91347|543|547|354276", 4404432]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db.fasta Mon Apr 19 20:56:20 2021 +0000
b
b'@@ -0,0 +1,182 @@\n+>13076__A0A2I1PE66__CYJ72_10760 UniRef90_A0A2I1PE66;k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Aerococcaceae|g__Globicatella|s__Globicatella_sanguinis;GCA_002847845\n+ATGCTTGAAATGTATATCCAAGGTGTCTCTACAAGGAAAGTTTCGAAAGTTATTGAAAATTTATGTGGGAAAACCTATTCTAAATCATTTGTCTCTTCTCTTACGAAACAACTGGATGAAGAGGTTCGACAATGGCGTCATCACGATTTAAGTCCTGTCCAATACGCTTACCTAGTGGTGGATGTTATTTATATAAAAGTAAGAGAAAATCATAAGGTTGTATCCAAAGCGTGCCATATCGCTATTGGGATTAGTGAAGAAGGAAAACGAAGACTCCTTGGTTTTGACATCAGTGATGGCGAAAGTGATTACTCTTGGTCTCGTTTCTTTAACCACTTAAAAGAAAGAGGCCTAAATGGCCTTAAAATGGTCATATCTGATGCACACACGGGTCTAGTAAAAGCAATTAAGGAAAACTTCCTGAATGTTTCTTGGCAAAGATGTCAGGTTCACTTCTTAATGTATTTGGCAATAGGGAATTGCGTTAAACGGCAATTAATTTTGTATGCCACTTGCCATCACTATCTACTCATGAATTAA\n+>1747__A0A1N4YDI5__CP875_06750 UniRef90_A0A1N4YDI5;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_003384555\n+ATGTCCCGTGAGTACGGTGGACGCGTGGCTGACACGAAAACCTCCCGTCATTCCTGGCTACGCATCACTTCCGGTCTCGTCGGGCTGATCCTGACCCTGCCGGTACTGACGTGGTCAGTGGCCGCGATCCTCCCCTCACACAATGCCCTCACCTTCATCTCCTCCGTTCTGGTGGGATCGTTGGCTGTACCGGCTCTGGTCATCGCCGTCATCGCCTTCGTCCTCGCTCTTCTGGGACGAGGTGGTCTGCGAGTGGTGGCCGTTCTCTTCTCGGTGATGGCCCTCATCGTGCCAGTGGCGGCAACGGCGACGACGGCGTGGATCACGGACCGCGCCGGTGGACGTATCAACGTCGTCTCGGCATCTGCGGTGTCGTCGATGTCCGACCACCCTGACGAAACGGTCCGGTATGGATCCGGCCCTGACGAGACGGCCCAGATCTACCGTCCCCATAACCACAATGCCCCGGTACTTGTCGACATTCATGGTGGAGGATGGAGCACCGACGCCACCATGCCCGCCACGTTGAGATGGTTTTCCGACCATGGCTGGTTAGTCATTCGTCCCTCGTACACCCTGGCCACCCAGGGCCACCCCACCTGGAATACTGCACCAAAACAGGTAGCATGTGCCTGGGCCTGGAGCCTGTCCCACGTGAAAGAACTCGGTGGCGACCCTTCACAGGTATCGATCATGGGTGATTCCGCTGGTGGTGGAATGGCCATTAATCTCGCCTACGGCGCCGTCAGCGGAAAGTTGAAGAGTTCCTGCGGCTCAATACGGGCCCCGAAGTCAGTCCTCGCCCTCTATCCAACGGTGGATGTCAGTACCGTCGAGTCAGTCACCACACTGAGCGCGGGCAATGCCGCCAAAATGTATATCGGTGGCACCCCCAAGCAATTCCCCGACCGTTATCGCGCCGTCAACAGTTCCACCTGGATCACTCCTCAGGCACCACCAACAATGGTGATCCAGGGAAATCACGACACCTTTGTCCCGCCATCCAGCGTGCGGAAATTCATCAATCGTGCCCGCCAGGCTGGAGTCCGGGTCGATCACGTACAGTTACCAATGCTCAATCACGCTTTCGATTCCCAGGCGCGCAACTCACTGGGATTCCAGGTCGTCACGAGCCTTGGGCAACGTTTCCTTACTAACCACTGA\n+>1747__A0A1N5AYP4__B1B09_10705 UniRef90_A0A1N5AYP4;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_002572615\n+ATGGATTGGGAGGCCACTGAGCTGACCGATCTCGACGTCACCGGTATTGATCACGAGGGCGGTTTCTCGGCCCTTGGCGTGCCCGACGAGATCGTTGCTGCTTTGGCCAAGACTGGCATTACCGATCCCTTCCGCATCCAGATCGCCGCCATCCCCGACGCCATCGCTGGGCGTGACGTGTTGGGCCGCGCCTCAACGGGTTCGGGTAAGACGTTGGCCTTTGGTGTCCCGCTGCTGTCGCGCCTGTCGGCCACCCCGCGTGAGGATAACCGGCCTCGAGCTCTCATCCTTTCTCCCACTCGTGAGCTGGCCATGCAGATTGCCGACGTGCTGTCCTCGCTGGCGTCCTCAATGGGGCTATCGACCATCCTCATCGCGGGTGGTATGAGCTACGGCCCGCAGACCAAGGCTTTTAAGAGGGGAGTCGACCTTGTTGTCGCCACCCCGGGGCGTCTGGTCGACCTCTTGGAGACTGGTGACGCCGACCTTTCCGGTGTCGCCGTGACTGTCCTCGACGAAGCCGATCACATGGCTGAACTGGGATTCATGGAGGCCGTTGGCTCGATCCTCGATGCCATTCCCGCTGACGGGCAGCGCCTGCTGTTCTCTGCCACCCTCGACGGTGCCGTTAACAAGTTGGTTAGGCGGTACATGCACGAGCCGGTTATCCACGAGGTCGACCCCGACAAGGGATCGGTCGCGACGATGACCCATCACGCTTTCCAGATCAAGCCTCACGAGAAGGTCGGGCTGATGTGCGAGATCGCTAACCGTAGCGGCCACACCATCGTCTTTGCTCGAACCCAGCGAGGTGCCTCCCGGATGGCCGAGCAACTGCGTGAGGCCGGCGTCATGGCTGGTGCTCTGCACGGCGGCCTGACCCAGGGTGCCCGTGCCCGCGTGTTGGCGGCATTCAAGGACGGTTCCCTGCCGGTCTTGGTTGCCACTGACGTCGCGGCCCGAGGAATTGACGTCGATGACGTCACTCTGGTGTTGCAAGTCGATCCGCCGATGAACTTCAAGGACTACCTGCACCGCTCCGGCCGTACCGCTCGCGCCGGACACGACGGGGCCGTCGTCTCTCTTGTCCTGCCGCATCAGCGTCGCACAATGGCTCGTTTGTACCGTCAAGCCGGGGTGAAGCCGGTAGAGGCCCAGGTCACGCGTGGCGACGACCACGTTGCCGAGGTGGCCGGTTGCAGCCCGGCCCTGGGCGCTCCTATTGACGAGAAGGATTACGAGGCTCTCGTCGCTCCCAAGCAGCAGCGCGGCAAGAAGGCTCGTGACGGAAAGCCTCGTCGTGGCAAGGGCGGACGCAACCGCGGTGGACGTGGATTCGACGGGGGCCGTCATCGCGGTCACGACTCCCGTGATCACCATGATCGTGGGCATGACTCGCGCGGACACAATTTCCGTGGCCGCGATCGTCGCGACATTGAGGCTAAATACGGCCGTTCCGGAAACGACACCTGGTGA\n+>1747__A0A3D8ZS35__CP884_00215 UniRef90_A0A3D8ZS35;k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Propionibacteriales|f__Propionibacteriaceae|g__Cutibacterium|s__Cutibacterium_acnes;GCA_003384705\n+ATGAACACC'..b'TTGATATAGACACACAAGGAGGCAATGATGTGATTTATGGCGGTGATGGTCATGATACCCTAAAAGGAGGTGATGGCAATGATGTGATTTATGGCTCTGCAGTCTTTTATGATTTTGAACCCAAAGACAGTCAAAGACTACCTAAGCCAAAACAAGACAAAGAGGGTAATTTAATTGGGGTGGATAAAGATATTATTATTGGTGGCATGGGAAGAGATTTGATTGCAGCAGGGATTGGGGATGATATTATTTGGGTGGACAATGAACAAGCAGAGTCTAATAACACCCATAACAACGACAAAGGAGACTGGGCATTAGGCGGACAAGGAAGTGATGTGATTTATGGGGGAGCAAATAAAGACTTTTTACAAGGTGGGGCGGATAATGATTATGTGTATGGCGGTGGTGGCGATGATGTGATATTGGGTGATGGACATATTAGGTTTGGCATAAAAAATCAAGTGCTACGCAGTGATAATCAGATTTTAAGCATAGAGCATACCTACGACCAAGTCAATCACAAAGAAAACACCCTAAAACAAAAAACCGCAAATTTACAAGACTACAAAACCTTTGAATGGAACATTCAAGTAGATACTGACAAAATGGATTATCTCTTAACCAAACAAAAAGACATTGCTTTGGCAAATAATTACCATCTTTTAGACGAATCATACAAACACAATGCCAATGACTTTTTGTACGGCGGCAAGGGAGACGACCTTATCATCGGTCAATATGGCAATGATTATCTAGATGGTGGTGAGGGTGATGACATCCTATGGGGTGATGATAACAGAGATGAGGATATTGAAGGGAATGACACGCTAAAAGGCGGTGCAGGAAGAGATAGGCTAATTGGTGGCAAAGGGAATGATGCGTATGTGTTTGACCTAGAAGACCTAGCCAAAAGCGGGGCGGATGACAAAACCATCATTGACAGTGATAATGGTGGCATCATCAAAATCAATCATTATGATCTAACAGGCACACAGTTTTTTAAATCCGCCAATGCCAGCAACCTCTACCAAAGCGATGATGGCAAATTTGACTTGATAAGATACGATAACGGCAACTACAGCCTATTAAGCAAAGATTTCAACGCCTCCATTGCCATCAAAGACACCCCCATCACAAAAAATGGCGACAATGACATCTTGCTAGGCATGATACTAAAAGAAACCAAAAACACCGCTCCTATCGTTCATAATCCCGTCTCTGACCAGATGGTGTTGGCAGAGCGAGACATCGCACTCGGTCTTGGCGAGGTATTCTTCGATGCTGATGGTGATGAGCTGACGTATAGCATCCAAGGGGCGGATGGACTGCATTTTGACCCTGCTACAAAGATGCTAACAGGCAGGGCACCTGATAAAGGGGTATTTAACATCACATTGACCGCCACCGACCCCAAGGGCGAGCATGCCAATACCAGCTTCACACTAAGAGTAAATGAACGACCAACGCTCGTCTCTGCCTTGACTCTACCGCTTGTGCTAAGCCAAGATGATGCCATGATGCAGATAGGTCTAGATAAGCTGTTTGTTGATAACGATGGTGATGCGTTAAGTTATTCGCTATCGGCAAATTCTTTGAGCGGTATCTACATTGACAACAATCATCTCATCATAGACCCTGCGACCACAGAGATTGGCATGCATGACATTACCATAACAGCGACAGACAGCTTCGGTCAGAGTGTCAGTACCAATGCCAGCTTTACCATCAAAGGCAGTGAGCCCATCGTGATGCCAGAGCCTGTTCTGCCCATCACGCCAACGCCTGACCCCATCACTGTTGGTAAGCGATATGTGGGCAAGCTCGGTGCTGATGACATCACAGGTGATGATAAAGCCAATACCATCAATGGCCTGACAGGAGATGATGTCCTAAGTGGTGGTCGTGGTAATGACACGCTCATCGGTGGATTTGGTCATGACACTTTAATTGGCGGACTTGATAACGACCTGCTTGTCGGTGGCTATGGCAATGATACTTATCTGTATCACAAAGGCGATGGTCTGGACACGATTCGTGATGTTGGTGGGTTGGATATCTTAAAGATTTCGGGGCTGACGTTGTCTGATTTGGGCTTTGCCAAACAGGGCAATCATCTGTTTATTGATGTCAAGCAAAATGATGATGGCATTATTATTGAAGATTATTTCAAATCAGGCAGTATCATACCAAGCCAAAAATCATCCCCCAACATAGAACGTATCTACATCAATGATAAGTTTGTCGGGCATGATGAGATTATCAAGATGGCGGATGTGATTATTTAA\n+>60442__A0A378QWP5__cya_3 UniRef90_A0A378QWP5;k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_equi;GCA_900453335\n+ATGGCAGAGTTTGATAAAATCAAATATAAAATTGGTGTTATCGATAGTTTCATGAGCACAAAAACAGATAACATGTTTTATGCTAACGTCAAAGACTTTGAAAAAACAACCAAAATTATCAATGATACTTATGATAGGCTGATTAAGTCTGTTTATAGTGGACTCTATCCGCAAACTGTTCAGAATCGTTATGGCGATTTGATAGATATTGTGGTAGATGTCAAAGATGGTAATCTTCTGTTTGGTTTGGACATTAATCGACTGGTAAGCGACTTTAAGGCAAGGTTAGATGTAGGTGGTCAAACCGCAATGTCTGCCTTTGGCGATATCATGGAGTATTTTGCTTATCTTCAAGTGCAAACACGACCTAACATTGTTGGCAGAGATAAGTTGGTTAATTTGATAGAGCATGTTGTATCTAGCGTGCCAAAACATGAATTGGATGGTTGGATAAATGGTATTGATTTTAAGATTGATAGAGAATTACCAATTAAGATGGGCGATGCACAAGATAATCATTTAAATGGCGGTGCTGTGTTTGCAGGTGCTGGTAATGATCGGTTGACTGGAACTAAAAATTCTGACCTCCTATTTGGTCAAGATGGTAACGACACCCTAAATGGCGGTAACGGCGATGACATCTTAAATGGTGGTGCTGGCGATGATTATTTAAGCAATTCGGGTCAGAATAACGGTAATGACACTTACATCTTTAGTGGTGATTATGGTAATGACATCATTCATGACTCTGATTCAAATGTCTCTAAAGAACGCCATTAA\n+>90240__A0A378QWM4__NCTC12877_00123 UniRef90_A0A378QWM4;k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_caprae;GCA_900453285\n+ATGCTCACAAACCCCATTGACTATTTATCACAGGTTCATGACCCAAGGCGTCAAAACAAAAACCTGCTACACCCACTTAAAAACATTCTAACCATTGCCTTAACCGCAGTTATCTGTGGTTATCATGACTGGGTAGATATAGAAGACTTTGGCAATGAAAACAAAACATGGTTTGCCACATTTCTTGACTTAACTCATGGCATACCCTCACATGACACCTTTGGCAATGTCTTTAAACGCCTTAACAAAGATGAACTTAGTCGATACTTGAGTGAGTGGATAAATCAAACCCAAGCCAATCATCCACACATCGCCATTGATGGTAAATTCATCCAAGGTGGCTACAAAAATGACAACGCATTACAGCTTGTTACCGCCTTTGCCAGTCAAACCAAACTCATTCTTGCTCAAGTTGACATTGCTGATAAAAACAACGAAATTAGCACATTACCCCAACTGTTAAAACTCATTGATATCAGCGGTAGTATAGTGACAGCCGATGCCATTTACACCCCACAAACCCCAAATATGCTACAATACCCTAACATCCATTAA\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db.json Mon Apr 19 20:56:20 2021 +0000
[
b'@@ -0,0 +1,1 @@\n+{"markers": {"504553__GeneID:10498608": {"ext": ["GCA_000144505", "GCA_000144125", "GCA_000488875", "GCA_000178055", "GCA_001660965", "GCA_000144445", "GCA_003384145", "GCA_002572705", "GCA_001660935", "GCA_001750525", "GCA_000144775", "GCA_003390995", "GCA_001750535", "GCA_000008345", "GCA_003384705", "GCA_003384485", "GCA_002572745", "GCA_000144465", "GCA_000145375", "GCA_002775655", "GCA_001660855", "GCA_001660945", "GCA_000144875", "GCA_003426625", "GCA_000240055", "GCA_003426225", "GCA_003384285", "GCA_000144815", "GCA_000145095", "GCA_900102845", "GCA_001469595", "GCA_000145195", "GCA_003384365", "GCA_000145155", "GCA_003384255", "GCA_003384315", "GCA_000144345", "GCA_000240015", "GCA_000302515", "GCA_002831715", "GCA_000144285", "GCA_003426665", "GCA_001469635", "GCA_000144185", "GCA_002572835", "GCA_001660115", "GCA_000144325", "GCA_000144735", "GCA_000194825", "GCA_000376705", "GCA_000147145", "GCA_002572575", "GCA_000144795", "GCA_003384385", "GCA_000144895", "GCA_003425775", "GCA_003384345", "GCA_000231215", "GCA_003384555", "GCA_000735055", "GCA_000145575", "GCA_000144005", "GCA_003384195", "GCA_001469555", "GCA_003384585", "GCA_003384395", "GCA_000145455", "GCA_000144485", "GCA_003384495", "GCA_000342585", "GCA_003384445", "GCA_000144365", "GCA_000730485", "GCA_002861085", "GCA_002572615", "GCA_000144145", "GCA_002556485", "GCA_001481615", "GCA_000144245", "GCA_000144545", "GCA_000221125", "GCA_001469655", "GCA_003426685", "GCA_000144045", "GCA_002572665", "GCA_002572695", "GCA_002572655", "GCA_001750555", "GCA_003426255", "GCA_000145335", "GCA_002572775"], "score": 37.0, "clade": "t__Propionibacterium_phage_PAS50", "len": 663, "taxon": "k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Caudovirales|f__Siphoviridae|g__Pa6virus|s__Propionibacterium_virus_PAS50|t__Propionibacterium_phage_PAS50"}, "386414__D1VYE0__HMPREF9019_1663": {"clade": "s__Prevotella_timonensis", "ext": [], "len": 1065, "score": 0, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_timonensis"}, "29394__H3NGH1__B5772_02200": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 972, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFY1__B8A41_07655": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 534, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NC06__B8A41_08715": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 1317, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDE2__B8A45_00965": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 699, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFL8__B8A45_06985": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 582, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDD1__B8A45_02325": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 480, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NE37__B8A45_05170": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 756, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NCU9__B8A45_08665": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 765, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "2'..b'roteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076675": ["2|1224|1236|91347|543|547|354276", 4289814], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958695": ["2|1224|1236|91347|543|547|354276", 4336076], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077255": ["2|1224|1236|91347|543|547|354276", 4482974], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075885": ["2|1224|1236|91347|543|547|354276", 4396160], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001487035": ["2|1224|1236|91347|543|547|354276", 4279410], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077025": ["2|1224|1236|91347|543|547|354276", 4303785], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000952555": ["2|1224|1236|91347|543|547|354276", 4340464], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076755": ["2|1224|1236|91347|543|547|354276", 4236759], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000783855": ["2|1224|1236|91347|543|547|354276", 4188638], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022075": ["2|1224|1236|91347|543|547|354276", 4412384], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022255": ["2|1224|1236|91347|543|547|354276", 4385357], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075375": ["2|1224|1236|91347|543|547|354276", 4164369], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075395": ["2|1224|1236|91347|543|547|354276", 4013880], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_003261215": ["2|1224|1236|91347|543|547|354276", 5016502], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075185": ["2|1224|1236|91347|543|547|354276", 4392401], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_002192395": ["2|1224|1236|91347|543|547|354276", 4556770], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958745": ["2|1224|1236|91347|543|547|354276", 4382425], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001025055": ["2|1224|1236|91347|543|547|354276", 4360879], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076455": ["2|1224|1236|91347|543|547|354276", 4404432]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.1.bt2
b
Binary file test-data/test-db/test-db.1.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.2.bt2
b
Binary file test-data/test-db/test-db.2.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.3.bt2
b
Binary file test-data/test-db/test-db.3.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.4.bt2
b
Binary file test-data/test-db/test-db.4.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.pkl
b
Binary file test-data/test-db/test-db.pkl has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.rev.1.bt2
b
Binary file test-data/test-db/test-db.rev.1.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test-db/test-db.rev.2.bt2
b
Binary file test-data/test-db/test-db.rev.2.bt2 has changed
b
diff -r 000000000000 -r f5df500fcc3c test-data/test_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_database.loc Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,6 @@
+# Tab separated with 4 columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - value (Galaxy records this in the Galaxy DB)
+# - path (folder name containing the Kraken DB)  
+test-db-20210409 "Test Database" test-db ${__HERE__}/test-db
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c tool-data/metaphlan_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan_database.loc.sample Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014  MetaPhlAn2 clade-specific marker genes db_v20 /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r f5df500fcc3c tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="metaphlan_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/metaphlan_database.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r f5df500fcc3c tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Apr 19 20:56:20 2021 +0000
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="metaphlan_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="${__HERE__}/test-data/test_database.loc" />
+    </table>
+</tables>
\ No newline at end of file