Repository 'obi_convert'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/obi_convert

Changeset 3:374fd9feb032 (2021-05-10)
Previous changeset 2:4d705e3443a7 (2019-11-28) Next changeset 4:e328ced6cf0a (2024-03-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/obitools commit 5d3c7a7b3f7d687bb03ef7993ddf1a6507d655bd"
modified:
macros.xml
obiconvert.xml
test-data/output_ngsfilter_error_3.fastq
test-data/output_ngsfilter_unidentified.fastq
test-data/output_obisort.fastq
added:
test-data/illuminapairedend.output.fastq.gz
test-data/output_ngsfilter_error_3.fastq.gz
test-data/output_obiannotate.fasta.gz
test-data/output_obiclean_advanced.fasta.gz
test-data/output_obigrep_predicat.fasta.gz
test-data/output_obiuniq.fasta.gz
test-data/output_obiuniq_family.fasta.gz
test-data/wolf_small.F.fastq.gz
test-data/wolf_small.R.fastq.gz
b
diff -r 4d705e3443a7 -r 374fd9feb032 macros.xml
--- a/macros.xml Thu Nov 28 15:51:00 2019 -0500
+++ b/macros.xml Mon May 10 19:36:34 2021 +0000
[
@@ -6,7 +6,8 @@
         </requirements>
     </xml>
 
-    <token name="@TOOL_VERSION@">1.2.11</token>
+    <token name="@TOOL_VERSION@">1.2.13</token>
+    <token name="@PROFILE@">21.01</token>
 
     <xml name="stdio">
         <stdio>
@@ -14,6 +15,80 @@
         </stdio>
     </xml>
 
+    <token name="@INPUT_FORMATS@">fastqsanger,fastqsanger.gz,fastqsolexa,fastqsolexa.gz,fasta,fasta.gz</token>
+    <token name="@GUNZIP_INPUT@"><![CDATA[
+        #if $input.ext.endswith(".gz")
+            gunzip -c '$input' > input &&
+        #else
+            ln -s '$input' input &&
+        #end if
+    ]]></token>
+    <token name="@GZIP_OUTPUT@"><![CDATA[
+        #if $input.ext.endswith(".gz")
+        | gzip -c 
+    #end if
+    ]]></token>
+    
+    <!-- generate galaxy.json to determine output format
+        - by default same as input
+        - if out_format can be specified 
+          - fasta/fastq depending on $out_format (if != "")
+          - or fasta if the tool does not support choosing the output format, e.g. obiuniq which always outputs fasta
+        - gz if $input is zipped 
+    
+        A list of outputs to include in the json can be given by
+        setting a variable $outputs = [...] listing pairs of output names and the actual output
+        (defaults to [("output", $output)])
+    -->
+    <token name="@GENERATE_GALAXY_JSON@"><![CDATA[
+        #import json
+
+        #try:
+            #silent $outputs[0]
+        #except 
+            #set outputs = [("output", $output)]
+        #end try
+
+        #if $input.ext.startswith("fastq")
+            #set ext = "fastqsanger"
+        #else if $input.ext.startswith("fasta")
+            #set ext = "fasta"
+        #end if
+
+        #try
+            #if $out_format == "fasta"
+                #set ext = "fasta"
+            #else if $out_format == "fastq"
+                #set ext = "fastqsanger"
+            #end if
+        #except
+            #set ext = "fasta"
+        #end try
+
+        #if $input.ext.endswith(".gz")
+            #set ext = ext + ".gz"
+        #end if
+
+        #set gxy_json = {}
+        #for oname, o in $outputs
+            #silent gxy_json[oname] = {"ext": ext}
+        #end for
+        && echo '${json.dumps(gxy_json)}' >> galaxy.json
+    ]]></token>
+    
+    <token name="@OUT_FORMAT@"><![CDATA[
+        #if $out_format
+            --${out_format}-output
+        #end if
+    ]]></token>
+    
+    <xml name="out_format_macro">
+        <param name="out_format" type="select" optional="true" label="Output data type" help="For FASTA/Q the the default output type is the same as the input type">
+            <option value="fasta">fasta</option>
+            <option value="fastq">fastq</option>
+        </param>
+    </xml>
+
     <token name="@OBITOOLS_LINK@">
 <![CDATA[
 --------
@@ -105,15 +180,43 @@
         <option value="taxid">taxid</option>
     </xml>
 
-    <xml name="inputtype">
-        <option value="--genbank">genbank</option>
-        <option value="--embl">embl</option>
-        <option value="--sanger">sanger</option>
-        <option value="--solexa">solexa</option>
-        <option value="--ecopcr">ecopcr</option>
-        <option value="--ecopcrdb">ecopcrdb</option>
-        <option value="--fasta" selected="true">fasta</option>
+    <xml name="input_format_options_macro">
+        <section name="input_format_options" title="Input format options" expanded="false">
+            <param name="options_inputtype" type="select"  optional="true" label="Specify the input datatype" help="default: determine automatically (should only be necessay for non FASTA/FASTQ datasets)">
+                <option value="--genbank">genbank</option>
+                <option value="--embl">embl</option>
+                <option value="--sanger">sanger</option>
+                <option value="--solexa">solexa</option>
+                <option value="--ecopcr">ecopcr</option>
+                <option value="--ecopcrdb">ecopcrdb</option>
+                <option value="--fasta">fasta (including obitools fasta extentions)</option>
+                <option value="--raw-fasta">raw fasta (more tolerant format variant)</option>
+            </param>
+            <param name="options_seqtype" type="select" optional="true" label="Specify the sequence datatype" >
+                <option value="--nuc">nucleic</option>
+                <option value="--prot">protein</option>
+            </param>
+        </section>
     </xml>
+    
+    <token name="@INPUT_FORMAT@"><![CDATA[
+        #if $input_format_options.options_inputtype
+            $input_format_options.options_inputtype
+        #else
+            #if $input.ext.startswith("fasta")
+                --fasta
+            #else if $input.ext.startswith("fastqsolexa")
+                ## input file is in fastq nucleic format produced by solexa sequencer
+                --solexa
+            #else
+                ## input file is in sanger fastq nucleic format (standard fastq)
+                --sanger
+            #end if
+        #end if
+        #if $input_format_options.options_seqtype
+            $input_format_options.options_seqtype
+        #end if
+    ]]></token>
 
     <xml name="sanitizer">
         <sanitizer invalid_char="test">
b
diff -r 4d705e3443a7 -r 374fd9feb032 obiconvert.xml
--- a/obiconvert.xml Thu Nov 28 15:51:00 2019 -0500
+++ b/obiconvert.xml Mon May 10 19:36:34 2021 +0000
[
@@ -1,113 +1,98 @@
-<tool id="obi_convert" name="obiconvert" version="@TOOL_VERSION@">
-    <description>converts sequence files to different output formats</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-    <expand macro="stdio"/>
-
-    <command>
-        <![CDATA[
-        obiconvert
-        #if $db
-            -d '$db'
-        #end if
-        #if $tax
-            -t '$tax'
-        #end if
-        ${options_inputtype}
-        ${options_seqtype}
-        --${out_format}-output
-        ${uppercase}
-        ${ecopcrdb}
-        #if str( $ecopcrdb) == "--ecopcrdb"
-            --ecopcrdb-output=${ecopcrdb_output}
-        #end if
-        '${input}' > '${output}'
-        ]]>
-
-    </command>
-
-    <inputs>
-        <param name="input" type="data" format="fastq,fasta,txt,tabular" label="Input sequences file" help="database used for the in silico PCR. The database must be in the ecoPCR format (for example output of obiconvert)" />
-        <param name="db" type="data" optional="true" format="txt,tabular" label="ecoPCR taxonomy database" />
-        <param name="tax" type="data" optional="true" format="txt,tabular" label="NCBI taxonomy dump repository"/>
-        <param name="options_inputtype" type="select" label="Specify the input datatype">
-            <expand macro="inputtype"/>
-        </param>
-        <param name="options_seqtype" type="select" label="Specify the sequence datatype" >
-            <option value="--nuc" selected="true">nucleic</option>
-            <option value="--prot">protein</option>
-        </param>
-        <param name="out_format" type="select" label="Output data type">
-            <option value="fasta" selected="true">fasta</option>
-            <option value="fastq">fastq</option>
-        </param>
-        <param name="ecopcrdb" type="boolean" truevalue="--ecopcrdb" falsevalue="" label="Do you want to create an ecoPCR database from sequence records results?"  help="Use this option if you want to generate an ecoPCR database output file" />
-        <param name="uppercase" type="boolean" truevalue="--uppercase" falsevalue=""  label="Do you want to print sequences in upper case?" />
-
-    </inputs>
-    <outputs>
-        <data format="txt" name="ecopcrdb_output" label="result.ecopcrdb with ${tool.name} on ${on_string}">
-            <filter>ecopcrdb == True</filter>
-        </data>
-        <data format="fasta" name="output" label="output with ${tool.name} on ${on_string}" >
-            <change_format>
-                <when input="out_format" value="fastq" format="fastq" />
-            </change_format>
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input" value="output_obisort.fasta" />
-            <param name="options_inputtype" value="fasta"/>
-            <param name="options_seqtype" value="--nuc"/>
-            <param name="out_format" value="fasta"/>
-            <param name="ecopcrdb" value="False"/>
-            <param name="uppercase" value="True"/>
-            <output name="output" file="output_obiconvert.fasta" ftype="fasta"/>
-        </test>
-    </tests>
-    <help><![CDATA[
-
-.. class:: infomark
-
-**What it does**
-
-obiconvert converts sequence files to different output formats. See the documentation for more details on the different formats.
-
-Input files can be in :
-
-fasta format
-
-extended OBITools fasta format
-
-Sanger fastq format
-
-Solexa fastq format
-
-ecoPCR format
-
-ecoPCR database format
-
-GenBank format
-
-EMBL format
-
-obiconvert converts those files to the :
-
-extended OBITools fasta format
-
-Sanger fastq format
-
-ecoPCR database format
-
-If no file name is specified, data is read from standard input.
-
-@OBITOOLS_LINK@
-
-    ]]>
-
-    </help>
-    <expand macro="citation" />
-</tool>
+<tool id="obi_convert" name="obiconvert" version="@TOOL_VERSION@" profile="@PROFILE@">
+    <description>converts sequence files to different output formats</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+        @GUNZIP_INPUT@
+        
+        obiconvert
+        #if $db
+            -d '$db'
+        #end if
+        #if $tax
+            -t '$tax'
+        #end if
+        @INPUT_FORMAT@
+        @OUT_FORMAT@
+        ${uppercase}
+        ${ecopcrdb}
+        #if str( $ecopcrdb) == "--ecopcrdb"
+            --ecopcrdb-output=${ecopcrdb_output}
+        #end if
+        input 
+        @GZIP_OUTPUT@
+        > '${output}'
+        @GENERATE_GALAXY_JSON@
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="@INPUT_FORMATS@,txt,tabular" label="Input sequences file" help="database used for the in silico PCR. The database must be in the ecoPCR format (for example output of obiconvert)" />
+        <param name="db" type="data" optional="true" format="txt,tabular" label="ecoPCR taxonomy database" />
+        <param name="tax" type="data" optional="true" format="txt,tabular" label="NCBI taxonomy dump repository"/>
+        <expand macro="input_format_options_macro"/>
+        <expand macro="out_format_macro"/>
+        <param name="ecopcrdb" type="boolean" truevalue="--ecopcrdb" falsevalue="" label="Do you want to create an ecoPCR database from sequence records results?"  help="Use this option if you want to generate an ecoPCR database output file" />
+        <param name="uppercase" type="boolean" truevalue="--uppercase" falsevalue=""  label="Do you want to print sequences in upper case?" />
+    </inputs>
+    <outputs>
+        <data format="txt" name="ecopcrdb_output" label="${tool.name} on ${on_string}: ecopcrdb">
+            <filter>ecopcrdb == True</filter>
+        </data>
+        <data format="auto" name="output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="output_obisort.fasta" />
+            <param name="options_inputtype" value="--fasta"/>
+            <param name="options_seqtype" value="--nuc"/>
+            <param name="out_format" value="fasta"/>
+            <param name="ecopcrdb" value="false"/>
+            <param name="uppercase" value="true"/>
+            <output name="output" file="output_obiconvert.fasta" ftype="fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+obiconvert converts sequence files to different output formats. See the documentation for more details on the different formats.
+
+Input files can be in :
+
+fasta format
+
+extended OBITools fasta format
+
+Sanger fastq format
+
+Solexa fastq format
+
+ecoPCR format
+
+ecoPCR database format
+
+GenBank format
+
+EMBL format
+
+obiconvert converts those files to the :
+
+extended OBITools fasta format
+
+Sanger fastq format
+
+ecoPCR database format
+
+If no file name is specified, data is read from standard input.
+
+@OBITOOLS_LINK@
+
+    ]]>
+
+    </help>
+    <expand macro="citation" />
+</tool>
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/illuminapairedend.output.fastq.gz
b
Binary file test-data/illuminapairedend.output.fastq.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_ngsfilter_error_3.fastq
b
Binary file test-data/output_ngsfilter_error_3.fastq has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_ngsfilter_error_3.fastq.gz
b
Binary file test-data/output_ngsfilter_error_3.fastq.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_ngsfilter_unidentified.fastq
b
Binary file test-data/output_ngsfilter_unidentified.fastq has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obiannotate.fasta.gz
b
Binary file test-data/output_obiannotate.fasta.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obiclean_advanced.fasta.gz
b
Binary file test-data/output_obiclean_advanced.fasta.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obigrep_predicat.fasta.gz
b
Binary file test-data/output_obigrep_predicat.fasta.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obisort.fastq
--- a/test-data/output_obisort.fastq Thu Nov 28 15:51:00 2019 -0500
+++ b/test-data/output_obisort.fastq Mon May 10 19:36:34 2021 +0000
b
b"@@ -1,172 +1,232 @@\n->HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'h'}; obiclean_head=True; \n-ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n-gcctgaaactcaaaggacttggcggtgctttacatccct\n->HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; obiclean_head=True; \n-ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'h'}; obiclean_head=True; \n-ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'h'}; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'h'}; obiclean_head=True; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n tttt\n->HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; merged_sample={'29a_F260619': 3}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n-ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB count=2; merged_sample={'29a_F260619': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_sample"..b"M_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=51; obiclean_samplecount=1; obiclean_status={'29a_F260619': 'i'}; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaataattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=52; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtagtactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=53; obiclean_samplecount=1; obiclean_status={'26a_F040644': 's'}; obiclean_head=True; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaatagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=54; obiclean_samplecount=1; obiclean_status={'15a_F730814': 'i'}; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=55; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=56; obiclean_samplecount=1; obiclean_status={'15a_F730814': 's'}; obiclean_head=True; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'i'}; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obiuniq.fasta.gz
b
Binary file test-data/output_obiuniq.fasta.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/output_obiuniq_family.fasta.gz
b
Binary file test-data/output_obiuniq_family.fasta.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/wolf_small.F.fastq.gz
b
Binary file test-data/wolf_small.F.fastq.gz has changed
b
diff -r 4d705e3443a7 -r 374fd9feb032 test-data/wolf_small.R.fastq.gz
b
Binary file test-data/wolf_small.R.fastq.gz has changed