Repository 'obi_uniq'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/obi_uniq

Changeset 3:ec215ed98831 (2021-05-10)
Previous changeset 2:c0f7b58b4ce4 (2019-11-28) Next changeset 4:d4bea99366f9 (2024-03-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/obitools commit 5d3c7a7b3f7d687bb03ef7993ddf1a6507d655bd"
modified:
macros.xml
obiuniq.xml
test-data/output_ngsfilter_error_3.fastq
test-data/output_ngsfilter_unidentified.fastq
test-data/output_obisort.fastq
added:
test-data/illuminapairedend.output.fastq.gz
test-data/output_ngsfilter_error_3.fastq.gz
test-data/output_obiannotate.fasta.gz
test-data/output_obiclean_advanced.fasta.gz
test-data/output_obigrep_predicat.fasta.gz
test-data/output_obiuniq.fasta.gz
test-data/output_obiuniq_family.fasta.gz
test-data/wolf_small.F.fastq.gz
test-data/wolf_small.R.fastq.gz
b
diff -r c0f7b58b4ce4 -r ec215ed98831 macros.xml
--- a/macros.xml Thu Nov 28 15:52:27 2019 -0500
+++ b/macros.xml Mon May 10 19:34:29 2021 +0000
[
@@ -6,7 +6,8 @@
         </requirements>
     </xml>
 
-    <token name="@TOOL_VERSION@">1.2.11</token>
+    <token name="@TOOL_VERSION@">1.2.13</token>
+    <token name="@PROFILE@">21.01</token>
 
     <xml name="stdio">
         <stdio>
@@ -14,6 +15,80 @@
         </stdio>
     </xml>
 
+    <token name="@INPUT_FORMATS@">fastqsanger,fastqsanger.gz,fastqsolexa,fastqsolexa.gz,fasta,fasta.gz</token>
+    <token name="@GUNZIP_INPUT@"><![CDATA[
+        #if $input.ext.endswith(".gz")
+            gunzip -c '$input' > input &&
+        #else
+            ln -s '$input' input &&
+        #end if
+    ]]></token>
+    <token name="@GZIP_OUTPUT@"><![CDATA[
+        #if $input.ext.endswith(".gz")
+        | gzip -c 
+    #end if
+    ]]></token>
+    
+    <!-- generate galaxy.json to determine output format
+        - by default same as input
+        - if out_format can be specified 
+          - fasta/fastq depending on $out_format (if != "")
+          - or fasta if the tool does not support choosing the output format, e.g. obiuniq which always outputs fasta
+        - gz if $input is zipped 
+    
+        A list of outputs to include in the json can be given by
+        setting a variable $outputs = [...] listing pairs of output names and the actual output
+        (defaults to [("output", $output)])
+    -->
+    <token name="@GENERATE_GALAXY_JSON@"><![CDATA[
+        #import json
+
+        #try:
+            #silent $outputs[0]
+        #except 
+            #set outputs = [("output", $output)]
+        #end try
+
+        #if $input.ext.startswith("fastq")
+            #set ext = "fastqsanger"
+        #else if $input.ext.startswith("fasta")
+            #set ext = "fasta"
+        #end if
+
+        #try
+            #if $out_format == "fasta"
+                #set ext = "fasta"
+            #else if $out_format == "fastq"
+                #set ext = "fastqsanger"
+            #end if
+        #except
+            #set ext = "fasta"
+        #end try
+
+        #if $input.ext.endswith(".gz")
+            #set ext = ext + ".gz"
+        #end if
+
+        #set gxy_json = {}
+        #for oname, o in $outputs
+            #silent gxy_json[oname] = {"ext": ext}
+        #end for
+        && echo '${json.dumps(gxy_json)}' >> galaxy.json
+    ]]></token>
+    
+    <token name="@OUT_FORMAT@"><![CDATA[
+        #if $out_format
+            --${out_format}-output
+        #end if
+    ]]></token>
+    
+    <xml name="out_format_macro">
+        <param name="out_format" type="select" optional="true" label="Output data type" help="For FASTA/Q the the default output type is the same as the input type">
+            <option value="fasta">fasta</option>
+            <option value="fastq">fastq</option>
+        </param>
+    </xml>
+
     <token name="@OBITOOLS_LINK@">
 <![CDATA[
 --------
@@ -105,15 +180,43 @@
         <option value="taxid">taxid</option>
     </xml>
 
-    <xml name="inputtype">
-        <option value="--genbank">genbank</option>
-        <option value="--embl">embl</option>
-        <option value="--sanger">sanger</option>
-        <option value="--solexa">solexa</option>
-        <option value="--ecopcr">ecopcr</option>
-        <option value="--ecopcrdb">ecopcrdb</option>
-        <option value="--fasta" selected="true">fasta</option>
+    <xml name="input_format_options_macro">
+        <section name="input_format_options" title="Input format options" expanded="false">
+            <param name="options_inputtype" type="select"  optional="true" label="Specify the input datatype" help="default: determine automatically (should only be necessay for non FASTA/FASTQ datasets)">
+                <option value="--genbank">genbank</option>
+                <option value="--embl">embl</option>
+                <option value="--sanger">sanger</option>
+                <option value="--solexa">solexa</option>
+                <option value="--ecopcr">ecopcr</option>
+                <option value="--ecopcrdb">ecopcrdb</option>
+                <option value="--fasta">fasta (including obitools fasta extentions)</option>
+                <option value="--raw-fasta">raw fasta (more tolerant format variant)</option>
+            </param>
+            <param name="options_seqtype" type="select" optional="true" label="Specify the sequence datatype" >
+                <option value="--nuc">nucleic</option>
+                <option value="--prot">protein</option>
+            </param>
+        </section>
     </xml>
+    
+    <token name="@INPUT_FORMAT@"><![CDATA[
+        #if $input_format_options.options_inputtype
+            $input_format_options.options_inputtype
+        #else
+            #if $input.ext.startswith("fasta")
+                --fasta
+            #else if $input.ext.startswith("fastqsolexa")
+                ## input file is in fastq nucleic format produced by solexa sequencer
+                --solexa
+            #else
+                ## input file is in sanger fastq nucleic format (standard fastq)
+                --sanger
+            #end if
+        #end if
+        #if $input_format_options.options_seqtype
+            $input_format_options.options_seqtype
+        #end if
+    ]]></token>
 
     <xml name="sanitizer">
         <sanitizer invalid_char="test">
b
diff -r c0f7b58b4ce4 -r ec215ed98831 obiuniq.xml
--- a/obiuniq.xml Thu Nov 28 15:52:27 2019 -0500
+++ b/obiuniq.xml Mon May 10 19:34:29 2021 +0000
[
@@ -1,92 +1,86 @@
-<tool id="obi_uniq" name="obiuniq" version="@TOOL_VERSION@">
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-    <expand macro="stdio"/>
-<command>
-
-    <![CDATA[
-
-    obiuniq
-    #if str( $options_attribute) == "merge"
-        -m '$options_uniq'
-    #else
-        -c '$options_uniq'
-    #end if
-    ${mid}
-    ${prefix}
-    '$input' > '$output'
-
-    ]]>
-
-</command>
-
-<inputs>
-    <param name="input" type="data" format="fasta,fastq" label="Input sequences file" />
-    <param name="options_uniq" type="select" label="Attribute to merge" >
-        <expand macro="attributes"/>
-    </param>
-    <param name="options_attribute" type="select" label="Use specific option" >
-        <option value="merge" selected="true">merge</option>
-        <option value="category_attribute">category_attribute</option>
-    </param>
-    <param name="mid" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Add a merged attribute containing the list of sequence record ids merged within this group" />
- <param name="prefix" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Dereplicate through a prefix matching" />
-</inputs>
-<outputs>
-    <data format="fasta" name="output" label="output.fasta with ${tool.name} on ${on_string}" />
-</outputs>
-
-<tests>
-   <test>
-       <param name="input" value="output_ngsfilter_error_3.fastq" />
-       <param name="options_uniq" value="sample" />
-       <param name="options_attribute" value="merge" />
-       <param name="mid" value="False" />
-       <param name="prefix" value="False" />
-       <output name="output" file="output_obiuniq.fasta" ftype="fasta"/>
-   </test>
-   <test>
-       <param name="input" value="output_ngsfilter_error_3.fastq" />
-       <param name="options_uniq" value="family" />
-       <param name="options_attribute" value="category_attribute" />
-       <param name="mid" value="True" />
-       <param name="prefix" value="True" />
-       <output name="output" file="output_obiuniq_family.fasta" ftype="fasta"/>
-   </test>
-
-</tests>
-<help><![CDATA[
-
-.. class:: infomark
-
-**What it does**
-
-The obiuniq command is in some way analog to the standard Unix uniq -c command.
-
-Instead of working text line by text line as the standard Unix tool, the processing is done on sequence records.
-
-A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself.
-
-The obiuniq command groups together sequence records. Then, for each group, a sequence record is printed.
-
-A group is defined by the sequence and optionally by the values of a set of attributes specified with the -c option.
-
-As the identifier, the set of attributes (key=value) and the definition of the sequence records that are grouped together may be different, two options (-m and -i) allow refining how these parts of the records are reported.
-
-\*  By default, only attributes with identical values within a group of sequence records are kept.
-
-\*  A count attribute is set to the total number of sequence records for each group.
-
-\*  For each attribute specified by the -m option, a new attribute whose key is prefixed by merged_ is created. These new attributes contain the number of times each value occurs within the group of sequence records.
-
-
-@OBITOOLS_LINK@
-
-
-
-]]>
-</help>
-<expand macro="citation" />
-</tool>
+<tool id="obi_uniq" name="obiuniq" version="@TOOL_VERSION@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+    @GUNZIP_INPUT@
+
+    obiuniq
+    --without-progress-bar
+    #if str( $options_attribute) == "merge"
+        -m '$options_uniq'
+    #else
+        -c '$options_uniq'
+    #end if
+    ${mid}
+    ${prefix}
+    @INPUT_FORMAT@
+    input
+    @GZIP_OUTPUT@
+    > '$output'
+    @GENERATE_GALAXY_JSON@
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="@INPUT_FORMATS@" label="Input sequences file" />
+        <param name="options_uniq" type="select" label="Attribute to merge" >
+            <expand macro="attributes"/>
+        </param>
+        <param name="options_attribute" type="select" label="Use specific option" >
+            <option value="merge" selected="true">merge</option>
+            <option value="category_attribute">category_attribute</option>
+        </param>
+        <param name="mid" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Add a merged attribute containing the list of sequence record ids merged within this group" />
+     <param name="prefix" type="boolean" checked="false" truevalue="-p" falsevalue="" label="Dereplicate through a prefix matching" />
+        <expand macro="input_format_options_macro"/>
+    </inputs>
+    <outputs>
+        <data format="auto" name="output"/>
+    </outputs>
+    <tests>
+       <test>
+           <param name="input" value="output_ngsfilter_error_3.fastq" ftype="fastqsanger"/>
+           <param name="options_uniq" value="sample" />
+           <param name="options_attribute" value="merge" />
+           <param name="mid" value="False" />
+           <param name="prefix" value="False" />
+           <output name="output" file="output_obiuniq.fasta" ftype="fasta"/>
+       </test>
+       <test>
+           <param name="input" value="output_ngsfilter_error_3.fastq.gz" ftype="fastqsanger.gz"/>
+           <param name="options_uniq" value="family" />
+           <param name="options_attribute" value="category_attribute" />
+           <param name="mid" value="True" />
+           <param name="prefix" value="True" />
+           <output name="output" file="output_obiuniq_family.fasta.gz" ftype="fasta.gz" decompress="true"/>
+       </test>
+    </tests>
+    <help><![CDATA[
+    .. class:: infomark
+
+    **What it does**
+
+    The obiuniq command is in some way analog to the standard Unix uniq -c command.
+
+    Instead of working text line by text line as the standard Unix tool, the processing is done on sequence records.
+
+    A sequence record is a complex object composed of an identifier, a set of attributes (key=value), a definition, and the sequence itself.
+
+    The obiuniq command groups together sequence records. Then, for each group, a sequence record is printed.
+
+    A group is defined by the sequence and optionally by the values of a set of attributes specified with the -c option.
+
+    As the identifier, the set of attributes (key=value) and the definition of the sequence records that are grouped together may be different, two options (-m and -i) allow refining how these parts of the records are reported.
+
+    \*  By default, only attributes with identical values within a group of sequence records are kept.
+
+    \*  A count attribute is set to the total number of sequence records for each group.
+
+    \*  For each attribute specified by the -m option, a new attribute whose key is prefixed by merged_ is created. These new attributes contain the number of times each value occurs within the group of sequence records.
+
+
+    @OBITOOLS_LINK@
+    ]]></help>
+    <expand macro="citation" />
+</tool>
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/illuminapairedend.output.fastq.gz
b
Binary file test-data/illuminapairedend.output.fastq.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_ngsfilter_error_3.fastq
b
Binary file test-data/output_ngsfilter_error_3.fastq has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_ngsfilter_error_3.fastq.gz
b
Binary file test-data/output_ngsfilter_error_3.fastq.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_ngsfilter_unidentified.fastq
b
Binary file test-data/output_ngsfilter_unidentified.fastq has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obiannotate.fasta.gz
b
Binary file test-data/output_obiannotate.fasta.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obiclean_advanced.fasta.gz
b
Binary file test-data/output_obiclean_advanced.fasta.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obigrep_predicat.fasta.gz
b
Binary file test-data/output_obigrep_predicat.fasta.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obisort.fastq
--- a/test-data/output_obisort.fastq Thu Nov 28 15:52:27 2019 -0500
+++ b/test-data/output_obisort.fastq Mon May 10 19:34:29 2021 +0000
b
b"@@ -1,172 +1,232 @@\n->HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'h'}; obiclean_head=True; \n-ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n-gcctgaaactcaaaggacttggcggtgctttacatccct\n->HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; obiclean_head=True; \n-ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'h'}; obiclean_head=True; \n-ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'h'}; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'h'}; obiclean_head=True; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n tttt\n->HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; merged_sample={'29a_F260619': 3}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_status={'29a_F260619': 's'}; obiclean_head=True; \n-ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n-agcttaaaactcaaaggacttggcggtgctttataccctt\n->HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB count=2; merged_sample={'29a_F260619': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_sample"..b"M_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=51; obiclean_samplecount=1; obiclean_status={'29a_F260619': 'i'}; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaataattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=52; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtagtactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=53; obiclean_samplecount=1; obiclean_status={'26a_F040644': 's'}; obiclean_head=True; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaatagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=54; obiclean_samplecount=1; obiclean_status={'15a_F730814': 'i'}; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=55; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=56; obiclean_samplecount=1; obiclean_status={'15a_F730814': 's'}; obiclean_head=True; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_status={'13a_F730603': 'i'}; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n+@HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_status={'26a_F040644': 'i'}; obiclean_head=False; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obiuniq.fasta.gz
b
Binary file test-data/output_obiuniq.fasta.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/output_obiuniq_family.fasta.gz
b
Binary file test-data/output_obiuniq_family.fasta.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/wolf_small.F.fastq.gz
b
Binary file test-data/wolf_small.F.fastq.gz has changed
b
diff -r c0f7b58b4ce4 -r ec215ed98831 test-data/wolf_small.R.fastq.gz
b
Binary file test-data/wolf_small.R.fastq.gz has changed