Repository 'obi_ngsfilter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/obi_ngsfilter

Changeset 0:4df964e14378 (2017-04-12)
Next changeset 1:e6ccf97b3d39 (2018-10-12)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/obitools commit e1031e4c94b25d1ed535bf221764ab801b710ab2
added:
macros.xml
ngsfilter.xml
test-data/illuminapairedend.output.fastq
test-data/input_ngsfilter_extrafile.txt
test-data/output_ngsfilter.fastq
test-data/output_ngsfilter_error_3.fastq
test-data/output_ngsfilter_unidentified.fastq
test-data/output_obiannotate.fasta
test-data/output_obiclean_advanced.fasta
test-data/output_obiclean_simple.fasta
test-data/output_obiconvert.fasta
test-data/output_obigrep_lmin.fastq
test-data/output_obigrep_predicat.fasta
test-data/output_obisort.fasta
test-data/output_obisort.fastq
test-data/output_obistat.txt
test-data/output_obitab.txt
test-data/output_obiuniq.fasta
test-data/output_obiuniq_family.fasta
test-data/wolf_small.F.fastq
test-data/wolf_small.R.fastq
b
diff -r 000000000000 -r 4df964e14378 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Apr 12 17:36:02 2017 -0400
[
@@ -0,0 +1,136 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.0.010">obitools</requirement>
+        </requirements>
+    </xml>
+
+    <token name="@WRAPPER_VERSION@">1.0.010</token>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Error in Obitools execution" />
+        </stdio>
+    </xml>
+
+    <token name="@OBITOOLS_LINK@">
+<![CDATA[
+--------
+
+**Project links:**
+
+`OBITools`_
+
+.. _OBITools: http://metabarcoding.org/obitools/
+]]>
+    </token>
+
+    <xml name="attributes">
+        <option value="ali_dir" selected="true">ali_dir</option>
+        <option value="ali_length">ali_length</option>
+        <option value="avg_quality">avg_quality</option>
+        <option value="best_match">best_match</option>
+        <option value="best_identity">best_identity</option>
+        <option value="class">class</option>
+        <option value="cluster">cluster</option>
+        <option value="complemented">complemented</option>
+        <option value="count">count</option>
+        <option value="cut">cut</option>
+        <option value="direction">direction</option>
+        <option value="distance">distance</option>
+        <option value="error">error</option>
+        <option value="experiment">experiment</option>
+        <option value="family">family</option>
+        <option value="family_name">family_name</option>
+        <option value="forward_error">forward_error</option>
+        <option value="forward_match">forward_match</option>
+        <option value="forward_primer">forward_primer</option>
+        <option value="forward_score">forward_score</option>
+        <option value="forward_tag">forward_tag</option>
+        <option value="forward_tm">forward_tm</option>
+        <option value="genus">genus</option>
+        <option value="genus_name">genus_name</option>
+        <option value="head_quality">head_quality</option>
+        <option value="id_status">id_status</option>
+        <option value="merged_star">merged_star</option>
+        <option value="merged">merged</option>
+        <option value="mid_quality">mid_quality</option>
+        <option value="mode">mode</option>
+        <option value="obiclean_cluster">obiclean_cluster</option>
+        <option value="obiclean_count">obiclean_count</option>
+        <option value="obiclean_head">obiclean_head</option>
+        <option value="obiclean_headcount">obiclean_headcount</option>
+        <option value="obiclean_internalcount">obiclean_internalcount</option>
+        <option value="obiclean_samplecount">obiclean_samplecount</option>
+        <option value="obiclean_singletoncount">obiclean_singletoncount</option>
+        <option value="obiclean_status">obiclean_status</option>
+        <option value="occurrence">occurrence</option>
+        <option value="order">order</option>
+        <option value="order_name">order_name</option>
+        <option value="pairend_limit ">pairend_limit </option>
+        <option value="partial ">partial </option>
+        <option value="rank">rank</option>
+        <option value="reverse_error">reverse_error</option>
+        <option value="reverse_match">reverse_match</option>
+        <option value="reverse_primer">reverse_primer</option>
+        <option value="reverse_score">reverse_score</option>
+        <option value="reverse_tag">reverse_tag</option>
+        <option value="reverse_tm">reverse_tm</option>
+        <option value="sample">sample</option>
+        <option value="scientific_name">scientific_name</option>
+        <option value="score">score</option>
+        <option value="score_norm">score_norm</option>
+        <option value="select">select</option>
+        <option value="seq_ab_match">seq_ab_match</option>
+        <option value="seq_a_single">seq_a_single</option>
+        <option value="seq_a_mismatch">seq_a_mismatch</option>
+        <option value="seq_a_deletion">seq_a_deletion</option>
+        <option value="seq_a_insertion">seq_a_insertion</option>
+        <option value="seq_b_single">seq_b_single</option>
+        <option value="seq_b_mismatch">seq_b_mismatch</option>
+        <option value="seq_b_deletion">seq_b_deletion</option>
+        <option value="seq_b_insertion">seq_b_insertion</option>
+        <option value="seq_length">seq_length</option>
+        <option value="seq_length_ori">seq_length_ori</option>
+        <option value="seq_rank">seq_rank</option>
+        <option value="sminL">sminL</option>
+        <option value="sminR">sminR</option>
+        <option value="species">species</option>
+        <option value="species_list">species_list</option>
+        <option value="species_name">species_name</option>
+        <option value="status">status</option>
+        <option value="strand">strand</option>
+        <option value="tail_quality">tail_quality</option>
+        <option value="taxid">taxid</option>
+    </xml>
+
+    <xml name="inputtype">
+        <option value="--genbank">genbank</option>
+        <option value="--embl">embl</option>
+        <option value="--sanger">sanger</option>
+        <option value="--solexa">solexa</option>
+        <option value="--ecopcr">ecopcr</option>
+        <option value="--ecopcrdb">ecopcrdb</option>
+        <option value="--fasta" selected="true">fasta</option>
+    </xml>
+
+    <xml name="sanitizer">
+        <sanitizer invalid_char="test">
+            <valid initial="default">
+                <!--add value="&quot;"/-->
+            </valid>
+            <mapping initial="default">
+                <add source="&gt;" target="\&gt;"/>
+                <add source="&quot;" target="\&quot;"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.1111/1755-0998.12428</citation>
+        </citations>
+    </xml>
+
+</macros>
b
diff -r 000000000000 -r 4df964e14378 ngsfilter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsfilter.xml Wed Apr 12 17:36:02 2017 -0400
[
@@ -0,0 +1,111 @@
+<tool id="obi_ngsfilter" name="NGSfilter" version="@WRAPPER_VERSION@">
+    <description>Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command>
+
+        <![CDATA[
+
+        ngsfilter
+        -t '$inputextrafile' '$inputfastq'
+        #if $bool
+        -u '$unident'
+        #end if
+        -e '$mismatch' > '$output'
+
+        ]]>
+
+    </command>
+
+    <inputs>
+        <param name="inputextrafile" type="data" format="tabular" label="Parameter file" help="extrafile describing the DNA tags and primers sequences used for each sample" />
+        <param name="inputfastq" type="data" format="fastq" label="Read from file" help="file of Illumina pair-end reads assembled by illuminapairedend" />
+        <param name="mismatch" type="integer" value="2" label="Number of errors allowed for matching primers (default = 2)" />
+        <param name="bool" type="boolean" checked="true" label="Do you want to generate a file with unidentified sequences?" />
+    </inputs>
+    <outputs>
+        <data format="fastq" name="unident" label="unidentified.fastq with ${tool.name} on ${on_string}">
+            <filter>bool is True</filter>
+        </data>
+        <data format="fastq" name="output" label="${tool.name} on ${on_string}: Trimmed and annotated" />
+    </outputs>
+    <tests>
+       <test>
+           <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" />
+           <param name="inputfastq" value="illuminapairedend.output.fastq" />
+           <param name="mismatch" value="2" />
+           <param name="bool" value="False" />
+           <output name="output" file="output_ngsfilter.fastq" ftype="fastq"/>
+       </test>
+       <test>
+           <param name="inputextrafile" value="input_ngsfilter_extrafile.txt" />
+           <param name="inputfastq" value="illuminapairedend.output.fastq" />
+           <param name="mismatch" value="3" />
+           <param name="bool" value="True" />
+           <output name="output" file="output_ngsfilter_error_3.fastq" ftype="fastq"/>
+           <output name="unident" file="output_ngsfilter_unidentified.fastq" ftype="fastq"/>
+       </test>
+   </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+A DNA metabarcoding experiment can be considered as a set a PCR products mixed together and sequenced using a next generation sequencer ({i.e.} a solexa or a 454).
+To distinguish between this different PCR products, pairs of small DNA sequences (call tags, see the oligoTag command and its associated paper for more
+informations on the design of such tags) unique for each PCR products are concatenated to the PCR primers.
+As they are amplified during the PCR, these tags should be recognizable, together with their respective primers, at the beginning and the end of the reads.
+The first step in data analysis is thus to demultiplex the large resulting sequence file by identifying these DNA tags and the primers.
+
+Usually the results of sequencing are stored in one or more files formatted according to the fasta or fastq format. ngsfilter take as input such sequence file and an extra file describing the DNA tags and primers sequences used for each sample.
+
+The results consist of sequences trimmed of the primers and tags and annotated with the corresponding sample (and possibly some extra informations).
+Sequences for which the tags and primers have not been well identified, and which are thus unassigned to any sample, are tagged as erroneous sequences
+by ngsfilter. Such erroneous sequences are not reported by the program unless specified by the appropriate option.
+
+--------
+
+**Help**
+Extrafile format:
+
+a tab delimited tabular file with 8 columns.
+
+experiment_name sample_name oligo_tag(s) forward_primer reverse_primer F @ extra_information
+
+ex:
+
+laos_gh LA01E1a agcgacta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01A;
+
+laos_gh LA01E1b tcagtgtc:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01B;
+
+laos_gh LA01E2a actctgct:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01C;
+
+laos_gh LA01E2b atatagcg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01D;
+
+laos_gh LA01P1a ctatgcta:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01E;
+
+laos_gh LA01P1b tcgcgctg:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01F;
+
+laos_gh LA01P2a agcacagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01G;
+
+laos_gh LA01P2b tagctagt:acacacac GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_01H;
+
+laos_gh LA02E1a agcgacta:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02A;
+
+laos_gh LA02E1b tcagtgtc:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02B;
+
+laos_gh LA02E2a actctgct:acagcaca GGGCAATCCTGAGCCAA CCATTGAGTCTCTGCACCTATC F @ position=04_02C;
+
+@OBITOOLS_LINK@
+
+
+]]>
+
+    </help>
+    <expand macro="citation" />
+
+</tool>
b
diff -r 000000000000 -r 4df964e14378 test-data/illuminapairedend.output.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illuminapairedend.output.fastq Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,996 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS ali_length=62; direction=left; seq_ab_match=47; sminR=40.0; score=115.761290673; seq_a_mismatch=1; seq_b_deletion=1; seq_b_mismatch=13; seq_a_deletion=0; score_norm=1.86711759151; seq_b_insertion=1; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttgagctattgccggtagtactctggcgaatgattttgttataataattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+dddddddddddbbdaddcddddddcadaddddaadcccWaaddabdx~~|b~~~~cccccccBcBcccBcBcccBccBBcc~}`kXyxu|~Kyr`b~~~~~~b~~~b~d\\cdddddddddddddcacddddcdddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS ali_length=62; direction=left; seq_ab_match=60; sminR=40.0; score=231.881364714; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=2; seq_a_deletion=0; score_norm=3.74002201152; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaacaattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddddddddddddddddbdddddbddddddddddddddddddddddd~~~~~~~~~~~~~~~~~~}~~~~~~~~~~~~~~~~~~~~~~~~~~~Z~~~~~~~~~~~~~b~d]dddddddcdddddcddddddddddddddddddddddcddddddd\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS ali_length=62; direction=left; seq_ab_match=52; sminR=40.0; score=167.91734192; seq_a_mismatch=5; seq_b_deletion=0; seq_b_mismatch=5; seq_a_deletion=0; score_norm=2.70834422451; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctcttgccggtagtactctggcgaataattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddadddddcd`dcddddddddd^dd]ddddddZad`\\`]Lcca``YT|~~^||v|z|~ts~~~|r~sZ^~~`~~~~~~~~~~~~~~~~V~b~]~~~~b~~~~~~~~`~b\\aadddd`dcdddddc`dddccddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS ali_length=62; direction=left; seq_ab_match=59; sminR=40.0; score=223.982011431; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=3; seq_a_deletion=0; score_norm=3.61261308759; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttatacccttctagaggagcctgttctaaggaggcgg\n++\n+dddedddddddddedddddddddedddaddbddddddddddddedd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~b~~~~~~~~b~~~~ddddddddddabbb^bdddddddddddcddddddddcddddddddd\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS ali_length=63; direction=left; seq_ab_match=60; sminR=40.0; score=227.883189945; seq_a_mismatch=3; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; score_norm=3.61719349119; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; seq_b_single=45; \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaatgtttatgtttagggctaagcatagtggggtatctaagatattcgg\n++\n+ddcdddddddddddddddeddddddddddddddddddddcddddd~b~~~~~~b~~~~~~~~~~~~~~c~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddddddddddddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS ali_length=62; direction=left; seq_ab_match=60; sminR=40.0; score=231.893859399; seq_a_mismatch=2; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; score_norm=3.7402235387; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctgttgccgctagtactctggtgaacaattttgtttgtgtaattatttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddddddddddddddddddcddddddddddddd\\ddeddecddddddb~~~b~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddcddddddcdddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS ali_length=62; direction=left; '..b'tagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+ddddddcddddddddddddd`dcdddddaddaddddbdddddddc~~~~~~~~~{~~~~~~~~zxy~~|~~~~ru~~~~~~~~~~~~~~~~~~{~~p~~~~~~~~~~~ddddddddddabaP`ddddabddddddddddddddddcdcdaddd\n+@HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; score=224.000075421; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=1; seq_a_deletion=50; score_norm=3.86207026589; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; seq_b_single=0; \n+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg\n++\n+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~~~~~~~~~\n+@HELIUM_000100422_612GNAAXX:7:59:15367:8376#0/2_CONS ali_length=62; direction=left; seq_ab_match=62; sminR=40.0; score=247.996374666; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; score_norm=3.99994152687; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttatacccttctagaggagcctgttctactacttcgg\n++\n+ddddddddddddddddddddddddddddddddddddddddddddad~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddddddddddedddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/2_CONS ali_length=63; direction=left; seq_ab_match=59; sminR=40.0; score=223.94591652; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=4; seq_a_deletion=0; score_norm=3.55469708763; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; seq_b_single=45; \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+ddddddddddddccdddddddddeddddddddddddddddedadd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~W[~~~~~~~~~~~~~~~~~~~~~~~b~~~b~dddcdddddddcddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/2_CONS ali_length=63; direction=left; seq_ab_match=62; sminR=40.0; score=243.950667893; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=1; seq_a_deletion=0; score_norm=3.87223282369; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; seq_b_single=45; \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaatgtttatgtttagggctaagcatagtggggtatctaagatattcgg\n++\n+ddddddddddddddddddddddddddddddddddddddcdddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~c~~~dddeddddddddddadddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/2_CONS ali_length=63; direction=left; seq_ab_match=63; sminR=40.0; score=251.962499638; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; score_norm=3.99940475616; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; seq_b_single=45; \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+dddcdddddddddddedddddddddddccdddddddddddddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddd^ddddddddddddddddddddddddcddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/2_CONS ali_length=62; direction=left; seq_ab_match=59; sminR=40.0; score=223.864093999; seq_a_mismatch=2; seq_b_deletion=0; seq_b_mismatch=1; seq_a_deletion=0; score_norm=3.61071119354; seq_b_insertion=0; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; seq_b_single=46; \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaactacttcgg\n++\n+dddcddcddcdbdbddddddddddddddddddcdddddccaddddd~~~~~~b~~~~~~~~~~~x~~~~wyZq~mv~}}~~~~~~~~~P~~~~~~~~~~~~~~~~~~~d^cddddddddcdddadddddddddddddddddddddddddddddd\n'
b
diff -r 000000000000 -r 4df964e14378 test-data/input_ngsfilter_extrafile.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_ngsfilter_extrafile.txt Wed Apr 12 17:36:02 2017 -0400
b
@@ -0,0 +1,4 @@
+wolf_diet    13a_F730603      aattaac  TTAGATACCCCACTATGC    TAGAACAGGCTCCTCTAG     F       @
+wolf_diet    15a_F730814      gaagtag  TTAGATACCCCACTATGC    TAGAACAGGCTCCTCTAG     F       @
+wolf_diet    26a_F040644      gaatatc  TTAGATACCCCACTATGC    TAGAACAGGCTCCTCTAG     F       @
+wolf_diet    29a_F260619      gcctcct  TTAGATACCCCACTATGC    TAGAACAGGCTCCTCTAG     F       @
b
diff -r 000000000000 -r 4df964e14378 test-data/output_ngsfilter.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_ngsfilter.fastq Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,952 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=47; sminR=40.0; seq_b_mismatch=13; seq_a_mismatch=1; tail_quality=67.0; seq_b_deletion=1; mid_quality=69.1492537313; seq_a_deletion=0; seq_b_single=46; score=115.761290673; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.0; avg_quality=68.8701298701; sminL=40.0; seq_a_single=46; score_norm=1.86711759151; seq_b_insertion=1; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaatagctcaaaactcaaaggacttggcggtgctttataccctt\n++\n+cacdddddddddddddc\\d~b~~~b~~~~~~b`ryK~|uxyXk`}~ccBBccBcccBcBcccBcBccccccc~~~~b|~~xdbaddaaWcccdaadddda\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=60; sminR=40.0; seq_b_mismatch=2; seq_a_mismatch=0; tail_quality=66.9; seq_b_deletion=0; mid_quality=78.447761194; seq_a_deletion=0; seq_b_single=46; score=231.881364714; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.0; avg_quality=76.9545454545; sminL=40.0; seq_a_single=46; score_norm=3.74002201152; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddcdddddcddddddd]d~b~~~~~~~~~~~~~Z~~~~~~~~~~~~~~~~~~~~~~~~~~~}~~~~~~~~~~~~~~~~~~ddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=52; sminR=40.0; seq_b_mismatch=5; seq_a_mismatch=5; tail_quality=67.0; seq_b_deletion=0; mid_quality=75.0820895522; seq_a_deletion=0; seq_b_single=46; score=167.91734192; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=66.6; avg_quality=74.0064935065; sminL=40.0; seq_a_single=46; score_norm=2.70834422451; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaagagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+d`cdddddcd`ddddaa\\b~`~~~~~~~~b~~~~]~b~V~~~~~~~~~~~~~~~~`~~^Zs~r|~~~st~|z|v||^~~|TY``accL]`\\`daZddddd\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=100; sample=29a_F260619; experiment=wolf_diet; forward_tag=gcctcct; reverse_tag=gcctcct; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=62; seq_ab_match=59; sminR=40.0; seq_b_mismatch=3; seq_a_mismatch=0; tail_quality=66.9; seq_b_deletion=0; mid_quality=78.2537313433; seq_a_deletion=0; seq_b_single=46; score=223.982011431; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.1; avg_quality=76.7922077922; sminL=40.0; seq_a_single=46; score_norm=3.61261308759; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+addbddddddddddddedd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~b~~~~~~~~b~~~~ddddddddddabbb^bddd\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP sample=26a_F040644; experiment=wolf_diet; seq_length=99; forward_score=72.0; for'..b'taacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddddddddddddddddad~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddddddddddeddddd\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=99; sample=26a_F040644; experiment=wolf_diet; forward_tag=gaatatc; reverse_tag=gaatatc; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=63; seq_ab_match=59; sminR=40.0; seq_b_mismatch=4; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=78.3007518797; seq_a_deletion=0; seq_b_single=45; score=223.94591652; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=67.0; avg_quality=76.8235294118; sminL=40.0; seq_a_single=45; score_norm=3.55469708763; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+dddddddddddddedadd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~W[~~~~~~~~~~~~~~~~~~~~~~~b~~~b~dddcdddddddcdddddd\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/2_CONS_SUB_SUB_CMP sample=26a_F040644; experiment=wolf_diet; seq_length=99; forward_score=72.0; forward_tag=gaatatc; reverse_tag=gaatatc; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=63; seq_ab_match=62; sminR=40.0; seq_b_mismatch=1; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=79.0902255639; seq_a_deletion=0; seq_b_single=45; score=243.950667893; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=67.0; avg_quality=77.5098039216; sminL=40.0; seq_a_single=45; score_norm=3.87223282369; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+dddaddddddddddeddd~~~c~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddddcddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=99; sample=26a_F040644; experiment=wolf_diet; forward_tag=gaatatc; reverse_tag=gaatatc; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=63; seq_ab_match=63; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=79.2556390977; seq_a_deletion=0; seq_b_single=45; score=251.962499638; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=66.9; avg_quality=77.6470588235; sminL=40.0; seq_a_single=45; score_norm=3.99940475616; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+ccdddddddddddddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddd^ddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/2_CONS_SUB_SUB_CMP sample=15a_F730814; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gaagtag; reverse_tag=gaagtag; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=59; sminR=40.0; seq_b_mismatch=1; seq_a_mismatch=2; tail_quality=67.0; seq_b_deletion=0; mid_quality=77.6194029851; seq_a_deletion=0; seq_b_single=46; score=223.864093999; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=66.7; avg_quality=76.2207792208; sminL=40.0; seq_a_single=46; score_norm=3.61071119354; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddadddcddddddddc^d~~~~~~~~~~~~~~~~~~~P~~~~~~~~~}}~vm~qZyw~~~~x~~~~~~~~~~~b~~~~~~dddddaccdddddcddddd\n'
b
diff -r 000000000000 -r 4df964e14378 test-data/output_ngsfilter_error_3.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_ngsfilter_error_3.fastq Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,952 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=47; sminR=40.0; seq_b_mismatch=13; seq_a_mismatch=1; tail_quality=67.0; seq_b_deletion=1; mid_quality=69.1492537313; seq_a_deletion=0; seq_b_single=46; score=115.761290673; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.0; avg_quality=68.8701298701; sminL=40.0; seq_a_single=46; score_norm=1.86711759151; seq_b_insertion=1; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaatagctcaaaactcaaaggacttggcggtgctttataccctt\n++\n+cacdddddddddddddc\\d~b~~~b~~~~~~b`ryK~|uxyXk`}~ccBBccBcccBcBcccBcBccccccc~~~~b|~~xdbaddaaWcccdaadddda\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=60; sminR=40.0; seq_b_mismatch=2; seq_a_mismatch=0; tail_quality=66.9; seq_b_deletion=0; mid_quality=78.447761194; seq_a_deletion=0; seq_b_single=46; score=231.881364714; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.0; avg_quality=76.9545454545; sminL=40.0; seq_a_single=46; score_norm=3.74002201152; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddcdddddcddddddd]d~b~~~~~~~~~~~~~Z~~~~~~~~~~~~~~~~~~~~~~~~~~~}~~~~~~~~~~~~~~~~~~ddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP sample=29a_F260619; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gcctcct; reverse_tag=gcctcct; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=52; sminR=40.0; seq_b_mismatch=5; seq_a_mismatch=5; tail_quality=67.0; seq_b_deletion=0; mid_quality=75.0820895522; seq_a_deletion=0; seq_b_single=46; score=167.91734192; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=66.6; avg_quality=74.0064935065; sminL=40.0; seq_a_single=46; score_norm=2.70834422451; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaagagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+d`cdddddcd`ddddaa\\b~`~~~~~~~~b~~~~]~b~V~~~~~~~~~~~~~~~~`~~^Zs~r|~~~st~|z|v||^~~|TY``accL]`\\`daZddddd\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=100; sample=29a_F260619; experiment=wolf_diet; forward_tag=gcctcct; reverse_tag=gcctcct; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=62; seq_ab_match=59; sminR=40.0; seq_b_mismatch=3; seq_a_mismatch=0; tail_quality=66.9; seq_b_deletion=0; mid_quality=78.2537313433; seq_a_deletion=0; seq_b_single=46; score=223.982011431; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.1; avg_quality=76.7922077922; sminL=40.0; seq_a_single=46; score_norm=3.61261308759; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+addbddddddddddddedd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~b~~~~~~~~b~~~~ddddddddddabbb^bddd\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP sample=26a_F040644; experiment=wolf_diet; seq_length=99; forward_score=72.0; for'..b'taacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddddddddddddddddad~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddddddddddeddddd\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=99; sample=26a_F040644; experiment=wolf_diet; forward_tag=gaatatc; reverse_tag=gaatatc; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=63; seq_ab_match=59; sminR=40.0; seq_b_mismatch=4; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=78.3007518797; seq_a_deletion=0; seq_b_single=45; score=223.94591652; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=67.0; avg_quality=76.8235294118; sminL=40.0; seq_a_single=45; score_norm=3.55469708763; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+dddddddddddddedadd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~W[~~~~~~~~~~~~~~~~~~~~~~~b~~~b~dddcdddddddcdddddd\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/2_CONS_SUB_SUB_CMP sample=26a_F040644; experiment=wolf_diet; seq_length=99; forward_score=72.0; forward_tag=gaatatc; reverse_tag=gaatatc; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=63; seq_ab_match=62; sminR=40.0; seq_b_mismatch=1; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=79.0902255639; seq_a_deletion=0; seq_b_single=45; score=243.950667893; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=67.0; avg_quality=77.5098039216; sminL=40.0; seq_a_single=45; score_norm=3.87223282369; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+dddaddddddddddeddd~~~c~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddddcddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/2_CONS_SUB_SUB reverse_score=72.0; reverse_match=tagaacaggctcctctag; seq_length=99; sample=26a_F040644; experiment=wolf_diet; forward_tag=gaatatc; reverse_tag=gaatatc; reverse_primer=tagaacaggctcctctag; status=full; forward_score=72.0; direction=forward; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; ali_length=63; seq_ab_match=63; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=79.2556390977; seq_a_deletion=0; seq_b_single=45; score=251.962499638; seq_a_insertion=0; mode=alignment; seq_length_ori=153; head_quality=66.9; avg_quality=77.6470588235; sminL=40.0; seq_a_single=45; score_norm=3.99940475616; seq_b_insertion=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccct\n++\n+ccdddddddddddddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddd^ddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/2_CONS_SUB_SUB_CMP sample=15a_F730814; experiment=wolf_diet; seq_length=100; forward_score=72.0; forward_tag=gaagtag; reverse_tag=gaagtag; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=59; sminR=40.0; seq_b_mismatch=1; seq_a_mismatch=2; tail_quality=67.0; seq_b_deletion=0; mid_quality=77.6194029851; seq_a_deletion=0; seq_b_single=46; score=223.864093999; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=66.7; avg_quality=76.2207792208; sminL=40.0; seq_a_single=46; score_norm=3.61071119354; seq_b_insertion=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt\n++\n+dddadddcddddddddc^d~~~~~~~~~~~~~~~~~~~P~~~~~~~~~}}~vm~qZyw~~~~x~~~~~~~~~~~b~~~~~~dddddaccdddddcddddd\n'
b
diff -r 000000000000 -r 4df964e14378 test-data/output_ngsfilter_unidentified.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_ngsfilter_unidentified.fastq Wed Apr 12 17:36:02 2017 -0400
[
@@ -0,0 +1,44 @@
+@HELIUM_000100422_612GNAAXX:7:1:9007:3289#0/2_CONS_SUB_SUB_CMP error=Cannot assign sequence to a sample; forward_score=72.0; forward_tag=atctctc; reverse_tag=atctctc; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_score=72.0; status=full; direction=reverse; reverse_match=tagaacaggctcctctag; reverse_primer=tagaacaggctcctctag; ali_length=62; seq_ab_match=60; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=2; tail_quality=67.0; seq_b_deletion=0; mid_quality=78.5671641791; seq_a_deletion=0; seq_b_single=46; score=231.83380127; seq_a_insertion=0; mode=alignment; seq_length_ori=154; head_quality=67.0; avg_quality=77.0649350649; sminL=40.0; seq_a_single=46; score_norm=3.7392548592; seq_b_insertion=0; 
+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttataccctt
++
+ddddddddddddddcddbd~~~~~~~~~~~~~~~~~~~}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~bddddddddddddddddddd
+@HELIUM_000100422_612GNAAXX:7:1:8849:9880#0/2_PairEnd sminR=40.0; pairend_limit=108; tail_quality=64.3; mid_quality=37.8724489796; ali_dir=left; score=25.9064763088; mode=joined; error=No primer match; head_quality=33.0; avg_quality=38.8703703704; sminL=40.0; 
+ggacgggagagggggggggggggggaaggggggggttttggggggggggggccgttttaaaaaaaaaaaaaaaacaacgtgacatggcacagccgtcacaactgtcagctgacgaccactggcagtgagtctactctcctattttgttttttttttcaagcagaagacggcatacgatatcggtctcggcattcctgctgaaccgctcttccgatc
++
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB_`X`b[`T[ZWHZTWU]]b]]TZ^__\^`\OOOOO^`^Z^_ba_bbbbbb
+@HELIUM_000100422_612GNAAXX:7:1:8649:5297#0/2_CONS ali_length=104; direction=right; seq_ab_match=104; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=0; tail_quality=93.0; seq_b_deletion=4; mid_quality=93.0; seq_a_deletion=4; seq_b_single=0; score=415.823014771; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=93.0; avg_quality=93.0; sminL=40.0; seq_a_single=0; score_norm=3.99829821895; seq_b_insertion=0; 
+ccagcgagatcccattgagtctctgcacctatcctttttttcgttttctgaatctttgtttggaaaaaaacaggatttggctcaggattgcccgatctcgctgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:1:8702:15835#0/2_CONS ali_length=85; direction=left; seq_ab_match=85; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=0; tail_quality=67.0; seq_b_deletion=0; mid_quality=86.8828828829; seq_a_deletion=0; seq_b_single=23; score=339.959438284; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=67.0; avg_quality=83.8473282443; sminL=40.0; seq_a_single=23; score_norm=3.99952280334; seq_b_insertion=0; 
+cccatcgctcagatatccgttgccgagagtcgtgtagattatatagaattgcaactcggggtgcggccagcaagccagccacatcccctcgttaggcacagtgttccttgacgccttcggtgagcgatggg
++
+ddddddddddddddddddddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{~~~~~~~~ddddddddddddddddddddddd
+@HELIUM_000100422_612GNAAXX:7:1:8613:9165#0/2_CONS ali_length=101; direction=left; seq_ab_match=99; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=2; tail_quality=74.8; seq_b_deletion=0; mid_quality=92.4105263158; seq_a_deletion=0; seq_b_single=7; score=387.825112777; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=74.8; avg_quality=89.347826087; sminL=40.0; seq_a_single=7; score_norm=3.83985260176; seq_b_insertion=0; 
+ccagagcgagtgggcaatcctgagccaaatctttatttttagaaaaacaaaagggcttcagaaagcaaaaataaacataaaggataggtgcagagactcaatggactcgctctgg
++
+ddddddd~~~~b~~~~~~b~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddddd
+@HELIUM_000100422_612GNAAXX:7:1:8670:8750#0/2_CONS ali_length=90; direction=right; seq_ab_match=90; sminR=40.0; seq_b_mismatch=0; seq_a_mismatch=0; tail_quality=93.0; seq_b_deletion=18; mid_quality=93.0; seq_a_deletion=18; seq_b_single=0; score=359.914979278; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=93.0; avg_quality=93.0; sminL=40.0; seq_a_single=0; score_norm=3.99905532531; seq_b_insertion=0; 
+ccgaggcttactaggattagataccctattatttttgaatgttaatgtttgtttgcttgagtagtattagttatgttcttgaaagcctcg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:1:8732:6209#0/2_PairEnd sminR=40.0; pairend_limit=108; tail_quality=57.9; mid_quality=36.5408163265; ali_dir=right; score=15.9727840143; mode=joined; error=No primer match; head_quality=33.0; avg_quality=37.3657407407; sminL=40.0; 
+gctctacagaggcgggggggggggagaaggggaaatcatggggggggggggcgacatctaaaaacatacatacaacccacacctcctaccacacagctcccagcgcaaagatagatcatgatggtaagtagaggcaaatgagatcatctatttttttcaaagagaagacggcatacgatatcggtctcggcattcctgctaaccgctcttccgatc
++
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBa[]aa_aaaa]aa`]a\[aZ`\ZcMWR]Z`````
+@HELIUM_000100422_612GNAAXX:7:1:8858:2566#0/2_PairEnd sminR=40.0; pairend_limit=108; tail_quality=65.0; mid_quality=40.6581632653; ali_dir=left; score=15.7312871104; mode=joined; error=No primer match; head_quality=33.0; avg_quality=41.4305555556; sminL=40.0; 
+ggctcggagagagggggggggggggggaggggggggatccgggggggggggggtcttacaaaaaaacacaaaacacataaccactctcacacccacactcgcacacccggatgtggttgtggtgtgcgcgtggatggtgtggtgtggtttttttttcaagcagaagacggcatacgagatcggtctcggcattcctgctgaaccgctcttccgatc
++
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBa_a_a]aaaa_`Y`_W]Y[S]]aaaabbbbbcbbbbbbbbbbbbb`bbbbbbbbbbbbbb
+@HELIUM_000100422_612GNAAXX:7:1:8644:15229#0/2_CONS ali_length=106; direction=left; seq_ab_match=103; sminR=40.0; seq_b_mismatch=2; seq_a_mismatch=1; tail_quality=83.8; seq_b_deletion=0; mid_quality=90.8; seq_a_deletion=0; seq_b_single=2; score=399.617128202; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=87.8; avg_quality=89.8909090909; sminL=40.0; seq_a_single=2; score_norm=3.76997290756; seq_b_insertion=0; 
+cacacgctctccattgagtctctgcacctatcctttttgattttcgctttctgaatctttgtttgttttcggaaaacgtgatttggctcaggattgcccagagcgtgtgg
++
+dd~~~~~~~~~~~~~~~~~~~~~~~~{~~~~b~~~~~~~~~{|~~~~s|~v~~|p~~~~~~~~~~~u~qy~a~~~~~~~x~|~~~~z{~|~o|~{yvwwy~~|~~[~~cb
+@HELIUM_000100422_612GNAAXX:7:1:8613:5665#0/2_CONS ali_length=90; direction=right; seq_ab_match=89; sminR=40.0; seq_b_mismatch=1; seq_a_mismatch=0; tail_quality=90.2; seq_b_deletion=18; mid_quality=93.0; seq_a_deletion=18; seq_b_single=0; score=351.879189487; seq_a_insertion=0; mode=alignment; error=No primer match; head_quality=93.0; avg_quality=92.6888888889; sminL=40.0; seq_a_single=0; score_norm=3.90976877208; seq_b_insertion=0; 
+ccttcctggggtagtactctggcgaataattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaaccaggaagg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:1:8823:6682#0/2_PairEnd sminR=40.0; pairend_limit=108; tail_quality=64.9; mid_quality=41.693877551; ali_dir=right; score=15.9839512414; mode=joined; error=No primer match; head_quality=33.0; avg_quality=42.3657407407; sminL=40.0; 
+gatcgggagagcgtcgtgtaggggaagagggtagatctcggggggggccgtatcattaaaaaaaaaaacaaacatcagcacgaagcacatgacaccgcctagctatcaatcttgagatgagatcagacatctgtttgttttcttttgtttttttttcaagcagaagacggcatacgatatcggtctcggcattcctgctgaaccgctcttccgatc
++
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB_aa]a`]^S``aabaab^bbbbab_c``abbbbbbbcbbbbbbbbbbbbbbbbbbbbbbbbbbab
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiannotate.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiannotate.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=1; count=1; seq_length=100; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=2; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=3; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaag\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 30, '15a_F730814': 31}; count=61; seq_length=100; seq_rank=4; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 64}; seq_rank=5; count=64; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 3}; seq_rank=6; count=3; seq_length=100; \n+ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 2}; seq_rank=7; count=2; seq_length=100; \n+ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 46}; count=46; seq_length=100; seq_rank=8; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:24:16230:12703#0/2_CONS_SUB_SUB_CMP merged_sample={'15a_F730814': 1}; seq_rank=9; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttatcccctt\n+>HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 1}; seq_rank=10; count=1; seq_length=100; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactacaggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=11; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata\n+gcttaaaactcaaaggacttggcggtgctttatatccct\n+>HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=12; count=1; seq_length=99; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcttaaaactcaaagaactgggcggtgctttatatcccg\n+>HELIUM_000100422_612GNAAXX:7:115:17402:4174#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1, '15a_F730814': 1}; seq_rank=13; count=2; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactacctgcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 1}; seq_rank=14; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacgagaatgttcgccagtgtacttctagcaaca\n+ggctgaaactcaaaggacttggcggtggtttacatccct\n+>HELIUM_000100422_612GNAAXX:7:38:10813:16867#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 2}; seq_rank=15; count=2; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgtttgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 7}; seq_rank=16; count=7; seq_length=4; \n+tttt\n+>HELIUM_000100422_612GNAAXX:7:16:16357:19948#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 1}; seq_rank=17; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagca"..b"1}; seq_rank=42; count=1; seq_length=100; \n+ctagccttaaacacaaatagttatgcaaacacaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:105:14135:6905#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 1}; seq_rank=43; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:76:18268:14980#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=44; count=1; seq_length=100; \n+ttacccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:25:7789:3090#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=45; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactacaggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:42:19757:3800#0/2_CONS_SUB_SUB_CMP merged_sample={'15a_F730814': 1}; seq_rank=46; count=1; seq_length=99; \n+ttagccctaaacacaagtaattaatatacaaaattattcgccagagtactaccggcaata\n+gcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:116:14244:17926#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 1}; seq_rank=47; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgcaagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:72:12959:5736#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=48; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacacccct\n+>HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=49; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgtcggccagagtactactagcaaca\n+gcatgaaactcaaagaactgggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:120:16005:7955#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=50; count=1; seq_length=98; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gccgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; seq_rank=51; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaataattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 1}; seq_rank=52; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtagtactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=53; count=1; seq_length=99; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP merged_sample={'15a_F730814': 1}; seq_rank=54; count=1; seq_length=100; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=55; count=1; seq_length=99; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB merged_sample={'15a_F730814': 1}; seq_rank=56; count=1; seq_length=100; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 1}; seq_rank=57; count=1; seq_length=100; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; seq_rank=58; count=1; seq_length=99; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiclean_advanced.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiclean_advanced.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; obiclean_internalcount=0; obiclean_singletoncount=1; count=1; seq_length=100; obiclean_headcount=0; seq_rank=1; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=2; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=3; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaag\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_internalcount=0; obiclean_singletoncount=0; count=61; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 64}; obiclean_internalcount=0; obiclean_singletoncount=0; count=64; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 3}; obiclean_internalcount=0; obiclean_singletoncount=1; count=3; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB merged_sample={'29a_F260619': 2}; obiclean_internalcount=0; obiclean_singletoncount=1; count=2; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 46}; obiclean_internalcount=0; obiclean_singletoncount=0; count=46; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_head=True; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:24:16230:12703#0/2_CONS_SUB_SUB_CMP merged_sample={'15a_F730814': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=9; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttatcccctt\n+>HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=10; obiclean_samplecount=1; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactacaggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=0; obiclean_singletoncount=1; count=1; seq_length=99; obiclean_headcount=0; seq_rank=11; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata\n+gcttaaaactcaaaggacttggcggtgctttatatc"..b"gaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:72:12959:5736#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=99; obiclean_headcount=0; seq_rank=48; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacacccct\n+>HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=0; obiclean_singletoncount=1; count=1; seq_length=99; obiclean_headcount=0; seq_rank=49; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgtcggccagagtactactagcaaca\n+gcatgaaactcaaagaactgggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:120:16005:7955#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=98; obiclean_headcount=0; seq_rank=50; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gccgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=51; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaataattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB merged_sample={'26a_F040644': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=99; obiclean_headcount=0; seq_rank=52; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtagtactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=0; obiclean_singletoncount=1; count=1; seq_length=99; obiclean_headcount=0; seq_rank=53; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP merged_sample={'15a_F730814': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=54; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=99; obiclean_headcount=0; seq_rank=55; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB merged_sample={'15a_F730814': 1}; obiclean_internalcount=0; obiclean_singletoncount=1; count=1; seq_length=100; obiclean_headcount=0; seq_rank=56; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB merged_sample={'13a_F730603': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 1}; obiclean_internalcount=1; obiclean_singletoncount=0; count=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiclean_simple.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiclean_simple.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,67 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=1; merged_sample={'29a_F260619': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; obiclean_count={'XXX': 81}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 'h'}; seq_rank=4; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_samplecount=1; obiclean_headcount=1; obiclean_singletoncount=0; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; obiclean_count={'XXX': 77}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 'h'}; seq_rank=5; merged_sample={'26a_F040644': 64}; obiclean_samplecount=1; obiclean_headcount=1; obiclean_singletoncount=0; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; obiclean_count={'XXX': 3}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=6; merged_sample={'29a_F260619': 3}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; obiclean_count={'XXX': 52}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 'h'}; seq_rank=8; merged_sample={'13a_F730603': 46}; obiclean_samplecount=1; obiclean_headcount=1; obiclean_singletoncount=0; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=11; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata\n+gcttaaaactcaaaggacttggcggtgctttatatccct\n+>HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=12; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcttaaaactcaaagaactgggcggtgctttatatcccg\n+>HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=14; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncoun"..b"ttacaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:111:8478:1127#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:111:8478:1127#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=33; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctggaacgcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:81:15726:2345#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:81:15726:2345#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=34; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaagcactcggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:7:11110:17924#0/2_CONS_SUB_SUB count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:7:11110:17924#0/2_CONS_SUB_SUB'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=35; merged_sample={'15a_F730814': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtcataccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS_SUB_SUB count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS_SUB_SUB'}; seq_length=5; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=38; merged_sample={'13a_F730603': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+caata\n+>HELIUM_000100422_612GNAAXX:7:105:14135:6905#0/2_CONS_SUB_SUB count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:105:14135:6905#0/2_CONS_SUB_SUB'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=43; merged_sample={'29a_F260619': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacacaagtaattaatataacaaaattattcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=49; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacataaacattcaataaacaagaatgtcggccagagtactactagcaaca\n+gcatgaaactcaaagaactgggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP'}; seq_length=99; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=53; merged_sample={'26a_F040644': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB count=1; obiclean_count={'XXX': 1}; obiclean_head=True; obiclean_cluster={'XXX': 'HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB'}; seq_length=100; obiclean_internalcount=0; obiclean_status={'XXX': 's'}; seq_rank=56; merged_sample={'15a_F730814': 1}; obiclean_samplecount=1; obiclean_headcount=0; obiclean_singletoncount=1; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiconvert.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiconvert.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=1; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACACAAGTAATTATTATAACAAAATCATTCGCCAGAGTACTACCGGCAAT\n+AGCTCAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=2; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTGTTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=3; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCGGCAAG\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:24:16230:12703#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=9; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATCCCCTT\n+>HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=10; obiclean_samplecount=1; obiclean_head=False; \n+CTAGCCTTAAACACAAATAGTTATGCAAACAAAACTATTCGCCAGAGTACTACAGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=11; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACATAAACATTCAATAAACAAGAATGTTCGCCAGAGGACTACTAGCAATA\n+GCTTAAAACTCAAAGGACTTGGCGGTGCTTTATATCCCT\n+>HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=12; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACATAGATAATTTTACAACAAAATAATTCGCCAGAGGACTACTAGCAATA\n+GCTTAAAACTCAAAGAACTGGGCGGTGCTTTATATCCCG\n+>HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=14; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACATAAACATTCAATAAACGAGAATGTTCGCCAGTGTACTTCTAGCAACA\n+GGCTGAAACTCAAAGGACTTGGCGGTGGTTTACATCCCT\n+>HELIUM_000100422_612GNAAXX:7:16:16357:19948#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=17; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACATAAACATTCAATAAACAAGAATGTTCGCCAGAGTACTACTAGCAACA\n+GCCTGAAACTCACAGGACTTGGCGGTGCTTTACATCCCT\n+>HELIUM_000100422_612GNAAXX:7:107:3570:18147#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=18; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCGGCAAT\n+AGCATAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:114:4511:17596#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=19; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTGCTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACGCTT\n+>HELIUM_000"..b" \n+TTAGCCCTAAACACAAGACATTAATATAACGAGATTAATCGACAGAGTACTACCGGCTAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_head=False; \n+CTAGCCTTAAACACAAATAGTTATGCAGACAAAACTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACATGAACATTCAATAAACAAGAATGTTCGCCAGAGTACTACTAGCAACA\n+GCCTGAAACTCAAAGGACTTGGCGGTGCTTTACATCCCT\n+>HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB count=2; merged_sample={'29a_F260619': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACACAAGTAATTATTATAACAAAATTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:115:17402:4174#0/2_CONS_SUB_SUB_CMP count=2; merged_sample={'29a_F260619': 1, '15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=2; seq_length=100; obiclean_headcount=0; seq_rank=13; obiclean_samplecount=2; obiclean_head=False; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCTGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:38:10813:16867#0/2_CONS_SUB_SUB count=2; merged_sample={'26a_F040644': 2}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=15; obiclean_samplecount=1; obiclean_head=False; \n+TTAGCCCTAAACATAAACATTCAATAAACAAGAATGTTTGCCAGAGTACTACTAGCAACA\n+GCCTGAAACTCAAAGGACTTGGCGGTGCTTTACATCCCT\n+>HELIUM_000100422_612GNAAXX:7:70:11798:2668#0/2_CONS_SUB_SUB count=2; merged_sample={'15a_F730814': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=20; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCTTT\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; merged_sample={'29a_F260619': 3}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACACAAATAATTACACAAACAAAATTGTTCACCAGAGTACTAGCGGCAAC\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_head=True; \n+TTTT\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_head=True; \n+CTAGCCTTAAACACAAATAGTTATGCAAACAAAACTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_head=True; \n+TTAGCCCTAAACACAAGTAATTAATATAACAAAATTATTCGCCAGAGTACTACCGGCAAT\n+AGCTTAAAACTCAAAGGACTTGGCGGTGCTTTATACCCTT\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_head=True; \n+TTAGCCCTAAACATAAACATTCAATAAACAAGAATGTTCGCCAGAGTACTACTAGCAACA\n+GCCTGAAACTCAAAGGACTTGGCGGTGCTTTACATCCCT\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obigrep_lmin.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obigrep_lmin.fastq Wed Apr 12 17:36:02 2017 -0400
b
@@ -0,0 +1,32 @@
+@HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS ali_length=58; direction=right; seq_ab_match=55; sminR=40.0; seq_a_mismatch=3; seq_b_deletion=50; seq_b_mismatch=0; seq_a_deletion=50; seq_b_single=0; score=207.846895043; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.58356715591; seq_b_insertion=0; 
+ccgcctcctttagataccccactatgcttttctagaggagcctgttctaaggaggcgg
++
+~~~~~~Z~~~~~~~~~b~~~~~~~~~_~~~~~~~~~~}~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:7:7916:16505#0/2_CONS ali_length=58; direction=right; seq_ab_match=58; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=0; seq_a_deletion=50; seq_b_single=0; score=231.999536374; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.99999200644; seq_b_insertion=0; 
+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:37:12979:6595#0/2_CONS ali_length=58; direction=right; seq_ab_match=58; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=0; seq_a_deletion=50; seq_b_single=0; score=231.998796685; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.99997925319; seq_b_insertion=0; 
+ccgcctcctttagataccccactatgcttttctagaggagcctgttctaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:96:8737:4973#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; seq_a_mismatch=1; seq_b_deletion=50; seq_b_mismatch=0; seq_a_deletion=50; seq_b_single=0; score=223.980140585; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.86172656181; seq_b_insertion=0; 
+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS ali_length=59; direction=right; seq_ab_match=59; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=49; seq_b_mismatch=0; seq_a_deletion=49; seq_b_single=0; score=235.999642017; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.99999393249; seq_b_insertion=0; 
+ccaattaacttagataccccactatgccaatactagaggagcctgttctagttaattgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:73:17068:16620#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=1; seq_a_deletion=50; seq_b_single=0; score=223.962007947; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.86141393012; seq_b_insertion=0; 
+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~~~~~~~|~
+@HELIUM_000100422_612GNAAXX:7:95:11283:2557#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=1; seq_a_deletion=50; seq_b_single=0; score=223.999078974; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.86205308576; seq_b_insertion=0; 
+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~a~~~~~~~~~~~~~~~
+@HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=1; seq_a_deletion=50; seq_b_single=0; score=224.000075421; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.86207026589; seq_b_insertion=0; 
+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~~~~~~~~~
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obigrep_predicat.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obigrep_predicat.fasta Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,980 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS ali_length=62; direction=left; seq_ab_match=47; sminR=40.0; seq_a_mismatch=1; seq_b_deletion=1; seq_b_mismatch=13; seq_a_deletion=0; seq_b_single=46; score=115.761290673; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=1.86711759151; seq_b_insertion=1; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttgagctattgccggtagtactctggcgaatgattttgttataataattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+dddddddddddbbdaddcddddddcadaddddaadcccWaaddabdx~~|b~~~~cccccccBcBcccBcBcccBccBBcc~}`kXyxu|~Kyr`b~~~~~~b~~~b~d\\cdddddddddddddcacddddcdddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS ali_length=62; direction=left; seq_ab_match=60; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=2; seq_a_deletion=0; seq_b_single=46; score=231.881364714; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=3.74002201152; seq_b_insertion=0; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaacaattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddddddddddddddddbdddddbddddddddddddddddddddddd~~~~~~~~~~~~~~~~~~}~~~~~~~~~~~~~~~~~~~~~~~~~~~Z~~~~~~~~~~~~~b~d]dddddddcdddddcddddddddddddddddddddddcddddddd\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS ali_length=62; direction=left; seq_ab_match=52; sminR=40.0; seq_a_mismatch=5; seq_b_deletion=0; seq_b_mismatch=5; seq_a_deletion=0; seq_b_single=46; score=167.91734192; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=2.70834422451; seq_b_insertion=0; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctcttgccggtagtactctggcgaataattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddadddddcd`dcddddddddd^dd]ddddddZad`\\`]Lcca``YT|~~^||v|z|~ts~~~|r~sZ^~~`~~~~~~~~~~~~~~~~V~b~]~~~~b~~~~~~~~`~b\\aadddd`dcdddddc`dddccddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS ali_length=62; direction=left; seq_ab_match=59; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=3; seq_a_deletion=0; seq_b_single=46; score=223.982011431; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=3.61261308759; seq_b_insertion=0; \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttatacccttctagaggagcctgttctaaggaggcgg\n++\n+dddedddddddddedddddddddedddaddbddddddddddddedd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~b~~~~~~~~b~~~~ddddddddddabbb^bdddddddddddcddddddddcddddddddd\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS ali_length=63; direction=left; seq_ab_match=60; sminR=40.0; seq_a_mismatch=3; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_b_single=45; score=227.883189945; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; score_norm=3.61719349119; seq_b_insertion=0; \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaatgtttatgtttagggctaagcatagtggggtatctaagatattcgg\n++\n+ddcdddddddddddddddeddddddddddddddddddddcddddd~b~~~~~~b~~~~~~~~~~~~~~c~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddddddddddddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS ali_length=62; direction=left; seq_ab_match=60; sminR=40.0; seq_a_mismatch=2; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_b_single=46; score=231.893859399; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=3.7402235387; seq_b_insertion=0; \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctgttgccgctagtactctggtgaacaattttgtttgtgtaattatttgtgtttagggctaagcatagtggggtatctaaaggaggcgg\n++\n+ddddddddddddddddddcddddddddddddd\\ddeddecddddddb~~~b~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ddddcddddddcdddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS ali_length=62; direction=left; '..b'tagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+ddddddcddddddddddddd`dcdddddaddaddddbdddddddc~~~~~~~~~{~~~~~~~~zxy~~|~~~~ru~~~~~~~~~~~~~~~~~~{~~p~~~~~~~~~~~ddddddddddabaP`ddddabddddddddddddddddcdcdaddd\n+@HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2_CONS ali_length=58; direction=right; seq_ab_match=57; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=50; seq_b_mismatch=1; seq_a_deletion=50; seq_b_single=0; score=224.000075421; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=0; score_norm=3.86207026589; seq_b_insertion=0; \n+ccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg\n++\n+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~b~~~~~~~~~~~~~~~\n+@HELIUM_000100422_612GNAAXX:7:59:15367:8376#0/2_CONS ali_length=62; direction=left; seq_ab_match=62; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_b_single=46; score=247.996374666; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=3.99994152687; seq_b_insertion=0; \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggacttggcggtgctttatacccttctagaggagcctgttctactacttcgg\n++\n+ddddddddddddddddddddddddddddddddddddddddddddad~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddddddddddedddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/2_CONS ali_length=63; direction=left; seq_ab_match=59; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=4; seq_a_deletion=0; seq_b_single=45; score=223.94591652; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; score_norm=3.55469708763; seq_b_insertion=0; \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+ddddddddddddccdddddddddeddddddddddddddddedadd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~W[~~~~~~~~~~~~~~~~~~~~~~~b~~~b~dddcdddddddcddddddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/2_CONS ali_length=63; direction=left; seq_ab_match=62; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=1; seq_a_deletion=0; seq_b_single=45; score=243.950667893; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; score_norm=3.87223282369; seq_b_insertion=0; \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaatgtttatgtttagggctaagcatagtggggtatctaagatattcgg\n++\n+ddddddddddddddddddddddddddddddddddddddcdddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~c~~~dddeddddddddddadddddddddddddddddddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/2_CONS ali_length=63; direction=left; seq_ab_match=63; sminR=40.0; seq_a_mismatch=0; seq_b_deletion=0; seq_b_mismatch=0; seq_a_deletion=0; seq_b_single=45; score=251.962499638; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=45; score_norm=3.99940475616; seq_b_insertion=0; \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttggcggtgctttacatccctctagaggagcctgttctagatattcgg\n++\n+dddcdddddddddddedddddddddddccdddddddddddddddd~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~dddddd^ddddddddddddddddddddddddcddddddddddddd\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/2_CONS ali_length=62; direction=left; seq_ab_match=59; sminR=40.0; seq_a_mismatch=2; seq_b_deletion=0; seq_b_mismatch=1; seq_a_deletion=0; seq_b_single=46; score=223.864093999; seq_a_insertion=0; mode=alignment; sminL=40.0; seq_a_single=46; score_norm=3.61071119354; seq_b_insertion=0; \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaattacttgtgtttagggctaagcatagtggggtatctaactacttcgg\n++\n+dddcddcddcdbdbddddddddddddddddddcdddddccaddddd~~~~~~b~~~~~~~~~~~x~~~~wyZq~mv~}}~~~~~~~~~P~~~~~~~~~~~~~~~~~~~d^cddddddddcdddadddddddddddddddddddddddddddddd\n'
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obisort.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obisort.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=1; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=2; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=3; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaag\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:24:16230:12703#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=9; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttatcccctt\n+>HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=10; obiclean_samplecount=1; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactacaggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=11; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata\n+gcttaaaactcaaaggacttggcggtgctttatatccct\n+>HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=12; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcttaaaactcaaagaactgggcggtgctttatatcccg\n+>HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=14; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacgagaatgttcgccagtgtacttctagcaaca\n+ggctgaaactcaaaggacttggcggtggtttacatccct\n+>HELIUM_000100422_612GNAAXX:7:16:16357:19948#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=17; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcacaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:107:3570:18147#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=18; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcataaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:114:4511:17596#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=19; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtgctaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttatacgctt\n+>HELIUM_000"..b" \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB count=2; merged_sample={'29a_F260619': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:115:17402:4174#0/2_CONS_SUB_SUB_CMP count=2; merged_sample={'29a_F260619': 1, '15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=2; seq_length=100; obiclean_headcount=0; seq_rank=13; obiclean_samplecount=2; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactacctgcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:10813:16867#0/2_CONS_SUB_SUB count=2; merged_sample={'26a_F040644': 2}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=15; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgtttgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:70:11798:2668#0/2_CONS_SUB_SUB count=2; merged_sample={'15a_F730814': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=20; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccttt\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; merged_sample={'29a_F260619': 3}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_head=True; \n+tttt\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_head=True; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obisort.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obisort.fastq Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP count=64; merged_sample={'26a_F040644': 64}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=99; obiclean_headcount=1; seq_rank=5; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB count=61; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=2; seq_rank=4; obiclean_samplecount=2; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB count=46; merged_sample={'13a_F730603': 46}; obiclean_singletoncount=0; obiclean_internalcount=0; seq_length=100; obiclean_headcount=1; seq_rank=8; obiclean_samplecount=1; obiclean_head=True; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB count=7; merged_sample={'29a_F260619': 7}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=4; obiclean_headcount=0; seq_rank=16; obiclean_samplecount=1; obiclean_head=True; \n+tttt\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP count=3; merged_sample={'29a_F260619': 3}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=6; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB count=2; merged_sample={'29a_F260619': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=7; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:115:17402:4174#0/2_CONS_SUB_SUB_CMP count=2; merged_sample={'29a_F260619': 1, '15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=2; seq_length=100; obiclean_headcount=0; seq_rank=13; obiclean_samplecount=2; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactacctgcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:10813:16867#0/2_CONS_SUB_SUB count=2; merged_sample={'26a_F040644': 2}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=15; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgtttgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:70:11798:2668#0/2_CONS_SUB_SUB count=2; merged_sample={'15a_F730814': 2}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=20; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccttt\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=1; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=2; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP count=1; merged_s"..b"gaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:72:12959:5736#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=48; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacacccct\n+>HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=49; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacataaacattcaataaacaagaatgtcggccagagtactactagcaaca\n+gcatgaaactcaaagaactgggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:120:16005:7955#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=98; obiclean_headcount=0; seq_rank=50; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gccgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'29a_F260619': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=51; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaataattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=52; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtagtactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=99; obiclean_headcount=0; seq_rank=53; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=54; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=55; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB count=1; merged_sample={'15a_F730814': 1}; obiclean_singletoncount=1; obiclean_internalcount=0; seq_length=100; obiclean_headcount=0; seq_rank=56; obiclean_samplecount=1; obiclean_head=True; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB count=1; merged_sample={'13a_F730603': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=100; obiclean_headcount=0; seq_rank=57; obiclean_samplecount=1; obiclean_head=False; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP count=1; merged_sample={'26a_F040644': 1}; obiclean_singletoncount=0; obiclean_internalcount=1; seq_length=99; obiclean_headcount=0; seq_rank=58; obiclean_samplecount=1; obiclean_head=False; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obistat.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obistat.txt Wed Apr 12 17:36:02 2017 -0400
b
@@ -0,0 +1,8 @@
+count merged min_seq_length    count    total
+2    None  99       4         8
+61   None 100       1        61
+46   None 100       1        46
+7    None   4       1         7
+64   None  99       1        64
+1    None   5      49        49
+3    None 100       1         3
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obitab.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obitab.txt Wed Apr 12 17:36:02 2017 -0400
b
@@ -0,0 +1,59 @@
+id count sample:13a_F730603 sample:15a_F730814 sample:26a_F040644 sample:29a_F260619 obiclean_head obiclean_headcount obiclean_internalcount obiclean_samplecount obiclean_singletoncount seq_length seq_rank
+HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP 64 0 0 64 0 True 1 0 1 0 99 5
+HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB 61 0 31 0 30 True 2 0 2 0 100 4
+HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2_CONS_SUB_SUB 46 46 0 0 0 True 1 0 1 0 100 8
+HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB 7 0 0 0 7 True 0 0 1 1 4 16
+HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP 3 0 0 0 3 True 0 0 1 1 100 6
+HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2_CONS_SUB_SUB 2 0 0 0 2 True 0 0 1 1 100 7
+HELIUM_000100422_612GNAAXX:7:115:17402:4174#0/2_CONS_SUB_SUB_CMP 2 0 1 0 1 False 0 2 2 0 100 13
+HELIUM_000100422_612GNAAXX:7:38:10813:16867#0/2_CONS_SUB_SUB 2 0 0 2 0 False 0 1 1 0 99 15
+HELIUM_000100422_612GNAAXX:7:70:11798:2668#0/2_CONS_SUB_SUB 2 0 2 0 0 True 0 0 1 1 100 20
+HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 True 0 0 1 1 100 1
+HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 2
+HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 3
+HELIUM_000100422_612GNAAXX:7:24:16230:12703#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 False 0 1 1 0 100 9
+HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB 1 1 0 0 0 False 0 1 1 0 100 10
+HELIUM_000100422_612GNAAXX:7:13:6954:13039#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 11
+HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 12
+HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB 1 0 0 1 0 True 0 0 1 1 99 14
+HELIUM_000100422_612GNAAXX:7:16:16357:19948#0/2_CONS_SUB_SUB 1 0 0 1 0 False 0 1 1 0 99 17
+HELIUM_000100422_612GNAAXX:7:107:3570:18147#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 18
+HELIUM_000100422_612GNAAXX:7:114:4511:17596#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 True 0 0 1 1 100 19
+HELIUM_000100422_612GNAAXX:7:29:18416:16620#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 True 0 0 1 1 100 21
+HELIUM_000100422_612GNAAXX:7:120:1900:13295#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 True 0 0 1 1 100 22
+HELIUM_000100422_612GNAAXX:7:76:10822:18722#0/2_CONS_SUB_SUB 1 0 0 1 0 False 0 1 1 0 99 23
+HELIUM_000100422_612GNAAXX:7:76:8491:8900#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 True 0 0 1 1 100 24
+HELIUM_000100422_612GNAAXX:7:9:10358:4089#0/2_CONS_SUB_SUB_CMP 1 1 0 0 0 True 0 0 1 1 100 25
+HELIUM_000100422_612GNAAXX:7:59:11971:12063#0/2_CONS_SUB_SUB 1 0 0 0 1 False 0 1 1 0 100 26
+HELIUM_000100422_612GNAAXX:7:111:19168:18517#0/2_CONS_SUB_SUB 1 0 1 0 0 False 0 1 1 0 100 27
+HELIUM_000100422_612GNAAXX:7:41:9316:2256#0/2_CONS_SUB_SUB 1 0 0 0 1 False 0 1 1 0 100 28
+HELIUM_000100422_612GNAAXX:7:33:9900:9541#0/2_CONS_SUB_SUB 1 1 0 0 0 False 0 1 1 0 100 29
+HELIUM_000100422_612GNAAXX:7:95:1688:9598#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 30
+HELIUM_000100422_612GNAAXX:7:55:16528:11418#0/2_CONS_SUB_SUB 1 1 0 0 0 True 0 0 1 1 100 31
+HELIUM_000100422_612GNAAXX:7:19:6313:16567#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 99 32
+HELIUM_000100422_612GNAAXX:7:111:8478:1127#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 33
+HELIUM_000100422_612GNAAXX:7:81:15726:2345#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 34
+HELIUM_000100422_612GNAAXX:7:7:11110:17924#0/2_CONS_SUB_SUB 1 0 1 0 0 True 0 0 1 1 100 35
+HELIUM_000100422_612GNAAXX:7:89:9710:20652#0/2_CONS_SUB_SUB 1 0 0 1 0 False 0 1 1 0 99 36
+HELIUM_000100422_612GNAAXX:7:71:17473:7401#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 False 0 1 1 0 100 37
+HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS_SUB_SUB 1 1 0 0 0 True 0 0 1 1 5 38
+HELIUM_000100422_612GNAAXX:7:99:8183:13912#0/2_CONS_SUB_SUB 1 1 0 0 0 False 0 1 1 0 100 39
+HELIUM_000100422_612GNAAXX:7:22:1798:3790#0/2_CONS_SUB_SUB_CMP 1 1 0 0 0 False 0 1 1 0 100 40
+HELIUM_000100422_612GNAAXX:7:115:14354:10118#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 99 41
+HELIUM_000100422_612GNAAXX:7:103:3594:15996#0/2_CONS_SUB_SUB 1 1 0 0 0 False 0 1 1 0 100 42
+HELIUM_000100422_612GNAAXX:7:105:14135:6905#0/2_CONS_SUB_SUB 1 0 0 0 1 True 0 0 1 1 100 43
+HELIUM_000100422_612GNAAXX:7:76:18268:14980#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 44
+HELIUM_000100422_612GNAAXX:7:25:7789:3090#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 45
+HELIUM_000100422_612GNAAXX:7:42:19757:3800#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 False 0 1 1 0 99 46
+HELIUM_000100422_612GNAAXX:7:116:14244:17926#0/2_CONS_SUB_SUB 1 0 0 1 0 False 0 1 1 0 99 47
+HELIUM_000100422_612GNAAXX:7:72:12959:5736#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 99 48
+HELIUM_000100422_612GNAAXX:7:7:15538:8325#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 49
+HELIUM_000100422_612GNAAXX:7:120:16005:7955#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 98 50
+HELIUM_000100422_612GNAAXX:7:10:9237:10532#0/2_CONS_SUB_SUB_CMP 1 0 0 0 1 False 0 1 1 0 100 51
+HELIUM_000100422_612GNAAXX:7:7:11998:4462#0/2_CONS_SUB_SUB 1 0 0 1 0 False 0 1 1 0 99 52
+HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 True 0 0 1 1 99 53
+HELIUM_000100422_612GNAAXX:7:3:17077:6562#0/2_CONS_SUB_SUB_CMP 1 0 1 0 0 False 0 1 1 0 100 54
+HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 99 55
+HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB 1 0 1 0 0 True 0 0 1 1 100 56
+HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB 1 1 0 0 0 False 0 1 1 0 100 57
+HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP 1 0 0 1 0 False 0 1 1 0 99 58
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiuniq.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiuniq.fasta Wed Apr 12 17:36:02 2017 -0400
b
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2_CONS_SUB_SUB_CMP ali_length=62; seq_ab_match=47; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'29a_F260619': 1}; forward_score=72.0; score=115.761290673; seq_a_mismatch=1; forward_tag=gcctcct; seq_b_mismatch=13; experiment=wolf_diet; mid_quality=69.1492537313; avg_quality=68.8701298701; seq_a_single=46; score_norm=1.86711759151; status=full; direction=reverse; seq_b_insertion=1; seq_b_deletion=1; seq_a_insertion=0; seq_length_ori=154; reverse_tag=gcctcct; count=1; seq_length=100; mode=alignment; head_quality=67.0; seq_b_single=46; \n+ttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaat\n+agctcaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2_CONS_SUB_SUB_CMP ali_length=62; seq_ab_match=60; sminR=40.0; tail_quality=66.9; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'29a_F260619': 1}; forward_score=72.0; score=231.881364714; seq_a_mismatch=0; forward_tag=gcctcct; seq_b_mismatch=2; experiment=wolf_diet; mid_quality=78.447761194; avg_quality=76.9545454545; seq_a_single=46; score_norm=3.74002201152; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=gcctcct; count=1; seq_length=100; mode=alignment; head_quality=67.0; seq_b_single=46; \n+ttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2_CONS_SUB_SUB_CMP ali_length=62; seq_ab_match=52; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'29a_F260619': 1}; forward_score=72.0; score=167.91734192; seq_a_mismatch=5; forward_tag=gcctcct; seq_b_mismatch=5; experiment=wolf_diet; mid_quality=75.0820895522; avg_quality=74.0064935065; seq_a_single=46; score_norm=2.70834422451; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=gcctcct; count=1; seq_length=100; mode=alignment; head_quality=66.6; seq_b_single=46; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaag\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2_CONS_SUB_SUB sminR=40.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'29a_F260619': 30, '15a_F730814': 31}; forward_score=72.0; experiment=wolf_diet; reverse_score=72.0; seq_length_ori=154; count=61; seq_length=100; status=full; mode=alignment; seq_b_single=46; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2_CONS_SUB_SUB_CMP ali_length=63; sminR=40.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'26a_F040644': 64}; forward_score=72.0; forward_tag=gaatatc; experiment=wolf_diet; seq_a_single=45; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=64; seq_length=99; status=full; mode=alignment; seq_b_single=45; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2_CONS_SUB_SUB_CMP ali_length=62; sminR=40.0; tail_quality=67.0; revers"..b"e=223.876050008; seq_a_mismatch=3; forward_tag=gaagtag; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=77.6567164179; avg_quality=76.1818181818; seq_a_single=46; score_norm=3.61090403239; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=gaagtag; count=1; seq_length=100; mode=alignment; head_quality=66.0; seq_b_single=46; \n+ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactgccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2_CONS_SUB_SUB_CMP ali_length=63; seq_ab_match=58; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'26a_F040644': 1}; forward_score=72.0; score=211.837822462; seq_a_mismatch=4; forward_tag=gaatatc; seq_b_mismatch=1; experiment=wolf_diet; mid_quality=77.7969924812; avg_quality=76.385620915; seq_a_single=45; score_norm=3.36250511845; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=1; seq_length=99; mode=alignment; head_quality=67.0; seq_b_single=45; \n+ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactacgagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2_CONS_SUB_SUB ali_length=62; seq_ab_match=52; sminR=40.0; tail_quality=65.6; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'15a_F730814': 1}; forward_score=72.0; score=167.712615339; seq_a_mismatch=0; forward_tag=gaagtag; seq_b_mismatch=10; experiment=wolf_diet; mid_quality=59.9104477612; avg_quality=58.6623376623; seq_a_single=46; score_norm=2.70504218289; status=full; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=gaagtag; count=1; seq_length=100; mode=alignment; head_quality=35.0; seq_b_single=46; \n+ttagccctaaacacaagacattaatataacgagattaatcgacagagtactaccggctat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2_CONS_SUB_SUB ali_length=62; seq_ab_match=60; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'13a_F730603': 1}; forward_score=72.0; score=231.857703869; seq_a_mismatch=0; forward_tag=aattaac; seq_b_mismatch=2; experiment=wolf_diet; mid_quality=78.5; avg_quality=77.0194805195; seq_a_single=46; score_norm=3.73964038498; status=full; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=aattaac; count=1; seq_length=100; mode=alignment; head_quality=67.2; seq_b_single=46; \n+ctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP ali_length=63; seq_ab_match=60; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; merged_sample={'26a_F040644': 1}; forward_score=72.0; score=227.901935995; seq_a_mismatch=3; forward_tag=gaatatc; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=78.5939849624; avg_quality=77.0784313725; seq_a_single=45; score_norm=3.61749104754; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=1; seq_length=99; mode=alignment; head_quality=67.0; seq_b_single=45; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/output_obiuniq_family.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_obiuniq_family.fasta Wed Apr 12 17:36:02 2017 -0400
[
b"@@ -0,0 +1,172 @@\n+>HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2_CONS_SUB_SUB_CMP ali_length=58; sminR=40.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=50; sample=29a_F260619; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; forward_tag=gcctcct; experiment=wolf_diet; seq_a_single=0; merged=['HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2_CONS_SUB_SUB_CMP', 'HELIUM_000100422_612GNAAXX:7:95:11283:2557#0/2_CONS_SUB_SUB_CMP', 'HELIUM_000100422_612GNAAXX:7:73:17068:16620#0/2_CONS_SUB_SUB_CMP', 'HELIUM_000100422_612GNAAXX:7:96:8737:4973#0/2_CONS_SUB_SUB_CMP', 'HELIUM_000100422_612GNAAXX:7:37:12979:6595#0/2_CONS_SUB_SUB', 'HELIUM_000100422_612GNAAXX:7:7:7916:16505#0/2_CONS_SUB_SUB_CMP', 'HELIUM_000100422_612GNAAXX:7:13:5742:9859#0/2_CONS_SUB_SUB']; reverse_score=72.0; seq_b_insertion=0; seq_b_deletion=50; seq_a_insertion=0; seq_length_ori=58; reverse_tag=gcctcct; count=7; seq_length=4; status=full; mode=alignment; seq_b_single=0; \n+tttt\n+>HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP ali_length=63; seq_ab_match=60; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=26a_F040644; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=227.901935995; seq_a_mismatch=3; forward_tag=gaatatc; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=78.5939849624; avg_quality=77.0784313725; seq_a_single=45; score_norm=3.61749104754; merged=['HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2_CONS_SUB_SUB_CMP']; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=1; seq_length=99; mode=alignment; head_quality=67.0; seq_b_single=45; \n+ttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaaca\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP ali_length=63; seq_ab_match=59; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=26a_F040644; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=219.686655506; seq_a_mismatch=0; forward_tag=gaatatc; seq_b_mismatch=4; experiment=wolf_diet; mid_quality=67.7443609023; avg_quality=67.3986928105; seq_a_single=45; score_norm=3.48708976993; merged=['HELIUM_000100422_612GNAAXX:7:38:6201:12003#0/2_CONS_SUB_SUB_CMP']; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=1; seq_length=99; mode=alignment; head_quality=63.2; seq_b_single=45; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcttaaaactcaaagaactgggcggtgctttatatcccg\n+>HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP ali_length=63; seq_ab_match=61; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=26a_F040644; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=235.877819976; seq_a_mismatch=2; forward_tag=gaatatc; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=78.8646616541; avg_quality=77.3137254902; seq_a_single=45; score_norm=3.74409238057; merged=['HELIUM_000100422_612GNAAXX:7:111:18277:17779#0/2_CONS_SUB_SUB_CMP']; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=153; reverse_tag=gaatatc; count=1; seq_length=99; mode=alignment; head_quality=67.0; seq_b_single=45; \n+ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata\n+gcctgaaactcaaaggacttggcggtgctttacatccct\n+>HELIUM_000100422_612GNAAXX:7:78:7151:19928#0/2_CONS_SUB_SUB ali_length=63; seq_ab_match=56; sminR=40.0; tail_quality=47.6; reverse_match="..b"00422_612GNAAXX:7:99:8183:13912#0/2_CONS_SUB_SUB']; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=aattaac; count=1; seq_length=100; status=full; mode=alignment; head_quality=67.0; seq_b_single=46; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctc\n+>HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB ali_length=62; seq_ab_match=58; sminR=40.0; tail_quality=63.3; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=13a_F730603; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=215.812703412; seq_a_mismatch=3; forward_tag=aattaac; seq_b_mismatch=1; experiment=wolf_diet; mid_quality=76.2388059701; avg_quality=74.6883116883; seq_a_single=46; score_norm=3.48085005502; merged=['HELIUM_000100422_612GNAAXX:7:4:4214:9434#0/2_CONS_SUB_SUB']; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=aattaac; count=1; seq_length=100; status=full; mode=alignment; head_quality=65.3; seq_b_single=46; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactacaggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:9:10358:4089#0/2_CONS_SUB_SUB_CMP ali_length=61; seq_ab_match=54; sminR=40.0; tail_quality=66.9; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=13a_F730603; reverse_score=72.0; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=181.620394786; seq_a_mismatch=1; forward_tag=aattaac; seq_b_mismatch=6; experiment=wolf_diet; mid_quality=66.3880597015; avg_quality=66.4545454545; seq_a_single=46; score_norm=2.97738352108; merged=['HELIUM_000100422_612GNAAXX:7:9:10358:4089#0/2_CONS_SUB_SUB_CMP']; status=full; direction=reverse; seq_b_insertion=0; seq_b_deletion=1; seq_a_insertion=0; seq_length_ori=154; reverse_tag=aattaac; count=1; seq_length=100; mode=alignment; head_quality=66.9; seq_b_single=47; \n+ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactacaggcaat\n+agctcaaaactcaaagaacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:33:9900:9541#0/2_CONS_SUB_SUB ali_length=62; seq_ab_match=62; sminR=40.0; tail_quality=67.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=0; sample=13a_F730603; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=247.949713348; seq_a_mismatch=0; forward_tag=aattaac; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=78.8955223881; avg_quality=77.3506493506; seq_a_single=46; score_norm=3.99918892497; merged=['HELIUM_000100422_612GNAAXX:7:33:9900:9541#0/2_CONS_SUB_SUB']; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=0; seq_a_insertion=0; seq_length_ori=154; reverse_tag=aattaac; count=1; seq_length=100; status=full; mode=alignment; head_quality=67.0; seq_b_single=46; \n+ctagccctaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat\n+agcttaaaactcaaaggacttggcggtgctttataccctt\n+>HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS_SUB_SUB ali_length=59; seq_ab_match=59; sminR=40.0; tail_quality=93.0; reverse_match=tagaacaggctcctctag; seq_a_deletion=49; sample=13a_F730603; forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; score=235.999642017; seq_a_mismatch=0; forward_tag=aattaac; seq_b_mismatch=0; experiment=wolf_diet; mid_quality=93.0; avg_quality=93.0; seq_a_single=0; score_norm=3.99999393249; merged=['HELIUM_000100422_612GNAAXX:7:115:3625:10051#0/2_CONS_SUB_SUB']; reverse_score=72.0; direction=forward; seq_b_insertion=0; seq_b_deletion=49; seq_a_insertion=0; seq_length_ori=59; reverse_tag=aattaac; count=1; seq_length=5; status=full; mode=alignment; head_quality=93.0; seq_b_single=0; \n+caata\n"
b
diff -r 000000000000 -r 4df964e14378 test-data/wolf_small.F.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/wolf_small.F.fastq Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,998 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/1  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattattataacaaaatcattcgccagagtactaccggcaatagctcaaaactcaaagaactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbabbbba_abbbbbbbbbbbbbaZbbb`bbbbbbb``b`QXK^[TWXXJ`\\`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/1  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattgttcgccagagtactaccggcaatagcttaaaactcaaaggactt\n++\n+bbbbbbbabbbbbbbbbbbbbbbbbbbbbbabbbbbabbbbbbb[bbbb`bbZb^bbbb_`Z\\KaZXRYZbb\\bZ\\\\^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/1  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccgccactagcttaaaactcaaagaactc\n++\n+bbbbbbbbbbbbbbbbbbbbbbbaabbb^abbbbbab^bbbb__Z`````Tb_b_bbb_b]]Zb_Vbb_bXRZRPYYT[[``[_BBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaat\n++\n+bbbbbbbbbabbbbbbbbabbbbbbbbbbb`\\```_bbbbbbbbbbb^b`bbb^bb_b_b`bb^b`\\a^O`Z```UZ]R^`b^b^W```R[[LW[a`b_QQ[U^BBBB\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/1  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaccagcctgaaactcacaggactcg\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcbbba^b^b``bb`Z`Z`^^^W^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/1  \n+ccgcctcctttagataccccactatgcttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaacagcttaaaactcaaagtactc\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbabbbbbbbb`bb_bbbbbaaa`a```^\\UXW[XU`[W`BBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/1  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttataataat\n++\n+bbbbbbbbbbbbbbbbbb_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_b]`babbb``bbbbabb_bb`Y`XSZYX[b_b_`\\I^a^UXSMYbb\\b\\H`\n+@HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/1  \n+ccaattaactagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctgccgaatagttttgtttgcataac\n++\n+bbbbbbbbbcbbbbbbbbbbbbbbbbbbbbbb\\babbababbbbbbabc^babbbb_b^bbbb\\a\\`a]^^\\`a_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:111:2893:6772#0/1  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattatt\n++\n+bbbbbbbbbbbbbbbbcbbbbbbbbbbb_bbbababbbabbbbbbbbWb^bbbbbbb_`bbbb`bY^`\\[^`M`]YZ[U[XX[VX\\[[YQZPGWR````GN\\\\\\X`G^\n+@HELIUM_000100422_612GNAAXX:7:89:10621:20317#0/1  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbb_bbb]bb`b`bbbbbbbbb^b`bbbb_`b`^`bbbcbababaabab_aQ_T[b````\\aBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:50:2791:11138#0/1  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctgtcgagtaattttgttatattaat\n++\n+bbbbbbbbbbbbbabbbbbbbbabbbbbbbbb`bbbbbbbbbbbbbbbbbbbbbabb]_bbbb_b^bbb[``^``[][Y\\WZ[XRINOOIXVJWV`^`^HO[^BBBBB\n+@HELIUM_000100422_612GNAAXX:7:108:17701:18887#0/1  \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcactagctaaaaactcaaagaactg\n++\n+babbbbbbbbbbbbbbbbbbbbbbbbbbbca^bbbbbbbbbbba\\aaa`a\\bbbbbbbbbBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:97:10414:9929#0/1  \n+ccaattaacttagataccccactatgcctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagcttaaaactcaaagaactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcbbbbbabbba_^bbbabaZ^\\```]\\]]\\\\`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:97:9091:15775#0/1  \n+ccaattaacttagataccccactatgcctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagcttacaactcacagaactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbab_bbbbbbbbbbbbbbbb_aab[^_bbbbbaZ^\\`_U\\Y]^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:74:8683:20738#0/1  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctgg'..b'+@HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/1  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaccagcctgaaactcaaagaactcg\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbb`bbabbbbabb\\bbbbb``bb^`a_``b_bb`bbb`b\\R``]`__Z`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/1  \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctatagccggtagtactctgtcgattaatctcgttatattaat\n++\n+aaaaa```a`YZYZYaa]Yaa]``^^^^Y^Y]TY]a``[a^aaa\\X]Sa]]aaaaaaa]`aaaa_[Q^^JV[\\U^T^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:16:19299:8684#0/1  \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaacctcaaagaactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbabaabbbbcbbbbbb_b_abbba`bababbb`bZZY``P^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:54:14618:14353#0/1  \n+ccaattaacttagataccccactatgcctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagcttaaaactcaaagaactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbacbbbbbbbbbbbbbbcb^bbb_c^``^\\V\\FKOONY[V[[`\\`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/1  \n+ccaattaactagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaatagttttgtctgcataac\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabb_bbbbbbbbababbbbba_``b`a`^`[^a^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:74:4783:20479#0/1  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggactt\n++\n+bbbbbbbbbbbbbbbbbb`bbbbabbbbbb_bbbbbb`bbbbXb^bbbb__bbbb_bbabSZW``U^^`[`\\a`Y`\\Z[`BBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/1  \n+ccgaatatcttagataccccactatgcttagccctaaacatgaacattcaataaacaagaatgttcgccagagtactactagcaccagcctgaaactcacagaacttg\n++\n+bbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbb_cbbbbbabbbab`bbbbbb_b_a_\\`\\bb^^X]TWWXSGNOOPU``BBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:4:10139:2389#0/1  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaat\n++\n+bbb_bababbbbbbbbbbbbbbbb`_bbbb^N_`_bbbbbbbbbb_b_bab`babbO__Z]]__R]abb_YT\\]]\\_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/1  \n+ccgcctcctttagataccccactatgcttttctagaggagcctgttctaaggaggcggagatcggaagagcggttcagcaggaatgccgagacagatatcgtatgccg\n++\n+bbbbbbcbbbbbb_bbcbabbbbbbbb_bbbabbbbb[bbbabbbbb_bab`abaa`a_bbaW`Zb\\`b]bb\\\\WYZ]]Y\\\\]]U``S`Y^BBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:59:15367:8376#0/1  \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcbbbbbbbbbbbbbbbb`b`bbbbbb^bb`bbb__[a]bb_`b]`]]WZX^[YJXPPN]MHNW`Y^`XTbb_VX_^\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/1  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbabbbbbbbabab`b`\\^````^\\\\^R[[[_\\\\Z[W^^\\\\UWVUV[\\a`X``YYZb_\\Y^NP^BBBBB\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/1  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttg\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_bbbbbbbbbbcbbbbbbccbbabbbbbbab`b_b\\`]Y\\`^`W`\\YYYYOOOOO^[YZZXVXWY^^W\\^BBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/1  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaat\n++\n+bbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbb\\bbbbbb`bbb^_b^`abb^_Z`_a\\U`^\\aY_\\\\[`XY^R\\OONNNH`X[YYYYN[`Y]``BBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/1  \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccgccaatagcttaaaactcaacggactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_bbbabbbbbbbba\\bbbb``bbbbbbbZbYa`bbP^\\\\Zbbb^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:13:11240:16837#0/1  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacacgaatgttcgccagagaactactagcaccagcctgaaactcaaaggacttg\n'
b
diff -r 000000000000 -r 4df964e14378 test-data/wolf_small.R.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/wolf_small.R.fastq Wed Apr 12 17:36:02 2017 -0400
[
b'@@ -0,0 +1,998 @@\n+@HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/2  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaacctactcccgctacacgtccgccgaataatactgttatcatatt\n++\n+bbbbbbbbbbb``b_bbabbbbbba_b_bbbb__baaaU__bb_`bWbb[b]b__BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/2  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaacacttttgttatattact\n++\n+bbbbbbbbbbbbbbbb`bbbbb`bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbb\\bbbbbbbbbbY\\][`bbbbabb`WZUWMUX``^]G]`b_b`R`\n+@HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/2  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctcttgccggtagtactctggcgcacacttttcttatattact\n++\n+bb_bbbbbab^babbbbbbbbb\\bb[bbbbbbX_b^Z^[Jaa_^^WT[^]^[[U[Y[^SR^^^[Q^RZ^^b`_bTTQGW[T[]YHMGNNPNNKPFUYRVT\\bbbb_^U\n+@HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/2  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttcacactcaaagaactt\n++\n+bbbcbbbbbbbbbcbbbbbbbbbcbbb_bb`bbbbbbbbbbbbcbbbbbbbbbcbbbbbaaaabb\\N```\\\\\\]\\b[bbb^aX``ZR^^Vbb_bUbbbW`bbbJ`aW`\n+@HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/2  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaat\n++\n+bbabbbbbbbbbbbbbbbcbbbbbbbbbbbbbbbbbbbbabbbbbbbbb]bbbbbbababbbbbcc`bca`ab`aa`ba^bbb`ba_VX_b__W_`c\\a_b^b`UW`^\n+@HELIUM_000100422_612GNAAXX:7:89:10281:5930#0/2  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctgttgccgctagtactctggtgaacaattttgtttgtgtaat\n++\n+bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbZbbcbbcabbbbbbbbb`bbbbbbbbbbbbbbb_b_`b]b^b^bbb`bbbbbb_ababb__abbb[\\a_abbbbXb\n+@HELIUM_000100422_612GNAAXX:7:29:15520:18035#0/2  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaatagcttaaaactcacaggactt\n++\n+aaaaaaaa]aa]aaa_a_a`[W[]^_\\]]^]W]^]_]^aa]aaaaaaa]a^_]__aaaaa\\\\]aUJGOIMMMNPJ`V]^^aY_aa]aaaa[`___aa[_aBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:6:9274:14951#0/2  \n+ccaattaacttagataccccactatgcctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaatagctaacaactcaaaggactt\n++\n+babbbbbbbbbbbcbbbbbbbbbabbbbbbabbbbbbbbbbbbbbbbabbc^bbbbbbbaaabb[Ya\\a[`_b`\\\\\\\\`\\_bYb_BBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:111:2893:6772#0/2  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcbbbbbbcb]bbbbabbbbbbbbbbaabbbbba^bbbaa_bbbcbab_cbb__^Z`\\`Q```\\`BBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:89:10621:20317#0/2  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaacccaaaaggactt\n++\n+bbbbbbbbbbbbbb`bbbbb_bbbbbbbb^bbbb_b_bbbbbbbPa_\\]aY_b_bbbbSbRV\\^]F\\^^W_b^bF\\_]]ab__BBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:50:2791:11138#0/2  \n+ccgcctcctttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggactt\n++\n+bbbbbabbbbbabbbbab\\bbbbbbbbb`b`bbbbbbbaZbbbb`bbbbbbabbba`bbb__^bbbbb^bbbbW\\bbbbbbb[bbQaZ``bbbb`^H\\^^Z^[X[abb\n+@HELIUM_000100422_612GNAAXX:7:108:17701:18887#0/2  \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaat\n++\n+bbYbbbbbbbb^^bbbbbbbbbbbbcbbbbbbabbbbbbZbbbbbbb_bbbbbbbbbbbbbbbbbabbbb`]ZbbZY[Z[babbbcbbbb^RRWZU`^`X]bbbbb_b\n+@HELIUM_000100422_612GNAAXX:7:97:10414:9929#0/2  \n+ccaattaactagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaatagttttgtttgcataac\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbabbbbbbbbbcbbbbbbbbbbbbbabbbbbbbbbbbbb^abbbbbbabZ^bb`bb\\\\WXYX]Xb`bba[\n+@HELIUM_000100422_612GNAAXX:7:97:9091:15775#0/2  \n+ccaattaactagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaatagttttgtttgcataac\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbb_babbb^bbbbabbbbb_bba[_bbbbbb_^Wbbbb_`\\b`XT[UMI[_bbZ^Y\\_`bZX^\n+@HELIUM_000100422_612GNAAXX:7:74:8683:20738#0/2  \n+tcgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaac'..b'+@HELIUM_000100422_612GNAAXX:7:19:4311:13343#0/2  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctcgtagtactctggcgaacatttttgtttattgaat\n++\n+bbbbbbbbbbbab^bbabbbbbbabZ`aaabbbbbbbbbabbba`bbbb]bbbbbb[b_a\\_``ZXab_b`_ZZZ`bba]bb````b^a]aab^BBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:30:1800:7833#0/2  \n+ccgaagtagttagataccccactatgcttagccctaaacacaagacataaatataacaaaattatgcgccagagtactaccgacaatcgcttaaaactcaacggactt\n++\n+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:16:19299:8684#0/2  \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbabbbbbbbbbbbbbabbabbbbbbbb`bbbbbb_bbbcbbbbba_babbbb[bbbbbabb[bbb^[b`]bbba`W`\n+@HELIUM_000100422_612GNAAXX:7:54:14618:14353#0/2  \n+ccaattaactagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaatagttttgtttgcataac\n++\n+bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbabbbbbbabbbbbbbbbabbbbbbbbbbbbbbbbbbbXbbabbY[\\Y`b`_bbbabaaac`ab^bZT\\QG\\\\abaB\n+@HELIUM_000100422_612GNAAXX:7:108:9222:18258#0/2  \n+ccaattaacttagataccccactatgcctagccttaaacacaaatagttatgcagacaaaactattcgccagagtactacaggcaatagctcaaaactcaaaggactt\n++\n+bbbbccbbbbbbcbbbbbbbbbbbbbbbbaabbbbbbbbbbbbbbba_cabaccbabbbbbb`_bU^^^^`\\`\\\\OOOOOH[XXZ[[VXROINON^[V[[^^BBBBBB\n+@HELIUM_000100422_612GNAAXX:7:74:4783:20479#0/2  \n+ccgcctccttagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgaataattttgttatattatt\n++\n+bbbbbbbbbbbbbbbbbb^bbbabbab_babbUbbbbb_bUbb``aPXa^bb_abb_bQb\\a``PW^`_^]Y[^[MV\\N\\`Y^]PRZIIURXU^VabXbZZbZ`BBBB\n+@HELIUM_000100422_612GNAAXX:7:38:3005:20881#0/2  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttattgaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb`bbbbbabbb`bbb\\b^bbbbbb]b`a[bbaababaa\\`a]a__aaaaa_a_STKX[][W\\]b[Gaa`^BBBBBB\n+@HELIUM_000100422_612GNAAXX:7:4:10139:2389#0/2  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttg\n++\n+bbbbbbabbbbbbbbbbbbb^babbbbb_bb_bbbb`bbbbbbbabbbbbbbbbZbbabb^]bYWX]][]]]]QT]Za____R^`b_XO`BBBBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:75:9674:16966#0/2  \n+ccgcctccttagaacaggctcctctagaaaagcatagtgggggatctaaaggaggcggagatcggaagagcgtcgtgtagggaaagagtgtagatctcggtggtcgcc\n++\n+bbbbbbbbbbbbcabbbbbbbbbbbabbbbbb[bbbb_bbbbG]`bbbbbb`bbcZbbbb``bbbbab^b_bbS`ZVQKX]]^ZZ`]`Y`V`BBBBBBBBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:59:15367:8376#0/2  \n+ccgaagtagttagataccccactatgcttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaatagcttaaaactcaaaggactt\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_bbbbbbbbbbbbbbbaabbbbb^cab`b`[b_bbbbbbcb`bZ]]\\\\^U`bbb`_``\\Y``\\_\n+@HELIUM_000100422_612GNAAXX:7:29:1328:13185#0/2  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactnntagcaacagcctgaaactcaaagaactcg\n++\n+bbbbbbbbbbbbaabbbbbbbbbcbbbbbbbbbbbbbbbbcb_bbbb^bbbbbbbbbbbbZb`b_bbbbbbb^^_OOEDOOOGOO`]``_a`JZU\\``BBBBBBBBBB\n+@HELIUM_000100422_612GNAAXX:7:30:16081:4486#0/2  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggcgaacattcttgtttatttaat\n++\n+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbb_`bbbbb_bbbbbbb`^bba``b_^`_\\babb``b`[[[`_YY]YUbWWb^\\J`BBBBB\n+@HELIUM_000100422_612GNAAXX:7:104:10274:7999#0/2  \n+ccgaatatcttagataccccactatgcttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaacagcctgaaactcaaaggacttg\n++\n+bbbabbbbbbbbbbbcbbbbbbbbbbbaabbbbbbbbbbbbbbbbbbbbbbabbbbbbbZb`bbbcb^bbZ``\\\\b`_`bV\\]\\\\bb`Rbb`_`a`W``\\VOFWWYR^\n+@HELIUM_000100422_612GNAAXX:7:14:7706:12338#0/2  \n+ccgaagtagtagaacaggctcctctagaagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggcgcataattttgttatattaat\n++\n+bbbabbabbab`b`bbbbbbbbbbbbbbbbbbabbbbbaa_bbbbb`bb_bbbbab^bbbb`bbWbbbbVXZP^LU`\\\\``b_bb\\`HH]ZU^^R[N[URW`bb_`BB\n+@HELIUM_000100422_612GNAAXX:7:13:11240:16837#0/2  \n+ccgaatatctagaacaggctcctctagagggatgtaaagcaccgccaagtcctttgagtttcaggctgttgatagaagtactctggcgaacattctcgtttattgaat\n'