Repository 'usearch_map_reads_to_otus'
hg clone https://toolshed.g2.bx.psu.edu/repos/qfab/usearch_map_reads_to_otus

Changeset 0:c10d09023766 (2014-05-29)
Next changeset 1:5fd38fd7a623 (2014-06-04)
Commit message:
Uploaded
added:
usearch_map_reads_to_otu/README.txt
usearch_map_reads_to_otu/map_reads_otu.sh
usearch_map_reads_to_otu/map_reads_otu.xml
usearch_map_reads_to_otu/repository_dependencies.xml
usearch_map_reads_to_otu/scripts/die.py
usearch_map_reads_to_otu/scripts/fasta.py
usearch_map_reads_to_otu/scripts/fasta_number.py
usearch_map_reads_to_otu/scripts/mod_uc2otutab.py
usearch_map_reads_to_otu/scripts/otu_table_transform.py
usearch_map_reads_to_otu/scripts/parseFasta.py
usearch_map_reads_to_otu/scripts/progress.py
usearch_map_reads_to_otu/scripts/uc.py
usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta
usearch_map_reads_to_otu/test-data/hit_list.tabular
usearch_map_reads_to_otu/test-data/otu.fasta
usearch_map_reads_to_otu/test-data/otu.rabund
usearch_map_reads_to_otu/test-data/otu_pre_table.tabular
usearch_map_reads_to_otu/test-data/otu_relabel.fasta
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/README.txt Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,62 @@
+Galaxy wrappers for USEARCH - Map Reads to OTUs 
+======================================================================
+
+USEARCH requires a licence. Therefore an automated installation is not
+possible at the moment.
+
+Requirements
+======================================================================
+
+Get your licenced USEARCH version 7 or greater from here:
+http://www.drive5.com/usearch/download.html
+
+
+Manual Installation Steps
+======================================================================
+
+USEARCH is distributed as one file, known as the binary file or executable
+file. It is completely self-contained: it does not require configuration
+files, environment variables, third-party libraries or other external
+dependencies. There is no setup script or installer because they're not
+needed. To install it, all you do is download or copy the binary to a
+directory that is accessible from the computer where you want to run the code.
+
+Step1:
+Rename the binary file to usearch.
+
+Step2:
+Move the binary file (usearch) to /usr/local/bin
+Ensure /usr/local/bin is in your path. If needed add /usr/local/bin/ to your
+path.
+
+Step3:
+Ensure that you have read and execute permissions for the binary file.
+If needed, use the chmod command to set the execute bit, e.g.:
+chmod +x /usr/local/bin/usearch
+
+
+Further installation information and help can be found at:
+http://drive5.com/usearch/manual/install.html
+
+
+Disclaimer
+======================================================================
+
+This source code is provided by QFAB Bioinformatics "as is", in the hope that it
+will be useful, and any express or implied warranties, including, but not limited to,
+the implied warranties of merchantability and fitness for a particular purpose
+are disclaimed.
+IN NO EVENT SHALL QFAB BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOURCE
+CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+License
+======================================================================
+
+This work by QFAB Bioinformatics (as part of the GVL project
+http://genome.edu.au)
+is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0
+International License.
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/map_reads_otu.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/map_reads_otu.sh Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,22 @@
+#!/bin/bash
+echo $@;
+
+infile_otu=$1
+infile_seq=$2
+outfile_otu_relabel=$3
+outfile_readmap=$4
+outfile_readmap_barcode=${4}'.barcode'
+outfile_otu_table=$5
+otu_pre_table=$6
+identity=$7
+multisample=$8
+barcodelabel=''
+full_path=$(echo "`dirname $0`";)
+
+echo `python $full_path\/scripts\/fasta_number.py $infile_otu OTU_ > $outfile_otu_relabel 2>1`;
+echo `usearch -usearch_global $infile_seq -db $outfile_otu_relabel -strand plus -id $identity -uc $outfile_readmap 2>1`;
+echo `sed 's/\t/\tbarcodelabel='$barcodelabel';/8' $outfile_readmap > $outfile_readmap_barcode`;
+echo `python $full_path\/scripts\/mod_uc2otutab.py $outfile_readmap_barcode $identity $multisample > $outfile_otu_table 2>1`;
+echo `python $full_path\/scripts\/otu_table_transform.py $outfile_otu_relabel $outfile_otu_table > $otu_pre_table 2>1`;
+echo `sed -i 1d $outfile_otu_table`;
+echo `sed -i 1,2d $otu_pre_table`;
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/map_reads_otu.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/map_reads_otu.xml Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,98 @@
+<tool id="map_reads_otu" name="Map Reads to OTU" version="1.1">
+  <description>Maps read sequences to OTUs</description>
+  <command interpreter='bash' >
+   map_reads_otu.sh $otu_file $seq_input $otu_relabel $readmap_uc $output $otu_pre_table $identity False
+  </command>
+  <inputs>
+    <param name='otu_file' type='data' format='fasta' label='OTU fasta file'
+           help="This is the non-chimeric output from Uchime on the clustered OTU, see description below"/>
+    <param name='seq_input' type='data' format='fasta' label='Input reads file'
+           help="This is the output of the from the Dereplicate tool"/>
+    <param name='identity' type='float' value='0.97' label='Minimum identity' help='Between 0.0 - 1.0'/>
+  </inputs>
+  <outputs>
+     <data name='output' format='rabund' label="${tool.name} on ${on_string}:rabund"/>
+     <data name='otu_pre_table' format='tabular' label='${tool.name} on ${on_string}:Pre-OTU Table' />
+     <data name='otu_relabel' format='fasta' label="${tool.name} on ${on_string}:relabelled OTU"/>
+     <data name='readmap_uc' format='tabular' hidden="TRUE" label="${tool.name} on ${on_string}:hit list"/>
+  </outputs>
+
+  <help>
+===========
+Description
+===========
+
+Maps read sequences that have been identified as non-chimeric OTU cluster representatives by the UCHIME tool. This is an intermediate step to generate an OTU table.
+Map Reads to OTU is part of the USEARCH-Tool-Suite_.
+
+.. _USEARCH-Tool-Suite: http://www.drive5.com/usearch/
+
+-----
+Input
+-----
+
+A) OTU FASTA file; containing OTU cluster representatives wich have been identified as non-chimeric by the UCHIME tool.
+B) File of read sequences in FASTA format, which have been dereplicated by the Dereplicate tool.
+
+----------
+Parameters
+----------
+
+OTU FASTA file
+  Of non-chimeric OTU representatives from the 'UCHIME and respectively OTU Cluster' step, this file represents the 'database' to search against.
+
+Input reads file
+  FASTA file of (dereplicated) reads, these are the query sequences
+
+Minimum identity
+  Specifies the minimum identity between the query sequence and a database sequence. This is a fractional identity between 0.0 and 1.0. By default set to 0.97, correponding to a minimum identity of 97%.
+
+------
+Output
+------
+
+This tool produces four output files, one of which is hidden by default. *To see the hidden file: click on the cogwheel icon in the history panel and select 'Include Hidden Datasets'.*
+
+A) A tab delimited file following the rabund_ format from mothur_. The first column is a label (this is usually the identity), the second column is the number of OTUs found and each subsequent column is the number of reads found for the corresponding OTUs.
+B) A tab delimited PRE OTU table with three columns: | 1 - OTU label | 2 - Count | 3 - Sequence |
+
+.. _mothur: http://www.mothur.org/
+.. _rabund: http://www.mothur.org/wiki/Rabund_file
+
+.. class:: infomark
+
+The PRE OTU table can be used as input for the RDP Multi-Classifier to generate a complete OTU table with assigned taxonomy.
+
+C) A FASTA file containing the sequences re-labelled with their corresponding OTU label, e.g. OTU_1, OTU_2,...OTU_N, where N is the number of OTUs
+
+
+=========
+Resources
+=========
+
+Mapping_Reads_To_OTUs_
+
+.. _Mapping_Reads_To_OTUs: http://drive5.com/usearch/manual/mapreadstootus.html
+
+**Author**
+
+Robert C. Edgar (bob@drive5.com)
+
+**Wrapper Author**
+
+QFAB Bioinformatics (support@qfab.org)
+
+</help>
+<tests>
+  <test>
+    <param name="otu_file" value="otu.fasta" />
+    <param name="seq_input" value="dereplicated_seqs.fasta" />
+    <param name="identity" value="0.97" />
+    <output name='output' file="otu.rabund" ftype='rabund' lines_diff="10"/>
+    <output name="otu_pre_table" file="otu_pre_table.tabular" ftype="tabular" lines_diff="10" />
+    <output name='otu_relabel' file="otu_relabel.fasta" ftype='fasta' lines_diff="10"/>
+    <output name='readmap_uc' file="hit_list.tabular" ftype='tabular' lines_diff="10"/>
+  </test>
+</tests>
+
+</tool>
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/repository_dependencies.xml Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the metagenomics datatype definitions (e.g. the rabund format).">
+ <repository changeset_revision="ccba8612695e" name="metagenomics_datatypes" owner="qfab" toolshed="http://toolshed.g2.bx.psu.edu/" />
+</repositories> 
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/die.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/die.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,23 @@
+import sys
+import traceback
+
+def Die(Msg):
+ print >> sys.stderr
+ print >> sys.stderr
+
+ traceback.print_stack()
+ s = ""
+ for i in range(0, len(sys.argv)):
+ if i > 0:
+ s += " "
+ s += sys.argv[i]
+ print >> sys.stderr, s
+ print >> sys.stderr, "**ERROR**", Msg
+ print >> sys.stderr
+ print >> sys.stderr
+ sys.exit(1)
+
+def Warning(Msg):
+ print >> sys.stderr
+ print >> sys.stderr, sys.argv
+ print >> sys.stderr, "**WARNING**", Msg
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/fasta.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,184 @@
+from die import *
+import subprocess
+import tempfile
+import progress
+
+TRUNC_LABELS=0
+
+def isgap(c):
+ return c == '-' or c == '.'
+
+def GetSeqCount(FileName):
+ Tmp = tempfile.TemporaryFile()
+ try:
+ TmpFile = Tmp.file
+ except:
+ TmpFile = Tmp
+ s = subprocess.call([ "grep", "-c", "^>", FileName ], stdout=TmpFile)
+ TmpFile.seek(0)
+ s = TmpFile.read()
+ return int(s)
+
+def GetSeqsDict(FileName):
+ return ReadSeqsFast(FileName, False)
+
+def ReadSeqsDict(FileName):
+ return ReadSeqsFast(FileName, False)
+
+def ReadSeqsOnSeq(FileName, OnSeq):
+ ReadSeqs3(FileName, OnSeq, False)
+
+def ReadSeqsFastFile(File, Progress = False):
+ Seqs = {}
+ Id = ""
+ N = 0
+ while 1:
+ if N%10000 == 0 and Progress:
+ sys.stderr.write("%u seqs\r" % (N))
+ Line = File.readline()
+ if len(Line) == 0:
+ if Progress:
+ sys.stderr.write("%u seqs\n" % (N))
+ return Seqs
+ if len(Line) == 0:
+ continue
+ Line = Line.strip()
+ if Line[0] == ">":
+ N += 1
+ Id = Line[1:]
+ if TRUNC_LABELS:
+ Id = Id.split()[0]
+ Seqs[Id] = ""
+ else:
+ if Id == "":
+ Die("FASTA file does not start with '>'")
+ Seqs[Id] = Seqs[Id] + Line
+
+def ReadSeqsFast(FileName, Progress = True):
+ File = open(FileName)
+ return ReadSeqsFastFile(File, Progress)
+
+def ReadSeqs(FileName, toupper=False, stripgaps=False, Progress=False):
+ if not toupper and not stripgaps:
+ return ReadSeqsFast(FileName, False)
+
+ Seqs = {}
+ Id = ""
+ File = open(FileName)
+ while 1:
+ Line = File.readline()
+ if len(Line) == 0:
+ return Seqs
+ Line = Line.strip()
+ if len(Line) == 0:
+ continue
+ if Line[0] == ">":
+ Id = Line[1:]
+ if TRUNC_LABELS:
+ Id = Id.split()[0]
+ if Id in Seqs.keys():
+ Die("Duplicate id '%s' in '%s'" % (Id, FileName))
+ Seqs[Id] = ""
+ else:
+ if Id == "":
+ Die("FASTA file '%s' does not start with '>'" % FileName)
+ if toupper:
+ Line = Line.upper()
+ if stripgaps:
+ Line = Line.replace("-", "")
+ Line = Line.replace(".", "")
+ Seqs[Id] = Seqs[Id] + Line
+
+def ReadSeqs2(FileName, ShowProgress = True):
+ Seqs = []
+ Labels = []
+ File = open(FileName)
+ if ShowProgress:
+ progress.InitFile(File, FileName)
+ while 1:
+ progress.File()
+ Line = File.readline()
+ if len(Line) == 0:
+ if ShowProgress:
+ print >> sys.stderr, "\n"
+ return Labels, Seqs
+ Line = Line.strip()
+ if len(Line) == 0:
+ continue
+ if Line[0] == ">":
+ Id = Line[1:]
+ if TRUNC_LABELS:
+ Id = Id.split()[0]
+ Labels.append(Id)
+ Seqs.append("")
+ else:
+ i = len(Seqs)-1
+ Seqs[i] = Seqs[i] + Line
+
+def ReadSeqs3(FileName, OnSeq, ShowProgress = True):
+ File = open(FileName)
+ if ShowProgress:
+ progress.InitFile(File, FileName)
+ Label = ""
+ Seq = ""
+ while 1:
+ Line = File.readline()
+ if len(Line) == 0:
+ if Seq != "":
+ OnSeq(Label, Seq)
+ if ShowProgress:
+ print >> sys.stderr, "\n"
+ return
+ Line = Line.strip()
+ if len(Line) == 0:
+ continue
+ if Line[0] == ">":
+ if Seq != "":
+ if ShowProgress:
+ progress.File()
+ if TRUNC_LABELS:
+ Label = Label.split()[0]
+ OnSeq(Label, Seq)
+ Label = Line[1:]
+ Seq = ""
+ else:
+ Seq += Line
+
+def WriteSeq(File, Seq):
+ BLOCKLENGTH = 80
+ SeqLength = len(Seq)
+ BlockCount = int((SeqLength + (BLOCKLENGTH-1))/BLOCKLENGTH)
+ for BlockIndex in range(0, BlockCount):
+ Block = Seq[BlockIndex*BLOCKLENGTH:]
+ Block = Block[:BLOCKLENGTH]
+ print >> File, Block
+
+def GetSizeFromLabel(Label, Default = -1):
+ Fields = Label.split(";")
+ for Field in Fields:
+ if Field.startswith("size="):
+ return int(Field[5:])
+ if Default == -1:
+ Die("Missing size >" + Label)
+ return Default
+
+def StripSizeFromLabel(Label):
+ Fields = Label.split(";")
+ NewLabel = ""
+ for Field in Fields:
+ if Field.startswith("size="):
+ continue
+ if NewLabel != "":
+ NewLabel += ";"
+ NewLabel += Field
+ return NewLabel
+
+def GetQualFromLabel(Label):
+ n = Label.find("qual=")
+ assert n >= 0
+ return Label[n+5:-1]
+
+def StripQualFromLabel(Label):
+ n = Label.find("qual=")
+ assert n >= 0
+ return Label[:n]
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/fasta_number.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/fasta_number.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,44 @@
+import sys
+import die
+
+Prefix = ""
+if len(sys.argv) > 2:
+ Prefix = sys.argv[2]
+
+NeedSize = 0
+if len(sys.argv) > 3:
+ if sys.argv[3] == "-needsize":
+ NeedSize = 1
+ elif sys.argv[3] == "-nosize":
+ NeedSize = 0
+ else:
+ die.Die("Must specify -needsize or -nosize")
+
+def GetSize(Label):
+ Fields = Label.split(";")
+ for Field in Fields:
+ if Field.startswith("size="):
+ return int(Field[5:])
+ print >> sys.stderr
+ print >> sys.stderr, "Size not found in label: " + Label
+ sys.exit(1)
+
+File = open(sys.argv[1])
+N = 0
+while 1:
+ Line = File.readline()
+ if len(Line) == 0:
+ break
+ Line = Line[:-1]
+ if len(Line) == 0:
+ continue
+ if Line[0] == '>':
+ N += 1
+ if NeedSize:
+ Label = Line[1:].strip()
+ Size = GetSize(Label)
+ print ">%s%u;size=%u;" % (Prefix, N, Size)
+ else:
+ print ">%s%u" % (Prefix, N)
+ else:
+ print Line
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/mod_uc2otutab.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/mod_uc2otutab.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,67 @@
+import sys
+#sys.path.insert(0, 'scripts');
+import uc
+import die
+import fasta
+
+FileName = sys.argv[1]
+pctIdent = sys.argv[2] #percent identity, will be used in the label
+pctIdent = 1.0 - float(pctIdent);
+multiSample = (sys.argv[3]=='True') #whether the input file has multiple samples (using barcode label)
+
+def GetSampleId(Label):
+ Fields = Label.split(";")
+ for Field in Fields:
+ if Field.startswith("barcodelabel="):
+ return Field[13:]
+ die.Die("barcodelabel= not found in read label '%s'" % Label)
+
+def OnRec():
+ global OTUs, Samples, OTUTable
+ if uc.Type != 'H':
+ return
+
+ OTUId = uc.TargetLabel
+ if OTUId not in OTUIds:
+ OTUIds.append(OTUId)
+ OTUTable[OTUId] = {}
+
+ SampleId = GetSampleId(uc.QueryLabel)
+ if SampleId not in SampleIds:
+ SampleIds.append(SampleId)
+
+ N = fasta.GetSizeFromLabel(uc.QueryLabel, 1)
+ try:
+ OTUTable[OTUId][SampleId] += N
+ except:
+ OTUTable[OTUId][SampleId] = N
+
+OTUIds = []
+SampleIds = []
+OTUTable = {}
+
+uc.ReadRecs(FileName, OnRec)
+
+#Header line
+if (multiSample):
+  s = "OTUId\t#OTUs\tsample"
+else:
+  s = "OTUId\t#OTUs"
+
+for OTUId in OTUIds:
+  s += "\t" + OTUId
+print s
+
+for SampleId in SampleIds:
+  if (multiSample):
+    s = str(pctIdent) + "\t" + SampleId
+  else: 
+    s = str(pctIdent)
+  s += "\t" + str(len(OTUIds))
+  for OTUId in OTUIds:
+    try:
+      n = OTUTable[OTUId][SampleId]
+    except:
+      n = 0;
+    s += "\t" + str(n)
+  print s
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/otu_table_transform.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/otu_table_transform.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,33 @@
+import sys, os
+#sys.path.insert(0, 'scripts');
+import parseFasta
+
+# Create OTU Table
+# OTU_Lable | Count | Sequence
+# 
+# Author: Anne Kunert
+#
+
+OTUmap = sys.argv[1]
+OTUcount = sys.argv[2]
+
+def transform(file):
+  seq = parseFasta.FastaParser(OTUmap)
+  with open(file) as f:
+    lis=[x.split() for x in f]
+  end=""
+  line=""
+  myseq=""
+  for x in zip(*lis):
+    otu_lable = x[0]
+    for item in seq:
+      if item == otu_lable:
+        myseq = seq.records[item]['sequence']
+        myseq = myseq.replace('\n','')
+    for y in x:
+      line= line+str(y)+"\t"
+    print(line+"\t"+myseq)
+    line=""
+    myseq=""
+#
+transform(OTUcount)
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/parseFasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/parseFasta.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,58 @@
+# parse fasta - id & sequence
+class Seq(object):
+
+    def __init__(self,key,records_dict):
+        self.key = key
+        self.records = records_dict
+
+    def tostring(self):
+        s = self.records[self.key]['sequence']
+        return s.replace('\n','')
+
+
+class SeqRecord(object):
+
+    def __init__(self,key,records_dict):
+        self.records = records_dict
+        self.key = key
+        self.seq = Seq(key,records_dict)
+
+    def format(self,out_format):
+        if out_format == 'fasta':
+            r = self.records[self.key]
+            return ">%s\n%s" % (r['description'],r['sequence'])
+
+
+class FastaParser(object):
+
+    def __init__(self,fasta_file):
+        self.fasta_file = fasta_file
+        fasta = open(fasta_file,'r').read()
+        self.entries = [x for x in fasta.split('>') if len(x) != 0]
+        self.build_records_dict()
+
+    def keys(self):
+        keys_list = []
+        for entry in self.entries:
+            key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0]
+            keys_list.append(key)
+        return [x.strip() for x in keys_list]
+
+    def __len__(self):
+        return len(self.keys())
+
+    def __iter__(self):
+        for k in self.keys():
+            yield k
+
+    def build_records_dict(self):
+        records_dict = {}
+        for entry in self.entries:
+            key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0]
+            description = entry.split('\n')[0]
+            sequence = '\n'.join(entry.split('\n')[1:]).strip()
+            records_dict[key] = {'description':description,'sequence':sequence}
+        self.records = records_dict
+
+    def __getitem__(self,key):
+        return SeqRecord(key,self.records)
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/progress.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/progress.py Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,51 @@
+import sys
+import time
+
+File__ = None
+FileSize__ = None
+FileName__ = None
+Secs__ = None
+
+def InitFile(File, FileName = ""):
+ global Secs__, File__, FileSize__, FileName__
+
+ File__ = File
+ FileName__ = FileName
+ Secs__ = None
+
+ Pos = File.tell()
+ File.seek(0, 2)
+ FileSize__ = File.tell()
+ File.seek(Pos)
+
+def FileDone(Msg = ""):
+ global Secs__, File__, FileSize__, FileName__
+ Str = "%s 100.0%% %s  \n" % (FileName__, Msg)
+ sys.stderr.write(Str)
+
+def File(Msg = ""):
+ global Secs__, File__, FileSize__, FileName__
+
+ Secs = time.clock()
+ if Secs__ != None and Secs - Secs__ < 1:
+ return
+
+ Secs__ = Secs
+ Pos = File__.tell()
+ Pct = (100.0*Pos)/FileSize__
+ Str = "%s %5.1f%% %s  \r" % (FileName__, Pct, Msg)
+ sys.stderr.write(Str)
+
+def Step(Msg, i, N):
+ global Secs__, File__, FileSize__, FileName__
+
+ Secs = time.clock()
+ if Secs__ != None and Secs - Secs__ < 1:
+ return
+
+ Secs__ = Secs
+ Pct = (100.0*i)/N
+ if i == N-1:
+ sys.stderr.write("%5.1f%% %s   \r" % (Pct, Msg))
+ else:
+ sys.stderr.write("%5.1f%% %s   \n" % (Pct, Msg))
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/uc.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/scripts/uc.py Thu May 29 00:51:18 2014 -0400
[
@@ -0,0 +1,149 @@
+import re
+import sys
+import progress
+
+# Tab-separated fields:
+# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=Label
+# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NotMatched
+# For C and D types, PctId is average id with seed.
+# QueryStart and SeedStart are zero-based relative to start of sequence.
+# If minus strand, SeedStart is relative to reverse-complemented seed.
+
+MaxError = -1
+
+Type = '?'
+ClusterNr = -1
+Size = -1
+PctId = -1.0
+LocalScore = -1.0
+Evalue = -1.0
+Strand = '.'
+QueryStart = -1
+SeedStart = -1
+Alignment = ""
+QueryLabel = ""
+TargetLabel = ""
+FileName = "?"
+Line = ""
+
+def Die(s):
+ print >> sys.stderr, "*** ERROR ***", s, sys.argv
+ sys.exit(1)
+
+def ProgressFile(File, FileSize):
+# if not sys.stderr.isatty():
+# return
+ Pos = File.tell()
+ Pct = (100.0*Pos)/FileSize
+ Str = "%s %5.1f%%\r" % (FileName, Pct)
+ sys.stderr.write(Str)
+
+def Progress(i, N):
+# if not sys.stderr.isatty():
+ return
+ Pct = (100.0*i)/N
+ Str = "%5.1f%%\r" % Pct
+ sys.stderr.write(Str)
+
+def PrintLine():
+ print Line
+
+def ParseRec(Line):
+ global Type
+ global ClusterNr
+ global Size
+ global PctId
+ global Strand
+ global QueryStart
+ global SeedStart
+ global Alignment
+ global QueryLabel
+ global TargetLabel
+ global LocalScore
+ global Evalue
+
+ Fields = Line.split("\t")
+ N = len(Fields)
+ if N != 9 and N != 10:
+ Die("Expected 9 or 10 fields in .uc record, got: " + Line)
+ Type = Fields[0]
+
+ try:
+ ClusterNr = int(Fields[1])
+ except:
+ ClusterNr = -1
+
+ try:
+ Size = int(Fields[2])
+ except:
+ Size = -1
+
+ Fields2 = Fields[3].split('/')
+ LocalScore = -1.0
+ Evalue = -1.0
+ if len(Fields2) == 3:
+ try:
+ PctId = float(Fields2[0])
+ LocalScore = float(Fields2[1])
+ Evalue = float(Fields2[2])
+ except:
+ PctId = -1.0
+ else:
+ try:
+ PctId = float(Fields[3])
+ except:
+ PctId = -1.0
+
+ Strand = Fields[4]
+
+ try:
+ QueryStart = int(Fields[5])
+ except:
+ QueryStart = -1
+
+ try:
+ SeedStart = int(Fields[6])
+ except:
+ SeedStart = -1
+
+ Alignment = Fields[7]
+ QueryLabel = Fields[8]
+
+ if len(Fields) > 9:
+ TargetLabel = Fields[9]
+
+def GetRec(File, OnRecord):
+ global Line
+ while 1:
+ Line = File.readline()
+ if len(Line) == 0:
+ return 0
+ if Line[0] == '#':
+ continue
+ Line = Line.strip()
+ if len(Line) == 0:
+ return 1
+ ParseRec(Line)
+ Ok = OnRecord()
+ if Ok != None and Ok == 0:
+ return 0
+ return 1
+
+def ReadRecs(argFileName, OnRecord, ShowProgress = True):
+ return ReadFile(argFileName, OnRecord, ShowProgress)
+
+def GetRecs(argFileName, OnRecord, ShowProgress = True):
+ return ReadFile(argFileName, OnRecord, ShowProgress)
+
+def ReadFile(argFileName, OnRecord, ShowProgress = True):
+ global FileName
+ FileName = argFileName
+ File = open(FileName)
+
+ if ShowProgress:
+ progress.InitFile(File, FileName)
+ while GetRec(File, OnRecord):
+ if ShowProgress:
+ progress.File()
+ if ShowProgress:
+ progress.FileDone()
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta Thu May 29 00:51:18 2014 -0400
b
b'@@ -0,0 +1,10558 @@\n+>248442;size=4;\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtg\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>126633;size=3;\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgg\n+gAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgc\n+gcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>340660;size=3;\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAA'..b'AGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCG\n+GGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGA\n+AATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGG\n+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTG\n+ACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCA\n+CAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgag\n+aTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGT\n+CCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGG\n+AGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGG\n+TTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTA\n+ACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGAC\n+GGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTA\n+GGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+>347843;size=1;\n+CGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAG\n+TAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAA\n+GGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGC\n+GACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAG\n+TGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACT\n+TTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTG\n+CCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAG\n+TGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAAT\n+TCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCT\n+GAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGA\n+AGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATG\n+AATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACAT\n+CCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCG\n+TGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGAC\n+TGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACA\n+ATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCA\n+ACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACaca\n+cCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATT\n+CATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+>203214;size=1;\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n'
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/hit_list.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/hit_list.tabular Thu May 29 00:51:18 2014 -0400
b
b'@@ -0,0 +1,1459 @@\n+H\t1\t1454\t100.0\t+\t0\t0\t1454M\t126633;size=3;\tOTU_2\n+H\t0\t1501\t100.0\t+\t0\t0\t1501M\t248442;size=4;\tOTU_1\n+H\t2\t1480\t100.0\t+\t0\t0\t1480M\t340660;size=3;\tOTU_3\n+H\t4\t1449\t100.0\t+\t0\t0\t1449M\t91747;size=3;\tOTU_5\n+H\t5\t1506\t100.0\t+\t0\t0\t1506M\t258157;size=3;\tOTU_6\n+H\t6\t1475\t100.0\t+\t0\t0\t1475M\t166987;size=1;\tOTU_7\n+H\t7\t1415\t100.0\t+\t0\t0\t1415M\t104633;size=1;\tOTU_8\n+H\t8\t1439\t100.0\t+\t0\t0\t1439M\t235221;size=1;\tOTU_9\n+H\t9\t1489\t100.0\t+\t0\t0\t1489M\t62498;size=1;\tOTU_10\n+H\t1\t1438\t98.8\t+\t0\t0\t20D1418M36I\t317318;size=1;\tOTU_2\n+H\t10\t1322\t100.0\t+\t0\t0\t1322M\t203534;size=1;\tOTU_11\n+H\t12\t1447\t100.0\t+\t0\t0\t1447M\t176763;size=1;\tOTU_13\n+H\t11\t1501\t100.0\t+\t0\t0\t1501M\t116062;size=1;\tOTU_12\n+H\t13\t1501\t100.0\t+\t0\t0\t1501M\t342161;size=1;\tOTU_14\n+H\t14\t1517\t100.0\t+\t0\t0\t1517M\t232885;size=1;\tOTU_15\n+H\t15\t1517\t100.0\t+\t0\t0\t1517M\t229914;size=1;\tOTU_16\n+H\t16\t1352\t100.0\t+\t0\t0\t1352M\t103462;size=1;\tOTU_17\n+H\t17\t1498\t100.0\t+\t0\t0\t1498M\t356409;size=1;\tOTU_18\n+H\t18\t1552\t100.0\t+\t0\t0\t1552M\t100649;size=1;\tOTU_19\n+H\t19\t1496\t100.0\t+\t0\t0\t1496M\t257959;size=1;\tOTU_20\n+H\t20\t1502\t100.0\t+\t0\t0\t1502M\t230967;size=1;\tOTU_21\n+H\t21\t1465\t100.0\t+\t0\t0\t1465M\t233918;size=1;\tOTU_22\n+H\t22\t1458\t100.0\t+\t0\t0\t1458M\t154776;size=1;\tOTU_23\n+H\t224\t1426\t99.9\t+\t0\t0\t1426M23I\t315318;size=1;\tOTU_225\n+H\t23\t1423\t100.0\t+\t0\t0\t1423M\t18318;size=1;\tOTU_24\n+H\t305\t1439\t99.0\t+\t0\t0\t20D1419M36I\t352990;size=1;\tOTU_306\n+H\t14\t1443\t99.9\t+\t0\t0\t103I1414M29D\t99933;size=1;\tOTU_15\n+H\t24\t1361\t100.0\t+\t0\t0\t1361M\t241380;size=1;\tOTU_25\n+H\t25\t1453\t100.0\t+\t0\t0\t1453M\t256833;size=1;\tOTU_26\n+H\t26\t1338\t100.0\t+\t0\t0\t1338M\t114582;size=1;\tOTU_27\n+H\t11\t1390\t99.2\t+\t0\t0\t20D1370M131I\t250316;size=1;\tOTU_12\n+H\t27\t1366\t100.0\t+\t0\t0\t1366M\t243013;size=1;\tOTU_28\n+H\t28\t1485\t100.0\t+\t0\t0\t1485M\t343226;size=1;\tOTU_29\n+H\t1\t1438\t98.9\t+\t0\t0\t20D1418M36I\t320475;size=1;\tOTU_2\n+H\t29\t1473\t100.0\t+\t0\t0\t1473M\t235527;size=1;\tOTU_30\n+H\t30\t1466\t100.0\t+\t0\t0\t1466M\t68981;size=1;\tOTU_31\n+H\t31\t1532\t100.0\t+\t0\t0\t1532M\t339114;size=1;\tOTU_32\n+H\t32\t1463\t100.0\t+\t0\t0\t1463M\t270810;size=1;\tOTU_33\n+H\t33\t1463\t100.0\t+\t0\t0\t1463M\t323827;size=1;\tOTU_34\n+H\t34\t1494\t100.0\t+\t0\t0\t1494M\t343781;size=1;\tOTU_35\n+N\t*\t*\t*\t.\t*\t*\t*\t354371;size=1;\t*\n+H\t305\t1455\t99.1\t+\t0\t0\t1455M\t68985;size=1;\tOTU_306\n+H\t35\t1491\t100.0\t+\t0\t0\t1491M\t356095;size=1;\tOTU_36\n+H\t36\t1477\t100.0\t+\t0\t0\t1477M\t354780;size=1;\tOTU_37\n+H\t37\t1418\t100.0\t+\t0\t0\t1418M\t248390;size=1;\tOTU_38\n+H\t17\t1500\t97.4\t+\t0\t0\t832MD180MD486M\t232506;size=1;\tOTU_18\n+H\t3\t1447\t99.9\t+\t0\t0\t3I1447M\t331619;size=1;\tOTU_4\n+H\t38\t1503\t100.0\t+\t0\t0\t1503M\t234677;size=1;\tOTU_39\n+H\t305\t1438\t99.0\t+\t0\t0\t20D828MI590M36I\t234582;size=1;\tOTU_306\n+H\t39\t1523\t100.0\t+\t0\t0\t1523M\t228291;size=1;\tOTU_40\n+H\t305\t1439\t99.2\t+\t0\t0\t20D1419M36I\t355638;size=1;\tOTU_306\n+H\t305\t1439\t99.3\t+\t0\t0\t20D1419M36I\t336753;size=1;\tOTU_306\n+H\t40\t1505\t100.0\t+\t0\t0\t1505M\t161217;size=1;\tOTU_41\n+H\t305\t1439\t99.0\t+\t0\t0\t20D1419M36I\t314677;size=1;\tOTU_306\n+H\t305\t1455\t98.9\t+\t0\t0\t1455M\t120789;size=1;\tOTU_306\n+N\t*\t*\t*\t.\t*\t*\t*\t337387;size=1;\t*\n+H\t24\t1473\t99.4\t+\t0\t0\t20D468MD893M91D\t245132;size=1;\tOTU_25\n+H\t11\t1477\t98.6\t+\t0\t0\t20D1408MI49M43I\t309969;size=1;\tOTU_12\n+H\t41\t1480\t100.0\t+\t0\t0\t1480M\t156314;size=1;\tOTU_42\n+H\t1\t1429\t98.9\t+\t0\t0\t11D1418M36I\t309196;size=1;\tOTU_2\n+H\t224\t1449\t99.7\t+\t0\t0\t1449M\t230233;size=1;\tOTU_225\n+H\t17\t1378\t99.7\t+\t0\t0\t6D1372M126I\t138696;size=1;\tOTU_18\n+H\t42\t1355\t100.0\t+\t0\t0\t1355M\t138691;size=1;\tOTU_43\n+H\t43\t1351\t100.0\t+\t0\t0\t1351M\t323976;size=1;\tOTU_44\n+H\t44\t1468\t100.0\t+\t0\t0\t1468M\t129024;size=1;\tOTU_45\n+H\t45\t1449\t100.0\t+\t0\t0\t1449M\t356435;size=1;\tOTU_46\n+H\t46\t1429\t100.0\t+\t0\t0\t1429M\t356183;size=1;\tOTU_47\n+H\t47\t1444\t100.0\t+\t0\t0\t1444M\t278744;size=1;\tOTU_48\n+H\t48\t1492\t100.0\t+\t0\t0\t1492M\t254829;size=1;\tOTU_49\n+H\t49\t1432\t100.0\t+\t0\t0\t1432M\t166245;size=1;\tOTU_50\n+H\t50\t1483\t100.0\t+\t0\t0\t1483M\t334050;size=1;\tOTU_51\n+H\t51\t1491\t100.0\t+\t0\t0\t1491M\t334058;size=1;\tOTU_52\n+H\t11\t1490\t98.2\t+\t0\t0\t20D954MD454MI61M31I\t234804;size=1;\tOTU_12\n+H\t52\t1483\t100.0\t+\t0\t0\t1483M\t62482;size=1;\tOTU_53\n+H\t53\t1482\t100.0\t+\t0\t0\t1482M\t62487;size=1;\tOTU_54\n+H\t35\t1489\t97.2\t+\t0\t0\t1111M2I378M\t316789;size=1;\tOTU_36\n+H\t54\t1482\t100.0\t+\t0\t0\t1482M\t173254;size=1;\tOTU_'..b'01197;size=1;\tOTU_202\n+H\t305\t1439\t99.2\t+\t0\t0\t20D1419M36I\t331948;size=1;\tOTU_306\n+H\t280\t1490\t99.8\t+\t0\t0\t1490M\t235263;size=1;\tOTU_281\n+H\t8\t1439\t98.1\t+\t0\t0\t1439M\t346583;size=1;\tOTU_9\n+H\t8\t1439\t97.8\t+\t0\t0\t1439M\t349272;size=1;\tOTU_9\n+H\t515\t1500\t100.0\t+\t0\t0\t1500M\t329380;size=1;\tOTU_516\n+H\t219\t1501\t98.0\t+\t0\t0\t34D764MD702M29I\t346039;size=1;\tOTU_220\n+N\t*\t*\t*\t.\t*\t*\t*\t257445;size=1;\t*\n+H\t238\t1439\t99.2\t+\t0\t0\t1439M\t316154;size=1;\tOTU_239\n+H\t486\t1440\t99.8\t+\t0\t0\t416MI1024M6I\t124464;size=1;\tOTU_487\n+H\t516\t1369\t100.0\t+\t0\t0\t1369M\t270990;size=1;\tOTU_517\n+H\t517\t1413\t100.0\t+\t0\t0\t1413M\t324499;size=1;\tOTU_518\n+H\t8\t1439\t97.8\t+\t0\t0\t1439M\t305410;size=1;\tOTU_9\n+H\t518\t1412\t100.0\t+\t0\t0\t1412M\t141383;size=1;\tOTU_519\n+H\t413\t1450\t99.7\t+\t0\t0\t1450M\t347465;size=1;\tOTU_414\n+H\t311\t1463\t98.2\t+\t0\t0\t1463M\t339161;size=1;\tOTU_312\n+H\t339\t1506\t99.9\t+\t0\t0\t95D1364M47D\t331178;size=1;\tOTU_340\n+H\t255\t1450\t99.7\t+\t0\t0\t20D1430M28I\t356196;size=1;\tOTU_256\n+H\t519\t1394\t100.0\t+\t0\t0\t1394M\t268436;size=1;\tOTU_520\n+H\t45\t1447\t99.9\t+\t0\t0\t1447M2I\t354925;size=1;\tOTU_46\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t334206;size=1;\tOTU_306\n+H\t520\t1343\t100.0\t+\t0\t0\t1343M\t287458;size=1;\tOTU_521\n+H\t185\t1484\t98.4\t+\t0\t0\t1484MI\t136965;size=1;\tOTU_186\n+H\t359\t1450\t99.2\t+\t0\t0\t20D1430M37I\t233543;size=1;\tOTU_360\n+H\t143\t1449\t99.1\t+\t0\t0\t1447M2D\t228729;size=1;\tOTU_144\n+H\t413\t1453\t98.6\t+\t0\t0\t1386M3D64M\t305093;size=1;\tOTU_414\n+H\t4\t1449\t98.0\t+\t0\t0\t893M3I553M3D\t233624;size=1;\tOTU_5\n+H\t521\t1490\t100.0\t+\t0\t0\t1490M\t326714;size=1;\tOTU_522\n+H\t522\t1486\t100.0\t+\t0\t0\t1486M\t254309;size=1;\tOTU_523\n+H\t383\t1466\t99.4\t+\t0\t0\t1466M\t331845;size=1;\tOTU_384\n+H\t523\t1538\t100.0\t+\t0\t0\t1538M\t366993;size=1;\tOTU_524\n+H\t524\t1467\t100.0\t+\t0\t0\t1467M\t232909;size=1;\tOTU_525\n+H\t525\t1505\t100.0\t+\t0\t0\t1505M\t237311;size=1;\tOTU_526\n+N\t*\t*\t*\t.\t*\t*\t*\t316923;size=1;\t*\n+N\t*\t*\t*\t.\t*\t*\t*\t336138;size=1;\t*\n+H\t165\t1464\t97.1\t+\t0\t0\t1464M21I\t311229;size=1;\tOTU_166\n+N\t*\t*\t*\t.\t*\t*\t*\t356209;size=1;\t*\n+H\t205\t1450\t99.6\t+\t0\t0\t821MI629M\t334492;size=1;\tOTU_206\n+H\t321\t1489\t99.3\t+\t0\t0\t44I948MI28MI508M5D\t140509;size=1;\tOTU_322\n+H\t526\t1451\t100.0\t+\t0\t0\t1451M\t348936;size=1;\tOTU_527\n+N\t*\t*\t*\t.\t*\t*\t*\t189534;size=1;\t*\n+H\t11\t1489\t98.6\t+\t0\t0\t20D1408MI61M31I\t352430;size=1;\tOTU_12\n+H\t143\t1449\t99.4\t+\t0\t0\t1447M2D\t347720;size=1;\tOTU_144\n+H\t527\t1345\t100.0\t+\t0\t0\t1345M\t167302;size=1;\tOTU_528\n+H\t102\t1498\t99.2\t+\t0\t0\t1498M\t231599;size=1;\tOTU_103\n+H\t528\t1553\t100.0\t+\t0\t0\t1553M\t134559;size=1;\tOTU_529\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t356149;size=1;\tOTU_306\n+H\t529\t1335\t100.0\t+\t0\t0\t1335M\t222960;size=1;\tOTU_530\n+H\t215\t1513\t99.1\t+\t0\t0\t1513M\t115456;size=1;\tOTU_216\n+N\t*\t*\t*\t.\t*\t*\t*\t342719;size=1;\t*\n+H\t102\t1498\t99.6\t+\t0\t0\t1498M\t314571;size=1;\tOTU_103\n+H\t13\t1501\t99.9\t+\t0\t0\t1501M\t321622;size=1;\tOTU_14\n+H\t530\t1525\t100.0\t+\t0\t0\t1525M\t252888;size=1;\tOTU_531\n+H\t1\t1438\t99.3\t+\t0\t0\t20D1418M36I\t230512;size=1;\tOTU_2\n+H\t170\t1534\t98.9\t+\t0\t0\t1506M28D\t242585;size=1;\tOTU_171\n+N\t*\t*\t*\t.\t*\t*\t*\t351578;size=1;\t*\n+H\t102\t1475\t98.5\t+\t0\t0\t24I170MD1304M\t308605;size=1;\tOTU_103\n+H\t83\t1476\t97.4\t+\t0\t0\t9I188MD1194MD92M9I\t179818;size=1;\tOTU_84\n+H\t311\t1472\t97.7\t+\t0\t0\t20I773MD670M28D\t126228;size=1;\tOTU_312\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t342655;size=1;\tOTU_306\n+H\t11\t1483\t98.8\t+\t0\t0\t20D1408MI55M37I\t136970;size=1;\tOTU_12\n+H\t531\t1477\t100.0\t+\t0\t0\t1477M\t356680;size=1;\tOTU_532\n+H\t532\t1515\t100.0\t+\t0\t0\t1515M\t337091;size=1;\tOTU_533\n+H\t359\t1450\t98.6\t+\t0\t0\t20D1430M37I\t233221;size=1;\tOTU_360\n+H\t122\t1494\t98.3\t+\t0\t0\t1494M\t229462;size=1;\tOTU_123\n+H\t183\t1461\t99.7\t+\t0\t0\t1461M22I\t256095;size=1;\tOTU_184\n+H\t102\t1499\t98.5\t+\t0\t0\t194MD1304M\t333374;size=1;\tOTU_103\n+H\t533\t1471\t100.0\t+\t0\t0\t1471M\t150135;size=1;\tOTU_534\n+H\t173\t1481\t99.8\t+\t0\t0\t1481M2I\t314740;size=1;\tOTU_174\n+H\t534\t1475\t100.0\t+\t0\t0\t1475M\t347843;size=1;\tOTU_535\n+H\t63\t1478\t97.8\t+\t0\t0\t150D49MI1235M44D\t315908;size=1;\tOTU_64\n+H\t143\t1489\t99.4\t+\t0\t0\t8D1447M34D\t142421;size=1;\tOTU_144\n+H\t305\t1439\t98.9\t+\t0\t0\t20D1419M36I\t233774;size=1;\tOTU_306\n+H\t1\t1438\t99.4\t+\t0\t0\t20D1418M36I\t324263;size=1;\tOTU_2\n+H\t535\t1412\t100.0\t+\t0\t0\t1412M\t203214;size=1;\tOTU_536\n+N\t*\t*\t*\t.\t*\t*\t*\t324270;size=1;\t*\n+N\t*\t*\t*\t.\t*\t*\t*\t339568;size=1;\t*\n'
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/otu.fasta Thu May 29 00:51:18 2014 -0400
b
b'@@ -0,0 +1,28685 @@\n+>248442;size=4;\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGGGGACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGGGGAAAGCGGGGGCTCTCTTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCCATGCCGCGTGTGTG\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAAAAGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCGCGCGTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAGAGAGGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACACACGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGGGGAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>126633;size=3;\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGGG\n+GAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TTTTAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGTGTGCTAGACGTTGGAAATATATTTTTCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTTTTTCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACACACGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCGC\n+GCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>340660;size=3;\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTTTTCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGGGGCTCAACCCCCAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GGGGACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAA'..b'GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGA\n+CCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTGAAAAAGTTGGTGGTGAAATCCCAGAGCTTAAC\n+TCTGGAACTGCCATCAAAACTTTTCAGCTAGAGTATGATAGAGGAAAGCAGAATTTCTAGTGTAGAGGTGAAATTCGTAG\n+ATATTAGAAAGAATACCAATTGCGAAGGCAGCTTTCTGGATCATTACTGACGCTGAGGAACGAAAGCATGGGTAGCGAAG\n+AGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGTTAGACGTTGGAAATTTATTTTCAGTGTCGCAGTGAAA\n+GCGATAAACACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGTAGTGGA\n+GCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGACTCTAAGAGATTAGAGTT\n+TTCGGTTCGGCCGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCTCACTTTTAGTTGCCATCATTAAGTTGGGCACTCTGAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTACGTGTTGGGCTACACACGTGCTACAATGGTATCTACAACAGGAAGCAAGA\n+CGGCGACGTTAAGCAAATCCTTAAAAGATACCTCAGTTCGGATTGCACTCTGCAACTCGAGTGCATGAAGCTGGAATTAC\n+TAGTAATCGTGGATCAGCGTGCCACGGTGAATGCGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTTGGT\n+TCTACCTTAAGGCAAGGTTTTAAACCCTTGACCACGGTATAGTCAGCGACTGGGGTGAAGTCGTAACAAGGTAGCCGTA\n+>324263;size=1;\n+AGGGTTTGATCATGGCTCAGAACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGG\n+CAAACGGGTGAGTATAATGTGGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCC\n+CTTACGGGGAAAGTTTTAACGCCGAAAGATGAGCCCGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAA\n+CGATCAATAGCTGTTCTTAGAGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTG\n+GGGAATCTTGCACAATGGGGGAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAAGGCCCTTGGGTTGTAAAACTCTTT\n+CGTCGGGGAAGAAAATGACTGTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGAC\n+CTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGCTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACC\n+TTGGAACTGCCATCAAAACTTTTTAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGA\n+TATTAGAAAGAACACCAAATGCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGA\n+GGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGCTAGACGTTGGAAATATATTTTTCAGTGTCGCAGCGAAA\n+GCATTAAGCACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGA\n+GCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACGCTTGACATGTTCGTCGCGAGACTAAGAGATTAGTCTT\n+TTCAGTTTGGCTGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCCTACTTTTAGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTATGTGTTGGGCTACACACGTGCTACAATGGTACTTACAATGGGATGCAAAG\n+AGGCGACTCTTAGCTAATCCCTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGC\n+TAGTAATCGCGGATCAGCGCGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTTGGT\n+TACACCTTAAGGCAAAGCTTATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCATAACAAGGTAACCGTA\n+>203214;size=1;\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGGGGCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCCCCACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGTGTAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGGGGCATGCGGGGGAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAAAACCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCCCCGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACACACGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAAAAGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n'
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu.rabund
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/otu.rabund Thu May 29 00:51:18 2014 -0400
b
@@ -0,0 +1,1 @@
+0.03 536 60 4 3 8 14 1 1 28 2 1 1 17 6 4 1 1 12 7 1 2 7 5 9 1 154 4 1 2 1 7 1 6 1 1 7 7 19 7 1 17 2 2 2 3 4 2 1 4 2 1 7 3 1 5 1 1 3 1 1 2 1 1 1 1 2 10 1 1 1 3 3 11 4 2 1 1 8 4 1 1 20 6 1 1 6 2 7 2 3 28 4 6 6 2 8 10 1 5 4 1 32 1 2 3 1 1 1 13 1 1 1 1 1 1 1 1 2 4 3 2 2 1 1 1 2 6 1 7 4 1 1 1 7 1 3 1 14 1 2 1 1 7 1 5 1 4 2 1 1 1 2 1 1 1 1 1 1 1 2 1 4 1 1 1 1 1 1 1 3 1 1 4 1 1 3 1 1 3 1 1 4 1 1 1 1 1 4 1 1 1 2 2 7 1 2 1 1 1 1 1 1 1 8 2 1 2 1 1 2 1 1 3 2 2 1 3 1 3 1 1 1 1 2 1 1 5 1 1 1 3 1 3 2 2 1 2 2 1 1 1 2 1 2 1 1 4 1 1 1 1 1 1 1 1 2 2 1 2 5 1 1 1 1 3 1 1 1 1 3 1 1 1 1 1 2 1 4 1 1 4 1 2 1 3 1 1 5 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 7 1 1 1 1 1 1 1 1 1 5 1 1 1 5 2 2 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu_pre_table.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/otu_pre_table.tabular Thu May 29 00:51:18 2014 -0400
b
b'@@ -0,0 +1,536 @@\n+OTU_2\t60\t\tAACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGTGGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAACGCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAGAGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgggAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACTGTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACTTtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAATGCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTTAGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCCCTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgcgcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTTATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTACCTCCTTTCTAAGGA\n+OTU_1\t4\t\tAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGTGAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATACCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtgAAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCTACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGACTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAGGCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAAACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACCTTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+OTU_3\t3\t\tAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGTGAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATACCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTGGTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGAAGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGACACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGGAACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGACGCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTGGgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAAGGAATTGACGGgggCCCGCACAAGCAGCGGAGCATGCGGCTTAATTCGATGCAACGCGAAGAACCTTACCTAGGCTTGACATGCATTGAAAACTGTTAGAGATAACAGGT'..b'GGCCCTAGGGTCGTAAAGCTCTTTCGCCAGGGATGATAATGACAGTACCTGGTAAAGAAACCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGgggTTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTGACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgagaTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGTCCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGGAGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGGTTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTAACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGACGGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+OTU_535\t1\t\tCGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAAGGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGCGACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACTTTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAGTGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGAAGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACATCCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACAATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCAACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+OTU_536\t1\t\tGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGCAGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCCTTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTATGACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACGAGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTTATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCCTCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAACCGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTGATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACGGGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTAACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAGGAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATGCCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACGCAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAGCATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCGTAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCATGCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTGGGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n'
b
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu_relabel.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch_map_reads_to_otu/test-data/otu_relabel.fasta Thu May 29 00:51:18 2014 -0400
b
b'@@ -0,0 +1,10558 @@\n+>OTU_1\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtg\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>OTU_2\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgg\n+gAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgc\n+gcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>OTU_3\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAAACTC\n+AAAGGAATTGACGGgggCCCG'..b'\n+ACGGAGGgggTTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCG\n+GGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGA\n+AATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGG\n+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTG\n+ACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCA\n+CAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgag\n+aTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGT\n+CCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGG\n+AGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGG\n+TTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTA\n+ACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGAC\n+GGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTA\n+GGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+>OTU_535\n+CGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAG\n+TAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAA\n+GGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGC\n+GACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAG\n+TGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACT\n+TTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTG\n+CCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAG\n+TGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAAT\n+TCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCT\n+GAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGA\n+AGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATG\n+AATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACAT\n+CCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCG\n+TGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGAC\n+TGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACA\n+ATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCA\n+ACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACaca\n+cCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATT\n+CATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+>OTU_536\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n'