Next changeset 1:5fd38fd7a623 (2014-06-04) |
Commit message:
Uploaded |
added:
usearch_map_reads_to_otu/README.txt usearch_map_reads_to_otu/map_reads_otu.sh usearch_map_reads_to_otu/map_reads_otu.xml usearch_map_reads_to_otu/repository_dependencies.xml usearch_map_reads_to_otu/scripts/die.py usearch_map_reads_to_otu/scripts/fasta.py usearch_map_reads_to_otu/scripts/fasta_number.py usearch_map_reads_to_otu/scripts/mod_uc2otutab.py usearch_map_reads_to_otu/scripts/otu_table_transform.py usearch_map_reads_to_otu/scripts/parseFasta.py usearch_map_reads_to_otu/scripts/progress.py usearch_map_reads_to_otu/scripts/uc.py usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta usearch_map_reads_to_otu/test-data/hit_list.tabular usearch_map_reads_to_otu/test-data/otu.fasta usearch_map_reads_to_otu/test-data/otu.rabund usearch_map_reads_to_otu/test-data/otu_pre_table.tabular usearch_map_reads_to_otu/test-data/otu_relabel.fasta |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/README.txt Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,62 @@ +Galaxy wrappers for USEARCH - Map Reads to OTUs +====================================================================== + +USEARCH requires a licence. Therefore an automated installation is not +possible at the moment. + +Requirements +====================================================================== + +Get your licenced USEARCH version 7 or greater from here: +http://www.drive5.com/usearch/download.html + + +Manual Installation Steps +====================================================================== + +USEARCH is distributed as one file, known as the binary file or executable +file. It is completely self-contained: it does not require configuration +files, environment variables, third-party libraries or other external +dependencies. There is no setup script or installer because they're not +needed. To install it, all you do is download or copy the binary to a +directory that is accessible from the computer where you want to run the code. + +Step1: +Rename the binary file to usearch. + +Step2: +Move the binary file (usearch) to /usr/local/bin +Ensure /usr/local/bin is in your path. If needed add /usr/local/bin/ to your +path. + +Step3: +Ensure that you have read and execute permissions for the binary file. +If needed, use the chmod command to set the execute bit, e.g.: +chmod +x /usr/local/bin/usearch + + +Further installation information and help can be found at: +http://drive5.com/usearch/manual/install.html + + +Disclaimer +====================================================================== + +This source code is provided by QFAB Bioinformatics "as is", in the hope that it +will be useful, and any express or implied warranties, including, but not limited to, +the implied warranties of merchantability and fitness for a particular purpose +are disclaimed. +IN NO EVENT SHALL QFAB BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOURCE +CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +License +====================================================================== + +This work by QFAB Bioinformatics (as part of the GVL project +http://genome.edu.au) +is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 +International License. |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/map_reads_otu.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/map_reads_otu.sh Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,22 @@ +#!/bin/bash +echo $@; + +infile_otu=$1 +infile_seq=$2 +outfile_otu_relabel=$3 +outfile_readmap=$4 +outfile_readmap_barcode=${4}'.barcode' +outfile_otu_table=$5 +otu_pre_table=$6 +identity=$7 +multisample=$8 +barcodelabel='' +full_path=$(echo "`dirname $0`";) + +echo `python $full_path\/scripts\/fasta_number.py $infile_otu OTU_ > $outfile_otu_relabel 2>1`; +echo `usearch -usearch_global $infile_seq -db $outfile_otu_relabel -strand plus -id $identity -uc $outfile_readmap 2>1`; +echo `sed 's/\t/\tbarcodelabel='$barcodelabel';/8' $outfile_readmap > $outfile_readmap_barcode`; +echo `python $full_path\/scripts\/mod_uc2otutab.py $outfile_readmap_barcode $identity $multisample > $outfile_otu_table 2>1`; +echo `python $full_path\/scripts\/otu_table_transform.py $outfile_otu_relabel $outfile_otu_table > $otu_pre_table 2>1`; +echo `sed -i 1d $outfile_otu_table`; +echo `sed -i 1,2d $otu_pre_table`; |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/map_reads_otu.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/map_reads_otu.xml Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,98 @@ +<tool id="map_reads_otu" name="Map Reads to OTU" version="1.1"> + <description>Maps read sequences to OTUs</description> + <command interpreter='bash' > + map_reads_otu.sh $otu_file $seq_input $otu_relabel $readmap_uc $output $otu_pre_table $identity False + </command> + <inputs> + <param name='otu_file' type='data' format='fasta' label='OTU fasta file' + help="This is the non-chimeric output from Uchime on the clustered OTU, see description below"/> + <param name='seq_input' type='data' format='fasta' label='Input reads file' + help="This is the output of the from the Dereplicate tool"/> + <param name='identity' type='float' value='0.97' label='Minimum identity' help='Between 0.0 - 1.0'/> + </inputs> + <outputs> + <data name='output' format='rabund' label="${tool.name} on ${on_string}:rabund"/> + <data name='otu_pre_table' format='tabular' label='${tool.name} on ${on_string}:Pre-OTU Table' /> + <data name='otu_relabel' format='fasta' label="${tool.name} on ${on_string}:relabelled OTU"/> + <data name='readmap_uc' format='tabular' hidden="TRUE" label="${tool.name} on ${on_string}:hit list"/> + </outputs> + + <help> +=========== +Description +=========== + +Maps read sequences that have been identified as non-chimeric OTU cluster representatives by the UCHIME tool. This is an intermediate step to generate an OTU table. +Map Reads to OTU is part of the USEARCH-Tool-Suite_. + +.. _USEARCH-Tool-Suite: http://www.drive5.com/usearch/ + +----- +Input +----- + +A) OTU FASTA file; containing OTU cluster representatives wich have been identified as non-chimeric by the UCHIME tool. +B) File of read sequences in FASTA format, which have been dereplicated by the Dereplicate tool. + +---------- +Parameters +---------- + +OTU FASTA file + Of non-chimeric OTU representatives from the 'UCHIME and respectively OTU Cluster' step, this file represents the 'database' to search against. + +Input reads file + FASTA file of (dereplicated) reads, these are the query sequences + +Minimum identity + Specifies the minimum identity between the query sequence and a database sequence. This is a fractional identity between 0.0 and 1.0. By default set to 0.97, correponding to a minimum identity of 97%. + +------ +Output +------ + +This tool produces four output files, one of which is hidden by default. *To see the hidden file: click on the cogwheel icon in the history panel and select 'Include Hidden Datasets'.* + +A) A tab delimited file following the rabund_ format from mothur_. The first column is a label (this is usually the identity), the second column is the number of OTUs found and each subsequent column is the number of reads found for the corresponding OTUs. +B) A tab delimited PRE OTU table with three columns: | 1 - OTU label | 2 - Count | 3 - Sequence | + +.. _mothur: http://www.mothur.org/ +.. _rabund: http://www.mothur.org/wiki/Rabund_file + +.. class:: infomark + +The PRE OTU table can be used as input for the RDP Multi-Classifier to generate a complete OTU table with assigned taxonomy. + +C) A FASTA file containing the sequences re-labelled with their corresponding OTU label, e.g. OTU_1, OTU_2,...OTU_N, where N is the number of OTUs + + +========= +Resources +========= + +Mapping_Reads_To_OTUs_ + +.. _Mapping_Reads_To_OTUs: http://drive5.com/usearch/manual/mapreadstootus.html + +**Author** + +Robert C. Edgar (bob@drive5.com) + +**Wrapper Author** + +QFAB Bioinformatics (support@qfab.org) + +</help> +<tests> + <test> + <param name="otu_file" value="otu.fasta" /> + <param name="seq_input" value="dereplicated_seqs.fasta" /> + <param name="identity" value="0.97" /> + <output name='output' file="otu.rabund" ftype='rabund' lines_diff="10"/> + <output name="otu_pre_table" file="otu_pre_table.tabular" ftype="tabular" lines_diff="10" /> + <output name='otu_relabel' file="otu_relabel.fasta" ftype='fasta' lines_diff="10"/> + <output name='readmap_uc' file="hit_list.tabular" ftype='tabular' lines_diff="10"/> + </test> +</tests> + +</tool> |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/repository_dependencies.xml Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the metagenomics datatype definitions (e.g. the rabund format)."> + <repository changeset_revision="ccba8612695e" name="metagenomics_datatypes" owner="qfab" toolshed="http://toolshed.g2.bx.psu.edu/" /> +</repositories> |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/die.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/die.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,23 @@ +import sys +import traceback + +def Die(Msg): + print >> sys.stderr + print >> sys.stderr + + traceback.print_stack() + s = "" + for i in range(0, len(sys.argv)): + if i > 0: + s += " " + s += sys.argv[i] + print >> sys.stderr, s + print >> sys.stderr, "**ERROR**", Msg + print >> sys.stderr + print >> sys.stderr + sys.exit(1) + +def Warning(Msg): + print >> sys.stderr + print >> sys.stderr, sys.argv + print >> sys.stderr, "**WARNING**", Msg |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/fasta.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,184 @@ +from die import * +import subprocess +import tempfile +import progress + +TRUNC_LABELS=0 + +def isgap(c): + return c == '-' or c == '.' + +def GetSeqCount(FileName): + Tmp = tempfile.TemporaryFile() + try: + TmpFile = Tmp.file + except: + TmpFile = Tmp + s = subprocess.call([ "grep", "-c", "^>", FileName ], stdout=TmpFile) + TmpFile.seek(0) + s = TmpFile.read() + return int(s) + +def GetSeqsDict(FileName): + return ReadSeqsFast(FileName, False) + +def ReadSeqsDict(FileName): + return ReadSeqsFast(FileName, False) + +def ReadSeqsOnSeq(FileName, OnSeq): + ReadSeqs3(FileName, OnSeq, False) + +def ReadSeqsFastFile(File, Progress = False): + Seqs = {} + Id = "" + N = 0 + while 1: + if N%10000 == 0 and Progress: + sys.stderr.write("%u seqs\r" % (N)) + Line = File.readline() + if len(Line) == 0: + if Progress: + sys.stderr.write("%u seqs\n" % (N)) + return Seqs + if len(Line) == 0: + continue + Line = Line.strip() + if Line[0] == ">": + N += 1 + Id = Line[1:] + if TRUNC_LABELS: + Id = Id.split()[0] + Seqs[Id] = "" + else: + if Id == "": + Die("FASTA file does not start with '>'") + Seqs[Id] = Seqs[Id] + Line + +def ReadSeqsFast(FileName, Progress = True): + File = open(FileName) + return ReadSeqsFastFile(File, Progress) + +def ReadSeqs(FileName, toupper=False, stripgaps=False, Progress=False): + if not toupper and not stripgaps: + return ReadSeqsFast(FileName, False) + + Seqs = {} + Id = "" + File = open(FileName) + while 1: + Line = File.readline() + if len(Line) == 0: + return Seqs + Line = Line.strip() + if len(Line) == 0: + continue + if Line[0] == ">": + Id = Line[1:] + if TRUNC_LABELS: + Id = Id.split()[0] + if Id in Seqs.keys(): + Die("Duplicate id '%s' in '%s'" % (Id, FileName)) + Seqs[Id] = "" + else: + if Id == "": + Die("FASTA file '%s' does not start with '>'" % FileName) + if toupper: + Line = Line.upper() + if stripgaps: + Line = Line.replace("-", "") + Line = Line.replace(".", "") + Seqs[Id] = Seqs[Id] + Line + +def ReadSeqs2(FileName, ShowProgress = True): + Seqs = [] + Labels = [] + File = open(FileName) + if ShowProgress: + progress.InitFile(File, FileName) + while 1: + progress.File() + Line = File.readline() + if len(Line) == 0: + if ShowProgress: + print >> sys.stderr, "\n" + return Labels, Seqs + Line = Line.strip() + if len(Line) == 0: + continue + if Line[0] == ">": + Id = Line[1:] + if TRUNC_LABELS: + Id = Id.split()[0] + Labels.append(Id) + Seqs.append("") + else: + i = len(Seqs)-1 + Seqs[i] = Seqs[i] + Line + +def ReadSeqs3(FileName, OnSeq, ShowProgress = True): + File = open(FileName) + if ShowProgress: + progress.InitFile(File, FileName) + Label = "" + Seq = "" + while 1: + Line = File.readline() + if len(Line) == 0: + if Seq != "": + OnSeq(Label, Seq) + if ShowProgress: + print >> sys.stderr, "\n" + return + Line = Line.strip() + if len(Line) == 0: + continue + if Line[0] == ">": + if Seq != "": + if ShowProgress: + progress.File() + if TRUNC_LABELS: + Label = Label.split()[0] + OnSeq(Label, Seq) + Label = Line[1:] + Seq = "" + else: + Seq += Line + +def WriteSeq(File, Seq): + BLOCKLENGTH = 80 + SeqLength = len(Seq) + BlockCount = int((SeqLength + (BLOCKLENGTH-1))/BLOCKLENGTH) + for BlockIndex in range(0, BlockCount): + Block = Seq[BlockIndex*BLOCKLENGTH:] + Block = Block[:BLOCKLENGTH] + print >> File, Block + +def GetSizeFromLabel(Label, Default = -1): + Fields = Label.split(";") + for Field in Fields: + if Field.startswith("size="): + return int(Field[5:]) + if Default == -1: + Die("Missing size >" + Label) + return Default + +def StripSizeFromLabel(Label): + Fields = Label.split(";") + NewLabel = "" + for Field in Fields: + if Field.startswith("size="): + continue + if NewLabel != "": + NewLabel += ";" + NewLabel += Field + return NewLabel + +def GetQualFromLabel(Label): + n = Label.find("qual=") + assert n >= 0 + return Label[n+5:-1] + +def StripQualFromLabel(Label): + n = Label.find("qual=") + assert n >= 0 + return Label[:n] |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/fasta_number.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/fasta_number.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,44 @@ +import sys +import die + +Prefix = "" +if len(sys.argv) > 2: + Prefix = sys.argv[2] + +NeedSize = 0 +if len(sys.argv) > 3: + if sys.argv[3] == "-needsize": + NeedSize = 1 + elif sys.argv[3] == "-nosize": + NeedSize = 0 + else: + die.Die("Must specify -needsize or -nosize") + +def GetSize(Label): + Fields = Label.split(";") + for Field in Fields: + if Field.startswith("size="): + return int(Field[5:]) + print >> sys.stderr + print >> sys.stderr, "Size not found in label: " + Label + sys.exit(1) + +File = open(sys.argv[1]) +N = 0 +while 1: + Line = File.readline() + if len(Line) == 0: + break + Line = Line[:-1] + if len(Line) == 0: + continue + if Line[0] == '>': + N += 1 + if NeedSize: + Label = Line[1:].strip() + Size = GetSize(Label) + print ">%s%u;size=%u;" % (Prefix, N, Size) + else: + print ">%s%u" % (Prefix, N) + else: + print Line |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/mod_uc2otutab.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/mod_uc2otutab.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,67 @@ +import sys +#sys.path.insert(0, 'scripts'); +import uc +import die +import fasta + +FileName = sys.argv[1] +pctIdent = sys.argv[2] #percent identity, will be used in the label +pctIdent = 1.0 - float(pctIdent); +multiSample = (sys.argv[3]=='True') #whether the input file has multiple samples (using barcode label) + +def GetSampleId(Label): + Fields = Label.split(";") + for Field in Fields: + if Field.startswith("barcodelabel="): + return Field[13:] + die.Die("barcodelabel= not found in read label '%s'" % Label) + +def OnRec(): + global OTUs, Samples, OTUTable + if uc.Type != 'H': + return + + OTUId = uc.TargetLabel + if OTUId not in OTUIds: + OTUIds.append(OTUId) + OTUTable[OTUId] = {} + + SampleId = GetSampleId(uc.QueryLabel) + if SampleId not in SampleIds: + SampleIds.append(SampleId) + + N = fasta.GetSizeFromLabel(uc.QueryLabel, 1) + try: + OTUTable[OTUId][SampleId] += N + except: + OTUTable[OTUId][SampleId] = N + +OTUIds = [] +SampleIds = [] +OTUTable = {} + +uc.ReadRecs(FileName, OnRec) + +#Header line +if (multiSample): + s = "OTUId\t#OTUs\tsample" +else: + s = "OTUId\t#OTUs" + +for OTUId in OTUIds: + s += "\t" + OTUId +print s + +for SampleId in SampleIds: + if (multiSample): + s = str(pctIdent) + "\t" + SampleId + else: + s = str(pctIdent) + s += "\t" + str(len(OTUIds)) + for OTUId in OTUIds: + try: + n = OTUTable[OTUId][SampleId] + except: + n = 0; + s += "\t" + str(n) + print s |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/otu_table_transform.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/otu_table_transform.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,33 @@ +import sys, os +#sys.path.insert(0, 'scripts'); +import parseFasta + +# Create OTU Table +# OTU_Lable | Count | Sequence +# +# Author: Anne Kunert +# + +OTUmap = sys.argv[1] +OTUcount = sys.argv[2] + +def transform(file): + seq = parseFasta.FastaParser(OTUmap) + with open(file) as f: + lis=[x.split() for x in f] + end="" + line="" + myseq="" + for x in zip(*lis): + otu_lable = x[0] + for item in seq: + if item == otu_lable: + myseq = seq.records[item]['sequence'] + myseq = myseq.replace('\n','') + for y in x: + line= line+str(y)+"\t" + print(line+"\t"+myseq) + line="" + myseq="" +# +transform(OTUcount) |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/parseFasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/parseFasta.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,58 @@ +# parse fasta - id & sequence +class Seq(object): + + def __init__(self,key,records_dict): + self.key = key + self.records = records_dict + + def tostring(self): + s = self.records[self.key]['sequence'] + return s.replace('\n','') + + +class SeqRecord(object): + + def __init__(self,key,records_dict): + self.records = records_dict + self.key = key + self.seq = Seq(key,records_dict) + + def format(self,out_format): + if out_format == 'fasta': + r = self.records[self.key] + return ">%s\n%s" % (r['description'],r['sequence']) + + +class FastaParser(object): + + def __init__(self,fasta_file): + self.fasta_file = fasta_file + fasta = open(fasta_file,'r').read() + self.entries = [x for x in fasta.split('>') if len(x) != 0] + self.build_records_dict() + + def keys(self): + keys_list = [] + for entry in self.entries: + key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0] + keys_list.append(key) + return [x.strip() for x in keys_list] + + def __len__(self): + return len(self.keys()) + + def __iter__(self): + for k in self.keys(): + yield k + + def build_records_dict(self): + records_dict = {} + for entry in self.entries: + key = [x for x in entry.split('\n')[0].split() if len(x) != 0][0] + description = entry.split('\n')[0] + sequence = '\n'.join(entry.split('\n')[1:]).strip() + records_dict[key] = {'description':description,'sequence':sequence} + self.records = records_dict + + def __getitem__(self,key): + return SeqRecord(key,self.records) |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/progress.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/progress.py Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,51 @@ +import sys +import time + +File__ = None +FileSize__ = None +FileName__ = None +Secs__ = None + +def InitFile(File, FileName = ""): + global Secs__, File__, FileSize__, FileName__ + + File__ = File + FileName__ = FileName + Secs__ = None + + Pos = File.tell() + File.seek(0, 2) + FileSize__ = File.tell() + File.seek(Pos) + +def FileDone(Msg = ""): + global Secs__, File__, FileSize__, FileName__ + Str = "%s 100.0%% %s \n" % (FileName__, Msg) + sys.stderr.write(Str) + +def File(Msg = ""): + global Secs__, File__, FileSize__, FileName__ + + Secs = time.clock() + if Secs__ != None and Secs - Secs__ < 1: + return + + Secs__ = Secs + Pos = File__.tell() + Pct = (100.0*Pos)/FileSize__ + Str = "%s %5.1f%% %s \r" % (FileName__, Pct, Msg) + sys.stderr.write(Str) + +def Step(Msg, i, N): + global Secs__, File__, FileSize__, FileName__ + + Secs = time.clock() + if Secs__ != None and Secs - Secs__ < 1: + return + + Secs__ = Secs + Pct = (100.0*i)/N + if i == N-1: + sys.stderr.write("%5.1f%% %s \r" % (Pct, Msg)) + else: + sys.stderr.write("%5.1f%% %s \n" % (Pct, Msg)) |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/scripts/uc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/uc.py Thu May 29 00:51:18 2014 -0400 |
[ |
@@ -0,0 +1,149 @@ +import re +import sys +import progress + +# Tab-separated fields: +# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=Label +# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NotMatched +# For C and D types, PctId is average id with seed. +# QueryStart and SeedStart are zero-based relative to start of sequence. +# If minus strand, SeedStart is relative to reverse-complemented seed. + +MaxError = -1 + +Type = '?' +ClusterNr = -1 +Size = -1 +PctId = -1.0 +LocalScore = -1.0 +Evalue = -1.0 +Strand = '.' +QueryStart = -1 +SeedStart = -1 +Alignment = "" +QueryLabel = "" +TargetLabel = "" +FileName = "?" +Line = "" + +def Die(s): + print >> sys.stderr, "*** ERROR ***", s, sys.argv + sys.exit(1) + +def ProgressFile(File, FileSize): +# if not sys.stderr.isatty(): +# return + Pos = File.tell() + Pct = (100.0*Pos)/FileSize + Str = "%s %5.1f%%\r" % (FileName, Pct) + sys.stderr.write(Str) + +def Progress(i, N): +# if not sys.stderr.isatty(): + return + Pct = (100.0*i)/N + Str = "%5.1f%%\r" % Pct + sys.stderr.write(Str) + +def PrintLine(): + print Line + +def ParseRec(Line): + global Type + global ClusterNr + global Size + global PctId + global Strand + global QueryStart + global SeedStart + global Alignment + global QueryLabel + global TargetLabel + global LocalScore + global Evalue + + Fields = Line.split("\t") + N = len(Fields) + if N != 9 and N != 10: + Die("Expected 9 or 10 fields in .uc record, got: " + Line) + Type = Fields[0] + + try: + ClusterNr = int(Fields[1]) + except: + ClusterNr = -1 + + try: + Size = int(Fields[2]) + except: + Size = -1 + + Fields2 = Fields[3].split('/') + LocalScore = -1.0 + Evalue = -1.0 + if len(Fields2) == 3: + try: + PctId = float(Fields2[0]) + LocalScore = float(Fields2[1]) + Evalue = float(Fields2[2]) + except: + PctId = -1.0 + else: + try: + PctId = float(Fields[3]) + except: + PctId = -1.0 + + Strand = Fields[4] + + try: + QueryStart = int(Fields[5]) + except: + QueryStart = -1 + + try: + SeedStart = int(Fields[6]) + except: + SeedStart = -1 + + Alignment = Fields[7] + QueryLabel = Fields[8] + + if len(Fields) > 9: + TargetLabel = Fields[9] + +def GetRec(File, OnRecord): + global Line + while 1: + Line = File.readline() + if len(Line) == 0: + return 0 + if Line[0] == '#': + continue + Line = Line.strip() + if len(Line) == 0: + return 1 + ParseRec(Line) + Ok = OnRecord() + if Ok != None and Ok == 0: + return 0 + return 1 + +def ReadRecs(argFileName, OnRecord, ShowProgress = True): + return ReadFile(argFileName, OnRecord, ShowProgress) + +def GetRecs(argFileName, OnRecord, ShowProgress = True): + return ReadFile(argFileName, OnRecord, ShowProgress) + +def ReadFile(argFileName, OnRecord, ShowProgress = True): + global FileName + FileName = argFileName + File = open(FileName) + + if ShowProgress: + progress.InitFile(File, FileName) + while GetRec(File, OnRecord): + if ShowProgress: + progress.File() + if ShowProgress: + progress.FileDone() |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/dereplicated_seqs.fasta Thu May 29 00:51:18 2014 -0400 |
b |
b'@@ -0,0 +1,10558 @@\n+>248442;size=4;\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtg\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>126633;size=3;\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgg\n+gAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgc\n+gcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>340660;size=3;\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAA'..b'AGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCG\n+GGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGA\n+AATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGG\n+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTG\n+ACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCA\n+CAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgag\n+aTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGT\n+CCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGG\n+AGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGG\n+TTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTA\n+ACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGAC\n+GGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTA\n+GGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+>347843;size=1;\n+CGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAG\n+TAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAA\n+GGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGC\n+GACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAG\n+TGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACT\n+TTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTG\n+CCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAG\n+TGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAAT\n+TCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCT\n+GAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGA\n+AGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATG\n+AATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACAT\n+CCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCG\n+TGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGAC\n+TGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACA\n+ATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCA\n+ACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACaca\n+cCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATT\n+CATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+>203214;size=1;\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n' |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/hit_list.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/hit_list.tabular Thu May 29 00:51:18 2014 -0400 |
b |
b'@@ -0,0 +1,1459 @@\n+H\t1\t1454\t100.0\t+\t0\t0\t1454M\t126633;size=3;\tOTU_2\n+H\t0\t1501\t100.0\t+\t0\t0\t1501M\t248442;size=4;\tOTU_1\n+H\t2\t1480\t100.0\t+\t0\t0\t1480M\t340660;size=3;\tOTU_3\n+H\t4\t1449\t100.0\t+\t0\t0\t1449M\t91747;size=3;\tOTU_5\n+H\t5\t1506\t100.0\t+\t0\t0\t1506M\t258157;size=3;\tOTU_6\n+H\t6\t1475\t100.0\t+\t0\t0\t1475M\t166987;size=1;\tOTU_7\n+H\t7\t1415\t100.0\t+\t0\t0\t1415M\t104633;size=1;\tOTU_8\n+H\t8\t1439\t100.0\t+\t0\t0\t1439M\t235221;size=1;\tOTU_9\n+H\t9\t1489\t100.0\t+\t0\t0\t1489M\t62498;size=1;\tOTU_10\n+H\t1\t1438\t98.8\t+\t0\t0\t20D1418M36I\t317318;size=1;\tOTU_2\n+H\t10\t1322\t100.0\t+\t0\t0\t1322M\t203534;size=1;\tOTU_11\n+H\t12\t1447\t100.0\t+\t0\t0\t1447M\t176763;size=1;\tOTU_13\n+H\t11\t1501\t100.0\t+\t0\t0\t1501M\t116062;size=1;\tOTU_12\n+H\t13\t1501\t100.0\t+\t0\t0\t1501M\t342161;size=1;\tOTU_14\n+H\t14\t1517\t100.0\t+\t0\t0\t1517M\t232885;size=1;\tOTU_15\n+H\t15\t1517\t100.0\t+\t0\t0\t1517M\t229914;size=1;\tOTU_16\n+H\t16\t1352\t100.0\t+\t0\t0\t1352M\t103462;size=1;\tOTU_17\n+H\t17\t1498\t100.0\t+\t0\t0\t1498M\t356409;size=1;\tOTU_18\n+H\t18\t1552\t100.0\t+\t0\t0\t1552M\t100649;size=1;\tOTU_19\n+H\t19\t1496\t100.0\t+\t0\t0\t1496M\t257959;size=1;\tOTU_20\n+H\t20\t1502\t100.0\t+\t0\t0\t1502M\t230967;size=1;\tOTU_21\n+H\t21\t1465\t100.0\t+\t0\t0\t1465M\t233918;size=1;\tOTU_22\n+H\t22\t1458\t100.0\t+\t0\t0\t1458M\t154776;size=1;\tOTU_23\n+H\t224\t1426\t99.9\t+\t0\t0\t1426M23I\t315318;size=1;\tOTU_225\n+H\t23\t1423\t100.0\t+\t0\t0\t1423M\t18318;size=1;\tOTU_24\n+H\t305\t1439\t99.0\t+\t0\t0\t20D1419M36I\t352990;size=1;\tOTU_306\n+H\t14\t1443\t99.9\t+\t0\t0\t103I1414M29D\t99933;size=1;\tOTU_15\n+H\t24\t1361\t100.0\t+\t0\t0\t1361M\t241380;size=1;\tOTU_25\n+H\t25\t1453\t100.0\t+\t0\t0\t1453M\t256833;size=1;\tOTU_26\n+H\t26\t1338\t100.0\t+\t0\t0\t1338M\t114582;size=1;\tOTU_27\n+H\t11\t1390\t99.2\t+\t0\t0\t20D1370M131I\t250316;size=1;\tOTU_12\n+H\t27\t1366\t100.0\t+\t0\t0\t1366M\t243013;size=1;\tOTU_28\n+H\t28\t1485\t100.0\t+\t0\t0\t1485M\t343226;size=1;\tOTU_29\n+H\t1\t1438\t98.9\t+\t0\t0\t20D1418M36I\t320475;size=1;\tOTU_2\n+H\t29\t1473\t100.0\t+\t0\t0\t1473M\t235527;size=1;\tOTU_30\n+H\t30\t1466\t100.0\t+\t0\t0\t1466M\t68981;size=1;\tOTU_31\n+H\t31\t1532\t100.0\t+\t0\t0\t1532M\t339114;size=1;\tOTU_32\n+H\t32\t1463\t100.0\t+\t0\t0\t1463M\t270810;size=1;\tOTU_33\n+H\t33\t1463\t100.0\t+\t0\t0\t1463M\t323827;size=1;\tOTU_34\n+H\t34\t1494\t100.0\t+\t0\t0\t1494M\t343781;size=1;\tOTU_35\n+N\t*\t*\t*\t.\t*\t*\t*\t354371;size=1;\t*\n+H\t305\t1455\t99.1\t+\t0\t0\t1455M\t68985;size=1;\tOTU_306\n+H\t35\t1491\t100.0\t+\t0\t0\t1491M\t356095;size=1;\tOTU_36\n+H\t36\t1477\t100.0\t+\t0\t0\t1477M\t354780;size=1;\tOTU_37\n+H\t37\t1418\t100.0\t+\t0\t0\t1418M\t248390;size=1;\tOTU_38\n+H\t17\t1500\t97.4\t+\t0\t0\t832MD180MD486M\t232506;size=1;\tOTU_18\n+H\t3\t1447\t99.9\t+\t0\t0\t3I1447M\t331619;size=1;\tOTU_4\n+H\t38\t1503\t100.0\t+\t0\t0\t1503M\t234677;size=1;\tOTU_39\n+H\t305\t1438\t99.0\t+\t0\t0\t20D828MI590M36I\t234582;size=1;\tOTU_306\n+H\t39\t1523\t100.0\t+\t0\t0\t1523M\t228291;size=1;\tOTU_40\n+H\t305\t1439\t99.2\t+\t0\t0\t20D1419M36I\t355638;size=1;\tOTU_306\n+H\t305\t1439\t99.3\t+\t0\t0\t20D1419M36I\t336753;size=1;\tOTU_306\n+H\t40\t1505\t100.0\t+\t0\t0\t1505M\t161217;size=1;\tOTU_41\n+H\t305\t1439\t99.0\t+\t0\t0\t20D1419M36I\t314677;size=1;\tOTU_306\n+H\t305\t1455\t98.9\t+\t0\t0\t1455M\t120789;size=1;\tOTU_306\n+N\t*\t*\t*\t.\t*\t*\t*\t337387;size=1;\t*\n+H\t24\t1473\t99.4\t+\t0\t0\t20D468MD893M91D\t245132;size=1;\tOTU_25\n+H\t11\t1477\t98.6\t+\t0\t0\t20D1408MI49M43I\t309969;size=1;\tOTU_12\n+H\t41\t1480\t100.0\t+\t0\t0\t1480M\t156314;size=1;\tOTU_42\n+H\t1\t1429\t98.9\t+\t0\t0\t11D1418M36I\t309196;size=1;\tOTU_2\n+H\t224\t1449\t99.7\t+\t0\t0\t1449M\t230233;size=1;\tOTU_225\n+H\t17\t1378\t99.7\t+\t0\t0\t6D1372M126I\t138696;size=1;\tOTU_18\n+H\t42\t1355\t100.0\t+\t0\t0\t1355M\t138691;size=1;\tOTU_43\n+H\t43\t1351\t100.0\t+\t0\t0\t1351M\t323976;size=1;\tOTU_44\n+H\t44\t1468\t100.0\t+\t0\t0\t1468M\t129024;size=1;\tOTU_45\n+H\t45\t1449\t100.0\t+\t0\t0\t1449M\t356435;size=1;\tOTU_46\n+H\t46\t1429\t100.0\t+\t0\t0\t1429M\t356183;size=1;\tOTU_47\n+H\t47\t1444\t100.0\t+\t0\t0\t1444M\t278744;size=1;\tOTU_48\n+H\t48\t1492\t100.0\t+\t0\t0\t1492M\t254829;size=1;\tOTU_49\n+H\t49\t1432\t100.0\t+\t0\t0\t1432M\t166245;size=1;\tOTU_50\n+H\t50\t1483\t100.0\t+\t0\t0\t1483M\t334050;size=1;\tOTU_51\n+H\t51\t1491\t100.0\t+\t0\t0\t1491M\t334058;size=1;\tOTU_52\n+H\t11\t1490\t98.2\t+\t0\t0\t20D954MD454MI61M31I\t234804;size=1;\tOTU_12\n+H\t52\t1483\t100.0\t+\t0\t0\t1483M\t62482;size=1;\tOTU_53\n+H\t53\t1482\t100.0\t+\t0\t0\t1482M\t62487;size=1;\tOTU_54\n+H\t35\t1489\t97.2\t+\t0\t0\t1111M2I378M\t316789;size=1;\tOTU_36\n+H\t54\t1482\t100.0\t+\t0\t0\t1482M\t173254;size=1;\tOTU_'..b'01197;size=1;\tOTU_202\n+H\t305\t1439\t99.2\t+\t0\t0\t20D1419M36I\t331948;size=1;\tOTU_306\n+H\t280\t1490\t99.8\t+\t0\t0\t1490M\t235263;size=1;\tOTU_281\n+H\t8\t1439\t98.1\t+\t0\t0\t1439M\t346583;size=1;\tOTU_9\n+H\t8\t1439\t97.8\t+\t0\t0\t1439M\t349272;size=1;\tOTU_9\n+H\t515\t1500\t100.0\t+\t0\t0\t1500M\t329380;size=1;\tOTU_516\n+H\t219\t1501\t98.0\t+\t0\t0\t34D764MD702M29I\t346039;size=1;\tOTU_220\n+N\t*\t*\t*\t.\t*\t*\t*\t257445;size=1;\t*\n+H\t238\t1439\t99.2\t+\t0\t0\t1439M\t316154;size=1;\tOTU_239\n+H\t486\t1440\t99.8\t+\t0\t0\t416MI1024M6I\t124464;size=1;\tOTU_487\n+H\t516\t1369\t100.0\t+\t0\t0\t1369M\t270990;size=1;\tOTU_517\n+H\t517\t1413\t100.0\t+\t0\t0\t1413M\t324499;size=1;\tOTU_518\n+H\t8\t1439\t97.8\t+\t0\t0\t1439M\t305410;size=1;\tOTU_9\n+H\t518\t1412\t100.0\t+\t0\t0\t1412M\t141383;size=1;\tOTU_519\n+H\t413\t1450\t99.7\t+\t0\t0\t1450M\t347465;size=1;\tOTU_414\n+H\t311\t1463\t98.2\t+\t0\t0\t1463M\t339161;size=1;\tOTU_312\n+H\t339\t1506\t99.9\t+\t0\t0\t95D1364M47D\t331178;size=1;\tOTU_340\n+H\t255\t1450\t99.7\t+\t0\t0\t20D1430M28I\t356196;size=1;\tOTU_256\n+H\t519\t1394\t100.0\t+\t0\t0\t1394M\t268436;size=1;\tOTU_520\n+H\t45\t1447\t99.9\t+\t0\t0\t1447M2I\t354925;size=1;\tOTU_46\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t334206;size=1;\tOTU_306\n+H\t520\t1343\t100.0\t+\t0\t0\t1343M\t287458;size=1;\tOTU_521\n+H\t185\t1484\t98.4\t+\t0\t0\t1484MI\t136965;size=1;\tOTU_186\n+H\t359\t1450\t99.2\t+\t0\t0\t20D1430M37I\t233543;size=1;\tOTU_360\n+H\t143\t1449\t99.1\t+\t0\t0\t1447M2D\t228729;size=1;\tOTU_144\n+H\t413\t1453\t98.6\t+\t0\t0\t1386M3D64M\t305093;size=1;\tOTU_414\n+H\t4\t1449\t98.0\t+\t0\t0\t893M3I553M3D\t233624;size=1;\tOTU_5\n+H\t521\t1490\t100.0\t+\t0\t0\t1490M\t326714;size=1;\tOTU_522\n+H\t522\t1486\t100.0\t+\t0\t0\t1486M\t254309;size=1;\tOTU_523\n+H\t383\t1466\t99.4\t+\t0\t0\t1466M\t331845;size=1;\tOTU_384\n+H\t523\t1538\t100.0\t+\t0\t0\t1538M\t366993;size=1;\tOTU_524\n+H\t524\t1467\t100.0\t+\t0\t0\t1467M\t232909;size=1;\tOTU_525\n+H\t525\t1505\t100.0\t+\t0\t0\t1505M\t237311;size=1;\tOTU_526\n+N\t*\t*\t*\t.\t*\t*\t*\t316923;size=1;\t*\n+N\t*\t*\t*\t.\t*\t*\t*\t336138;size=1;\t*\n+H\t165\t1464\t97.1\t+\t0\t0\t1464M21I\t311229;size=1;\tOTU_166\n+N\t*\t*\t*\t.\t*\t*\t*\t356209;size=1;\t*\n+H\t205\t1450\t99.6\t+\t0\t0\t821MI629M\t334492;size=1;\tOTU_206\n+H\t321\t1489\t99.3\t+\t0\t0\t44I948MI28MI508M5D\t140509;size=1;\tOTU_322\n+H\t526\t1451\t100.0\t+\t0\t0\t1451M\t348936;size=1;\tOTU_527\n+N\t*\t*\t*\t.\t*\t*\t*\t189534;size=1;\t*\n+H\t11\t1489\t98.6\t+\t0\t0\t20D1408MI61M31I\t352430;size=1;\tOTU_12\n+H\t143\t1449\t99.4\t+\t0\t0\t1447M2D\t347720;size=1;\tOTU_144\n+H\t527\t1345\t100.0\t+\t0\t0\t1345M\t167302;size=1;\tOTU_528\n+H\t102\t1498\t99.2\t+\t0\t0\t1498M\t231599;size=1;\tOTU_103\n+H\t528\t1553\t100.0\t+\t0\t0\t1553M\t134559;size=1;\tOTU_529\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t356149;size=1;\tOTU_306\n+H\t529\t1335\t100.0\t+\t0\t0\t1335M\t222960;size=1;\tOTU_530\n+H\t215\t1513\t99.1\t+\t0\t0\t1513M\t115456;size=1;\tOTU_216\n+N\t*\t*\t*\t.\t*\t*\t*\t342719;size=1;\t*\n+H\t102\t1498\t99.6\t+\t0\t0\t1498M\t314571;size=1;\tOTU_103\n+H\t13\t1501\t99.9\t+\t0\t0\t1501M\t321622;size=1;\tOTU_14\n+H\t530\t1525\t100.0\t+\t0\t0\t1525M\t252888;size=1;\tOTU_531\n+H\t1\t1438\t99.3\t+\t0\t0\t20D1418M36I\t230512;size=1;\tOTU_2\n+H\t170\t1534\t98.9\t+\t0\t0\t1506M28D\t242585;size=1;\tOTU_171\n+N\t*\t*\t*\t.\t*\t*\t*\t351578;size=1;\t*\n+H\t102\t1475\t98.5\t+\t0\t0\t24I170MD1304M\t308605;size=1;\tOTU_103\n+H\t83\t1476\t97.4\t+\t0\t0\t9I188MD1194MD92M9I\t179818;size=1;\tOTU_84\n+H\t311\t1472\t97.7\t+\t0\t0\t20I773MD670M28D\t126228;size=1;\tOTU_312\n+H\t305\t1439\t99.1\t+\t0\t0\t20D1419M36I\t342655;size=1;\tOTU_306\n+H\t11\t1483\t98.8\t+\t0\t0\t20D1408MI55M37I\t136970;size=1;\tOTU_12\n+H\t531\t1477\t100.0\t+\t0\t0\t1477M\t356680;size=1;\tOTU_532\n+H\t532\t1515\t100.0\t+\t0\t0\t1515M\t337091;size=1;\tOTU_533\n+H\t359\t1450\t98.6\t+\t0\t0\t20D1430M37I\t233221;size=1;\tOTU_360\n+H\t122\t1494\t98.3\t+\t0\t0\t1494M\t229462;size=1;\tOTU_123\n+H\t183\t1461\t99.7\t+\t0\t0\t1461M22I\t256095;size=1;\tOTU_184\n+H\t102\t1499\t98.5\t+\t0\t0\t194MD1304M\t333374;size=1;\tOTU_103\n+H\t533\t1471\t100.0\t+\t0\t0\t1471M\t150135;size=1;\tOTU_534\n+H\t173\t1481\t99.8\t+\t0\t0\t1481M2I\t314740;size=1;\tOTU_174\n+H\t534\t1475\t100.0\t+\t0\t0\t1475M\t347843;size=1;\tOTU_535\n+H\t63\t1478\t97.8\t+\t0\t0\t150D49MI1235M44D\t315908;size=1;\tOTU_64\n+H\t143\t1489\t99.4\t+\t0\t0\t8D1447M34D\t142421;size=1;\tOTU_144\n+H\t305\t1439\t98.9\t+\t0\t0\t20D1419M36I\t233774;size=1;\tOTU_306\n+H\t1\t1438\t99.4\t+\t0\t0\t20D1418M36I\t324263;size=1;\tOTU_2\n+H\t535\t1412\t100.0\t+\t0\t0\t1412M\t203214;size=1;\tOTU_536\n+N\t*\t*\t*\t.\t*\t*\t*\t324270;size=1;\t*\n+N\t*\t*\t*\t.\t*\t*\t*\t339568;size=1;\t*\n' |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/otu.fasta Thu May 29 00:51:18 2014 -0400 |
b |
b'@@ -0,0 +1,28685 @@\n+>248442;size=4;\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGGGGACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGGGGAAAGCGGGGGCTCTCTTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCCATGCCGCGTGTGTG\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAAAAGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCGCGCGTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAGAGAGGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACACACGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGGGGAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>126633;size=3;\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGGG\n+GAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TTTTAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGTGTGCTAGACGTTGGAAATATATTTTTCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTTTTTCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACACACGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCGC\n+GCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>340660;size=3;\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTTTTCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGGGGCTCAACCCCCAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GGGGACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAA'..b'GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGA\n+CCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTGAAAAAGTTGGTGGTGAAATCCCAGAGCTTAAC\n+TCTGGAACTGCCATCAAAACTTTTCAGCTAGAGTATGATAGAGGAAAGCAGAATTTCTAGTGTAGAGGTGAAATTCGTAG\n+ATATTAGAAAGAATACCAATTGCGAAGGCAGCTTTCTGGATCATTACTGACGCTGAGGAACGAAAGCATGGGTAGCGAAG\n+AGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGTTAGACGTTGGAAATTTATTTTCAGTGTCGCAGTGAAA\n+GCGATAAACACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGTAGTGGA\n+GCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGACTCTAAGAGATTAGAGTT\n+TTCGGTTCGGCCGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCTCACTTTTAGTTGCCATCATTAAGTTGGGCACTCTGAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTACGTGTTGGGCTACACACGTGCTACAATGGTATCTACAACAGGAAGCAAGA\n+CGGCGACGTTAAGCAAATCCTTAAAAGATACCTCAGTTCGGATTGCACTCTGCAACTCGAGTGCATGAAGCTGGAATTAC\n+TAGTAATCGTGGATCAGCGTGCCACGGTGAATGCGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTTGGT\n+TCTACCTTAAGGCAAGGTTTTAAACCCTTGACCACGGTATAGTCAGCGACTGGGGTGAAGTCGTAACAAGGTAGCCGTA\n+>324263;size=1;\n+AGGGTTTGATCATGGCTCAGAACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGG\n+CAAACGGGTGAGTATAATGTGGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCC\n+CTTACGGGGAAAGTTTTAACGCCGAAAGATGAGCCCGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAA\n+CGATCAATAGCTGTTCTTAGAGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTG\n+GGGAATCTTGCACAATGGGGGAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAAGGCCCTTGGGTTGTAAAACTCTTT\n+CGTCGGGGAAGAAAATGACTGTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGAC\n+CTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGCTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACC\n+TTGGAACTGCCATCAAAACTTTTTAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGA\n+TATTAGAAAGAACACCAAATGCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGA\n+GGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGCTAGACGTTGGAAATATATTTTTCAGTGTCGCAGCGAAA\n+GCATTAAGCACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGA\n+GCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACGCTTGACATGTTCGTCGCGAGACTAAGAGATTAGTCTT\n+TTCAGTTTGGCTGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCCTACTTTTAGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTATGTGTTGGGCTACACACGTGCTACAATGGTACTTACAATGGGATGCAAAG\n+AGGCGACTCTTAGCTAATCCCTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGC\n+TAGTAATCGCGGATCAGCGCGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTTGGT\n+TACACCTTAAGGCAAAGCTTATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCATAACAAGGTAACCGTA\n+>203214;size=1;\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGGGGCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCCCCACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGTGTAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGGGGCATGCGGGGGAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAAAACCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCCCCGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACACACGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAAAAGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n' |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu.rabund --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/otu.rabund Thu May 29 00:51:18 2014 -0400 |
b |
@@ -0,0 +1,1 @@ +0.03 536 60 4 3 8 14 1 1 28 2 1 1 17 6 4 1 1 12 7 1 2 7 5 9 1 154 4 1 2 1 7 1 6 1 1 7 7 19 7 1 17 2 2 2 3 4 2 1 4 2 1 7 3 1 5 1 1 3 1 1 2 1 1 1 1 2 10 1 1 1 3 3 11 4 2 1 1 8 4 1 1 20 6 1 1 6 2 7 2 3 28 4 6 6 2 8 10 1 5 4 1 32 1 2 3 1 1 1 13 1 1 1 1 1 1 1 1 2 4 3 2 2 1 1 1 2 6 1 7 4 1 1 1 7 1 3 1 14 1 2 1 1 7 1 5 1 4 2 1 1 1 2 1 1 1 1 1 1 1 2 1 4 1 1 1 1 1 1 1 3 1 1 4 1 1 3 1 1 3 1 1 4 1 1 1 1 1 4 1 1 1 2 2 7 1 2 1 1 1 1 1 1 1 8 2 1 2 1 1 2 1 1 3 2 2 1 3 1 3 1 1 1 1 2 1 1 5 1 1 1 3 1 3 2 2 1 2 2 1 1 1 2 1 2 1 1 4 1 1 1 1 1 1 1 1 2 2 1 2 5 1 1 1 1 3 1 1 1 1 3 1 1 1 1 1 2 1 4 1 1 4 1 2 1 3 1 1 5 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 2 1 1 1 1 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 7 1 1 1 1 1 1 1 1 1 5 1 1 1 5 2 2 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu_pre_table.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/otu_pre_table.tabular Thu May 29 00:51:18 2014 -0400 |
b |
b'@@ -0,0 +1,536 @@\n+OTU_2\t60\t\tAACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGTGGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAACGCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAGAGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgggAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACTGTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACTTtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAATGCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTTAGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCCCTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgcgcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTTATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTACCTCCTTTCTAAGGA\n+OTU_1\t4\t\tAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGTGAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATACCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtgAAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCTACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGACTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAGGCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAAACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACCTTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+OTU_3\t3\t\tAGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGTGAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATACCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTGGTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGAAGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGACACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGGAACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGACGCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTGGgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAAGGAATTGACGGgggCCCGCACAAGCAGCGGAGCATGCGGCTTAATTCGATGCAACGCGAAGAACCTTACCTAGGCTTGACATGCATTGAAAACTGTTAGAGATAACAGGT'..b'GGCCCTAGGGTCGTAAAGCTCTTTCGCCAGGGATGATAATGACAGTACCTGGTAAAGAAACCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGgggTTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTGACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgagaTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGTCCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGGAGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGGTTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTAACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGACGGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+OTU_535\t1\t\tCGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAAGGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGCGACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACTTTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAGTGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGAAGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACATCCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACAATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCAACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+OTU_536\t1\t\tGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGCAGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCCTTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTATGACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACGAGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTTATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCCTCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAACCGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTGATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACGGGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTAACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAGGAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATGCCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACGCAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAGCATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCGTAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCATGCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTGGGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n' |
b |
diff -r 000000000000 -r c10d09023766 usearch_map_reads_to_otu/test-data/otu_relabel.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/test-data/otu_relabel.fasta Thu May 29 00:51:18 2014 -0400 |
b |
b'@@ -0,0 +1,10558 @@\n+>OTU_1\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGgggACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGgggAAAGCGGgggCTctctTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGgggAAACCCTGACGCAGCCATGCCGCGTgtgtg\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAaaaGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCgcgcgTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAgagagGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACacacGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGgggAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>OTU_2\n+AACGTACGCTGGCGGCACGCCTAACACATGCAAGTCGAACGCAGTAGCAATACTGAGTGGCAAACGGGTGAGTATAATGT\n+GGGAATCTGCCTTTTGGTTTGGAATAACACGGGGAAACTTGTGCTAATACCGAATAAGCCCTTACGGGGAAAGTTTTAAC\n+GCCGAAAGATGAGCCTGCACTTGATTAGCTAGTTGGTAAGGTAAAAGCTTACCAAGGCAACGATCAATAGCTGTTCTTAG\n+AGGAAGACCAGCCACATTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATGGgg\n+gAAACCCTGATGCAGCGATGCCGCGTGAGTGAAGAGGGCCCTTGGGTTGTAAAACTCTTTCGTCGGGGAAGAAAATGACT\n+GTACCCGAATAAGAAGGTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGACCTAGCGTAGTTCGGAATTAC\n+TGGGCTTAAAGAGCTCGTAGGTGGTTAAaaaaGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTGCCATCAAAACT\n+TtttAGCTAGAGTGTGATAGAGGTAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAGATATTAGAAAGAACACCAAAT\n+GCGAAGGCAACTTACTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAGAGGATTAGATACCCTCGTAGT\n+CCATGCCGTAAACGATGtgtgCTAGACGTTGGAAATatatTtttCAGTGTCGCAGCGAAAGCATTAAGCACACCGCCTGG\n+GGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGGAGCATGTGGTTTAATTCGAAG\n+ATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAAACTAAGAGATTAGTTttttCAGTTCGGCTGGACGAAA\n+CACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTTTT\n+AGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCCTC\n+ATGGCCCTTATGTGTTGGGCTACacacGTGCTACAATGGTACTTACAATGGGATGCAAAGAGGTGACTCTTAGCTAATCC\n+CTAAAATGTACCTCAGTTCGGATTGTACTCTGTAACTCGAGTGCATGAAGCTGGAATTGCTAGTAATCGCGGATCAGCgc\n+gcCGCGGTGAATACGTTCCCGGGTCTTGTACacacCGCCCGTCACACCATGGAAGTTGGTTACACCTTAAGGCAAAGCTT\n+ATACCTTTGACTACGGTACGATCAGCAACTGGGGTGAAGTCGTAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATTAC\n+CTCCTTTCTAAGGA\n+>OTU_3\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAACCTGCGGGCATCCGCGGGTTTGGAAAGTTtttCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCAACGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAC\n+GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGTTAGAC\n+ACGTCGGATGTGAAAACTGGgggCTCAACCcccAGCCTGCATTCGATACGGGAGGACTCGAGGACGGCAGGGGAGACTGG\n+AACTTCTGGTGTAGCGGTGGAATGCGCAGATATCAGAAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGATCCTGAC\n+GCTGAGGAACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACTATGGACGCTAGGTGTG\n+GgggACATTCCACGTTCTCTGCGCCGCAGCTAACGCATTAAGCGTCCCGCCTGGGGAGTACGACCGCAAGGTTAAAACTC\n+AAAGGAATTGACGGgggCCCG'..b'\n+ACGGAGGgggTTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGACTATTAAGTCAGGGGTGAAATCCCG\n+GGGCTCAACCCCGGAACTGCCCTTGATACTGGTAGTCTTGAGTTCGAgagagGTGAGTGGAATTCCGAGTGTAGAGGTGA\n+AATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGG\n+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTTGCATGCAATTCGGTG\n+ACacacCTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGgggCCCGCA\n+CAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAACCCTTGACATCCTGTGCTACATGGAgag\n+aTCCATGGTTCCCTTCGGGGACGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGT\n+CCGGCAACGAGCGCAACCCACACCCTTAGTTGCCAGCAGTTCGGCTGGGCACTCTAGGGGAACTGCCCGTGATAAGCGGG\n+AGGAAGGTGTGGATGACGTCAAGTCCTCATGGCCCTTACGGGTTGGGCTACacacGTGCTACAATGGCAGTGACAATGGG\n+TTAATCCCCAAaaaCTGTCTCAGTTCGGATTGTCGTCTGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTA\n+ACAGCATGACGCGGTGAATACGTTCCCGGGCCTTGTACacacCGCCCGTCACATCATGGGAGTTGGGTTTACCCGAAGAC\n+GGTGCGCCAACCTGTTCGCAGGgggCAGCTGGCCACGGTAGGCTCAGCGACTGGGATGAAGTCGTAACAAGGTAGCCGTA\n+GGGGAACCTGCGGCTGGATCACCTCCTTTCT\n+>OTU_535\n+CGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGTAACAGGGGAGCTTGCTCCCGCTGACGAGCGGCGGACGGGTGAG\n+TAACGCGTAGGAATCTGCCCAATAGTGGgggATAGCCCGGGGAAACTCGGATTAATACCGCATACGCCCTACGGgggAAA\n+GGCCGGGATCTTCGGACCGGTCGCTATTGGATGAGCCTGCGTAAGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGC\n+GACGATCTTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAG\n+TGGGGAATATTGGACAATGGgggCAACCCTGATCCAGCCATGCCGCGTgtgtgAAGAAGGCCTTAGGGTTGTAAAGCACT\n+TTAAGTTGGGAGGAAGGCTCTGTAGCTAATATCTGCAGGGATTGACGTTACCAACAGAATAAGCACCGGCTAACTCCGTG\n+CCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCgcgcgTAGGCGGTTTGGTAAG\n+TGTGATGTGAAAGCCCAGGGCTTAACCTTGGAACTGCATCACATACTGCCAGGCTAGAGTACGGTAGAGGggggTAGAAT\n+TCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCAGTGGCGAAGGCGGCCcccTGGATCGATACTGACGCT\n+GAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGA\n+AGGGTAATCCTTTTAGTGGCGCAGCTAACGCTCTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATG\n+AATTGACGGgggCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGTCTTGACAT\n+CCTGCGAACTTTCCAGAGATGGATTGGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCG\n+TGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCATCATTTAGTTGGGCACTTTAGGGAGAC\n+TGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACacacGTGCTACA\n+ATGGAGCGTACAAAGGGTTGCAACCCCGCGAGGGTGAGCTAATCCCATAAAACGTTTCGTAGTCCGGATCGCAGTCTGCA\n+ACTCGACTGCGTGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACaca\n+cCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTGGGTAGCATAACCGCAAGGAGTGCGCTCACCACTTTGTGATT\n+CATGACTGGGGTGAAGTCGTAACAAGGTAACCGTA\n+>OTU_536\n+GAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGGATAAGTCATGCAAGTCAAGGgggCTCGCAAGAGCAACCGGC\n+AGACGAGGTAGTAATAAGTAGGTACGTCCCTTCAAGTCACGAATAGCCCGTCGAAAGACGGGGTAATACGCGATAGTCCC\n+TTCGGGGTAAAGATTTATCGCTTGAAGAACGGCCTGCTCGGTATCAGCTAGTTGGCGGTGTAAAAGACCACCAAGGCTAT\n+GACGCCTAGGGGAGCTGAGAGGCTGACCcccACCGATGGGACTGAGATACGGCCCATACACCTACGGGTGGCTGCAGACG\n+AGAATATTCCACAATGGACGAAAGTCTGATGGAGCGACGCCGCGTGCAGGATGAAGTTCTTCGGGATGTAAACTGCTTTT\n+ATGAGCGAGAAAGTTTATTGATCAGCTCATGAATAAGAGGTTGCTAAACTCGTGCCAGCAGCAGCGGTAATACGAGTGCC\n+TCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTgtgtAGGTGGTCGCGTTAGTCTTCCGTTAAATTCTTCGGCTCAAC\n+CGGgggCATGCGGgggAAACGGCGCGACTTGAGGATGCGAGAGGAAAGCGGAACTCATAGTGTAGCGGTGAAATGCGTTG\n+ATATTATGGGGAACACCAAATGCGAAGGCAGCTTTCTGGAGCATCCTGACACTGAAACACGAAAGCGTGGGTAGCGAACG\n+GGATTAGATACCCCGGTAGTCCACGCCCTAAACGATGATCACTAGCTATTCGGAGTATCGACCTCCAAGTGGCGTAGCTA\n+ACGCGTTAAGTGATCCGCCTGGGTAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGACTTGCACAAGCGGAG\n+GAACATGTGGTTCAATTCGATGGTAAACGAAaaaCCTTACCAGGGTTGGAAATCAGTTGAGATAGGCAGAAACGTCTATG\n+CCGCAAGGCAATTAGACAGGTGCTGCATGGTCGTCGTCAGTTCGTGGCTTGAGTTGTTCCCTTCAGTGGGGTAACGAACG\n+CAACCcccGTTGCCTGGTATAAGTATCAGGCGAGACTGCCCAGGTTAACTGGGAGGAAGGTGGGGATGACGCCAGATCAG\n+CATGGCCCTTATACCTGGGCCTACacacGTGTTACAATGGCCGACTCACGCGAGGCGAAGAGGAAACTCGGAGCAAATCG\n+TAAaaaGTCGGCCAAGTTCGGATTGAGGGCTGCAACTCGCCCTCATGAAGCGGGATTCGCTAGTAATCGCAGATCAGCAT\n+GCTGCGGTGAATACGTTCTCAAGTCTTGTACTCACCGCCCGTCAACTCAAGGGAGCTGGGAATACCCGAAGTCGCCCTTG\n+GGCGCCTAAGGTAAGCTCAGTGACAGGGAGTAAGTCGTAACAAGGTAACCGT\n' |