Repository 'vsearch'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/vsearch

Changeset 0:fae6527990af (2015-05-21)
Next changeset 1:8c4e2933a17a (2015-08-26)
Commit message:
Imported from capsule None
added:
alignment.xml
chimera.xml
clustering.xml
dereplication.xml
masking.xml
search.xml
shuffling.xml
sorting.xml
test-data/AF091148.fsa.bz2
test-data/AF091148_first_rep.fsa.bz2
test-data/BioMarKs5k.fsa.bz2
test-data/PR2-18S-rRNA-V4.derep.fsa.bz2
test-data/Rfam_11_0.repr.fasta.bz2
test-data/alignment_result1.fasta
test-data/chimera_result1.fasta
test-data/chimera_uchimeout_result1.tabular
test-data/clustering_blast6out_result1.tab
test-data/clustering_centroids_result1.fasta
test-data/clustering_notmatched_result1.fasta
test-data/db.fasta
test-data/dereplication_result1.fasta
test-data/masking_result1.fasta
test-data/masking_result2.fasta
test-data/query.fasta
test-data/search_blast6out_result1.tabular
test-data/search_dbmatched_result1.fasta
test-data/search_fastapairs_result2.fasta
test-data/search_matched_result2.fasta
test-data/search_userfields_result2.tabular
test-data/shuffling_result1.fasta
test-data/sorting_result1.fasta
test-data/sorting_result2.fasta
test-data/three_human_mRNA.fasta
tool_dependencies.xml
vsearch_macros.xml
b
diff -r 000000000000 -r fae6527990af alignment.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alignment.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,66 @@
+<tool id="vsearch_alignment" name="VSearch alignment" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        $acceptall
+        --id $id
+        --iddef $iddef
+        --allpairs_global "$infile"
+        --alnout $outfile
+        --query_cov $query_cov
+        @USERFIELDS@
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--allpairs_global)" />
+        <expand macro="id_and_iddef" />
+        <param name="acceptall" type="boolean" truevalue="--acceptall" falsevalue="" checked="False" 
+            label="Output all pairwise alignments" help="This option overrides all other accept/reject options including identity. (--acceptall)"/>
+        <param name="query_cov" type="float" value="" optional="True" label="Reject if fraction of query seq. aligned lower than this value"
+            help="(--query_cov)"/>
+        <expand macro="userfields" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="Rfam_11_0.repr.fasta.bz2" ftype="fasta" />
+            <param name="acceptall" value=""/>
+            <param name="id" value="0.97"/>
+            <param name="query_cov" value="0.95"/>
+            <param name="userfields" value="query,target"/>
+            <output name="outfile" file="alignment_result1.fasta" lines_diff="4" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Pairwise alignments of all sequences.
+
+
+Alignment options (most searching options also apply)
+  --allpairs_global FILENAME  perform global alignment of all sequence pairs
+  --alnout FILENAME           filename for human-readable alignment output
+  --acceptall                 output all pairwise alignments
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af chimera.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chimera.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,135 @@
+<tool id="vsearch_chimera_detection" name="VSearch chimera detection" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        --abskew $abskew
+        --chimeras $outfile
+        --dn $dn
+        --mindiffs $mindiffs
+        --mindiv $mindiv
+        --minh $minh
+        --xn $xn
+        $self_param
+        $selfid_param
+        #if $detection_mode.detection_mode_select == 'denovo':
+            --uchime_denovo $detection_mode.infile_denovo
+        #else:
+            --uchime_ref $detection_mode.infile_reference
+            --db $detection_mode.db
+        #end if
+
+        #if 'nonchimeras' in str($outputs):
+            --nonchimeras $nonchimeras
+        #end if
+        #if 'uchimealns' in str($outputs):
+            --uchimealns $uchimealns
+        #end if
+        #if 'uchimeout' in str($outputs):
+            --uchimeout $uchimeout
+        #end if
+
+]]>
+    </command>
+    <inputs>
+        <conditional name="detection_mode">
+            <param name="detection_mode_select" type="select" label="Detect chimeras" help="">
+                <option value="denovo">de novo</option>
+                <option value="reference">using a reference database</option>
+            </param>
+            <when value="denovo">
+                <param name="infile_denovo" type="data" format="fasta" label="Select your FASTA file" help="(--uchime_denovo)" />
+            </when>
+            <when value="reference">
+                <param name="infile_reference" type="data" format="fasta" label="Select your FASTA file" help="(--uchime_ref)" />
+                <param name="db" type="data" format="fasta" label="Select your reference database" help="(--db)" />
+          </when>
+        </conditional>
+
+        <param name="abskew" type="float" value="2.0" label="Minimal abundance ratio of parent vs chimera"
+            help="(--abskew)"/>
+        <param name="dn" type="float" value="1.4" label="'no' vote pseudo count"
+            help="(--dn)"/>
+        <param name="xn" type="float" value="8.0" label="'no' vote weight"
+            help="(--xn)"/>
+        <param name="mindiffs" type="integer" value="3" label="Minimum number of differences in segment"
+            help="(--mindiffs)"/>
+        <param name="mindiv" type="float" value="0.8" label="Minimum divergence from closest parent"
+            help="(--mindiv)"/>
+        <param name="minh" type="float" value="0.28" label="Minimum score"
+            help="(--minh)"/>
+
+        <expand macro="self_and_selfid" />
+
+        <param name="outputs" type="select" multiple="True" optional="True"
+            label="Select output formats" help="(--qmask)">
+            <option value="nonchimeras">Non-chimeric sequences (--nonchimeras)</option>
+            <option value="uchimealns">Chimera alignments (--uchimealns)</option>
+            <option value="uchimeout">Chimera info to tab-separated (--uchimeout)</option>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+        <data name="nonchimeras" format="fasta" label="${tool.name} on ${on_string}: Non Chimera">
+            <filter>'nonchimeras' in outputs</filter>
+        </data>
+        <data name="uchimealns" format="fasta" label="${tool.name} on ${on_string}: Chimera Alignments">
+            <filter>'uchimealns' in outputs</filter>
+        </data>
+        <data name="uchimeout" format="tabular" label="${tool.name} on ${on_string}: Chimera Information">
+            <filter>'uchimeout' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="detection_mode_select" value="denovo"/>
+            <param name="infile_denovo" value="PR2-18S-rRNA-V4.derep.fsa.bz2" ftype="fasta" />
+            <param name="outputs" value="uchimeout"/>
+            <output name="outfile" file="chimera_result1.fasta" ftype="fasta" />
+            <output name="uchimeout" file="chimera_uchimeout_result1.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Sequence chimera detection based on a different scoring functions.
+
+Chimera detection options
+  --abskew REAL               min abundance ratio of parent vs chimera (2.0)
+  --alignwidth INT            width of alignment in uchimealn output (80)
+  --chimeras FILENAME         output chimeric sequences to file
+  --db FILENAME               reference database for --uchime_ref
+  --dn REAL                   'no' vote pseudo-count (1.4)
+  --mindiffs INT              minimum number of differences in segment (3)
+  --mindiv REAL               minimum divergence from closest parent (0.8)
+  --minh REAL                 minimum score (0.28)
+  --nonchimeras FILENAME      output non-chimeric sequences to file
+  --self                      exclude identical labels for --uchime_ref
+  --selfid                    exclude identical sequences for --uchime_ref
+  --uchime_denovo FILENAME    detect chimeras de novo
+  --uchime_ref FILENAME       detect chimeras using a reference database
+  --uchimealns FILENAME       output chimera alignments to file
+  --uchimeout FILENAME        output to chimera info to tab-separated file
+  --uchimeout5                make output compatible with uchime version 5
+  --xn REAL                   'no' vote weight (8.0)
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/clustering.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,167 @@
+<tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        --cluster_fast "$infile"
+        ##--cluster_smallmem FILENAME  cluster sequences using a small amount of memory
+        ##--clusters STRING            output each cluster to a separate FASTA file
+
+        #if $maxrejects:
+            --maxrejects $maxrejects
+        #end if
+        #if $maxaccepts:
+            --maxaccepts $maxaccepts
+        #end if
+
+        $cons_truncate
+        --id $id
+        ##--iddef $iddef
+
+        #if '--msaout' in str($outputs):
+            --msaout $msaout
+        #end if
+        #if '--consout' in str($outputs):
+            --consout $consout
+        #end if
+        #if '--centroids' in str($outputs):
+            --centroids $centroids
+        #end if
+        #if '--alnout' in str($outputs):
+            --alnout $alnout
+        #end if
+        #if '--blast6out' in str($outputs):
+            --blast6out $blast6out
+        #end if
+        #if '--notmatched' in str($outputs):
+            --notmatched $notmatched
+        #end if
+        #if '--fastapairs' in str($outputs):
+            --fastapairs $fastapairs
+        #end if
+        #if '--matched' in str($outputs):
+            --matched $matched
+        #end if
+        #if $qmask != 'no':
+            --qmask $qmask
+        #end if
+        #if $sizein:
+            --sizein $sizein
+        #end if
+        #if $sizeout:
+            --sizeout $sizeout
+        #end if
+        --strand $strand
+        --usersort $usersort
+
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" />
+        <expand macro="id_and_iddef" />
+        <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" 
+            label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/>
+        <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" 
+            label="Indicate that input sequences are presorted" help="(--usersort)"/>
+        <expand macro="qmask" />
+        <expand macro="sizein" />
+        <expand macro="sizeout" />
+        <expand macro="strand" />
+        <expand macro="maxrejects" />
+        <expand macro="maxaccepts" />
+        <expand macro="general_output">
+            <option value="--msaout">Multiple sequence alignments</option>
+            <option value="--consout">Cluster consensus sequences</option>
+            <option value="--centroids">Centroid sequences</option>
+            <option value="--notmatched">Write non-matching query sequences to separate file</option>
+            <option value="--matched">Write matching query sequences to separate file</option>
+        </expand>
+
+    </inputs>
+    <outputs>
+        <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments">
+            <filter>'--msaout' in outputs</filter>
+        </data>
+        <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences">
+            <filter>'--consout' in outputs</filter>
+        </data>
+        <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids">
+            <filter>'--centroids' in outputs</filter>
+        </data>
+        <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
+            <filter>'--alnout' in outputs</filter>
+        </data>
+        <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
+            <filter>'--notmatched' in outputs</filter>
+        </data>
+        <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
+            <filter>'--matched' in outputs</filter>
+        </data>
+        <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
+            <filter>'--blast6out' in outputs</filter>
+        </data>
+        <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
+            <filter>'--fastapairs' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
+            <param name="id" value="0.99"/>
+            <param name="maxaccepts" value="1"/>
+            <param name="maxrejects" value="2"/>
+            <param name="sizeout" value="--sizeout"/>
+            <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" />
+            <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
+            <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" />
+            <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" />
+            <!-- The result following result files would be too big -->
+            <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /-->
+            <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /-->
+            <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /-->
+            <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /-->
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms
+implemented in usearch, DNAclust and sumaclust for example.
+
+
+Clustering options (most searching options also apply)
+  --centroids FILENAME         output centroid sequences to FASTA file
+  --cluster_fast FILENAME      cluster sequences fast
+  --cluster_smallmem FILENAME  cluster sequences using a small amount of memory
+  --clusters STRING            output each cluster to a separate FASTA file
+  --consout FILENAME           output cluster consensus sequences to FASTA file
+  --cons_truncate              do not ignore terminal gaps in MSA for consensus
+  --id REAL                    reject if identity lower
+  --iddef INT                  id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
+  --msaout FILENAME            output multiple seq. alignments to FASTA file 
+  --qmask                      mask seqs with dust, soft or no method (dust)
+  --sizein                     read abundance annotation from input
+  --sizeout                    write cluster abundances to centroid file
+  --strand                     cluster using "plus" or "both" strands (plus)
+  --usersort                   indicate that input sequences are presorted
+
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af dereplication.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dereplication.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,93 @@
+<tool id="vsearch_dereplication" name="VSearch dereplication" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        --derep_fulllength $infile
+        #if $maxuniquesize:
+            --maxuniquesize $maxuniquesize
+        #end if
+        #if $minuniquesize:
+            --minuniquesize $minuniquesize
+        #end if
+        --output $outfile
+        #if $sizein:
+            --sizein $sizein
+        #end if
+        #if $sizeout:
+            --sizeout $sizeout
+        #end if
+        --strand $strand
+        --topn $topn
+        #if $uc:
+            --uc $uc
+        #end if
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--derep_fulllength)" />
+        <expand macro="topn" />
+        <expand macro="sizein" />
+        <expand macro="sizeout" />
+        <expand macro="strand" />
+        <expand macro="uclust_like_output" />
+        <param name="minuniquesize" type="integer" value="" optional="True" label="Minimum abundance"
+            help="(--minuniquesize)"/>
+        <param name="maxuniquesize" type="integer" value="" optional="True" label="Maximum abundance"
+            help="(--maxuniquesize)"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+        <data name="uc" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output">
+            <filter>uc is True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" />
+            <param name="strand" value="both" />
+            <param name="minuniquesize" value="1" />
+            <param name="maxuniquesize" value="100000" />
+            <param name="sizeout" value="--sizeout"/>
+            <param name="topn" value="10000" />
+            <output name="outfile" file="dereplication_result1.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Merge strictly identical sequences contained in filename. Identical sequences are
+defined as having the same length and the same string of nucleotides (case insensitive,
+T and U are considered the same).
+
+Dereplication options
+  --derep_fulllength FILENAME  dereplicate sequences in the given FASTA file
+  --maxuniquesize INT          maximum abundance for output from dereplication
+  --minuniquesize INT          minimum abundance for output from dereplication
+  --output FILENAME            output FASTA file
+  --sizein                     read abundance annotation from input
+  --sizeout                    write abundance annotation to output
+  --strand                     dereplicate "plus" or "both" strands (plus)
+  --topn INT                   output just the n most abundant sequences
+  --uc FILENAME                filename for UCLUST-like output
+
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af masking.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/masking.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,79 @@
+<tool id="vsearch_masking" name="VSearch masking" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        #if $qmask != 'no':
+            --qmask $qmask
+        #end if
+        $hardmask
+        --maskfasta "$infile"
+        --output $outfile
+
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--maskfasta)" />
+        <expand macro="qmask" />
+        <expand macro="hardmask" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta" />
+            <param name="qmask" value="dust"/>
+            <param name="hardmask" value="True"/>
+            <output name="outfile" file="masking_result1.fasta" ftype="fasta" />
+        </test>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta" />
+            <param name="qmask" value="soft"/>
+            <param name="hardmask" value="True"/>
+            <output name="outfile" file="masking_result2.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+An input sequence can be composed of lower- or uppercase nucleotides. Lowercase nucleotides
+are silently set to uppercase before masking, unless the −−qmask soft option is used.
+Here are the results of combined masking options −−qmask (or −−dbmask for database sequences) and −−hardmask, assuming each input sequences contains both lower and uppercase nucleotides:
+
+===== ======== ================================================
+qmask hardmask action
+===== ======== ================================================
+none  off      no masking, all symbols uppercased
+none  on       no masking, all symbols uppercased
+dust  off      masked symbols lowercased, others uppercased
+dust  on       masked symbols changed to Ns, others uppercased
+soft  off      lowercase symbols masked, no case changes
+soft  on       lowercase symbols masked and changed to Ns
+===== ======== ================================================
+
+Masking options
+  --hardmask                  mask by replacing with N instead of lower case
+  --maskfasta FILENAME        mask sequences in the given FASTA file
+  --output FILENAME           output to specified FASTA file
+  --qmask                     mask seqs with "dust", "soft" or "none" method (dust)
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af search.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/search.xml Thu May 21 03:58:09 2015 -0400
[
b'@@ -0,0 +1,391 @@\n+<tool id="vsearch_search" name="VSearch search" version="@VERSION@.0">\n+    <description></description>\n+    <macros>\n+        <import>vsearch_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n+    <expand macro="stdio" />\n+    <expand macro="version_command" />\n+    <command>\n+<![CDATA[\n+    vsearch\n+\n+        @GENERAL@\n+        --db $dbfile\n+        --dbmask $dbmask\n+\n+        ##--fulldp                    full dynamic programming alignment (always on)\n+        ##--gapext STRING             penalties for gap extension (2I/1E)\n+        ##--gapopen STRING            penalties for gap opening (20I/2E)\n+        $hardmask\n+        --id $id\n+        --iddef $iddef\n+\n+        #if $qmask != \'no\':\n+            --qmask $qmask\n+        #end if\n+\n+        ## --rowlen INT                width of alignment lines in alnout output (64)\n+\n+        $self_param\n+        $selfid_param\n+        #if $sizeout:\n+            --sizeout $sizeout\n+        #end if\n+        --strand $strand\n+        --usearch_global $queryfile\n+\n+        #if \'--alnout\' in str($outputs):\n+            --alnout $alnout\n+        #end if\n+        #if \'--blast6out\' in str($outputs):\n+            --blast6out $blast6out\n+        #end if\n+        #if \'--dbmatched\' in str($outputs):\n+            --dbmatched $dbmatched\n+        #end if\n+        #if \'--dbnotmatched\' in str($outputs):\n+            --dbnotmatched $dbnotmatched\n+        #end if\n+        #if \'--fastapairs\' in str($outputs):\n+            --fastapairs $fastapairs\n+        #end if\n+        #if \'--notmatched\' in str($outputs):\n+            --notmatched $notmatched\n+        #end if\n+        #if \'--matched\' in str($outputs):\n+            --matched $matched\n+        #end if\n+\n+        #if $adv_opts.adv_opts_selector == "advanced":\n+            $adv_opts.top_hits_only\n+            $adv_opts.rightjust\n+            $adv_opts.leftjust\n+            --target_cov $adv_opts.target_cov\n+            --query_cov $adv_opts.query_cov\n+\n+\n+            #if $adv_opts.maxid:\n+                --maxid $adv_opts.maxid\n+            #end if\n+            #if $adv_opts.maxqt:\n+                --maxqt $adv_opts.maxqsize\n+            #end if\n+            #if $adv_opts.maxsizeratio:\n+                --maxsizeratio $adv_opts.maxsizeratio\n+            #end if\n+            #if $adv_opts.maxsl:\n+                --maxsl $adv_opts.maxsl\n+            #end if\n+            #if $adv_opts.mid:\n+                --mid $adv_opts.mid\n+            #end if\n+            #if $adv_opts.minqt:\n+                --minqt $adv_opts.minqt\n+            #end if\n+            #if $adv_opts.minsizeratio:\n+                --minsizeratio $adv_opts.minseqlength\n+            #end if\n+            #if $adv_opts.minsl:\n+                --minsl $adv_opts.minsl\n+            #end if\n+\n+            #if $adv_opts.mintsize:\n+                --mintsize $adv_opts.mintsize\n+            #end if\n+            #if $adv_opts.mismatch:\n+                --mismatch $adv_opts.mismatch\n+            #end if\n+            #if $adv_opts.maxqsize:\n+                --maxqsize $adv_opts.maxqsize\n+            #end if\n+            #if $adv_opts.mincols:\n+                --mincols $adv_opts.mincols\n+            #end if\n+            #if $adv_opts.maxsubs:\n+                --maxsubs $adv_opts.maxsubs\n+            #end if\n+            #if $adv_opts.maxrejects:\n+                --maxrejects $adv_opts.maxrejects\n+            #end if\n+            #if $adv_opts.maxaccepts:\n+                --maxaccepts $adv_opts.maxaccepts\n+            #end if\n+            #if $adv_opts.maxdiffs:\n+                --maxdiffs $adv_opts.maxdiffs\n+            #end if\n+            #if $adv_opts.maxgaps:\n+                --maxgaps $adv_opts.maxgaps\n+            #end if\n+            #if $adv_opts.maxhits:\n+                --maxhits $adv_opts.maxhits\n+            #end if\n+            #if $adv_opts.match:\n+                --match $adv_opts.match\n+            #end if\n+            #if $adv_opts.idprefix:\n+      '..b' --blast6out FILENAME        filename for blast-like tab-separated output\n+  --db FILENAME               filename for FASTA formatted database for search\n+  --dbmask                    mask db with "dust", "soft" or "none" method (dust)\n+  --dbmatched FILENAME        FASTA file for matching database sequences\n+  --dbnotmatched FILENAME     FASTA file for non-matching database sequences\n+  --fastapairs FILENAME       FASTA file with pairs of query and target\n+  --fulldp                    full dynamic programming alignment (always on)\n+  --gapext STRING             penalties for gap extension (2I/1E)\n+  --gapopen STRING            penalties for gap opening (20I/2E)\n+  --hardmask                  mask by replacing with N instead of lower case\n+  --id REAL                   reject if identity lower\n+  --iddef INT                 id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)\n+  --idprefix INT              reject if first n nucleotides do not match\n+  --idsuffix INT              reject if last n nucleotides do not match\n+  --leftjust                  reject if terminal gaps at alignment left end\n+  --match INT                 score for match (2)\n+  --matched FILENAME          FASTA file for matching query sequences\n+  --maxaccepts INT            number of hits to accept and show per strand (1)\n+  --maxdiffs INT              reject if more substitutions or indels\n+  --maxgaps INT               reject if more indels\n+  --maxhits INT               maximum number of hits to show (unlimited)\n+  --maxid REAL                reject if identity higher\n+  --maxqsize INT              reject if query abundance larger\n+  --maxqt REAL                reject if query/target length ratio higher\n+  --maxrejects INT            number of non-matching hits to consider (32)\n+  --maxsizeratio REAL         reject if query/target abundance ratio higher\n+  --maxsl REAL                reject if shorter/longer length ratio higher\n+  --maxsubs INT               reject if more substitutions\n+  --mid REAL                  reject if percent identity lower, ignoring gaps\n+  --mincols INT               reject if alignment length shorter\n+  --minqt REAL                reject if query/target length ratio lower\n+  --minsizeratio REAL         reject if query/target abundance ratio lower\n+  --minsl REAL                reject if shorter/longer length ratio lower\n+  --mintsize INT              reject if target abundance lower\n+  --mismatch INT              score for mismatch (-4)\n+  --notmatched FILENAME       FASTA file for non-matching query sequences\n+  --output_no_hits            output non-matching queries to output files\n+  --qmask                     mask query with "dust", "soft" or "none" method (dust)\n+  --query_cov REAL            reject if fraction of query seq. aligned lower\n+  --rightjust                 reject if terminal gaps at alignment right end\n+  --rowlen INT                width of alignment lines in alnout output (64)\n+  --self                      reject if labels identical\n+  --selfid                    reject if sequences identical\n+  --sizeout                   write abundance annotation to output\n+  --strand                    search "plus" or "both" strands (plus)\n+  --target_cov REAL           reject if fraction of target seq. aligned lower\n+  --top_hits_only             output only hits with identity equal to the best\n+  --uc FILENAME               filename for UCLUST-like output\n+  --uc_allhits                show all, not just top hit with uc output\n+  --usearch_global FILENAME   filename of queries for global alignment search\n+  --userfields STRING         fields to output in userout file\n+  --userout FILENAME          filename for user-defined tab-separated output\n+  --weak_id REAL              include aligned hits with >= id; continue search\n+  --wordlength INT            length of words for database index 3-15 (8)\n+\n+@EXTERNAL_DOCUMENTATION@\n+\n+-------\n+\n+@REFERENCES@\n+\n+\n+]]>\n+    </help>\n+    <expand macro="citations" />\n+</tool>\n'
b
diff -r 000000000000 -r fae6527990af shuffling.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/shuffling.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,59 @@
+<tool id="vsearch_shuffling" name="VSearch shuffling" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        --output $outfile
+        --seed $seed
+        --shuffle $infile
+        --topn $topn
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--shuffle)" />
+        <param name="seed" type="integer" value="0" label="Seed"
+            help="Zero to use random data source. (--seed)"/>
+        <expand macro="topn" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta" />
+            <param name="seed" value="1"/>
+            <param name="topn" value="5"/>
+            <output name="outfile" file="shuffling_result1.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Sequence shuffling to obtain new random sequences.
+
+Shuffling options
+  --output FILENAME           output to specified FASTA file
+  --seed INT                  seed for PRNG, zero to use random data source (0)
+  --shuffle FILENAME          shuffle order of sequences pseudo-randomly
+  --topn INT                  output just first n sequences
+
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af sorting.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sorting.xml Thu May 21 03:58:09 2015 -0400
[
@@ -0,0 +1,102 @@
+<tool id="vsearch_sorting" name="VSearch sorting" version="@VERSION@.0">
+    <description></description>
+    <macros>
+        <import>vsearch_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+    vsearch
+        @GENERAL@
+        #if $sorting_mode.sorting_mode_select == 'sortbylength':
+            --sortbylength "$sorting_mode.infile"
+        #else:
+            --sortbysize "$sorting_mode.infile"
+            #if $sorting_mode.minsize:
+                --minsize $sorting_mode.minsize
+            #end if
+            #if $sorting_mode.maxsize:
+                --maxsize $sorting_mode.maxsize
+            #end if
+        #end if
+        --output $outfile
+        #if $sizeout:
+            --sizeout $sizeout
+        #end if
+        #if $topn:
+            --topn $topn
+        #end if
+
+]]>
+    </command>
+    <inputs>
+        <conditional name="sorting_mode">
+            <param name="sorting_mode_select" type="select" label="Sorting by" help="">
+                <option value="sortbylength">sequence length</option>
+                <option value="sortbyabundance">by abundance</option>
+            </param>
+            <when value="sortbylength">
+                <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--sortbylength)" />
+            </when>
+            <when value="sortbyabundance">
+                <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--sortbysize)" />
+                <param name="minsize" type="integer" value="" optional="True" label="Minimum abundance"
+                    help="(--minsize)"/>
+                <param name="maxsize" type="integer" value="" optional="True" label="Maximum abundance"
+                    help="(--maxsize)"/>
+          </when>
+        </conditional>
+        <expand macro="topn" />
+        <param name="sizeout" type="boolean" truevalue="--sizeout" falsevalue="" checked="False" 
+            label="Add abundance annotation to output" help="(--sizeout)"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sorting_mode_select" value="sortbyabundance"/>
+            <param name="infile" value="db.fasta" ftype="fasta" />
+            <output name="outfile" file="sorting_result1.fasta" ftype="fasta" />
+        </test>
+        <test>
+            <param name="sorting_mode_select" value="sortbylength"/>
+            <param name="infile" value="db.fasta" ftype="fasta" />
+            <output name="outfile" file="sorting_result2.fasta" ftype="fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Fasta entries are sorted by decreasing abundance (−−sortbysize) or sequence length (−−sort-
+bylength). To obtain a stable sorting order, ties are sorted by decreasing abundance and label
+increasing alpha-numerical order (−−sortbylength), or just by label increasing alpha-numerical
+order (−−sortbysize). Label sorting assumes that all sequences have unique labels. The same
+applies to the automatic sorting performed during chimera checking (−−uchime_denovo), derepli-
+cation (−−derep_fulllength), and clustering (−−cluster_fast and −−cluster_size).
+
+Sorting options
+  --maxsize INT               maximum abundance for sortbysize
+  --minsize INT               minimum abundance for sortbysize
+  --output FILENAME           output FASTA file
+  --relabel STRING            relabel with this prefix string after sorting
+  --sizeout                   add abundance annotation to output
+  --sortbylength FILENAME     sort sequences by length in given FASTA file
+  --sortbysize FILENAME       abundance sort sequences in given FASTA file
+  --topn INT                  output just top n seqs after sorting
+
+
+@EXTERNAL_DOCUMENTATION@
+
+-------
+
+@REFERENCES@
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r fae6527990af test-data/AF091148.fsa.bz2
b
Binary file test-data/AF091148.fsa.bz2 has changed
b
diff -r 000000000000 -r fae6527990af test-data/AF091148_first_rep.fsa.bz2
b
Binary file test-data/AF091148_first_rep.fsa.bz2 has changed
b
diff -r 000000000000 -r fae6527990af test-data/BioMarKs5k.fsa.bz2
b
Binary file test-data/BioMarKs5k.fsa.bz2 has changed
b
diff -r 000000000000 -r fae6527990af test-data/PR2-18S-rRNA-V4.derep.fsa.bz2
b
Binary file test-data/PR2-18S-rRNA-V4.derep.fsa.bz2 has changed
b
diff -r 000000000000 -r fae6527990af test-data/Rfam_11_0.repr.fasta.bz2
b
Binary file test-data/Rfam_11_0.repr.fasta.bz2 has changed
b
diff -r 000000000000 -r fae6527990af test-data/alignment_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alignment_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,128 @@
+vsearch --threads 1 --notrunclabels --id 0.97 --iddef 2 --allpairs_global /tmp/tmpn4c95N/files/000/dataset_1.dat --alnout /tmp/tmpn4c95N/files/000/dataset_2.dat --query_cov 0.95 
+vsearch v1.1.3_linux_x86_64, 7.7GB RAM, 4 cores
+
+Query >RF00177;SSU_rRNA_bacteria;CP001742.1/177780-176277   666510:Acidilobus saccharovorans 345-15
+ %Id   TLen  Target
+100%   1497  RF01959;SSU_rRNA_archaea;CP001742.1/177775-176279   666510:Acidilobus saccharovorans 345-15
+
+ Query 1504nt >RF00177;SSU_rRNA_bacteria;CP001742.1/177780-176277   666510:Acidilobus saccharovorans 345-15
+Target 1497nt >RF01959;SSU_rRNA_archaea;CP001742.1/177775-176279   666510:Acidilobus saccharovorans 345-15
+
+Qry    6 + ACUCCGGUUGAUCCUGCCGGACCCGACUGCUAUCGGGGUGAGGCUAAGCCAUGGGAGUCGCGCG 69
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt    1 + ACUCCGGUUGAUCCUGCCGGACCCGACUGCUAUCGGGGUGAGGCUAAGCCAUGGGAGUCGCGCG 64
+
+Qry   70 + CCCAGCCGCCGCUGGGCGCGGCGCACGGCUGAGUAACACGUAGCUAACCUACCCUCGGGACGGG 133
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt   65 + CCCAGCCGCCGCUGGGCGCGGCGCACGGCUGAGUAACACGUAGCUAACCUACCCUCGGGACGGG 128
+
+Qry  134 + GAUAACCCCGGGAAACUGGGGCUAAUCCCCGAUAGGCGAGGGGGCCUGGAAUGGUCCCUCGCCG 197
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  129 + GAUAACCCCGGGAAACUGGGGCUAAUCCCCGAUAGGCGAGGGGGCCUGGAAUGGUCCCUCGCCG 192
+
+Qry  198 + AAAGGGACCCUGGGGGGUUAUCGCCUGGGGUCCGCCCGAGGAUGGGGCUGCGGCCCAUCAUGGU 261
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  193 + AAAGGGACCCUGGGGGGUUAUCGCCUGGGGUCCGCCCGAGGAUGGGGCUGCGGCCCAUCAUGGU 256
+
+Qry  262 + AGUUGGCGGGGUAAUGGCCCGCCAAGCCGACGACGGGUAGGGGCCGUGGGAGCGGGAGCCCCCA 325
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  257 + AGUUGGCGGGGUAAUGGCCCGCCAAGCCGACGACGGGUAGGGGCCGUGGGAGCGGGAGCCCCCA 320
+
+Qry  326 + GAUGGGCCCUGAGACAAGGGCCCAGGCCCUACGGGGCGCACCAGGCGCGAAACCUCCGCAAUGC 389
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  321 + GAUGGGCCCUGAGACAAGGGCCCAGGCCCUACGGGGCGCACCAGGCGCGAAACCUCCGCAAUGC 384
+
+Qry  390 + GGGAAACCGUGACGGGGUCACCCCGAGUGCUCCCGUAAGGGAGCUUUUCCCCGCUGCAAGGAGG 453
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  385 + GGGAAACCGUGACGGGGUCACCCCGAGUGCUCCCGUAAGGGAGCUUUUCCCCGCUGCAAGGAGG 448
+
+Qry  454 + CGGGGGAAUAAGCGGGGGGCAAGUCUGGUGUCAGCCGCCGCGGUAAUACCAGCCCCGCGAGUGG 517
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  449 + CGGGGGAAUAAGCGGGGGGCAAGUCUGGUGUCAGCCGCCGCGGUAAUACCAGCCCCGCGAGUGG 512
+
+Qry  518 + UCGGGACGUCUACUGGGCCUAAAGCGCCCGUAGCCGGCCCCGUAAGUCCCUCCUGAAAGCCCUG 581
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  513 + UCGGGACGUCUACUGGGCCUAAAGCGCCCGUAGCCGGCCCCGUAAGUCCCUCCUGAAAGCCCUG 576
+
+Qry  582 + GGCUCAACCCAGGGAGUGGGGGGGAUACUGCGGGGCUAGGGGGCGGGAAAGGCCGGGGGUACCC 645
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  577 + GGCUCAACCCAGGGAGUGGGGGGGAUACUGCGGGGCUAGGGGGCGGGAAAGGCCGGGGGUACCC 640
+
+Qry  646 + CAGGGGUAGGGGCGAAAUCCGAUAAUCCCUGGGGGACCACCAGUGGCGAAAGCGCCCGGCUGGA 709
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  641 + CAGGGGUAGGGGCGAAAUCCGAUAAUCCCUGGGGGACCACCAGUGGCGAAAGCGCCCGGCUGGA 704
+
+Qry  710 + ACGCGCCCGACGGUGAGGGGCGAAAGCCGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCC 773
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  705 + ACGCGCCCGACGGUGAGGGGCGAAAGCCGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCC 768
+
+Qry  774 + GGCUGUAAACGAUGCGGGCUAGGUGUCGGGCGGGCGUUAGAGCCCGCCCGGUGCCGCAGGGAAG 837
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  769 + GGCUGUAAACGAUGCGGGCUAGGUGUCGGGCGGGCGUUAGAGCCCGCCCGGUGCCGCAGGGAAG 832
+
+Qry  838 + CCGUUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUUAAGGAAUUGGCGGGGGG 901
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  833 + CCGUUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUUAAGGAAUUGGCGGGGGG 896
+
+Qry  902 + GCACACAAGGGGUGGAGCCUGCGGCUCAAUUGGAGUCAACGCCGGGAACCUCACCGGGGGCGAC 965
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  897 + GCACACAAGGGGUGGAGCCUGCGGCUCAAUUGGAGUCAACGCCGGGAACCUCACCGGGGGCGAC 960
+
+Qry  966 + AGCAGGAUGACGGCCAGGCUAACGACCUUGCCCGACGCGCUGAGGGGAGGUGCAUGGCCGUCGC 1029
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  961 + AGCAGGAUGACGGCCAGGCUAACGACCUUGCCCGACGCGCUGAGGGGAGGUGCAUGGCCGUCGC 1024
+
+Qry 1030 + CAGCUCGUGCUGUGAAGUGUCCUGUUAAGUCAGGCAACGAGCGAGACCCCCGCCCCUAGUUGCG 1093
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1025 + CAGCUCGUGCUGUGAAGUGUCCUGUUAAGUCAGGCAACGAGCGAGACCCCCGCCCCUAGUUGCG 1088
+
+Qry 1094 + ACCCGGCGGGAGACCGCUGGGGCACACUAGGGGGACUGCCGCCGCUAAGGCGGAGGAAGGAGGG 1157
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1089 + ACCCGGCGGGAGACCGCUGGGGCACACUAGGGGGACUGCCGCCGCUAAGGCGGAGGAAGGAGGG 1152
+
+Qry 1158 + GGCCACGGCAGGUCAGCAUGCCCCUAAACCCCCGGGCUGCACGCGGGCUACAAUGGCGGGGACA 1221
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1153 + GGCCACGGCAGGUCAGCAUGCCCCUAAACCCCCGGGCUGCACGCGGGCUACAAUGGCGGGGACA 1216
+
+Qry 1222 + GCGGGAUCCGACCCCGAAAGGGGGAGGCAAUCCCUCAAACCCCGCCGUAGUCGGGAUUGGGGGC 1285
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1217 + GCGGGAUCCGACCCCGAAAGGGGGAGGCAAUCCCUCAAACCCCGCCGUAGUCGGGAUUGGGGGC 1280
+
+Qry 1286 + UGUAACUCGCCCCCAUGAACCUGGAAUCCCUAGUAACCGCGCGUCAACAUCGCGCGGUGAAUAC 1349
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1281 + UGUAACUCGCCCCCAUGAACCUGGAAUCCCUAGUAACCGCGCGUCAACAUCGCGCGGUGAAUAC 1344
+
+Qry 1350 + GUCCCUGCCCCUUGUACACACUGCCCGUCGCUCCACCUGAGAGAAGGAGGGGUGAGGCUUCCUC 1413
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1345 + GUCCCUGCCCCUUGUACACACUGCCCGUCGCUCCACCUGAGAGAAGGAGGGGUGAGGCUUCCUC 1408
+
+Qry 1414 + CUUCGGGAGGGAGUCGAACCCCUCCUUCUCGAGGGGGGAGAAGUCGUAACAAGGUAGCCGUAGG 1477
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt 1409 + CUUCGGGAGGGAGUCGAACCCCUCCUUCUCGAGGGGGGAGAAGUCGUAACAAGGUAGCCGUAGG 1472
+
+Qry 1478 + GGAACCUGCGGCUGGAUCACCUCUC 1502
+           |||||||||||||||||||||||||
+Tgt 1473 + GGAACCUGCGGCUGGAUCACCUCUC 1497
+
+1497 cols, 1497 ids (100.0%), 0 gaps (0.0%)
+
+Query >RF00423;SCARNA4;AAVX01260432.1/2253-2118   7868:Callorhinchus milii (ghost shark)
+ %Id   TLen  Target
+100%    134  RF00426;SCARNA15;AAVX01260432.1/2252-2119   7868:Callorhinchus milii (ghost shark)
+
+ Query 136nt >RF00423;SCARNA4;AAVX01260432.1/2253-2118   7868:Callorhinchus milii (ghost shark)
+Target 134nt >RF00426;SCARNA15;AAVX01260432.1/2252-2119   7868:Callorhinchus milii (ghost shark)
+
+Qry   2 + UCGGAGGAAUAAGAAAGCACAGUCUCGAGAGUGUCCAUGACUUUGCUGAUACUCUCCUCCUAUA 65
+          ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt   1 + UCGGAGGAAUAAGAAAGCACAGUCUCGAGAGUGUCCAUGACUUUGCUGAUACUCUCCUCCUAUA 64
+
+Qry  66 + GAAAAGUGGUGGAAGAACAGGUCUUCUCUUGUGGCUGUGGAGUUCUGACCUACUUAAUCCACUC 129
+          ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Tgt  65 + GAAAAGUGGUGGAAGAACAGGUCUUCUCUUGUGGCUGUGGAGUUCUGACCUACUUAAUCCACUC 128
+
+Qry 130 + ACAAGU 135
+          ||||||
+Tgt 129 + ACAAGU 134
+
+134 cols, 134 ids (100.0%), 0 gaps (0.0%)
b
diff -r 000000000000 -r fae6527990af test-data/chimera_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chimera_result1.fasta Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,958 @@\n+>AF281177.1.2172_G|AF281177|R|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Claviceps|Claviceps+sorghicola|Claviceps|Claviceps+sorghicola;size=10\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTGAACCTTGGGCCTGGCTGGCCGGTCCGCCTC\n+ACCGCGAGTACTGGTCCGGCCGGGCCTTTCCCTCTGTGGAACCCCATGCCCTTCACTGGGCGTGGCGGGGAAACAGGACT\n+TTTACTTTGAAAAAATTAGAGTGCTCCAGGCAGGCCTATGCTCGAATACATTAGCATGGAATAATGAAATAGGACGCGCG\n+GTTCTATTTTGTTGGTTTCTAGGACCGCCGTAATGATTAATAGGGACAGTCGGGGGCATCAGTATTCAATTGTCAGAGGT\n+GAAATTCTTGGATTTATTGAAGACTAACTACTGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>DQ514862.1.1735_U|DQ514862|U|||11|Eukaryota|Stramenopiles|Ochrophyta|Bacillariophyta|Bacillariophyta_X|Polar-centric-Mediophyceae|Thalassiosira|Thalassiosira+pseudonana|Thalassiosira|Thalassiosira+pseudonana;size=10\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGGCAGGAGCGACCGGTCTCACAC\n+TCAGTGCGAGAACTCGTGTTGTCTCTGGCCATCCTTGGGGATATCCTGTTTGGCATTAAGTTGTCGGGCAGGGGATACCC\n+ATCGTTTACTGTGAAAAAATTAGAGTGTTTAAAGCAGGCTTATGCCGTTGAATATATTAGCATGGAATAATAAGATAGGA\n+CCCTGGTACTATTTTGTTGGTTTGCGCACCGAGGTAATGATTAAAAGAGACAGGCGGGGCTATTCGTATTGCATTGTCAG\n+AGGTGAAATTCTTGGATTTCTGCAAGACGAACTACTGCGAAAGCATTTAGCAAGGATGTTTTCA\n+>CK269001.1.1038_U|CK269001|U|||11|Eukaryota|Archaeplastida|Streptophyta|Embryophyceae|Embryophyceae_X|Embryophyceae_XX|Solanum|Solanum+tuberosum|Solanum|Solanum+tuberosum;size=9\n+AGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGACTTTGGGATGGGCCGGCCGGTCCGCCCT\n+AGGTGTGCACCGGTCGTCTCGTCCCTTCTGTCGGCGATGCGCTCCTGGCCTTAATTGGCCGGGTCGTGCCTCCGGCGCTG\n+TTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGTATACATTAGCATGGGATAACATTATAGGATTTCGG\n+TCCTATTACGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTG\n+AAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>AJ972856.1.1760_UC|AJ972856|U||Reference|11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Dothideomycetes|Capnobotryella|Capnobotryella+sp.|Capnobotryella|Capnobotryella+sp.;size=8\n+AGCTCCAGTAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGTCTGGCTGGCCGGTCCGCCTC\n+ACCGCGTGTTACTGGTCCGGCCGGGTGAAATTCTTGGATTTATTGAAGACAAACTACTGCGAAAGCATTTGCCAAGGATG\n+TTTTCA\n+>AM236073.1.1638_U|AM236073|U|||11|Eukaryota|Stramenopiles|Ochrophyta|Bacillariophyta|Bacillariophyta_X|Polar-centric-Mediophyceae|Cyclotella|Cyclotella+meneghiniana|Cyclotella|Cyclotella+meneghiniana;size=8\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGGCAGGAGCGTCTGGTCTCACAC\n+TCTGTGCGGGAACTTGGATGTCTCTGGCCATCTTTGGGGATATCCTGTTTGGCATTAAGTTGTCGGGCAGGGGATACCCA\n+TCGTTTACTGTGAAAAAATTAGAGTGTTTAAAGCAGGCTTATGCCGTTGAATATATTAGCATGGAATAATAAGATAGGAC\n+CCTGGTACTATTTTGTTGGTTTGCGCACCTGGGTAATGATTAATAGAGACAGGCGGGGCTATTCGTATTGCATTGTCAGA\n+GGTGAAATTCTTGGATTTCTGCAAGACGAACTACTGCGAAAGCATTTAGCAAGGATGTTTTCA\n+>EF526943.1.1394_U|EF526943|U|||11|Eukaryota|Stramenopiles|Ochrophyta|Bacillariophyta|Bacillariophyta_X|Polar-centric-Mediophyceae|Cyclotella|Cyclotella+sp.|uncultured|uncultured+marine;size=8\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGGCAGGAGCGTCCGGTCTCACAC\n+TCAGTGCGGGAACTTGGATGTCTCTGGCCATCTTTGGGGATATCCTGTTTGGCATTAAGTTGTCGGGCAGGGGACACCCA\n+TCGTTTACTGTGAAAAAATTAGAGTGTTTAAAGCAGGCTTATGCCGTTGAATATATTAGCATGGAATAATAAGATAGGAC\n+GTAGGTACTATTTTGTTGGTTTGCGCACCCACGTAATGATTAATAGAGACAGGCGGGGCTATTCGTATTGCATTGTCAGA\n+GGTGAAATTCTTGGATTTCTGCAAGACGAACTACTGCGAAAGCATTTAGCAAGGATGTTTTCA\n+>HQ680491.1.1812_U|HQ680491|U|||11|Eukaryota|Stramenopiles|Ochrophyta|Bacillariophyta|Bacillariophyta_X|Raphid-pennate|Cymbella|Cymbella+hebridica|Cymbella|Cymbella+hebridica;size=8\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTGTGGTTGGAGCAACGTGCCCTTCAC\n+TTGTGTTGGTGCTTGTTTGTCTCCGTCCATCCTTGGGTGGAACCTGTGTGGCATTAAGTTGTCGTGCAGGGGATGCCCAT\n+CTTTTACTGTGAAAAAATTAGAGTGTTCAAAGCAGGCTTACGCCGTTGAATATATTAGCATGGAATAATGAGATAGGACC\n+TGGGTACTATTTTGTTGGTTTGCGCACCGAGGTAATGATTAATAGGGACAGTTGGGGGTATTCGTATTCCATTGTCAGAG\n+GTGAAATTCTTGGATTTTTGGAAGACGAACTACTGCGAAAGCATTTACCAAGGATGTTTTCA\n+>AB594350.1.1721_U|AB594350|U|||11|Eukaryota|Opisthokonta|Metazoa|Mollusca|Bivalvia|Pterioida-Pinnidae|P'..b'0-28//12-20|Eukaryota|Stramenopiles|Stramenopiles_X|MAST|MAST-12|MAST-12B|MAST-12B_X|MAST-12B_X+sp.|uncultured|uncultured+eukaryote;size=1\n+AGCTCCAATAGCGTATACTAATGTTGTTGCAGTTAAAAAGCTCGTAGTTGAATTTCTGGTTGTCGACTTATGGTCCTGCG\n+CCTCGTGCGTTTTGGTACCGTTTTTGTCGACGCCATCTTAGTCGTGATTGCTTCTGGCATTAGGTTGTCGGTTTTAGGAG\n+CGGCTTCGTTTACTGTGATAACAAATAGAGTGTTCAAAGCAGACATTACGTCTTGAATGCTTCAGCATGGAATAAAAAGA\n+TAGGACAGCGGTACTATTTTATTGGTATGTATACCGTAGTAATGATTAATAGGGATAGTTGGGGGTATTCATATTTGATT\n+GTCAGAGGTGAAATTCTTGGATTTATGAAAGATGAACAACTGCGAAAGCATTTACCAAGGATGTTTTCA\n+>HQ867242.1.872_U|HQ867242|U|||11|Eukaryota|Hacrobia|Cryptophyta|Cryptophyceae|Cryptophyceae_X|Cryptomonadales|Cryptomonadales_X|Cryptomonadales_X+sp.|uncultured|uncultured+eukaryote;size=1\n+AGCTCTAATAGCGTATATTAAAGTTGTTGCAGTCAAAAAGCTCGTAGTCGGATGTCGGGCTCGGGCAAGCTGTCGGCCTT\n+TGGTCGGACGGCAGGCTCGGGTCTTTCTGCCTGAGGAACCCGGTTGCTTTAACGAGCTGCCGGTGGACGCAGGTCGTTTA\n+CTTTGAAAAAATTAGAGTGTTCAAAGCAGGCTAGCGCTTGAATACATTAGCATGGAATAATGGAATAGGACTTTGGTCCT\n+ATTTTGTTGGTTTTCGGGACCGAAGTAATGATTAATAGGGACAGTTGGGGGCATTCGTATTTAATTGTCAGAGGTGAAAT\n+TCTTGGATTTATGAAAGACGAACTTCTGCGAAAGCATCTGCCAAGGATGTTTTCA\n+>JN832714.1.1689_U|JN832714|U|||gb_191-cc|Eukaryota|Alveolata|Dinophyta|Dinophyceae|Dinophyceae_X|Dinophyceae_XX|Dinophyceae_XXX|Dinophyceae_XXX+sp.|environmental_samples|uncultured+eukaryote;size=1\n+AGCTCCAATAGCGTATACTAACGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTAGTGGGATGCCGGCCGGGTTC\n+TCTGAACTTGTAGCTAGGTTATCTTGCTGCATCTTGTCGTAATTCTCACTGGCATTCATTTGTTGGTGTAGAGGACCGAC\n+TCGTTTACTTTGAGAAAATTAGAGTGTTTAAAGCAGGCATACGCCTTGAATACATTAGCATGGAATAATAAGATAGGACC\n+TCGGTTCTATTTTGTTGGTTTCTAGAGCTGAGGTAATGATTAATAGGGATAGTTGGGGGCATTCGTATTTAACTGTCAGA\n+GGTGAAATTCTTGGATTTGTTAAAGACGGACTACTGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>KC674974.1.1781_U|KC674974|U||gb197A|cc_2n|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Sordariomycetes_X|Sordariomycetes_X+sp.|environmental_samples|uncultured+fungus;size=1\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGACCTTGGACCTGGACAGGCGGTCCGCCTC\n+ACGGCGAGTACTGTCTTGCTGGGTCTTTCCTCTTGGTGATCTGTTGTTTCGGCAGCAGGGAACCAGGACCTTTACTTTGA\n+AAAAATTAGAGTGCTCCAGGCAGGCCTATGCTCGAATACATTAGCATGGAATAATAGAATAGGACGTGTGGTTCTATTTT\n+GTTGGTTTCTAGGACCGCCGTAATGATTAATAGGGACAGTCGGGGGCATCAGTATTCAATTGTCAGAGGTGAAATTCTTG\n+GATTTATTGAAGACTAACTACTGCGAAAGCATTTGCCAAGGATGTTTCA\n+>U32401.1.1034_U|U32401|U|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Claviceps|Claviceps+paspali|Claviceps|Claviceps+paspali;size=1\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTGAACCTTGGGCCTGGCTGGCCGGTCCGCCTC\n+ACCGCGTGTACTGGTCCGGCCGGGCCTTTCCCTCTGTGGAACCCCATGCCCTTCACTGGGCGTGGCGGGGAAACAGGACT\n+TTTACTTTGAAAAAATTAGAGTGCTCCAGGCAGGCCTATGCTCGAATACATTAGCATGGAATAATGAAATAGGACGCGCG\n+GTTCTATTTTGTTGGTTTCTAGGACCGCCGTAATGATTAATAGGGACAGTCGGGGGCATCAGTATTCAATTGTCAGAGGT\n+GAAATTCTTGGATTTATTGAAGACTAACTACNGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>U83122.1.1738_U|U83122|U||Reference|11|Eukaryota|Archaeplastida|Chlorophyta|Chlorophyceae|Chlorophyceae_X|CW-Chlamydomonadales|Gloeococcus|Gloeococcus+maximus|Gloeococcus|Gloeococcus+maximus;size=1\n+AGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCGTGTGGGTTGGCGCGGTCCGCTCT\n+GGTGTGCACTGCGCCGGTCCACCTTCCAGCCGGGGACGGGCTCCTGGGCTTCACTGTCTGGGACTCGGAGTCGACGATGT\n+TACTTTGAGAAAATTAGAGTGTTCAAAGCAAGCCTACGCTCTGAATAGTTTAGCATGGAATAACGCGATAGGACTCTGGC\n+CTATCCCGTTGGTCTGTGGGACCGGAGTAATGATTAAGAGGGACAGTCGGGGGCATTCGTATTTCATTGTCAGAGGTGAA\n+ATTCTTGGATTTATGAAAGACGAACTTCTGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>Z83751.1.1890_U|Z83751|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Annelida|Annelida_X|Annelida_XX|Glossiphonia|Glossiphonia+sp.|Glossiphonia|Glossiphonia+sp.;size=1\n+AGCTCCAATAGCGTATGTTATAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATCTCGGGTTCGGACTGGTTGCAACCTCG\n+CGGCGTAGCGACCCGCTCCGACCTGTTTTCGGTTCTGCGCCCGGTTCTTCTTAACCGAGTGCCGGGTGCGGCCGAGACGT\n+TTACTTTGAAAAAATTAAAGTGCTTAAAGCAGGCTCGACAGCCTGAATAGTAGCGCATGGAATAATAGAATAGGACCTCG\n+GTTCTATTTTGTTGGTTTTCGGAGCTCGAGGTAATGATTAAGAGGGACAGTTGGGGGTCATTGCGGTTATTTCGATTGTC\n+AGAGGTGAAATTCTTGGATTTGCGAAAGACGAACTAATGCGAAAGCATTTGCCAAGAATGTTTTCA\n'
b
diff -r 000000000000 -r fae6527990af test-data/chimera_uchimeout_result1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chimera_uchimeout_result1.tabular Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,13060 @@\n+0.0000\tKC487520.1.1651_U|KC487520|U||gb197A|cc_2n|Eukaryota|Alveolata|Apicomplexa|Apicomplexa_X|Colpodellidae|Colpodellidae_X|Colpodellidae_XX|Colpodellidae_XX+sp.|environmental_samples|uncultured+Colpodellidae;size=914\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tEU175348.1.814_U|EU175348|U|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Sordariomycetes_X|Sordariomycetes_X+sp.|uncultured|uncultured+fungus;size=485\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tGU971995.1.816_U|GU971995|U|||11|Eukaryota|Opisthokonta|Fungi|Basidiomycota|Ustilaginomycotina|Exobasidiomycetes|Exobasidiomycetes_X|Exobasidiomycetes_X+sp.|uncultured|uncultured+organism;size=424\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tKC672135.1.1740_U|KC672135|U||gb197A|cc_2n|Eukaryota|Opisthokonta|Fungi|Basidiomycota|Ustilaginomycotina|Exobasidiomycetes|Malassezia|Malassezia+globosa|environmental_samples|uncultured+fungus;size=356\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tDQ680046.1.1088_UC|DQ680046|UC|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Stachybotrys|Stachybotrys+chartarum|Stachybotrys|Stachybotrys+chartarum;size=315\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tHQ865593.1.872_U|HQ865593|U|||11|Eukaryota|Stramenopiles|Stramenopiles_X|Bicoecea|Bicoecea_X|Bicoecea_XX|Bicoecea_XXX|Bicoecea_XXX+sp.|uncultured|uncultured+eukaryote;size=265\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0627\tAC193417.150218.152027_U|AC193417|U|||11|Eukaryota|Archaeplastida|Streptophyta|Embryophyceae|Embryophyceae_X|Embryophyceae_XX|Zea|Zea+mays|Zea|Zea+mays;size=228\tEU175348.1.814_U|EU175348|U|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Sordariomycetes_X|Sordariomycetes_X+sp.|uncultured|uncultured+fungus;size=485\tKC487520.1.1651_U|KC487520|U||gb197A|cc_2n|Eukaryota|Alveolata|Apicomplexa|Apicomplexa_X|Colpodellidae|Colpodellidae_X|Colpodellidae_XX|Colpodellidae_XX+sp.|environmental_samples|uncultured+Colpodellidae;size=914\tEU175348.1.814_U|EU175348|U|||11|Eukaryota|Opisthokonta|Fungi|Ascomycota|Pezizomycotina|Sordariomycetes|Sordariomycetes_X|Sordariomycetes_X+sp.|uncultured|uncultured+fungus;size=485\t80.1\t78.2\t76.3\t77.1\t78.2\t39\t25\t42\t7\t0\t6\t1.9\tN\n+0.0590\tHQ870089.1.881_U|HQ870089|U|||11|Eukaryota|Archaeplastida|Streptophyta|Embryophyceae|Embryophyceae_X|Embryophyceae_XX|Embryophyceae_XXX|Embryophyceae_XXX+sp.|uncultured|uncultured+eukaryote;size=194\tGU971995.1.816_U|GU971995|U|||11|Eukaryota|Opisthokonta|Fungi|Basidiomycota|Ustilaginomycotina|Exobasidiomycetes|Exobasidiomycetes_X|Exobasidiomycetes_X+sp.|uncultured|uncultured+organism;size=424\tKC487520.1.1651_U|KC487520|U||gb197A|cc_2n|Eukaryota|Alveolata|Apicomplexa|Apicomplexa_X|Colpodellidae|Colpodellidae_X|Colpodellidae_XX|Colpodellidae_XX+sp.|environmental_samples|uncultured+Colpodellidae;size=914\tKC487520.1.1651_U|KC487520|U||gb197A|cc_2n|Eukaryota|Alveolata|Apicomplexa|Apicomplexa_X|Colpodellidae|Colpodellidae_X|Colpodellidae_XX|Colpodellidae_XX+sp.|environmental_samples|uncultured+Colpodellidae;size=914\t77.5\t74.7\t77.0\t77.2\t77.0\t25\t23\t51\t10\t0\t6\t0.6\tN\n+0.0000\tAAPE02054660.131.1675_U|AAPE02054660|U|||11|Eukaryota|Opisthokonta|Metazoa|Craniata|Craniata_X|Craniata_XX|Myotis|Myotis+lucifugus|Myotis|Myotis+lucifugus;size=172\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.1149\tABXC01010957.1335.3144_U|ABXC01010957|U|||11|Eukaryota|Archaeplastida|Streptophyta|Embryophyceae|Embryophyceae_X|Embryophyceae_XX|Sorghum|Sorghum+bicolor|Sorghum|Sorghum+bicolor;size=170\tKC672135.1.1740_U|KC672135|U||gb197A|cc_2n|Eukaryota|Opisthokonta|Fungi|Basidiomycota|Ustilaginomycotina|Exobasidiomycetes|Malassezia|Malassezia+globosa|environmental_samples|uncultured+fungus;size=356\tKC487520.1.1651_U|KC487520|U||gb197A|cc_2n|Eukaryota|Alveolata|Apicomplexa|Apicomplexa_X|Colpodellidae|Colpodellidae_X|Colpodellidae_XX|Colpodellidae_XX+sp.|environmental_samples|uncultured+Colpodellidae;size=914\tKC672135.1.1740_U|KC672135|U||gb197A|cc_2n|Eukaryota|Opisthokonta|Fungi|Basidiomycota|Ustilaginomy'..b'vata|Metamonada|Parabasalia|Cristamonadida|Calonymphidae-Devescovinidae|Metadevescovina|Metadevescovina+modica|Metadevescovina|Metadevescovina+modica;size=3\tAB326380.1.1525_U|AB326380|U||Reference|11|Eukaryota|Excavata|Metamonada|Parabasalia|Cristamonadida|Calonymphidae-Devescovinidae|Deltotrichonympha|Deltotrichonympha+operculata|Deltotrichonympha|Deltotrichonympha+operculata;size=3\t91.2\t90.1\t88.5\t89.3\t90.1\t8\t1\t11\t7\t4\t7\t1.1\tN\n+0.0039\tX98829.1.1811_U|X98829|U|||11|Eukaryota|Opisthokonta|Metazoa|Mollusca|Polyplacophora|Polyplacophora_X|Lepidozona|Lepidozona+coreanica|Lepidozona|Lepidozona+coreanica;size=1\tHQ907767.1.1726_U|HQ907767|U|||11|Eukaryota|Opisthokonta|Metazoa|Mollusca|Polyplacophora|Polyplacophora_X|Leptochiton|Leptochiton+sp.|Leptochiton|Leptochiton+sp.;size=19\tHQ907736.1.1742_U|HQ907736|U|||11|Eukaryota|Opisthokonta|Metazoa|Mollusca|Polyplacophora|Polyplacophora_X|Acanthochitona|Acanthochitona+rhodea|Acanthochitona|Acanthochitona+rhodea;size=2\tHQ907767.1.1726_U|HQ907767|U|||11|Eukaryota|Opisthokonta|Metazoa|Mollusca|Polyplacophora|Polyplacophora_X|Leptochiton|Leptochiton+sp.|Leptochiton|Leptochiton+sp.;size=19\t97.3\t97.1\t97.1\t99.5\t97.1\t1\t0\t2\t1\t0\t8\t0.3\tN\n+0.0000\tY17633.1.1768_U|Y17633|U||Reference|11|Eukaryota|Opisthokonta|Fungi|Glomeromycota|Glomeromycotina|Glomeromycotina_X|Acaulospora|Acaulospora+laevis|Acaulospora|Acaulospora+laevis;size=1\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tY17636.1.1698_U|Y17636|U|||11|Eukaryota|Opisthokonta|Fungi|Glomeromycota|Glomeromycotina|Glomeromycetes|Claroideoglomus|Claroideoglomus+claroideum|Claroideoglomus|Claroideoglomus+claroideum;size=1\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+0.0000\tZ11590.1.1648_U|Z11590|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Platyhelminthes|Trematoda|Digenea|Schistosoma|Schistosoma+japonicum|Schistosoma|Schistosoma+japonicum;size=1\t*\t*\t*\t*\t*\t*\t*\t*\t0\t0\t0\t0\t0\t0\t*\tN\n+1.9127\tZ83751.1.1890_U|Z83751|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Annelida|Annelida_X|Annelida_XX|Glossiphonia|Glossiphonia+sp.|Glossiphonia|Glossiphonia+sp.;size=1\tGQ368792.1.1765_U|GQ368792|U|||11|Eukaryota|Opisthokonta|Metazoa|Annelida|Annelida_X|Annelida_XX|Hirudo|Hirudo+orientalis|Hirudo|Hirudo+orientalis;size=5\tJN227970.1.1297_U|JN227970|U|||gb_191-cc|Eukaryota|Opisthokonta|Metazoa|Cnidaria|Cnidaria_X|Anthozoa|Trichogorgia|Trichogorgia+capensis|Trichogorgia|Trichogorgia+capensis;size=2\tGQ368792.1.1765_U|GQ368792|U|||11|Eukaryota|Opisthokonta|Metazoa|Annelida|Annelida_X|Annelida_XX|Hirudo|Hirudo+orientalis|Hirudo|Hirudo+orientalis;size=5\t97.7\t95.4\t81.9\t81.7\t95.4\t55\t0\t3\t8\t0\t5\t2.3\tY\n+0.0395\tZ97583.1.1876_U|Z97583|U|||11|Eukaryota|Opisthokonta|Metazoa|Arthropoda|Hexapoda|Insecta|Stiphra|Stiphra+robusta|Stiphra|Stiphra+robusta;size=1\tAY121146.1.1811_U|AY121146|U|||11|Eukaryota|Opisthokonta|Metazoa|Arthropoda|Hexapoda|Insecta|Melanoplus|Melanoplus+sp.|Melanoplus|Melanoplus+sp.;size=3\tEU364522.1.1748_U|EU364522|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Arthropoda|Hexapoda|Insecta|Euparatettix|Euparatettix+erythronotus|Euparatettix|Euparatettix+erythronotus;size=2\tAY121146.1.1811_U|AY121146|U|||11|Eukaryota|Opisthokonta|Metazoa|Arthropoda|Hexapoda|Insecta|Melanoplus|Melanoplus+sp.|Melanoplus|Melanoplus+sp.;size=3\t95.7\t95.5\t94.0\t93.1\t95.5\t9\t1\t7\t9\t8\t3\t0.2\tN\n+0.0246\tZ99946.1.1790_U|Z99946|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Platyhelminthes|Turbellaria|Seriata|Baikalobia|Baikalobia+guttata|Baikalobia|Baikalobia+guttata;size=1\tAJ287559.1.1759_U|AJ287559|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Platyhelminthes|Turbellaria|Seriata|Phagocata|Phagocata+sibirica|Phagocata|Phagocata+sibirica;size=2\tZ99949.1.1760_U|Z99949|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Platyhelminthes|Turbellaria|Seriata|Polycelis|Polycelis+tenuis|Polycelis|Polycelis+tenuis;size=2\tAJ287559.1.1759_U|AJ287559|U||Reference|11|Eukaryota|Opisthokonta|Metazoa|Platyhelminthes|Turbellaria|Seriata|Phagocata|Phagocata+sibirica|Phagocata|Phagocata+sibirica;size=2\t92.6\t92.3\t90.4\t90.4\t92.3\t11\t3\t8\t6\t5\t11\t0.3\tN\n'
b
diff -r 000000000000 -r fae6527990af test-data/clustering_blast6out_result1.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clustering_blast6out_result1.tab Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,3384 @@\n+a0db13ac48fe53746f62697be51eb75c;size=30\t5b3bd3d89041bf6676e95510f0b0b8e6;size=32\t99.6\t492\t2\t0\t1\t492\t1\t492\t-1\t0\n+8bb82b41a18117e1fb9f340acbac3253;size=144\t5b3bd3d89041bf6676e95510f0b0b8e6;size=32\t99.8\t492\t0\t1\t1\t491\t1\t492\t-1\t0\n+f46817965f2f10189a8ccdda13105279;size=34\t5b3bd3d89041bf6676e95510f0b0b8e6;size=32\t99.4\t491\t2\t1\t1\t490\t1\t492\t-1\t0\n+c764e2bd39b276f8d9ffd7fc51f5a89d;size=206\t54b9859f6c86ac8d2736e3fa4d13ba87;size=95\t99.8\t461\t0\t1\t1\t460\t1\t461\t-1\t0\n+b97f53a53b822fbd23c59a6a88b5b6b4;size=96\t1fcfe9045f0bc00da82d03df44a48394;size=53\t99.8\t461\t0\t1\t1\t460\t1\t461\t-1\t0\n+263cf13efbf85a5445d68804e93b4d9f;size=31\t1fcfe9045f0bc00da82d03df44a48394;size=53\t99.1\t461\t3\t1\t1\t460\t1\t461\t-1\t0\n+f751da1bb8c234b9f61a7af3f8efecb3;size=176\t1fcfe9045f0bc00da82d03df44a48394;size=53\t99.6\t461\t0\t2\t1\t459\t1\t461\t-1\t0\n+6111ea2ce1dfc236dc016876ec2d5f41;size=161\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t461\t0\t3\t1\t459\t1\t460\t-1\t0\n+959cb8eaadf5b199de35625052d3c2d1;size=124\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t461\t0\t3\t1\t459\t1\t460\t-1\t0\n+f572e667fbbdf01d3309678581f9092c;size=111\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t461\t0\t3\t1\t459\t1\t460\t-1\t0\n+f7ad0d34c912269f017f97315ad99a7c;size=101\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.8\t460\t0\t1\t1\t459\t1\t460\t-1\t0\n+a034bfacf39e144ea09eb7d0a40e0023;size=45\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t461\t0\t3\t1\t459\t1\t460\t-1\t0\n+a5820116a31d1f85ab738b0487acab3c;size=40\t1fcfe9045f0bc00da82d03df44a48394;size=53\t99.1\t461\t2\t1\t1\t459\t1\t461\t-1\t0\n+dd9e167e71c9b44ec53339f398dc9ca3;size=37\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t461\t0\t3\t1\t459\t1\t460\t-1\t0\n+38dd5bcb48a511e780aef4b63445382f;size=31\t54b9859f6c86ac8d2736e3fa4d13ba87;size=95\t99.6\t461\t0\t2\t1\t459\t1\t461\t-1\t0\n+ffd96ef15fd96d9d5b60c8fed2047a64;size=737\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.6\t460\t0\t2\t1\t458\t1\t460\t-1\t0\n+8eeca4d7ca8eda54eaa0aeb7e629b40e;size=66\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.1\t461\t0\t4\t1\t458\t1\t460\t-1\t0\n+09f3030ed956eb076c2f714a89ea8e29;size=41\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t460\t1\t2\t1\t458\t1\t460\t-1\t0\n+e3808a9992bb6bd448373ccfc56494df;size=102\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t460\t0\t3\t1\t457\t1\t460\t-1\t0\n+f1e235e5fb09fef006c66c736a9f9a7f;size=71\t5d50f22d97026ac2cf6f18b3bda0127a;size=83\t99.6\t457\t2\t0\t1\t457\t1\t457\t-1\t0\n+db102da912ae727755a5c4fc620a8c8a;size=56\t1d040138a8e18337b6517fe857ce5515;size=178\t99.8\t457\t1\t0\t1\t457\t1\t457\t-1\t0\n+bd5246d2c33950733d99682c551a2e21;size=38\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.1\t459\t2\t2\t1\t457\t1\t460\t-1\t0\n+a66684d8a93b5a3741c08f723565ddd7;size=32\t1d040138a8e18337b6517fe857ce5515;size=178\t99.3\t458\t1\t2\t1\t457\t1\t457\t-1\t0\n+d22ea32c6f5378809e11ea9cac04c234;size=31\t8cf694bf5517e02fc09302d0c222f492;size=32\t99.3\t460\t0\t3\t1\t457\t1\t460\t-1\t0\n+ecd8c9163b8746f32c2e98dfd746f83b;size=472\t1d040138a8e18337b6517fe857ce5515;size=178\t99.6\t457\t1\t1\t1\t456\t1\t457\t-1\t0\n+08459a0219a184090f05f0041e8b9805;size=176\t07caab35e95c428ef0393595f2c72e76;size=92\t99.8\t457\t0\t1\t1\t456\t1\t457\t-1\t0\n+2d7bc6040e4ca3400b82a9fb2c387260;size=138\t1d040138a8e18337b6517fe857ce5515;size=178\t99.8\t457\t0\t1\t1\t456\t1\t457\t-1\t0\n+98858cc9a83cc05bf3269d8a1c83543f;size=97\t07caab35e95c428ef0393595f2c72e76;size=92\t99.8\t457\t0\t1\t1\t456\t1\t457\t-1\t0\n+b2033c6eda8c91d43d6eb45bacc7f511;size=90\t5d50f22d97026ac2cf6f18b3bda0127a;size=83\t99.8\t457\t0\t1\t1\t456\t1\t457\t-1\t0\n+b58c613a8d99c11df27c9a49a6683acb;size=85\t1d040138a8e18337b6517fe857ce5515;size=178\t99.1\t458\t1\t3\t1\t456\t1\t457\t-1\t0\n+14d991272555a4022a7c08aea9bafdf1;size=75\t1d040138a8e18337b6517fe857ce5515;size=178\t99.8\t457\t0\t1\t1\t456\t1\t457\t-1\t0\n+2bfd8c0dd4c8f173016c5a7aa747f399;size=55\t07caab35e95c428ef0393595f2c72e76;size=92\t99.3\t458\t0\t3\t1\t456\t1\t457\t-1\t0\n+a9e13523f49dc6b2547752c7dd643191;size=55\t1d040138a8e18337b6517fe857ce5515;size=178\t99.1\t458\t1\t3\t1\t456\t1\t457\t-1\t0\n+09e09d127b4876163d02984f2aebd727;size=40\t5d50f22d97026ac2cf6f18b3bda0127a;size=83\t99.3\t457\t2\t1\t1\t456\t1\t457\t-1\t0\n+0e2f3a2390b487039c0daab0288fde75;size=33\t1d040138a8e18337b6517fe857ce5515;size=178\t99.3\t457\t2\t1\t1\t456\t1\t457\t-1\t0\n+3c7c'..b'\t1\t364\t1\t365\t-1\t0\n+5459237955111c0681f507be88a7f611;size=422\t11b0b2d3acbe972060dcd8faf4af822d;size=86\t99.7\t364\t0\t1\t1\t363\t1\t364\t-1\t0\n+30b1b51fe2175706d0b82e5134d699f0;size=190\td9ac9faf4b6f32ebd5c50e8d427047a0;size=30\t99.5\t365\t0\t2\t1\t363\t1\t365\t-1\t0\n+c8853816fba540da3ed4d3632dd1f948;size=152\ta1de117c8ceeed63af3aeeee3227fbad;size=41\t99.7\t364\t0\t1\t1\t363\t1\t364\t-1\t0\n+a804a56e39d33a56047fff62b95bcdc0;size=131\t1a0f9bbf8b3ca5c668a600bbbd28230a;size=76\t99.7\t364\t0\t1\t1\t363\t1\t364\t-1\t0\n+04aea65caa96e67baaab99567c447e4c;size=77\t4f7756e4b35fcd03d6aba2ca17ccf05f;size=30\t99.7\t364\t0\t1\t1\t363\t1\t364\t-1\t0\n+1d37c637a7bbe32ca6a4c35d81188af2;size=35\t0bc1c0e6a33d79abf02d973765f63bba;size=44\t99.5\t365\t0\t2\t1\t363\t1\t365\t-1\t0\n+a400da3e359f7b184568f72ca92ca8d9;size=207\t52b372ca2470405d60100d5a30c3d690;size=64\t99.7\t363\t0\t1\t1\t362\t1\t363\t-1\t0\n+b45fe3ef50ffef8327c6ac385b0448c2;size=120\t4f610af4c82abf3650eb56a2a0b4c2d3;size=215\t100.0\t362\t0\t0\t1\t362\t1\t363\t-1\t0\n+f54151ecea6a3541ed676b54853f7171;size=111\t52b372ca2470405d60100d5a30c3d690;size=64\t99.7\t363\t0\t1\t1\t362\t1\t363\t-1\t0\n+f8052b02944acace27d8bc389efc4440;size=69\t11b0b2d3acbe972060dcd8faf4af822d;size=86\t99.5\t364\t0\t2\t1\t362\t1\t364\t-1\t0\n+1e7bf5b9268da3a50505214ab3a5670f;size=575\t649094ebe0076e62ce7d5e199affeac9;size=30\t99.7\t362\t0\t1\t1\t361\t1\t362\t-1\t0\n+36873a7ad45fdf9629abc879d6779593;size=83\t649094ebe0076e62ce7d5e199affeac9;size=30\t99.4\t362\t0\t2\t1\t360\t1\t362\t-1\t0\n+b1ba47ca788a38d4b2db6ec2f305d505;size=45\t649094ebe0076e62ce7d5e199affeac9;size=30\t99.4\t362\t0\t2\t1\t360\t1\t362\t-1\t0\n+e8c7759df43178f58d6d576b767b2224;size=265\t208bc821768aa9de5dfd05968bfb3a56;size=42\t99.7\t360\t0\t1\t1\t359\t1\t360\t-1\t0\n+fa87b8ffba690da59903be605e84281c;size=87\t85037f37bafe82d520f2cfba3cd4b855;size=71\t99.7\t360\t0\t1\t1\t359\t1\t360\t-1\t0\n+003e0ff09b028868c5389a0ea3b4e887;size=55\tc8dd7527c116180add354515754d344b;size=113\t99.2\t359\t2\t1\t1\t359\t1\t359\t-1\t0\n+355bcea002bcfa248ca6e8ddd7ba503d;size=50\t85037f37bafe82d520f2cfba3cd4b855;size=71\t99.7\t360\t0\t1\t1\t359\t1\t360\t-1\t0\n+c31bf4ce5355a276e4c379412cc4e7a1;size=39\tc8dd7527c116180add354515754d344b;size=113\t99.2\t359\t2\t1\t1\t359\t1\t359\t-1\t0\n+bc04647af93059a38915012d5f17d308;size=29\t649094ebe0076e62ce7d5e199affeac9;size=30\t99.2\t362\t0\t2\t1\t359\t1\t362\t-1\t0\n+4625bea51c6ef6ac56eb3226e764f2fe;size=1046\tc8dd7527c116180add354515754d344b;size=113\t99.4\t358\t2\t0\t1\t358\t1\t359\t-1\t0\n+440143f4b8f4b747472cbfe20a01716a;size=76\tc8dd7527c116180add354515754d344b;size=113\t99.2\t358\t3\t0\t1\t358\t1\t359\t-1\t0\n+3f05b4ca141cdd0587e3d8edc41ffc0c;size=40\t208bc821768aa9de5dfd05968bfb3a56;size=42\t99.4\t360\t0\t2\t1\t358\t1\t360\t-1\t0\n+9fed7bd7d80da33d0e8c654e16248f8c;size=78\tc8dd7527c116180add354515754d344b;size=113\t99.2\t358\t2\t1\t1\t357\t1\t359\t-1\t0\n+0c8e6a8f59ae69830714c4710540f666;size=30\t8d5e43e719aad9429f6b83dd58df87bb;size=49\t99.7\t354\t1\t0\t1\t354\t1\t354\t-1\t0\n+28cf4b7f2057888b45eebb42323854c0;size=51\te20bd4dedf3bed33d22d70e580ad3a6e;size=36\t99.4\t340\t2\t0\t1\t340\t1\t341\t-1\t0\n+a8c32a1c253990f48a53e660dcd4f634;size=97\t90ded2932eac98a8cb2a92bf4d04e0bf;size=120\t99.7\t309\t1\t0\t1\t309\t1\t309\t-1\t0\n+560a693cef1c4cd418b49f2e891deed6;size=81\t90ded2932eac98a8cb2a92bf4d04e0bf;size=120\t99.0\t310\t1\t2\t1\t309\t1\t309\t-1\t0\n+947994fe45c71991250bfc7a1a537aa5;size=76\t90ded2932eac98a8cb2a92bf4d04e0bf;size=120\t99.0\t310\t1\t2\t1\t309\t1\t309\t-1\t0\n+712028baaef90f19bdd6b34769fbb497;size=59\t46c166a8a872048e3b43afc09a7ff4e4;size=64\t99.0\t309\t2\t1\t1\t309\t1\t309\t-1\t0\n+cdb44fd3289eb8ee3c35d63c974d7ee6;size=30\t5f6b41eee25b85749d6079c416331706;size=52\t99.4\t309\t2\t0\t1\t309\t1\t309\t-1\t0\n+b4e1bd5eac322b803425414f19938cd6;size=1992\t46c166a8a872048e3b43afc09a7ff4e4;size=64\t99.4\t308\t2\t0\t1\t308\t1\t309\t-1\t0\n+7301515860adb6b2e54517d01e778e3a;size=298\t90ded2932eac98a8cb2a92bf4d04e0bf;size=120\t99.7\t309\t0\t1\t1\t308\t1\t309\t-1\t0\n+f531ed80c5166b4df536ba2e6d85eb57;size=123\t46c166a8a872048e3b43afc09a7ff4e4;size=64\t99.3\t307\t2\t0\t1\t307\t1\t309\t-1\t0\n+29b686d7bc307e55ee11bd9eba357c21;size=84\t46c166a8a872048e3b43afc09a7ff4e4;size=64\t99.0\t308\t2\t1\t1\t307\t1\t309\t-1\t0\n+4a805d8bc08ae555bb1eea1a359bfc3c;size=43\t46c166a8a872048e3b43afc09a7ff4e4;size=64\t99.0\t308\t2\t1\t1\t307\t1\t309\t-1\t0\n'
b
diff -r 000000000000 -r fae6527990af test-data/clustering_centroids_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clustering_centroids_result1.fasta Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,9698 @@\n+>5b3bd3d89041bf6676e95510f0b0b8e6;size=32\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGTTGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGTGACGGGTCGGCGGGGTTGGCTTTTATCGGCCTTCACTGTCGGCTCCCACCCGTAATCGTCCGAGGTGCTCTTAAC\n+CGAGTGTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTGCTCGCCTGAATATCA\n+CAGCATGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTTCTAGACCCGAGGTAATGGTTAATAGAGACA\n+GACGGGGGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACAACTGCGAAAGCATTTGCCA\n+AGAATGTTTTCA\n+>dd9c9696486e4c8e44f64040bfd68af0;size=43\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTCCTGTCTTCGGCCGGGCGGTGCCGCC\n+CGATCAAAGGGCGTCGCACTGCTTTGGGTCGGCGGTGTCAAAGCCGTCGACTTTTAACGGTCGGACAATACCACCGGAGC\n+GGTCGCGTCGGTCCGTCGGGCCTCAGGGTTCGGCGGTCGCCGGCGCGGCCCGTAATCGTCCGAGGTGCCCTTAACAAGGT\n+GTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCCAAGCAGGTGCATACCTTCGCCTGAATAGTACAG\n+CATGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTTCTAGACCCGAGGTAATGGTTAATAGAGACGGACGG\n+GGGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACGACTGCGAAAGCATTTGCCAAGAAT\n+GTTTTCA\n+>fc137ae3d4f5eb20104e5d3f1490201c;size=75\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGTCGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGCGGACGGAGGGGTGTCGAGCTCTCGGGCTTTTCGCTTCTCTCCCGCCCGTAATCGTCCGAGGTGCTCTTAACCGAG\n+TGTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTATCGCCTGAATATCACAGCA\n+TGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTCTAGACCCGAGGTAATGGTTAATAGAGACAGACGGG\n+GGCATTCGTACTGCGGCGCTAGAGGTGAAATTCTTGGACCGCCGCAAGACGAACAACTGCGAAAGCATTTGCCAAGAATG\n+TTTTCA\n+>d9dd80f66077bd29d1569180753d1ea6;size=30\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGCCGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGTGACGGGCCGGCGGGGTTGGTTCGCCCTTCACCGTCGGTCCCCACCCGTAATCGTCCGAGGTGCTCTTAACCGAGT\n+GTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTGCTCGCCTGAATATCACAGCA\n+TGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTCTAGACCCGAGGTAATGGTTAATAGAGACAGACGGG\n+GGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACAACTGCGAAAGCATTTGCCAAGAATG\n+TTTTCA\n+>54b9859f6c86ac8d2736e3fa4d13ba87;size=95\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTATCTCGTCGGTGTCGT\n+TGGGTCATGCCTCGAAAGGGGTTGTGGTCTTTCGGCGTCTTCGGGGTCTCATCCGGAGGGTCTCTGGCTTCACGCCGGGG\n+TTTCACCTTCCGCGATGGCCCGCCGTCGCGTTTGGGTGGGTGTCCTTAATTGGATGCCCGCCGTCGAACGTGACCATTTT\n+ACCTTGAACAAATCGGAGTGCTCAACGCAGGCTCCACCACGCTCGAACGGTAGCGCATGGAATAATGGAAGAGGACCAGC\n+TTCCGCTTCTATTGGTCTCACGGAGGCAGGTAATGATCAAGAGGAACGGATGGGGGGCAGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCTAGCCAGACCCTCGAGAGCGAAAGCATCTGCCAAAGATGTTTTCA\n+>1fcfe9045f0bc00da82d03df44a48394;size=53\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTGTCGGCGTTCTCGTGG\n+TTTCTCCTGCGGTTCTGCCGTTGGGAGTTTCCGCGGGTCGTCGGCCCTGGCGGCTAGGTCCCCACCTGATGGTGGGGTTC\n+CACCTGGCTCGTCCAGCCCGCCGTCGCGTTTGGGCGGGTGTCCTTAATTGGATGCCCGCTCTCGAACGTGGCCCGTTTAC\n+CTTGAACAAATCGGAGTGCTCAAAGCAGGCTCTACGTCGCTCGAACGGTAGCGCATGGAATAATGGAAGAGGGACCGGGC\n+TTCCGCTTTCTGTTGGTCTCACGGAGGCAGGTAATGATCAAGAGGGACGGACGGGGGCGGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCTAGCCAGACCCTCGAGAGCGAAAGCATCCGCCAAGGATGTTTCCA\n+>3b5b50289fa657be02f8223db1ceecc1;size=38\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTACCCCCTCGGTTTTCG\n+TTGGGCCATGCCTCGCAAGGGGTTGTGGACTCGCGGGGCCGTTGGGGTCTCATCCAGAGGGTCTCTGGTATTTCGCCGGG\n+GTTTTACCTTCTGCGATGGCCCGCCGTTGTGTTTGGGTGGGTGTCCTTAATTGGATGCCCGCTTTCGAACGCGACCATTT\n+TACCTTGAACAAATCGGAGTGCTTAAAGCAGGCTCCACCACGCTCGTACGGTAGCGCATGGAATAATGGAAGAGGACCCG\n+CTTCCGCTTCTGTTGGTCTCACGGAGGCAGGTAATGATCAAGAGGAACGGACGGGGGCGGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCCAGCCAGACCTACGAGAGCGAAAGCATCCGCCAAGGATGTTTCCA\n+>8cf694bf5517e02fc09302d0c222f492;size=32\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTGTCGGCGTTCTCGTGG\n+TTTCTCCTGCGGTTCTGCCGTTGGGAGTTTCCGCGGGTCGTCGGCCCTGGCGGCTAGGTCCCCACCTGATGGTGGGGTTC\n+CACCTGGCTCGTCCAGCCCGCCGTCGCGTTTGGGCGGGTGTCCTT'..b'GTTGCAGTTAAAAAGCTCGTAGTTGAATTTAGGAGAAAGAATTCTCCTGTCTGCA\n+CTCTTCAGGGCAAATGATATTGCTCTGGAGAACAGACTTTTACTGTGAGAAAACTAAAGTGCTCAAAGCAGGCTAACGCT\n+TGAATATTAAAGCATGGAATAATAAAATAGGACTTTATTCTATTATTGGTCATTGGATAAAGTAATGATTAACAGAAACA\n+GTTGGGGGCATTCGTATTTAATTGTCAGAGGTGAAATTCTTGGATTAATTAAAGACGAACTAGTGCGAAAGCATTTGCCA\n+AGGATGTTTTCA\n+>96a9caaafe30428b70835b5d473865f4;size=78\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATCTCGGGAGCAGGCCGGCGGTCCGCCTC\n+GCGGCGGTTACTGCCTGTCCTGCCCTACCTGCCGGTTTTCCCCCGGTGCTCTTCATTGAGTGCCTCGGGTGGCCGGAACG\n+TTTACTTTGACCATTATGGAATAATGGAATAGGACCTCGGTTCTATTTTGTTGGTCTCGATGTTGGATTAAGAGGGACAG\n+ACGGGGGCATTCGTATTACACTGTTAGAGGTGAAATTCTTGGATCGGTGTAAGACGAACTACTGCGAAAGCATTTGCCAA\n+GAATGTTTTCA\n+>6e693c587ce4656374be43a0c5a0e0f1;size=33\n+AGCTCCAGTAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGACTTGCACTCCTTTTGGAGCAGCCTAGCA\n+ACTCCGCATGTCCTTAATTGGATGGCGGAGACCTGCGGGCCCTTTACCTTGAGAAAATTAGAGTGTTTAAGGCAGGCCAC\n+GCCTGAATACTGCAGCTGGGAATAATGCATCACGACTGTCTTGGCGAGGCAGCTCTACACTTTTAATAGGAACGGTTGGG\n+GGCATTCGTACTCAGCAGTCAGAGGTGAAATTCTTGGATTTGCTGAAGACGGACTAGTGCGAAAGCATTTGCCAAGGATG\n+TTTTCA\n+>6d23e3b9b8db42f3ee64c4b8b3b3e016;size=71\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTTTGCCAGAGGTTTCGGGGTGCTCTTAATCGA\n+GTGTCCCGGGATGCTGGCAGGTTTACTTTGAAAAAATTAGAGTGCTCAAAGCAGGCTATTACGCCTGAATATTCGTGCAT\n+GGAATAATAGAATAGGAAGTCGTTTCTATTTTGTTGGTTTTCGGAAATCGACTTAATGATTAATAGGGACAGTCGGGGGC\n+ATTTGTATTCAAACGACAGAGGTGAAATTCTTGGACCGTTTGAAGACAAACTACTGCGAAAGCATTTGCCAAGAATGTTT\n+TCA\n+>ea0e653d3c6c63a34d2fd51697955280;size=56\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGGTGGAGATGGGGTTTACTCCG\n+TTGAAGCTTATCAGTCCGAAAGGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCAATTGCAGGAATACATTAG\n+CATGGAATAACGAATGTGTCTAGAATCTTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTA\n+ATTGTCAGAGGTGAAATTCTTGGATTTGTTAAAGACTAACCTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>90ded2932eac98a8cb2a92bf4d04e0bf;size=120\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTAGATAAGTGGGTACTTGTACTTT\n+GCTTGTCTACCAGTCTTAGACTGTTACTGTGAGAAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCAT\n+GGAATAACGAATGTGTCTGGAATATTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>b858b313f1df62306272d6837665a1ff;size=65\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGGATCTCAGCGTGGATACACGT\n+TTGAATCCTACCAGTCTTTGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCAT\n+GGAATAACGAATGTGTCTAGAATCTTGGTTAATTCTAGAATACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>46c166a8a872048e3b43afc09a7ff4e4;size=64\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTAGATAAGTGGGTACTTGTACTTT\n+GCTTGTCTACCAGTCATAGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCATG\n+GAATAACGAATGTGTCTGGAATATTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATTG\n+TCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTTCA\n+>5f6b41eee25b85749d6079c416331706;size=52\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGAGACGCATCTGGGGATACCGG\n+TATGCTCTCTACGGTCTCAGGACTGTTACTGTGAGAAAATTAGAGTGTTCAAAGCAGGCTATTGCAGGAATATATTAGCA\n+TGGAATAACGAATGTGTTTACAATTTGGTTAATTGTAGATTTCAATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTAGTTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>8eece1bfb387537588b482297f3a3861;size=61\n+AGCTCTTTTTGTGTACTTTAAAGTTACTTTGTTTGAAAAGTTCGTCGTCTGTGTTGTTGTTTGAGTTGATTTTTCGAAAG\n+AAAGTAGTACTGTGAGAAAATGGGTTTGTTTCAGATGACTTGTTAGCATGGTATATTTGTGTGTATGTTTAGCGATTGTT\n+TTGTTAAAAATAAGACAAGAAGAGTGGTCGGGGGTGCTCGTATTTGTGGGCCAGAGGTGAAATTCTTGGATTCCACAAGG\n+ACGTCCAATCGCGCAAGCATTCGCCCAGGACACGTCTG\n+>cbd30db2f4ec28a4d991954efbe58ec7;size=43\n+ATACCCCTTTATGGAGATGGATTAAACATCCGCGATTGGCTTTATGTTGAAGACCATGTGGATGCATTACTCCTAGCAGC\n+CTGTCGAGGAACATCTGGACGCAGCTATTGCGTTGGTGGCCATGGTGAAAAAACGAATCAAGAGGTGGTCAATGCAATCT\n+GCCATCAGT\n+>ddc6ea88330faf28ef92ee4e8b1380cf;size=38\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTTTCA\n+>ce291c2620b3cd6a0b627ca586ddd4e9;size=40\n+TAAAGTAGCAACTGAACGTAGAATAGGGAAGAAGCTACGAAGAGAGAATA\n'
b
diff -r 000000000000 -r fae6527990af test-data/clustering_notmatched_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clustering_notmatched_result1.fasta Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,9698 @@\n+>5b3bd3d89041bf6676e95510f0b0b8e6;size=32\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGTTGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGTGACGGGTCGGCGGGGTTGGCTTTTATCGGCCTTCACTGTCGGCTCCCACCCGTAATCGTCCGAGGTGCTCTTAAC\n+CGAGTGTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTGCTCGCCTGAATATCA\n+CAGCATGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTTCTAGACCCGAGGTAATGGTTAATAGAGACA\n+GACGGGGGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACAACTGCGAAAGCATTTGCCA\n+AGAATGTTTTCA\n+>dd9c9696486e4c8e44f64040bfd68af0;size=43\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTCCTGTCTTCGGCCGGGCGGTGCCGCC\n+CGATCAAAGGGCGTCGCACTGCTTTGGGTCGGCGGTGTCAAAGCCGTCGACTTTTAACGGTCGGACAATACCACCGGAGC\n+GGTCGCGTCGGTCCGTCGGGCCTCAGGGTTCGGCGGTCGCCGGCGCGGCCCGTAATCGTCCGAGGTGCCCTTAACAAGGT\n+GTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCCAAGCAGGTGCATACCTTCGCCTGAATAGTACAG\n+CATGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTTCTAGACCCGAGGTAATGGTTAATAGAGACGGACGG\n+GGGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACGACTGCGAAAGCATTTGCCAAGAAT\n+GTTTTCA\n+>fc137ae3d4f5eb20104e5d3f1490201c;size=75\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGTCGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGCGGACGGAGGGGTGTCGAGCTCTCGGGCTTTTCGCTTCTCTCCCGCCCGTAATCGTCCGAGGTGCTCTTAACCGAG\n+TGTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTATCGCCTGAATATCACAGCA\n+TGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTCTAGACCCGAGGTAATGGTTAATAGAGACAGACGGG\n+GGCATTCGTACTGCGGCGCTAGAGGTGAAATTCTTGGACCGCCGCAAGACGAACAACTGCGAAAGCATTTGCCAAGAATG\n+TTTTCA\n+>d9dd80f66077bd29d1569180753d1ea6;size=30\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTCTTCGGCCGGGCGGTGCCGCC\n+TGATAATCAAGGGCGCGCACTGCTTTGGGAGCGCCGGTGTCAAAGCCGCGCTCTTTTAACGGTCGGACAATACCACCGGA\n+GTGGTGACGGGCCGGCGGGGTTGGTTCGCCCTTCACCGTCGGTCCCCACCCGTAATCGTCCGAGGTGCTCTTAACCGAGT\n+GTCTCGGGCGGCCGGTAACGTTTACTTTGAACAAATTAGAGTGCTCAAAGCAGGTGCTTGCTCGCCTGAATATCACAGCA\n+TGGAATGATGGAATAGGACCTCGGTCTTATTTTGTTGGTTTTCTCTAGACCCGAGGTAATGGTTAATAGAGACAGACGGG\n+GGCATTCGTACTGCGACGCTAGAGGTGAAATTCTTGGACCGTCGCAAGACGAACAACTGCGAAAGCATTTGCCAAGAATG\n+TTTTCA\n+>54b9859f6c86ac8d2736e3fa4d13ba87;size=95\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTATCTCGTCGGTGTCGT\n+TGGGTCATGCCTCGAAAGGGGTTGTGGTCTTTCGGCGTCTTCGGGGTCTCATCCGGAGGGTCTCTGGCTTCACGCCGGGG\n+TTTCACCTTCCGCGATGGCCCGCCGTCGCGTTTGGGTGGGTGTCCTTAATTGGATGCCCGCCGTCGAACGTGACCATTTT\n+ACCTTGAACAAATCGGAGTGCTCAACGCAGGCTCCACCACGCTCGAACGGTAGCGCATGGAATAATGGAAGAGGACCAGC\n+TTCCGCTTCTATTGGTCTCACGGAGGCAGGTAATGATCAAGAGGAACGGATGGGGGGCAGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCTAGCCAGACCCTCGAGAGCGAAAGCATCTGCCAAAGATGTTTTCA\n+>1fcfe9045f0bc00da82d03df44a48394;size=53\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTGTCGGCGTTCTCGTGG\n+TTTCTCCTGCGGTTCTGCCGTTGGGAGTTTCCGCGGGTCGTCGGCCCTGGCGGCTAGGTCCCCACCTGATGGTGGGGTTC\n+CACCTGGCTCGTCCAGCCCGCCGTCGCGTTTGGGCGGGTGTCCTTAATTGGATGCCCGCTCTCGAACGTGGCCCGTTTAC\n+CTTGAACAAATCGGAGTGCTCAAAGCAGGCTCTACGTCGCTCGAACGGTAGCGCATGGAATAATGGAAGAGGGACCGGGC\n+TTCCGCTTTCTGTTGGTCTCACGGAGGCAGGTAATGATCAAGAGGGACGGACGGGGGCGGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCTAGCCAGACCCTCGAGAGCGAAAGCATCCGCCAAGGATGTTTCCA\n+>3b5b50289fa657be02f8223db1ceecc1;size=38\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTACCCCCTCGGTTTTCG\n+TTGGGCCATGCCTCGCAAGGGGTTGTGGACTCGCGGGGCCGTTGGGGTCTCATCCAGAGGGTCTCTGGTATTTCGCCGGG\n+GTTTTACCTTCTGCGATGGCCCGCCGTTGTGTTTGGGTGGGTGTCCTTAATTGGATGCCCGCTTTCGAACGCGACCATTT\n+TACCTTGAACAAATCGGAGTGCTTAAAGCAGGCTCCACCACGCTCGTACGGTAGCGCATGGAATAATGGAAGAGGACCCG\n+CTTCCGCTTCTGTTGGTCTCACGGAGGCAGGTAATGATCAAGAGGAACGGACGGGGGCGGAGGTATGGCTCGGCGAGAGG\n+TGAAATTCTTGGACCCCAGCCAGACCTACGAGAGCGAAAGCATCCGCCAAGGATGTTTCCA\n+>8cf694bf5517e02fc09302d0c222f492;size=32\n+AGCTCCACTAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTCGGATCTCGGGATCGTGTCGGCGTTCTCGTGG\n+TTTCTCCTGCGGTTCTGCCGTTGGGAGTTTCCGCGGGTCGTCGGCCCTGGCGGCTAGGTCCCCACCTGATGGTGGGGTTC\n+CACCTGGCTCGTCCAGCCCGCCGTCGCGTTTGGGCGGGTGTCCTT'..b'GTTGCAGTTAAAAAGCTCGTAGTTGAATTTAGGAGAAAGAATTCTCCTGTCTGCA\n+CTCTTCAGGGCAAATGATATTGCTCTGGAGAACAGACTTTTACTGTGAGAAAACTAAAGTGCTCAAAGCAGGCTAACGCT\n+TGAATATTAAAGCATGGAATAATAAAATAGGACTTTATTCTATTATTGGTCATTGGATAAAGTAATGATTAACAGAAACA\n+GTTGGGGGCATTCGTATTTAATTGTCAGAGGTGAAATTCTTGGATTAATTAAAGACGAACTAGTGCGAAAGCATTTGCCA\n+AGGATGTTTTCA\n+>96a9caaafe30428b70835b5d473865f4;size=78\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATCTCGGGAGCAGGCCGGCGGTCCGCCTC\n+GCGGCGGTTACTGCCTGTCCTGCCCTACCTGCCGGTTTTCCCCCGGTGCTCTTCATTGAGTGCCTCGGGTGGCCGGAACG\n+TTTACTTTGACCATTATGGAATAATGGAATAGGACCTCGGTTCTATTTTGTTGGTCTCGATGTTGGATTAAGAGGGACAG\n+ACGGGGGCATTCGTATTACACTGTTAGAGGTGAAATTCTTGGATCGGTGTAAGACGAACTACTGCGAAAGCATTTGCCAA\n+GAATGTTTTCA\n+>6e693c587ce4656374be43a0c5a0e0f1;size=33\n+AGCTCCAGTAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGACTTGCACTCCTTTTGGAGCAGCCTAGCA\n+ACTCCGCATGTCCTTAATTGGATGGCGGAGACCTGCGGGCCCTTTACCTTGAGAAAATTAGAGTGTTTAAGGCAGGCCAC\n+GCCTGAATACTGCAGCTGGGAATAATGCATCACGACTGTCTTGGCGAGGCAGCTCTACACTTTTAATAGGAACGGTTGGG\n+GGCATTCGTACTCAGCAGTCAGAGGTGAAATTCTTGGATTTGCTGAAGACGGACTAGTGCGAAAGCATTTGCCAAGGATG\n+TTTTCA\n+>6d23e3b9b8db42f3ee64c4b8b3b3e016;size=71\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTTTGCCAGAGGTTTCGGGGTGCTCTTAATCGA\n+GTGTCCCGGGATGCTGGCAGGTTTACTTTGAAAAAATTAGAGTGCTCAAAGCAGGCTATTACGCCTGAATATTCGTGCAT\n+GGAATAATAGAATAGGAAGTCGTTTCTATTTTGTTGGTTTTCGGAAATCGACTTAATGATTAATAGGGACAGTCGGGGGC\n+ATTTGTATTCAAACGACAGAGGTGAAATTCTTGGACCGTTTGAAGACAAACTACTGCGAAAGCATTTGCCAAGAATGTTT\n+TCA\n+>ea0e653d3c6c63a34d2fd51697955280;size=56\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGGTGGAGATGGGGTTTACTCCG\n+TTGAAGCTTATCAGTCCGAAAGGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCAATTGCAGGAATACATTAG\n+CATGGAATAACGAATGTGTCTAGAATCTTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTA\n+ATTGTCAGAGGTGAAATTCTTGGATTTGTTAAAGACTAACCTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>90ded2932eac98a8cb2a92bf4d04e0bf;size=120\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTAGATAAGTGGGTACTTGTACTTT\n+GCTTGTCTACCAGTCTTAGACTGTTACTGTGAGAAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCAT\n+GGAATAACGAATGTGTCTGGAATATTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>b858b313f1df62306272d6837665a1ff;size=65\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGGATCTCAGCGTGGATACACGT\n+TTGAATCCTACCAGTCTTTGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCAT\n+GGAATAACGAATGTGTCTAGAATCTTGGTTAATTCTAGAATACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>46c166a8a872048e3b43afc09a7ff4e4;size=64\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTAGATAAGTGGGTACTTGTACTTT\n+GCTTGTCTACCAGTCATAGACTGTTACTGTGAGAAAATTAGAGTGTTTCAAGCAGGCTGTTGCAGGAATACATTAGCATG\n+GAATAACGAATGTGTCTGGAATATTGGTTAATTCTAGATTACGATTAATAGGGACAGTTGGGGGCATTAGTATTTAATTG\n+TCAGAGGTGAAATTCTTGGATTTATTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTTCA\n+>5f6b41eee25b85749d6079c416331706;size=52\n+AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCAAGAGACGCATCTGGGGATACCGG\n+TATGCTCTCTACGGTCTCAGGACTGTTACTGTGAGAAAATTAGAGTGTTCAAAGCAGGCTATTGCAGGAATATATTAGCA\n+TGGAATAACGAATGTGTTTACAATTTGGTTAATTGTAGATTTCAATTAATAGGGACAGTTGGGGGCATTAGTATTTAATT\n+GTCAGAGGTGAAATTCTTGGATTAGTTAAAGACTAACGTATGCGAAAGCATTTGCCAAGGATGTTTTCA\n+>8eece1bfb387537588b482297f3a3861;size=61\n+AGCTCTTTTTGTGTACTTTAAAGTTACTTTGTTTGAAAAGTTCGTCGTCTGTGTTGTTGTTTGAGTTGATTTTTCGAAAG\n+AAAGTAGTACTGTGAGAAAATGGGTTTGTTTCAGATGACTTGTTAGCATGGTATATTTGTGTGTATGTTTAGCGATTGTT\n+TTGTTAAAAATAAGACAAGAAGAGTGGTCGGGGGTGCTCGTATTTGTGGGCCAGAGGTGAAATTCTTGGATTCCACAAGG\n+ACGTCCAATCGCGCAAGCATTCGCCCAGGACACGTCTG\n+>cbd30db2f4ec28a4d991954efbe58ec7;size=43\n+ATACCCCTTTATGGAGATGGATTAAACATCCGCGATTGGCTTTATGTTGAAGACCATGTGGATGCATTACTCCTAGCAGC\n+CTGTCGAGGAACATCTGGACGCAGCTATTGCGTTGGTGGCCATGGTGAAAAAACGAATCAAGAGGTGGTCAATGCAATCT\n+GCCATCAGT\n+>ddc6ea88330faf28ef92ee4e8b1380cf;size=38\n+AGCTCCAATAGCGTATATTAAAGTTGTTGTGGTTAAAAAGCTCGTAGTTTTCA\n+>ce291c2620b3cd6a0b627ca586ddd4e9;size=40\n+TAAAGTAGCAACTGAACGTAGAATAGGGAAGAAGCTACGAAGAGAGAATA\n'
b
diff -r 000000000000 -r fae6527990af test-data/db.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,54 @@
+>tr|M0F5C2|M0F5C2_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum hochstenium ATCC 700873 GN=rps14P PE=3 SV=1
+MSEANNDTGEHAAKRTDSRHTCRRCDREQGLVGKYDINLCRQCFREVARDMGFEKYS
+>tr|M0KT65|M0KT65_9EURY 50S ribosomal protein L37e OS=Haloarcula amylolytica JCM 13557 GN=rpl37e PE=3 SV=1
+MTGAGTPSQGKKNTTTHTKCRRCGEKSYHTKKKVCSSCGFGKSAKRRDYEWQSKAGE
+>tr|M0KH83|M0KH83_9EURY 50S ribosomal protein L18Ae OS=Haloarcula amylolytica JCM 13557 GN=rplX PE=3 SV=1
+MSTYTVRGSFPARDGPQQFEKEVDAPNENVAEERVYSDFGSQHNLKRTQITIEEVAA
+>tr|M0PIT7|M0PIT7_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum arcis JCM 13916 GN=rps14P PE=3 SV=1
+MSEANNDTGEHAAKRTDSRHTCRRCDREQGLVGKYDINLCRQCFREVARDMGFEKYS
+>tr|M0MK44|M0MK44_9EURY 50S ribosomal protein L37e OS=Halococcus saccharolyticus DSM 5350 GN=rpl37e PE=3 SV=1
+MTGSGTPSQGKKNKTVHVKCRRCGEASYHKTKKVCASCGFGKSAKRRDYAWQEKAGE
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSESETDTGEHATKRTGQLEDCQRCGRKQGLVGKYDIWLCRQCFREIARGMGFRKYK
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+MSEANNDTGEHAAKRTDSRHTCRRCDREQGLVGKYDINLCRQCFREVARDMGFEKYS
+>tr|L9WBT3|L9WBT3_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum sulfidifaciens JCM 14089 GN=rplX PE=3 SV=1
+MSQFTVTGQFKSRDGYAPFETTIDAENENVAREHVLSQLGSQHGLKRTEIDLEEVSE
+>tr|M0NIQ6|M0NIQ6_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum lipolyticum DSM 21995 GN=rps14P PE=3 SV=1
+MSEANNDTGEHAAKRTDSRHTCRRCDREQGLVGKYDINLCRQCFREVARDMGFEKYS
+>tr|L9WHT8|L9WHT8_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum bangense JCM 10635 GN=rplX PE=3 SV=1
+MSQFTVTGQFKSRDGYSPFETTIDAENENVAREHVYSQLGSQHGLKRSEIELDEVSA
+>sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
+MGVYKFCYNKKKEVGQVAVLQKERLIFYIVTKEKSYLKPTLANFSNAIDSLYNECLLRKC
+CKLAIPKIGCCLDRLYWKTVKNIIIDKLCKKGIEVVVYYI
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVLTVVVSTADLHLANLQEVKRRRRRSHVRNRRARGYKSPSERARSIARLFQMLPFHGV
+DPVDWFPDVVRSPSVTSLVSYESFDDTDWFAGNEWAEGSF
+>sp|P20194|A100_SSV1 Uncharacterized protein A-100 OS=Sulfolobus spindle-shape virus 1 GN=a100 PE=1 SV=1
+MVSPQTRKEEELLEKQNSVFYLLTLGRKPYGSYLHIKIELDEDEKLEKEIYADNIKLENE
+LRQLKRLYEVYQSVEIDDAQKAIQKEALLTIAKILSVFDF
+>sp|P0DMP9|APOC3_PANTA Apolipoprotein C-III OS=Panthera tigris altaica GN=APOC3 PE=3 SV=1
+MQSRVLLVTALLVLLASARATEGEDPSLLGLMQGYVQHATKTAQDTLTTMREFPVAQQAR
+DWVTGRFSSLKDYWSTLTGKFSGFWDSTFAVTPTPASEAK
+>sp|Q89681|DR4_HHV6U Uncharacterized protein DR4 OS=Human herpesvirus 6A (strain Uganda-1102) GN=DR4L PE=4 SV=1
+MRGTRRGPSGGWSPLGLALPRYPAGSVAASDPRSDTPPPRAPPPPPPLTTSAYRPHTHPA
+AESGARTRAEHARQHARHRPPEVTEPVHVPVLAGVCARVP
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+MTVSNTVDQYTVLSGDRSKIKDLLCNRLTECGWRDEVRLLCRTILLEKGTGNSFTVEQLI
+TEVTPKARTLVPDAVKKELLMKIRTILTENESEIEDAEEP
+>tr|H0ACG9|H0ACG9_HALSG Uncharacterized protein OS=Haloredivivus sp. (strain G17) GN=HRED_08116 PE=4 SV=1
+MREVRVPEDRVGVVIGEGGETKKRXEEGFV
+>tr|N6VV02|N6VV02_9EURY Uncharacterized protein (Fragment) OS=Thermoplasmatales archaeon SCGC AB-539-C06 GN=MBGDC06_00209 PE=4 SV=1
+KRLEGALEHGKMNIGSVFIKTTMGPSGRIL
+>tr|X0LYM3|X0LYM3_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. vasinfectum 25433 GN=FOTG_17987 PE=4 SV=1
+MTNAPGRPSKACDICKRQKVGCSPFLERYK
+>tr|V9H0X7|V9H0X7_NEUCS Neurospora crassa DNA for RNA polymerase I second-largest subunit OS=Neurospora crassa PE=4 SV=1
+MPPAHISDNRPDTQPADYEGIIQGALHFAA
+>tr|W9PMP6|W9PMP6_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. pisi HDV247 GN=FOVG_07074 PE=4 SV=1
+MTNAPGRPSKACDICKRQKVGCSPFLERYK
+>tr|H3BPV3|H3BPV3_HUMAN ATP-dependent RNA helicase DDX19B OS=Homo sapiens GN=DDX19B PE=4 SV=1
+MATDSWALAVDEQEAAAESRTELPSPYSTS
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSQSQNAIFTSPTGEENLMNSNHRDSESIT
+>tr|Q9BZF8|Q9BZF8_HUMAN NF-E2-related factor 2 (Fragment) OS=Homo sapiens PE=2 SV=1
+MMDLELPPPGLPSQQDMDLIDILWRQDIDL
b
diff -r 000000000000 -r fae6527990af test-data/dereplication_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dereplication_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,3 @@
+>97485665bcded44c4d86c131ca714848
+gtcgctcctaccgattgaatacgttggtgattgaattggataaagagatatcatcttaaatgatagcaaagcggtaaaca
+tttgtaaactagattatttagaggaaggagaagtcgtaacaaggtttcc
b
diff -r 000000000000 -r fae6527990af test-data/masking_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/masking_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,48 @@
+>tr|M0F5C2|M0F5C2_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum hochstenium ATCC 700873 GN=rps14P PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>tr|M0KT65|M0KT65_9EURY 50S ribosomal protein L37e OS=Haloarcula amylolytica JCM 13557 GN=rpl37e PE=3 SV=1
+MTGAGTSGKKNTTTHTKCRRCGKSYHTKKKVCSSCGGNNNNNNNNNNNNG
+>tr|M0KH83|M0KH83_9EURY 50S ribosomal protein L18Ae OS=Haloarcula amylolytica JCM 13557 GN=rplX PE=3 SV=1
+MSTYTVRGNNNNNNNNNNNNNNNNNNNNNNNNTTVAA
+>tr|M0PIT7|M0PIT7_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum arcis JCM 13916 GN=rps14P PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>tr|M0MK44|M0MK44_9EURY 50S ribosomal protein L37e OS=Halococcus saccharolyticus DSM 5350 GN=rpl37e PE=3 SV=1
+MTGSGTSGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNG
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSSTDTGHATKRTGDCRCGRKGVGKYDWCRCRARGMGRKYK
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>tr|L9WBT3|L9WBT3_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum sulfidifaciens JCM 14089 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYATTNNNNNNNNNNGSHGKRTDVS
+>tr|M0NIQ6|M0NIQ6_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum lipolyticum DSM 21995 GN=rps14P PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>tr|L9WHT8|L9WHT8_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum bangense JCM 10635 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYSTTNNNNNNNNNNNNNNNNNNNNNN
+>sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
+MGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTDWAGNWAGS
+>sp|P20194|A100_SSV1 Uncharacterized protein A-100 OS=Sulfolobus spindle-shape virus 1 GN=a100 PE=1 SV=1
+MVSTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>sp|P0DMP9|APOC3_PANTA Apolipoprotein C-III OS=Panthera tigris altaica GN=APOC3 PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGKSGWDSTAVTTASAK
+>sp|Q89681|DR4_HHV6U Uncharacterized protein DR4 OS=Human herpesvirus 6A (strain Uganda-1102) GN=DR4L PE=4 SV=1
+MRGTRRGSGGWSGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGVCARV
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>tr|H0ACG9|H0ACG9_HALSG Uncharacterized protein OS=Haloredivivus sp. (strain G17) GN=HRED_08116 PE=4 SV=1
+NNNNNNNNGVVGGGTKKRGV
+>tr|N6VV02|N6VV02_9EURY Uncharacterized protein (Fragment) OS=Thermoplasmatales archaeon SCGC AB-539-C06 GN=MBGDC06_00209 PE=4 SV=1
+KRGAHGKMNGSVKTTMGSGR
+>tr|X0LYM3|X0LYM3_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. vasinfectum 25433 GN=FOTG_17987 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|V9H0X7|V9H0X7_NEUCS Neurospora crassa DNA for RNA polymerase I second-largest subunit OS=Neurospora crassa PE=4 SV=1
+NNNNNNNNTADYGGAHAA
+>tr|W9PMP6|W9PMP6_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. pisi HDV247 GN=FOVG_07074 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|H3BPV3|H3BPV3_HUMAN ATP-dependent RNA helicase DDX19B OS=Homo sapiens GN=DDX19B PE=4 SV=1
+MATNNNNNNNNNNNNTSYSTS
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSSNATSTGNNNNNNNNNNT
+>tr|Q9BZF8|Q9BZF8_HUMAN NF-E2-related factor 2 (Fragment) OS=Homo sapiens PE=2 SV=1
+MMDGNNNNNNNNN
b
diff -r 000000000000 -r fae6527990af test-data/masking_result2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/masking_result2.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,48 @@
+>tr|M0F5C2|M0F5C2_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum hochstenium ATCC 700873 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0KT65|M0KT65_9EURY 50S ribosomal protein L37e OS=Haloarcula amylolytica JCM 13557 GN=rpl37e PE=3 SV=1
+MTGAGTSGKKNTTTHTKCRRCGKSYHTKKKVCSSCGGKSAKRRDYWSKAG
+>tr|M0KH83|M0KH83_9EURY 50S ribosomal protein L18Ae OS=Haloarcula amylolytica JCM 13557 GN=rplX PE=3 SV=1
+MSTYTVRGSARDGKVDANNVARVYSDGSHNKRTTVAA
+>tr|M0PIT7|M0PIT7_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum arcis JCM 13916 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0MK44|M0MK44_9EURY 50S ribosomal protein L37e OS=Halococcus saccharolyticus DSM 5350 GN=rpl37e PE=3 SV=1
+MTGSGTSGKKNKTVHVKCRRCGASYHKTKKVCASCGGKSAKRRDYAWKAG
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSSTDTGHATKRTGDCRCGRKGVGKYDWCRCRARGMGRKYK
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|L9WBT3|L9WBT3_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum sulfidifaciens JCM 14089 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYATTDANNVARHVSGSHGKRTDVS
+>tr|M0NIQ6|M0NIQ6_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum lipolyticum DSM 21995 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|L9WHT8|L9WHT8_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum bangense JCM 10635 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYSTTDANNVARHVYSGSHGKRSDVSA
+>sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
+MGVYKCYNKKKVGVAVKRYVTKKSYKTANSNADSYNCRKCCKAKGCCDRYWKTVKNDKCKKGVVVYY
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVTVVVSTADHANVKRRRRRSHVRNRRARGYKSSRARSARMHGVDVDWDVVRSSVTSVSYSDDTDWAGNWAGS
+>sp|P20194|A100_SSV1 Uncharacterized protein A-100 OS=Sulfolobus spindle-shape virus 1 GN=a100 PE=1 SV=1
+MVSTRKKNSVYTGRKYGSYHKDDKKYADNKNRKRYVYSVDDAKAKATAKSVD
+>sp|P0DMP9|APOC3_PANTA Apolipoprotein C-III OS=Panthera tigris altaica GN=APOC3 PE=3 SV=1
+MSRVVTAVASARATGDSGMGYVHATKTADTTTMRVAARDWVTGRSSKDYWSTTGKSGWDSTAVTTASAK
+>sp|Q89681|DR4_HHV6U Uncharacterized protein DR4 OS=Human herpesvirus 6A (strain Uganda-1102) GN=DR4L PE=4 SV=1
+MRGTRRGSGGWSGARYAGSVAASDRSDTRATTSAYRHTHAASGARTRAHARHARHRVTVHVVAGVCARV
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+MTVSNTVDYTVSGDRSKKDCNRTCGWRDVRCRTKGTGNSTVTVTKARTVDAVKKMKRTTNSDA
+>tr|H0ACG9|H0ACG9_HALSG Uncharacterized protein OS=Haloredivivus sp. (strain G17) GN=HRED_08116 PE=4 SV=1
+MRVRVDRVGVVGGGTKKRGV
+>tr|N6VV02|N6VV02_9EURY Uncharacterized protein (Fragment) OS=Thermoplasmatales archaeon SCGC AB-539-C06 GN=MBGDC06_00209 PE=4 SV=1
+KRGAHGKMNGSVKTTMGSGR
+>tr|X0LYM3|X0LYM3_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. vasinfectum 25433 GN=FOTG_17987 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|V9H0X7|V9H0X7_NEUCS Neurospora crassa DNA for RNA polymerase I second-largest subunit OS=Neurospora crassa PE=4 SV=1
+MAHSDNRDTADYGGAHAA
+>tr|W9PMP6|W9PMP6_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. pisi HDV247 GN=FOVG_07074 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|H3BPV3|H3BPV3_HUMAN ATP-dependent RNA helicase DDX19B OS=Homo sapiens GN=DDX19B PE=4 SV=1
+MATDSWAAVDAAASRTSYSTS
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSSNATSTGNMNSNHRDSST
+>tr|Q9BZF8|Q9BZF8_HUMAN NF-E2-related factor 2 (Fragment) OS=Homo sapiens PE=2 SV=1
+MMDGSDMDDWRDD
b
diff -r 000000000000 -r fae6527990af test-data/query.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,26 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG
+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC
+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC
+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA
+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC
+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT
+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA
+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA
+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA
+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC
+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA
+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT
+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG
+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA
+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA
+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG
+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA
+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC
+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT
+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA
+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC
+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA
+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG
+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC
+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC
b
diff -r 000000000000 -r fae6527990af test-data/search_blast6out_result1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/search_blast6out_result1.tabular Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,1 @@
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds. ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds. 100.0 1500 0 0 1 1500 1 4796 -1 0
b
diff -r 000000000000 -r fae6527990af test-data/search_dbmatched_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/search_dbmatched_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,61 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAGCCCTTTCCTAACCCAACCCA
+ACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCACGGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCC
+TTACCCGACCTCAGATGCTCCCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA
+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGCTGACTGGTGTCGTTTCAGTC
+AGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCATTAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCC
+AGAGTTGATTGTGATCAGCACTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA
+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTACATCAGGCAACAAAAAAGTG
+ACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCACTCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAG
+CAAAAGGACTCGGACAACTATAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT
+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGGGCATTCTGCTCCGGATATGG
+TGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAATTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATA
+ACATTTGAAAATGGAGAGGAATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG
+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAACTTTTTACATGCCGATTGTG
+ACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCCAGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCAT
+ATGTATGTGTTTGGAGACTTCAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA
+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGCCCAAGATGTAGCAAGCAGTC
+CACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATATAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAA
+CTTGAAAAACAGTTTGTAAGCCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC
+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCCCCGACTCAAAATGCATTGTC
+ATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTAGGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAA
+ATCTGTCCTGTAAAAGTTTTATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA
+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATTCTGTTGGTATTTTCAGTATC
+TCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTTGGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTG
+TTTTACTGATTGGTTGGATATTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA
+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACAAGAAAAAGAAAATGAAAAGT
+ATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATGGATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTG
+GGACAGGGAGGTGACAGTGGAACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT
+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCTTGTAGAGTCTTGTTATAGTT
+GTATAAATCAAAAACACAGAATAAGGAACATATTTAACTTTTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTA
+GATTCCCTGATTTCCCCAGGCCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA
+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGTTCAAAACTGGCTTTTCCTCT
+GGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATACTTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTT
+TAAGCTTAAAAGGCTGACATGTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT
+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGTTATGCATGATTTATCCAAAG
+TTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGTGATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAAT
+AAGAAAGTAGGATGGAGCTTTCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA
+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAATTCACATTCAACAAGGTAGC
+ACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAATATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACA
+AAAGATACAATTCAAGGGTTAGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT
+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATTACTTAGCATTCATGCATATT
+GGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCTGGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAAT
+GTCTGTATCACTAGTGCCTAGAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT
+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATTTTATAAAAAGTCACTAAGCT
+CAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTTTTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGAC
+TTGGGACTGGGACAGTCTTTAGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG
+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGCTGTAGTATATGATGAAAGAT
+GTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTTCACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTG
+TAACGTGTTATAGATGTAAAGACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT
+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCTGAGAGGGAATAATCTGAGCA
+AAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCTTTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAA
+ATGGGAGCTGGTCACACAGGGCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA
+CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCACGTCTCAAATTTAGACTTAC
+TTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTAGAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAG
+GCTTCTCAGCTGGGAAGAGAGGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT
+GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATGGAAACTTTTAGATGACATTC
+TACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCAATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCT
+CTGATGAAGCCAGGTTGTCAAAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA
+ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAAGGTTGTCAAGAAGGCTTTTT
+TTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGATAAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATA
+ACTCAGTTTTTTTTTCTCCTACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT
b
diff -r 000000000000 -r fae6527990af test-data/search_fastapairs_result2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/search_fastapairs_result2.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,5 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAGCCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCACGGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTCCCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGATACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGCTGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCATTAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCACTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAATGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTACATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCACTCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTATAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATTTGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGGGCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAATTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGAATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAGTTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAACTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCCAGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTTCAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAAACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGCCCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATATAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAGCCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTCTATGTgtatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAGCCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCACGGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTCCCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGATACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGCTGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCATTAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCACTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAATGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTACATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCACTCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTATAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATTTGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGGGCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAATTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGAATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAGTTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAACTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCCAGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTTCAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAAACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGCCCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATATAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAGCCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTCTATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC
+
b
diff -r 000000000000 -r fae6527990af test-data/search_matched_result2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/search_matched_result2.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,20 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAGCCCTTTCCTAACCCAACCCA
+ACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCACGGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCC
+TTACCCGACCTCAGATGCTCCCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA
+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGCTGACTGGTGTCGTTTCAGTC
+AGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCATTAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCC
+AGAGTTGATTGTGATCAGCACTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA
+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTACATCAGGCAACAAAAAAGTG
+ACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCACTCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAG
+CAAAAGGACTCGGACAACTATAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT
+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGGGCATTCTGCTCCGGATATGG
+TGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAATTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATA
+ACATTTGAAAATGGAGAGGAATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG
+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAACTTTTTACATGCCGATTGTG
+ACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCCAGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCAT
+ATGTATGTGTTTGGAGACTTCAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA
+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGCCCAAGATGTAGCAAGCAGTC
+CACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATATAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAA
+CTTGAAAAACAGTTTGTAAGCCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC
+TATGTgtatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC
b
diff -r 000000000000 -r fae6527990af test-data/search_userfields_result2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/search_userfields_result2.tabular Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,1 @@
+ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds. ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds. -1
b
diff -r 000000000000 -r fae6527990af test-data/shuffling_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/shuffling_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,10 @@
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSSNATSTGNMNSNHRDSST
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSSTDTGHATKRTGDCRCGRKGVGKYDWCRCRARGMGRKYK
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVTVVVSTADHANVKRRRRRSHVRNRRARGYKSSRARSARMHGVDVDWDVVRSSVTSVSYSDDTDWAGNWAGS
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+MTVSNTVDYTVSGDRSKKDCNRTCGWRDVRCRTKGTGNSTVTVTKARTVDAVKKMKRTTNSDA
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
b
diff -r 000000000000 -r fae6527990af test-data/sorting_result1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorting_result1.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,48 @@
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+MTVSNTVDYTVSGDRSKKDCNRTCGWRDVRCRTKGTGNSTVTVTKARTVDAVKKMKRTTNSDA
+>sp|P0DMP9|APOC3_PANTA Apolipoprotein C-III OS=Panthera tigris altaica GN=APOC3 PE=3 SV=1
+MSRVVTAVASARATGDSGMGYVHATKTADTTTMRVAARDWVTGRSSKDYWSTTGKSGWDSTAVTTASAK
+>sp|P20194|A100_SSV1 Uncharacterized protein A-100 OS=Sulfolobus spindle-shape virus 1 GN=a100 PE=1 SV=1
+MVSTRKKNSVYTGRKYGSYHKDDKKYADNKNRKRYVYSVDDAKAKATAKSVD
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVTVVVSTADHANVKRRRRRSHVRNRRARGYKSSRARSARMHGVDVDWDVVRSSVTSVSYSDDTDWAGNWAGS
+>sp|Q89681|DR4_HHV6U Uncharacterized protein DR4 OS=Human herpesvirus 6A (strain Uganda-1102) GN=DR4L PE=4 SV=1
+MRGTRRGSGGWSGARYAGSVAASDRSDTRATTSAYRHTHAASGARTRAHARHARHRVTVHVVAGVCARV
+>sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
+MGVYKCYNKKKVGVAVKRYVTKKSYKTANSNADSYNCRKCCKAKGCCDRYWKTVKNDKCKKGVVVYY
+>tr|H0ACG9|H0ACG9_HALSG Uncharacterized protein OS=Haloredivivus sp. (strain G17) GN=HRED_08116 PE=4 SV=1
+MRVRVDRVGVVGGGTKKRGV
+>tr|H3BPV3|H3BPV3_HUMAN ATP-dependent RNA helicase DDX19B OS=Homo sapiens GN=DDX19B PE=4 SV=1
+MATDSWAAVDAAASRTSYSTS
+>tr|L9WBT3|L9WBT3_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum sulfidifaciens JCM 14089 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYATTDANNVARHVSGSHGKRTDVS
+>tr|L9WHT8|L9WHT8_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum bangense JCM 10635 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYSTTDANNVARHVYSGSHGKRSDVSA
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0F5C2|M0F5C2_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum hochstenium ATCC 700873 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0KH83|M0KH83_9EURY 50S ribosomal protein L18Ae OS=Haloarcula amylolytica JCM 13557 GN=rplX PE=3 SV=1
+MSTYTVRGSARDGKVDANNVARVYSDGSHNKRTTVAA
+>tr|M0KT65|M0KT65_9EURY 50S ribosomal protein L37e OS=Haloarcula amylolytica JCM 13557 GN=rpl37e PE=3 SV=1
+MTGAGTSGKKNTTTHTKCRRCGKSYHTKKKVCSSCGGKSAKRRDYWSKAG
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSSTDTGHATKRTGDCRCGRKGVGKYDWCRCRARGMGRKYK
+>tr|M0MK44|M0MK44_9EURY 50S ribosomal protein L37e OS=Halococcus saccharolyticus DSM 5350 GN=rpl37e PE=3 SV=1
+MTGSGTSGKKNKTVHVKCRRCGASYHKTKKVCASCGGKSAKRRDYAWKAG
+>tr|M0NIQ6|M0NIQ6_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum lipolyticum DSM 21995 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0PIT7|M0PIT7_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum arcis JCM 13916 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|N6VV02|N6VV02_9EURY Uncharacterized protein (Fragment) OS=Thermoplasmatales archaeon SCGC AB-539-C06 GN=MBGDC06_00209 PE=4 SV=1
+KRGAHGKMNGSVKTTMGSGR
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSSNATSTGNMNSNHRDSST
+>tr|Q9BZF8|Q9BZF8_HUMAN NF-E2-related factor 2 (Fragment) OS=Homo sapiens PE=2 SV=1
+MMDGSDMDDWRDD
+>tr|V9H0X7|V9H0X7_NEUCS Neurospora crassa DNA for RNA polymerase I second-largest subunit OS=Neurospora crassa PE=4 SV=1
+MAHSDNRDTADYGGAHAA
+>tr|W9PMP6|W9PMP6_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. pisi HDV247 GN=FOVG_07074 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|X0LYM3|X0LYM3_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. vasinfectum 25433 GN=FOTG_17987 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
b
diff -r 000000000000 -r fae6527990af test-data/sorting_result2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorting_result2.fasta Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,48 @@
+>sp|Q66125|2B_CMVQ Suppressor of silencing 2b OS=Cucumber mosaic virus (strain Q) GN=ORF2b PE=1 SV=1
+MDVTVVVSTADHANVKRRRRRSHVRNRRARGYKSSRARSARMHGVDVDWDVVRSSVTSVSYSDDTDWAGNWAGS
+>sp|P0DMP9|APOC3_PANTA Apolipoprotein C-III OS=Panthera tigris altaica GN=APOC3 PE=3 SV=1
+MSRVVTAVASARATGDSGMGYVHATKTADTTTMRVAARDWVTGRSSKDYWSTTGKSGWDSTAVTTASAK
+>sp|Q89681|DR4_HHV6U Uncharacterized protein DR4 OS=Human herpesvirus 6A (strain Uganda-1102) GN=DR4L PE=4 SV=1
+MRGTRRGSGGWSGARYAGSVAASDRSDTRATTSAYRHTHAASGARTRAHARHARHRVTVHVVAGVCARV
+>sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
+MGVYKCYNKKKVGVAVKRYVTKKSYKTANSNADSYNCRKCCKAKGCCDRYWKTVKNDKCKKGVVVYY
+>sp|B3MQ24|ENY2_DROAN Enhancer of yellow 2 transcription factor OS=Drosophila ananassae GN=e(y)2 PE=3 SV=1
+MTVSNTVDYTVSGDRSKKDCNRTCGWRDVRCRTKGTGNSTVTVTKARTVDAVKKMKRTTNSDA
+>sp|P20194|A100_SSV1 Uncharacterized protein A-100 OS=Sulfolobus spindle-shape virus 1 GN=a100 PE=1 SV=1
+MVSTRKKNSVYTGRKYGSYHKDDKKYADNKNRKRYVYSVDDAKAKATAKSVD
+>tr|M0KT65|M0KT65_9EURY 50S ribosomal protein L37e OS=Haloarcula amylolytica JCM 13557 GN=rpl37e PE=3 SV=1
+MTGAGTSGKKNTTTHTKCRRCGKSYHTKKKVCSSCGGKSAKRRDYWSKAG
+>tr|M0MK44|M0MK44_9EURY 50S ribosomal protein L37e OS=Halococcus saccharolyticus DSM 5350 GN=rpl37e PE=3 SV=1
+MTGSGTSGKKNKTVHVKCRRCGASYHKTKKVCASCGGKSAKRRDYAWKAG
+>tr|M0D8K9|M0D8K9_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum terrestre JCM 10247 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0F5C2|M0F5C2_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum hochstenium ATCC 700873 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0NIQ6|M0NIQ6_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum lipolyticum DSM 21995 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0PIT7|M0PIT7_9EURY 30S ribosomal protein S14 type Z OS=Halorubrum arcis JCM 13916 GN=rps14P PE=3 SV=1
+MSANNDTGHAAKRTDSRHTCRRCDRGVGKYDNCRCRVARDMGKYS
+>tr|M0MEH5|M0MEH5_9EURY 30S ribosomal protein S14 type Z OS=Halococcus saccharolyticus DSM 5350 GN=rps14P PE=3 SV=1
+MSSTDTGHATKRTGDCRCGRKGVGKYDWCRCRARGMGRKYK
+>tr|L9WHT8|L9WHT8_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum bangense JCM 10635 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYSTTDANNVARHVYSGSHGKRSDVSA
+>tr|M0KH83|M0KH83_9EURY 50S ribosomal protein L18Ae OS=Haloarcula amylolytica JCM 13557 GN=rplX PE=3 SV=1
+MSTYTVRGSARDGKVDANNVARVYSDGSHNKRTTVAA
+>tr|L9WBT3|L9WBT3_9EURY 50S ribosomal protein L18Ae OS=Natronorubrum sulfidifaciens JCM 14089 GN=rplX PE=3 SV=1
+MSTVTGKSRDGYATTDANNVARHVSGSHGKRTDVS
+>tr|W9PMP6|W9PMP6_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. pisi HDV247 GN=FOVG_07074 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|X0LYM3|X0LYM3_FUSOX Uncharacterized protein OS=Fusarium oxysporum f. sp. vasinfectum 25433 GN=FOTG_17987 PE=4 SV=1
+MTNAGRSKACDCKRKVGCSRYK
+>tr|H3BPV3|H3BPV3_HUMAN ATP-dependent RNA helicase DDX19B OS=Homo sapiens GN=DDX19B PE=4 SV=1
+MATDSWAAVDAAASRTSYSTS
+>tr|H0ACG9|H0ACG9_HALSG Uncharacterized protein OS=Haloredivivus sp. (strain G17) GN=HRED_08116 PE=4 SV=1
+MRVRVDRVGVVGGGTKKRGV
+>tr|N6VV02|N6VV02_9EURY Uncharacterized protein (Fragment) OS=Thermoplasmatales archaeon SCGC AB-539-C06 GN=MBGDC06_00209 PE=4 SV=1
+KRGAHGKMNGSVKTTMGSGR
+>tr|Q53RS6|Q53RS6_HUMAN Putative uncharacterized protein ALS2CR3 (Fragment) OS=Homo sapiens GN=ALS2CR3 PE=4 SV=1
+MSSNATSTGNMNSNHRDSST
+>tr|V9H0X7|V9H0X7_NEUCS Neurospora crassa DNA for RNA polymerase I second-largest subunit OS=Neurospora crassa PE=4 SV=1
+MAHSDNRDTADYGGAHAA
+>tr|Q9BZF8|Q9BZF8_HUMAN NF-E2-related factor 2 (Fragment) OS=Homo sapiens PE=2 SV=1
+MMDGSDMDDWRDD
b
diff -r 000000000000 -r fae6527990af test-data/three_human_mRNA.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGT'..b'AAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r 000000000000 -r fae6527990af tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu May 21 03:58:09 2015 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="vsearch" version="1.1.3">
+      <repository changeset_revision="c1fc77fac9fb" name="package_vsearch_1_1_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
b
diff -r 000000000000 -r fae6527990af vsearch_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vsearch_macros.xml Thu May 21 03:58:09 2015 -0400
b
b'@@ -0,0 +1,237 @@\n+<macros>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="1.1.3">vsearch</requirement>\n+        </requirements>\n+    </xml>\n+    <xml name="version_command">\n+        <version_command>vsearch --version | head -n 1 | awk \'{print $2}\' | sed \'s/,//\'</version_command>\n+    </xml>\n+    <xml name="stdio">\n+        <stdio>\n+            <exit_code range="1:" />\n+            <exit_code range=":-1" />\n+            <regex match="Error:" />\n+            <regex match="Exception:" />\n+        </stdio>\n+    </xml>\n+\n+    <xml name="topn">\n+        <param name="topn" type="integer" value="" optional="True" label="Output just first n sequences"\n+            help="(--topn)"/>\n+    </xml>\n+\n+    <xml name="maxaccepts">\n+        <param name="maxaccepts" type="integer" value="1" label="Number of hits to accept and show per strand"\n+            help="(--maxaccepts)"/>\n+    </xml>\n+    <xml name="maxrejects">\n+        <param name="maxrejects" type="integer" value="32" label="Number of non-matching hits to consider"\n+            help="(--maxrejects)"/>\n+    </xml>\n+    <xml name="qmask">\n+        <param name="qmask" type="select" label="Mask sequences" help="(--qmask)">\n+            <option value="no">No masking</option>\n+            <option value="dust" selected="True">dust</option>\n+            <option value="soft">soft</option>\n+        </param>\n+    </xml>\n+\n+    <xml name="hardmask">\n+        <param name="hardmask" type="boolean" truevalue="--hardmask" falsevalue="" checked="False" \n+            label="Mask by replacing with N instead of lower case" help="(--hardmask)"/>\n+    </xml>\n+\n+    <xml name="id_and_iddef">\n+        <param name="iddef" type="select" label="ID definition" help="(--iddef)">\n+            <option value="0">CD-HIT</option>\n+            <option value="1">all</option>\n+            <option value="2" selected="True">int</option>\n+            <option value="3">MBL</option>\n+            <option value="4">BLAST</option>\n+        </param>\n+        <param name="id" type="float" value="" optional="True" label="Reject hit if identity is lower than this value"\n+            help="(--id)"/>\n+    </xml>\n+\n+    <xml name="self_and_selfid">\n+        <param name="self_param" type="boolean" truevalue="--self" falsevalue="" checked="False" \n+            label="Exclude identical labels for --uchime_ref" help="(--self)"/>\n+        <param name="selfid_param" type="boolean" truevalue="--selfid" falsevalue="" checked="False" \n+            label="Exclude identical sequences for --uchime_ref" help="(--selfid)"/>\n+    </xml>\n+\n+    <xml name="strand">\n+        <param name="strand" type="select" label="Strand specific clustering" help="(--strand)">\n+            <option value="plus" selected="True">Plus strand</option>\n+            <option value="both">Both strands</option>\n+        </param>\n+    </xml>\n+\n+    <xml name="sizein">\n+        <param name="sizein" type="boolean" truevalue="--sizein" falsevalue="" checked="False" \n+            label="Read abundance annotation from input" help="(--sizein)"/>\n+    </xml>\n+\n+    <xml name="sizeout">\n+        <param name="sizeout" type="boolean" truevalue="--sizeout" falsevalue="" checked="False" \n+            label="Write cluster abundances to centroid file" help="(--sizeout)"/>\n+    </xml>\n+\n+    <xml name="uclust_like_output">\n+        <param name="uc" type="boolean" truevalue="--uc" falsevalue="" checked="False" \n+            label="UCLUST-like output" help="(--uc)"/>\n+    </xml>\n+\n+    <token name="@GENERAL@">\n+        --threads "\\${GALAXY_SLOTS:-4}"\n+        --notrunclabels\n+    </token>\n+    <token name="@USERFIELDS@">\n+        #if $userfields_output.userfields_output_select == \'yes\':\n+            --userfields \'#echo \'+\'.join( str($userfields_output.userfields).split(\',\') )#\'\n+            --userout $userout\n+        #end if\n+    </token>\n+    <xml name="userfields_output">\n+        <data name="userout" format="tabular" label="${tool.'..b'ences, this is equivalent to the percentage of matches (real value ranging from 0.0 to 100.0).\n+pv        Number of positive columns. When working with nucleotide sequences, this is equivalent to the number of matches (zero or positive integer value).\n+qcov      Fraction of the query sequence that is aligned with the target sequence (real value ranging from 0.0 to 100.0). The query coverage is computed as 100.0 * (matches + mismatches) / query sequence length. Internal or terminal gaps are not taken into account. The field is set to 0.0 if there is no alignment.\n+qframe    Query frame (-3 to +3). That field only concerns coding sequences and is not computed by vsearch. Always set to +0.\n+qhi       Last nucleotide of the query aligned with the target. Always equal to the length of the pairwise alignment. The field is set to 0 if there is no alignment.\n+qihi      Last nucleotide of the query aligned with the target (ignoring terminal gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.\n+qilo      First nucleotide of the query aligned with the target (ignoring initial gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.\n+ql        Query sequence length (positive integer value). The field is set to 0 if there is no alignment.\n+qlo       First nucleotide of the query aligned with the target. Always equal to 1 if there is an alignment, 0 otherwise.\n+qrow      Print the sequence of the query segment as seen in the pairwise alignment (i.e. with gap insertions if need be). Empty field if there is no alignment.\n+qs        Query segment length. Always equal to query sequence length.\n+qstrand   Query strand orientation (+ or - for nucleotide sequences). Empty field if there is no alignment.\n+query     Query label.\n+raw       Raw alignment score (negative, null or positive integer value). The score is the sum of match rewards minus mismatch penalties, gap openings and gap extensions. The field is set to 0 if there is no alignment.\n+target    Target label. The field is set to "*" if there is no alignment.\n+tcov      Fraction of the target sequence that is aligned with the query sequence (real value rang-ing from 0.0 to 100.0). The target coverage is computed as 100.0 * (matches + mis-matches) / target sequence length. Internal or terminal gaps are not taken into account. The field is set to 0.0 if there is no alignment.\n+tframe    Target frame (-3 to +3). That field only concerns coding sequences and is not computed by vsearch. Always set to +0.\n+thi       Last nucleotide of the target aligned with the query. Always equal to the length of the pairwise alignment. The field is set to 0 if there is no alignment.\n+tihi      Last nucleotide of the target aligned with the query (ignoring terminal gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.\n+tilo      First nucleotide of the target aligned with the query (ignoring initial gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.\n+tl        Target sequence length (positive integer value). The field is set to 0 if there is no alignment.\n+tlo       First nucleotide of the target aligned with the query. Always equal to 1 if there is an alignment, 0 otherwise.\n+trow      Print the sequence of the target segment as seen in the pairwise alignment (i.e. with gap insertions if need be). Empty field if there is no alignment.\n+ts        Target segment length. Always equal to target sequence length. The field is set to 0 if there is no alignment.\n+tstrand   Target strand orientation (+ or - for nucleotide sequences). Always set to "+", so reverse strand matches have tstrand "+" and qstrand "-". Empty field if there is no alignment.\n+========= ================\n+\n+    </token>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.5281/zenodo.15524</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+</macros>\n'