Repository 'usearch_dereplication'
hg clone https://toolshed.g2.bx.psu.edu/repos/qfab/usearch_dereplication

Changeset 0:88fc52f1c5db (2014-05-28)
Commit message:
Uploaded
added:
dereplication/README.txt
dereplication/dereplicate.xml
dereplication/test-data/seqs.fasta
dereplication/test-data/seqs_derep.fasta
b
diff -r 000000000000 -r 88fc52f1c5db dereplication/README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dereplication/README.txt Wed May 28 20:34:11 2014 -0400
b
@@ -0,0 +1,60 @@
+Galaxy wrappers for USEARCH - Dereplication
+============================================
+
+USEARCH requires a licence. Therefore an automated installation is not
+possible at the moment.
+
+Requirements
+============================================
+
+Get your licenced USEARCH version 7 or greater from here:
+http://www.drive5.com/usearch/download.html
+
+
+Manual Installation Steps
+============================================
+
+USEARCH is distributed as one file, known as the binary file or executable
+file. It is completely self-contained: it does not require configuration
+files, environment variables, third-party libraries or other external
+dependencies. There is no setup script or installer because they're not
+needed. To install it, all you do is download or copy the binary to a
+directory that is accessible from the computer where you want to run the code.
+
+Step1:
+Rename the binary file to usearch.
+
+Step2:
+Move the binary file (usearch) to /usr/local/bin
+Ensure /usr/local/bin is in your path. If needed add /usr/local/bin/ to your
+path.
+
+Step3:
+Ensure that you have read and execute permissions for the binary file.
+If needed, use the chmod command to set the execute bit, e.g.:
+chmod +x /usr/local/bin/usearch
+
+
+Further installation information and help can be found at:
+http://drive5.com/usearch/manual/install.html
+
+
+Disclaimer
+=====================================================
+This source code is provided by QFAB Bioinformatics "as is", in the hope that it will be
+useful, and any express or implied warranties, including, but not limited to,
+the implied warranties of merchantability and fitness for a particular purpose
+are disclaimed.
+IN NO EVENT SHALL QFAB BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOURCE
+CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+License
+=====================================================
+This work by QFAB Bioinformatics (as part of the GVL project
+http://genome.edu.au)
+is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0
+International License.
b
diff -r 000000000000 -r 88fc52f1c5db dereplication/dereplicate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dereplication/dereplicate.xml Wed May 28 20:34:11 2014 -0400
[
@@ -0,0 +1,86 @@
+<tool id="usearch_derep_full" name="Dereplicate" version="1.0.0">
+  <description>Remove duplicate sequences</description>
+  <command>
+    #if [ $mode == "fulllength" ]
+      usearch -derep_fulllength $input -output $output -sizeout 2&gt;1;
+    #elif [ $mode == "prefix" ]
+      usearch -derep_prefix $input -output $output -sizeout 2&gt;1;
+    #else
+      echo 'Unrecognised mode:' $mode;
+      echo '  [fulllength|prefix] only';
+    #end if
+  </command>
+  <inputs>
+    <param name='input' type='data' format='fasta' label='Input sequence file' />
+    <param name='mode' type='select' label='Criteria used for duplicate detection'>
+      <option value='fulllength'>Full length</option>
+      <option value='prefix'>Prefix</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name='output' format='fasta' />
+  </outputs>
+
+  <help>
+===========
+Description
+===========
+
+Removes duplicate sequences using one of two modes (below), from the Usearch-Tool-Suite_.
+
+.. _Usearch-Tool-Suite: http://www.drive5.com/usearch/
+
+-----
+
+-----
+Input
+-----
+
+File of reads in FASTA format.
+
+----------
+Parameters
+----------
+
+Full length
+  Matching is performed over the full length of the sequences, all identical sequences except one are removed.
+Prefix
+  A sequence (A) is discarded, if it is a prefix of another sequence (B). The first part of the sequence is identical.
+
+------
+Output
+------
+
+A FASTA file containing only unique sequences according to the criteria chosen for the duplicate detection. The identifier line for each sequence states the representative sequence followed by the number of identical sequences found.
+
+e.g. >sequenceXXXX;size=1443;
+
+sequenceXXXX is the representative of 1443 identical sequences.
+
+-----
+
+=========
+Resources
+=========
+
+Dereplication_
+
+.. _Dereplication: http://drive5.com/usearch/manual/dereplication.html
+
+**Author**
+
+Robert C. Edgar (bob@drive5.com)
+
+**Wrapper Author**
+
+QFAB Bioinformatics (support@qfab.org)
+  </help>
+  <tests>
+    <test>
+     <param name="input" value="seqs.fasta" />
+     <param name="mode" value="fulllength" />
+     <output name="output" file="seqs_derep.fasta" ftype="fasta" lines_diff="10" />
+    </test>
+  </tests>
+</tool>
b
diff -r 000000000000 -r 88fc52f1c5db dereplication/test-data/seqs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dereplication/test-data/seqs.fasta Wed May 28 20:34:11 2014 -0400
b
b'@@ -0,0 +1,2922 @@\n+>248442\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGTGAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGGGGACAACCTGGGGAAACCCAGGCTAATACCGCATACGCCCTACGGGGGAAAGCGGGGGCTCTCTTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCCATGCCGCGTGTGTGAAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAAAAGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCTACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGACTAGAGTACGAGAGAGGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAGGCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCTGGGCTACACACGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGGGGAGCGAATCCGAGAAAACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACCTTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>222222\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGTGAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGGGGACAACCTGGGGAAACCCAGGCTAATACCGCATACGCCCTACGGGGGAAAGCGGGGGCTCTCTTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCCATGCCGCGTGTGTGAAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAAAAGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCTACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGACTAGAGTACGAGAGAGGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAGGCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGACCTGGGCTACACACGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGGGGAGCGAATCCGAGAAAACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACCTTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>258155\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGTGAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATACCGGATATGAAATCTGCGGGCATCCGCGGATTTGGAAAGTTTTTCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTGGTGAGGTAATGGCTCACCAAGGCGACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGAGGAAACTCTGACCCAGCGACGCCGCGTGCGGGATGAAGGCCTTCGGGTTGTAAACCGCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTATGTGCCAGCAGCCGCGGTAATACATAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGTGGTTCGTCACGTCGGATGTGAAACTCTGGGGCTTAACCCCAGACCTGCATTCGATACGGGCGAGCTTGAGTATGGTAGGGGAGTCTTGAATTCCTGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAATGGCGAAGGCAGGACTCTGGGCCATTACTGACACTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGGCACTAGTTGTGGGGACCTTCCACGGTCTCTGCGACGCAGCTAACGCATTAAGTGCCCCGCCTGGGGAGTACGATCGCAAGATTAAAACTCAAAGGAATTGACGGGGCCCCGCACAAGCAGCGGAGCATGCGGCTTAATTCGACGCAACGCGAAGAAC'..b'CGCGTGAGGGACGACGGCCTTCGGGTTGTAAACCTCTTTTAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAAAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAAACTGAGGCTCAACCTCCAGCCTGCAGTGGGTACGGGCAGACTAGAGTGCGGTAGGGGAGATTGGAATTCCTGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGATCTCTGGGCCGTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCATGCCGTAAACGTTGGGAACTAGATGTAGGGACCATTCCACGGTTTCTGTGTCGCAGCTAACGCATTAAGTTCCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGGGATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCAAGGCTTGACATATACGAGAACGGGCCAGAAATGGTCAACTCTTTGGACACTCGTAAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGTTCTATGTTGCCAGCACGTAATGGTGGGAACTCATAGGAGACTGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTTCACGCATGCTACAATGGCCGATACAAAGGGCTGCAATACCGTAAGGTGGAGCGAATCCCAAAAAGTCGGTCTCAGTTCGGATTGAGGTCTGCAACTCGACCTCATGAAGTCGGAGTCGCTAGTAATCGCAGATCAGCAACGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTCGANAACACCCGAAGCCAGTGGCCTAACCGCAAGGAAGGAGCTGTCGAAGGTGGGATCGGTGATTAGGACTAAGTCGTAACAAGGTA\n+>5647\n+TTAGAGTTTGATCCTGGCTCAGAACGAACGCTGGCGGCAGGCTAACACATGCAAGTCGAGCGCTACCTTCGGGTGGAGCNGCGGACGGGTTAGTAACGCGTGGGAACATACNCCTTTCTAAGGAATAGCCTCGGGAAACTGAGAGTAATACCTTATACGCTTCGGGGAAAGATTTATCGGTGAGGGATTGGCCCGCGTTGGNTTAGGTAGTTGGTGGGGTAACGGCCTACCAAGCCTACGATCCATAGCTGGTTTTAGAGGACGATCAGCAACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTAGACAATGGGCGAAGCCTGATCTAGCCATGCCGCGTGAGTGANGAGGGTCTTAGGATCGTAAAGCTCTTTCGCCAGAGATGATAATGACAGTATCTGGTAAAGAAANCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGGGTTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGATTAGTAAGTTAGGGNTGAAATCCGGGNNTCAACCCCGGAACTGCCTCTAATACTGCTAGTCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTCACTGGCTCGATACTGACGCTGAGGTGCGAAAGTGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGAATGCCAGTCGTCGGGTAGATGCTGCTCGGTGACACACTAACGGATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGGGGCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGNAGAACCTTACCAACCCTTGACATCCCTATCGCGGTTTCCAGAGATGGATTCCTTCAGNTCGGCTGGATAGGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTCGGTTAAGTCCGGCAACGAGNGCAACCCACATCTTTAGTTGCCAGNATTTAGTTGGGNACTCTAAAGAAACTGCCCGTGATAAGCGGGAGGAAGGTGTGGATGACGTCAAGTCCTCATGGNTTACGGGTTGGGCTACACACGTGCTACAATGGCAGTGACAATGGGTTAATCCCAAAAAGCTGTCTCAGTTCGGATTGTCGTCCGCAACTCGACGGCATGAAGTCGGAATCGCTAGTAATCGCGTAACAGCATGACGNGGTGAATACGTTCCCGGGNCTTGTACACACCGCCNGTCGCACCATGGGAGTTGGTTCTACCTGACGNGTGNGCTAACTTCGGGAGGCAGGCGGCCACGGTAGGATCAGCGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>4585\n+AGAGTTTGATCATGGCTCAGAACGAACGCTGGCGGCATGCTTAACACATGCAAGTCGAACGCTATCTTTGATAGAGTGGCGCACGGGTGAGTAACACGTGGGAATCTGCCCTTTTGTTCGGGACAACAGTTGGAAACGACTGCTAATACCGGATACGCCCTTCGGGGGAAAGGTCCGCCGCAGAAGGAGGAGCCCGCGTCCGATTAGCTTGTTGGTAGGGTAATGGCCTACCAAGGCGACGATCGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGATGAAGGCCTTAGGGTTGTAAAGCACTTTCACTGGTGAAGATGATGACGGTAACCAGAGAAGAAGCCCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGGGCGAGCGTTGTTCGGAATTACTGGGCGTAAAGGGAGCGCAGGCGGTTCATTTAGTTAGGCGTGAAAGCCCCGGGCTCAACCTGGGAACTGCGCTTAATACTGATGAACTAGAAAACAGAAGAGGGTAGTGGAATTCCCAGTGTAGAGGTGAAATTCGTAGATATTGGGAAGAACACCGGTGGCGAAAGCGGCTACCTGGTCTGATTTTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGACAGCTAGGTGTCGGGGGGTCGCCCCTCGGTGCCGCCGCTAACGCATTAAGCTGTCCGCCTGGGAAGTACGGTCGCAAGATTAAAACTCACAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGTCCTTGACATGGGTAGTTTGGATTTTGGAGACAATTTCCTTCAGTTCGGCTGGCTACCACACAGGTGCTGCACGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCATCTTTAGTTGCCATCAGTTCGGCTGGGCACTCTAGAGAAACTGCCTGCGATGAGCAGGAGGAAGGCGGGGACGACGTCAAGTCATCATGGCCCTTATGGACTGGGCTACACACGTGCTACAATGGCGGTGACAATGGGCAGCAACAGAGCGATCTGAAGCAAATCTCAAAAAACCGTCCCAGTTCGGATTGTACTCTGCAACTCGAGTGCATGAAGTTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTGGTTCTACCCGAAGCCGGTGCGCTAACCGCAAGGAAGCAGCCGACCACGGTAGGGTTAGCGACTGGGGTGAAGTCGTAACAAGGTAGCAGTAGGGGAACCTGCGGCTGGATCACCTCCTT\n'
b
diff -r 000000000000 -r 88fc52f1c5db dereplication/test-data/seqs_derep.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dereplication/test-data/seqs_derep.fasta Wed May 28 20:34:11 2014 -0400
b
b'@@ -0,0 +1,28685 @@\n+>258155;size=2;\n+AGAGTTTGATCATGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAGCGATGAAGCACCTTCGGGTGT\n+GAATTAGCGGCGAACGGGTGAGGAACACGTGAGAAATCTGCCTTCAACACTGGGATAACTCCGGGAAACCGGGGCTAATA\n+CCGGATATGAAATCTGCGGGCATCCGCGGATTTGGAAAGTTTTTCGGTTGAAGATGATCTCGCGGCCTATCAGCTTGTTG\n+GTGAGGTAATGGCTCACCAAGGCGACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACACTGGGACTGAGACACGGCC\n+CAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGAGGAAACTCTGACCCAGCGACGCCGCGTGCGGGATGA\n+AGGCCTTCGGGTTGTAAACCGCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCACCGGCTAACTAT\n+GTGCCAGCAGCCGCGGTAATACATAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGTGGTTCGTC\n+ACGTCGGATGTGAAACTCTGGGGCTTAACCCCAGACCTGCATTCGATACGGGCGAGCTTGAGTATGGTAGGGGAGTCTTG\n+AATTCCTGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAATGGCGAAGGCAGGACTCTGGGCCATTACTGAC\n+ACTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGGCACTAGTTGTG\n+GGGACCTTCCACGGTCTCTGCGACGCAGCTAACGCATTAAGTGCCCCGCCTGGGGAGTACGATCGCAAGATTAAAACTCA\n+AAGGAATTGACGGGGCCCCGCACAAGCAGCGGAGCATGCGGCTTAATTCGACGCAACGCGAAGAACCTTACCAAGGCTTG\n+ACATATACAGGAATATGGCAGAGATGTCATAGCCGCAAGGTCTGTATACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTC\n+GTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGTTCTGTGTTGCCAGCATTTAGTTGGGGACTCACAGGAGA\n+CTGCCGGGGTTAACTCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTGCACGCATGCTAC\n+AATGGCTGGTACAAACGGCTGCAATACCGCAAGGTGGAGCGAATCCGAGAAAGCCAGTCTCAGTTCGGATTGGGGTCTGC\n+AACTCGACCCCATGAAGTCGGAGTTGCTAGTAATCATAGATCAGCAACGCTACGGTGAATACGTTCCCGGGGCTTGTACA\n+CACCGCCCGTCACGTCACGAGAGTCGGTAACACCCGAAGTCAGTGGCCCAACCGCAAGGAGGGAGCTGCCGAAGGTGGGA\n+TCGGTGATTGGGACGAAGTCGTAACAAGGTAGCCGTACCGGAAGGTGCGGCTGGATCACCTCCTTT\n+>248442;size=2;\n+AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAAATCTCCTTCGGGAGT\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCCAGTGGTGGGGGACAACCTGGGGAAACCCAGGCTAATA\n+CCGCATACGCCCTACGGGGGAAAGCGGGGGCTCTCTTCGGAGACCTCGCGCCATTGGATGAGCCTGCGTTGGATTAGCTA\n+GTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCCATAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTGAGACAC\n+GGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCCATGCCGCGTGTGTG\n+AAGAAGGCTCTAGGGTTGTAAAGCACTTTCAGTAGGGAGAAAAAGCTCATGTTTAATAGATGTGAGTGTTGATGTTACCT\n+ACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCG\n+TAAAGCGCGCGTAGGCGGTTTGTTAAGTCGGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTCGATACTGATCGA\n+CTAGAGTACGAGAGAGGGAGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAATACCGGTGGCGAAG\n+GCGGCCTCCTGGCTCGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\n+CGTAAACGATGTCAACTAGCCGTTGGGGAACTTGATTCCTTAGTGGCGCAGCTAACGCAATAAGTTGACCGCCTGGGGAG\n+TACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAC\n+GCGAAGAACCTTACCAGGTCTTGACATCCTGAAAACTTTCCAGAGATGGATTGGTGCCTTCGGGAATTCAGTGACAGGTG\n+CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTGTCCTTAGTTGCCA\n+GCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCC\n+CTTACGACCTGGGCTACACACGTGCTACAATGGTCGGTACAGACGGCTGCGAACCCGCGAGGGGGAGCGAATCCGAGAAA\n+ACCGATCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGAATGTCGC\n+GGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCTCCAGAAGTGGTTAGCCTAACC\n+TTAGGGAGGGCGATCACCACGGAGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACC\n+>229462;size=1;\n+AGAGTTTGATCATGGCTCAGGATGAACGCTAGCGGCAGGCTTAACACATGCAAGTCGAGGGGTAACAGGGATTGCTTGCA\n+ATCCGCTGACGACCGGCGCACGGGTGCGTAACGCGTATGCAACTTACCTTTTACTGGGGGATAGTCAAGAGAAATTTTGA\n+ATAATACCCCATACGATCTAACTCACTCCTGTGAGATAGAAGAAAATTTCGATGGTAAAAGATAGGCATGCGTCCTATTA\n+GTTTGTTGGTGAGGTAACGGCTTACCAAGACTACGATAGGTAGGGGTCCCGAGAGGGAGATCCCCCACACTGGTACTGAG\n+ACACGGACCAGACTCCTACGGGAGGCAGCAGTGAGGAATATTGGACAATGGAGGCAACTCTGATCCAGCCATGCCGCGTG\n+CAGGAAGACAGCCCTATGGGTTGTAAACTGCTTTTATACAGGAAGAAACGTTAGTACGTGTACTAGCCTGACGGTACTGT\n+AAGAATAAGGATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATCCAAGCGTTATCCGGAATCATTGGGTTT\n+AAAGGGTCCGCAGGCGGTCTATTAAGTCAGAGGTGAAATCCTATCGCTCAACGATAGAACTGCCTTTGATACTGCTAGAC\n+TTGAGTTATTGTGAAGTAGTTAGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAATACCGATTGCGAAGG\n+CAGATTACTAACAATACACTGACGCTCAGGGACGAAAGCGTGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCACGCC\n+GTAAACGATGGTCACTAGCTGTTT'..b'AATTACTGGGCTTAAAGAGCTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAAC\n+CTTGGAACTGCCATCAAAACTTTTTAGCTAGAGTGTGATAGAGGAAAGTGGAATTTCTAGTGTAGAGGTGAAATTCGTAG\n+ATATTAGAAAGAACACCAAATGCGAAGGCAACTTTCTGGGTCACTACTGACACTGAGGAGCGAAAGCATGGGTAGCGAAG\n+AGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGCTAGACGTTGGAAATATATTTTTCAGTGTCGCAGCGAA\n+AGCATTAAGCACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCAGTGG\n+AGCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGAGACCAAGAGATTGGTTT\n+CTTCATTTAGTTGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCCTACTTTTAGTTGCCACCATTTAGTTGGGCACTTTAAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTACGTGTTGGGCTACACACGTGCTACAATGGCACTTACAATGGGAAGCAAAG\n+AGGTGACTCCTAGCTAATCCCAAAAATGTGTCTCAGTTCGGATTGCACTCTGCAACTCGAGTGCATGAAGCTGGAATTGC\n+TAGTAATCGCGAATCAGCGCGTCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTTGGT\n+TACACCTTAAGGCAAATCGTCAACCTTCGGGAGACATTTGACTACGGTACGATCAGCAACTGGGGT\n+>356409;size=1;\n+AGGGTTTGATCATGGCTCAGATTGAACGCTGGCGGTAGGCTTAACACATGCAAGTCGTGCGAGAAAGTACCTTCGGGTGC\n+GAGTAGAGCGGCGGACGGGTGAGTAACGCGTAGGAATCTACCTAGTAGAAGGGGATAGCCCGGGGAAACTCGGATTAATA\n+CCGTATACCTCCTTAGGGAGAAAGAGGGCTTAGCTTTGATGCTCTCGCTATTAGATGAGCCTGCGTAAGATTAGCTTGTT\n+GGTGAGGTAATGGCTCACCAAGGCGACGATCTTTAGCTGGTCTGAGAGGACGATCAGCCACATTGGGACTGAGACACGGC\n+CCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATACCGCGTGTGTGAAG\n+AAGGCCTTCGGGTTGTAAAGCACTTTAAGTAAGGAGGAAAAGATTGTAGTTAATACCTGCAATCCGTGACGTTACTTACA\n+GAATAAGGACCGGCTAATTCCGTGCCAGCAGCCGCGGTAATACGGAAGGTCCAAGCGTTAATCGGAATTACTGGGCGTAA\n+AGCGCGCGTAGGTGGTTTTTTAAGTTGGATGTGAAAGCCCTGGGCTCAACCTAGGAACTGCATCCAAAACTAGATGACTA\n+GAGTACGAAAGAGGGAAGTAGAATTCACAGTGTAGCGGTGGAATGCGTAGATATTGTGAAGAATACCAATGGCGAAGGCA\n+GCTTCCTGGTTCTGTACTGACACTGAGGTGCGAAAGCGTGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGT\n+AAACGATGACAACTAGCTGTTGGGAGACAAGATCTCTCAGTGGCGCAGCTAACGCTTTAAGTTGTCCGCCTGGGGAGTAC\n+GGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCG\n+AAAAACCTTACCTACTCTTGACATACTTGGAAGCTCTTGTAATGAGAGTGTGCTTTTAGAGCCAAGATACAGGTGCTGCA\n+TGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCATAACGAGCGCAACCCTTACCCTTATTTGCCAGCGGT\n+TCGGCCGGGAACTATAAGGGGACTGCCGGTGACAAACCGGAGGAAGGTGAGGACGACGTCAAGTCATCATGGCCCTTACG\n+AGTAGGGCTACACACGTGCTACAATGGGGAATACAGACGGACGCTAAAGCGTGAGCTGGTGCTAATCCTAAAAAATTTCT\n+CGTAGTCCGGATTGCAGTCTGCAACTCGACTGCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGCATGTCGCGGTGAA\n+TACGTTCTCGGGTCTTGTACACACCGCCCGTCACACCATGGAAGTGTGTTGCACCAGAAGTAGGTAGTCTAACCTTCGGG\n+AAGGCGCTTACCACGGTGTGATCCATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTA\n+>354371;size=1;\n+AGAGTTTGATCATGGCTCAGAATGAACGCTGGCGGCACGCTTAACACATGCAAGTCGAACGAGATCTTCGGATCTAGTGG\n+CAGACGGGTGAGTAACGCGTGGGAACCTGCCCAGTAGTAGAGAATAACTTGGGGAAACTTAAGCTAATACTTTATACGTC\n+CTTCGGGAGAAAGCTTTATGCGCTATTGGATGGGCCCGCGTTAGATTAGTTTGTTGGTGAGGTAACGGCTCACCAAGGCG\n+ACGATCTATAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGT\n+GGGGAATATTGGACAATGGGGGCAACCCTGATCCAGCGATGCCGCGTGAGTGATGAAGGCCCTAGGGTTGTAAAACTCTT\n+TCGTCAGGGAAGATAATGACGGTACCTGAAGAAGAAGATCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGG\n+TCTAGCGTTATTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTGAAAAAGTTGGTGGTGAAATCCCAGAGCTTAAC\n+TCTGGAACTGCCATCAAAACTTTTCAGCTAGAGTTTGATAGAGGAAAGCAGAATTTCTAGTGTAGAGGTGAAATTCGTAG\n+ATATTAGAAAGAATACCAATTGCGAAGGCAGCTTTCTGGATCATTACTGACACTGAGGAACGAAAGCATGGGTAGCGAAG\n+AGGATTAGATACCCTCGTAGTCCATGCCGTAAACGATGTGTGTTAGACGTTGGAAATTTATTTTCAGTGTCGCAGCGAAA\n+GCGATAAACACACCGCCTGGGGAGTACGACCGCAAGGTTAAAACTCAAATGAATTGACGGGGACCCGCACAAGTAGTGGA\n+GCATGTGGTTTAATTCGAAGATACGCGCAGAACCTTACCAACACTTGACATGTTCGTCGCGACTTTAAGAGATTAAAGTT\n+TTCGGTTCGGCCGGACGAAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAA\n+CGAGCGCAACCCTCACTTTTAGTTGCCATCATTTAGTTGGGCACTCTGAAAGAACTGCCAGTGATAAGCTGGAGGAAGGT\n+GGGGATGACGTCAAGTCCTCATGGCCCTTACGTGTTGGGCTACACACGTGCTACAATGGTATCTACAACAGGAAGCAAAA\n+CAGCGATGTTAAGCAAATCCTTAAAAGATACCTCAGTTCGGATTGCACTCTGCAACTCGAGTGCATGAAGCTGGAATTAC\n+TAGTAATCGTGGATCAGCGTGCCACGGTGAATGCGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTTGGT\n+TCTACCTTAAGGCAAGGTTTAATACCCTTGACCACGGTATAGTCAGCGACTGGGGTGAAGTCATAACAAGGTAGCCGTA\n'