Repository 't_coffee'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/t_coffee

Changeset 0:794a6e864a96 (2016-12-15)
Next changeset 1:b3833e5b50d4 (2016-12-19)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
added:
filter_by_fasta_ids.py
t_coffee.xml
t_coffee_to_cigar.pl
test-data/cigar.tabular
test-data/ids.txt
test-data/input.fasta
test-data/output1.fasta
b
diff -r 000000000000 -r 794a6e864a96 filter_by_fasta_ids.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_by_fasta_ids.py Thu Dec 15 11:04:25 2016 -0500
[
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+""" A script to build specific fasta databases """
+from __future__ import print_function
+
+
+import logging
+import sys
+
+
+# ===================================== Iterator ===============================
+class Sequence:
+    ''' Holds protein sequence information '''
+    def __init__(self):
+        self.header = ""
+        self.sequence_parts = []
+
+    def get_sequence(self):
+        return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts])
+
+
+class FASTAReader:
+    """
+        FASTA db iterator. Returns a single FASTA sequence object.
+    """
+    def __init__(self, fasta_name):
+        self.fasta_file = open(fasta_name)
+        self.next_line = self.fasta_file.readline()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        ''' Iteration '''
+        # while True:
+        #    line = self.fasta_file.readline()
+        #    if not line:
+        #        raise StopIteration
+        #    if line[0] == '>':
+        #        break
+        next_line = self.next_line
+        if not next_line:
+            raise StopIteration
+
+        seq = Sequence()
+        seq.header = next_line.rstrip().replace('\n', '').replace('\r', '')
+
+        next_line = self.fasta_file.readline()
+        while next_line and next_line[0] != '>':
+            # tail = self.fasta_file.tell()
+            # line = self.fasta_file.readline()
+            # if not line:
+            #   break
+            # if line[0] == '>':
+            #   self.fasta_file.seek(tail)
+            #   break
+            seq.sequence_parts.append(next_line)
+            next_line = self.fasta_file.readline()
+        self.next_line = next_line
+        return seq
+
+    # Python 2/3 compat
+    next = __next__
+# ==============================================================================
+
+
+def target_match(target, search_entry):
+    ''' Matches '''
+    search_entry = search_entry.upper()
+    for atarget in target:
+        if search_entry.find(atarget) > -1:
+            return atarget
+    return None
+
+
+def main():
+    ''' the main function'''
+    logging.basicConfig(filename='filter_fasta_log',
+                        level=logging.INFO,
+                        format='%(asctime)s :: %(levelname)s :: %(message)s',)
+
+    used_sequences = set()
+    work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0}
+    targets = []
+
+    f_target = open(sys.argv[1])
+    for line in f_target.readlines():
+        targets.append(">%s" % line.strip().upper())
+    f_target.close()
+
+    work_summary['wanted'] = len(targets)
+    homd_db = FASTAReader(sys.argv[2])
+
+    # output = open(sys.argv[3], "w")
+    for entry in homd_db:
+        target_matched_results = target_match(targets, entry.header)
+        if target_matched_results:
+            work_summary['found'] += 1
+            targets.remove(target_matched_results)
+            sequence = entry.get_sequence()
+            used_sequences.add(sequence)
+            print(entry.header)
+            print(sequence)
+    for parm, count in work_summary.items():
+        logging.info('%s ==> %d', parm, count)
+
+
+if __name__ == "__main__":
+    main()
b
diff -r 000000000000 -r 794a6e864a96 t_coffee.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee.xml Thu Dec 15 11:04:25 2016 -0500
[
b'@@ -0,0 +1,210 @@\n+<tool id="t_coffee" name="T-Coffee" version="11.0.8">\n+    <description>multiple sequence alignment</description>\n+    <requirements>\n+        <requirement type="package" version="11.0.8">t_coffee</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:" level="fatal" />\n+    </stdio>\n+    <version_command>\n+        t_coffee -version | grep Version\n+    </version_command>\n+    <command>\n+<![CDATA[\n+        #if str($input_type.filter_fasta) == \'yes\'\n+            #set $input = \'-infile=stdin\'\n+            python \'$__tool_directory__/filter_by_fasta_ids.py\' \'$input_type.identifiers\' \'$input_type.fasta_input\' |\n+        #end if\n+\n+        #set $method_opt = \'\'\n+        #if $method01\n+            #set $method_opt += str($method01) + \',\'\n+        #end if\n+        #if $method02\n+            #set $method_opt += str($method02) + \',\'\n+        #end if\n+        #if $method03\n+            #set $method_opt += str($method03) + \',\'\n+        #end if\n+        #if $method_opt\n+            #set $method_opt = \'-method \' + $method_opt[:-1]\n+        #end if\n+\n+        #set $output_opt = \'\'\n+        #if $outputs\n+            #set $outputs_arr = str($outputs).split(\',\')\n+            #for $o in $outputs_arr\n+                #if $o not in [\'cigar\', \'dnd\']\n+                    #set $output_opt += $o + \',\'\n+                #end if\n+            #end for\n+            #if \'cigar\' in $outputs_arr and \'fasta_aln\' not in $outputs_arr\n+                #set $output_opt += \'fasta_aln,\'\n+            #end if\n+        #else\n+            #set $outputs_arr = []\n+        #end if\n+        #if $output_opt\n+            #set $output_opt = \'-output \' + $output_opt[:-1]\n+        #end if\n+\n+        t_coffee \'$input\' $method_opt $output_opt -n_core \\${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet\n+\n+        #if \'cigar\' in $outputs_arr\n+            && perl \'$__tool_directory__/t_coffee_to_cigar.pl\' t_coffee_out.fasta_aln > \'$cigar\'\n+        #end if\n+]]>\n+    </command>\n+    <inputs>\n+        <conditional name="input_type">\n+            <param name="filter_fasta" type="select" label="Filter FASTA input?">\n+                <option value="no">No</option>\n+                <option value="yes">Yes</option>\n+            </param>\n+            <when value="yes">\n+                <param name="fasta_input" type="data" format="fasta" label="FASTA sequences" />\n+                <param name="identifiers" type="data" format="txt" label="List of FASTA sequence IDs" />\n+            </when>\n+            <when value="no">\n+                <param name="input" type="data" format="fasta" label="FASTA sequences" />\n+            </when>\n+        </conditional>\n+        <param name="method01" type="select" display="checkboxes" multiple="true" label="Pairwise Structual Method">\n+            <option value="sap_pair">sap_pair</option>\n+            <option value="TMalign_pair">TMalign_pair</option>\n+            <option value="mustang_pair">mustang_pair</option>\n+        </param>\n+        <param name="method02" type="select" display="checkboxes" multiple="true" label="Multiple Sequence Alignment Methods">\n+            <option value="pcma_msa">pcma_msa</option>\n+            <option value="clustalw_msa">clustalw_msa</option>\n+            <option value="dialigntx_msa">dialigntx_msa</option>\n+            <option value="poa_msa">poa_msa</option>\n+            <option value="muscle_msa">muscle_msa</option>\n+            <option value="probcons_msa">probcons_msa</option>\n+            <option value="t_coffee_msa">t_coffee_msa</option>\n+            <option value="amap_msa">amap_msa</option>\n+            <option value="kalign_msa">kalign_msa</option>\n+        </param>\n+        <param name="method03" type="select" display="checkboxes" multiple="true" label="Pairwise Sequence Alignment Methods">\n+            <option value="fast_pair">fast_pair</option>\n+            <option value="clustalw_pair">clustalw_pair</option>\n+            <option value="lalign_id_pair">lalign_id_pair</opti'..b'" label="${tool.name} on ${on_string}: fasta_seq" from_work_dir="t_coffee_out.fasta_seq">\n+            <filter>\'fasta_seq\' in outputs</filter>\n+        </data>\n+        <data name="msf_aln" format="msf" label="${tool.name} on ${on_string}: msf_aln" from_work_dir="t_coffee_out.msf_aln">\n+            <filter>\'msf_aln\' in outputs</filter>\n+        </data>\n+        <data name="phylip" format="phyloxml" label="${tool.name} on ${on_string}: phylip" from_work_dir="t_coffee_out.phylip">\n+            <filter>\'phylip\' in outputs</filter>\n+        </data>\n+        <data name="pir_aln" format="pir" label="${tool.name} on ${on_string}: pir_aln" from_work_dir="t_coffee_out.pir_aln">\n+            <filter>\'pir_aln\' in outputs</filter>\n+        </data>\n+        <data name="pir_seq" format="pir" label="${tool.name} on ${on_string}: pir_seq" from_work_dir="t_coffee_out.pir_seq">\n+            <filter>\'pir_seq\' in outputs</filter>\n+        </data>\n+        <data name="score_ascii" format="ascii" label="${tool.name} on ${on_string}: score_ascii" from_work_dir="t_coffee_out.score_ascii">\n+            <filter>\'score_ascii\' in outputs</filter>\n+        </data>\n+        <data name="score_html" format="html" label="${tool.name} on ${on_string}: score_html" from_work_dir="t_coffee_out.score_html">\n+            <filter>\'score_html\' in outputs</filter>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="filter_fasta" value="no" />\n+            <param name="input" value="input.fasta" ftype="fasta" />\n+            <param name="method02" value="clustalw_msa" />\n+            <param name="outputs" value="fasta_aln" />\n+            <output name="fasta_aln" file="output1.fasta" />\n+        </test>\n+        <test>\n+            <param name="filter_fasta" value="yes" />\n+            <param name="fasta_input" value="input.fasta" ftype="fasta" />\n+            <param name="identifiers" value="ids.txt" ftype="txt" />\n+            <param name="method02" value="clustalw_msa" />\n+            <param name="outputs" value="cigar" />\n+            <output name="cigar" file="cigar.tabular" />\n+        </test>\n+    </tests>\n+    <help>\n+**What it does**\n+\n+This tool is a wrapper for the T-Coffee multiple sequence alignment suite. The input is a set of sequences in FASTA format. Apart from running on the complete FASTA input, it can also run on a subset of sequences by providing a list of the FASTA IDs.\n+\n+This wrapper offers selected advanced T-Coffee options like the selection of the alignment methods to use: \'\'Pairwise Structual Method\'\', \'\'Multiple Sequence Alignment Methods\'\' or \'\'Pairwise Sequence Alignment Methods\'\'.\n+\n+The T-Coffee documentation can be found at http://www.tcoffee.org/Projects/tcoffee/ .\n+\n+**Example**\n+\n+Suppose you have 5 sequences in FASTA format::\n+\n+    >1aboA\n+    NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS\n+    NYITPVN\n+    >1ycsB\n+    KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY\n+    VPRNLLGLYP\n+    >1pht\n+    GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG\n+    WLNGYNETTGERGDFPGTYVEYIGRKKISP\n+    >1vie\n+    DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI\n+    N\n+    >1ihvA\n+    NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD\n+\n+By selecting "Yes" in output fasta_aln in the wrapper, the user will obtain the multiple alignment in FASTA format::\n+\n+    >1aboA\n+    NL-FVA---LYDFVASGDNTLSITKGEKLR-------VLGYN-------H\n+    NGEWCEA--QTKN-GQGWVPSNYIT------PVN\n+    >1ycsB\n+    KGVIYA---LWDYEPQNDDELPMKEGDCMT-------IIHREDE-----D\n+    EIEWWWA--RLND-KEGYVPRNLLG------LYP\n+    >1pht\n+    GYQYRA---LYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE\n+    EIGWLNGYNETTG-ERGDFPGTYVEYIGRKKISP\n+    >1vie\n+    DR-----------VRK--KSGAAWQGQIVGWYCTNLTPEGYAVE------\n+    ------S--EAHPGSVQIYPVAALE------RIN\n+    >1ihvA\n+    NF-RVYYRDSRDPVWKGPA-KLLWKGEGAV-------VIQDN-------S\n+    DI--------------KVVPRRKAK-----IIRD\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1006/jmbi.2000.4042</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 794a6e864a96 t_coffee_to_cigar.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee_to_cigar.pl Thu Dec 15 11:04:25 2016 -0500
[
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+
+# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
+# TCoffee_to_cigar.pl <file>
+
+sub convert_and_print {
+    my ($header, $sequence) = @_;
+    # Converts each match into M and each gap into D
+    $sequence =~ s/[^-]/M/g;
+    $sequence =~ s/-/D/g;
+
+    # Split the sequence in substrings composed by the same letter
+    $sequence =~ s/DM/D,M/g;
+    $sequence =~ s/MD/M,D/g;
+    my @cigar_array = split(',', $sequence);
+
+    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
+    my $cigar = '';
+    foreach my $str (@cigar_array) {
+        if (length($str) > 1) {
+            $cigar .= length($str);
+        }
+        $cigar .= substr($str, 0, 1);
+    }
+    print "$header\t$cigar\n";
+}
+
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+
+my $header = '', my $sequence = '';
+while (my $line = <$fh1>) {
+    chomp $line;
+    if (substr($line, 0, 1) eq '>') {
+        if ($header) {
+            convert_and_print($header, $sequence);
+        }
+        $header = substr($line, 1);
+        $sequence = '';
+    } else {
+        $sequence .= $line;
+    }
+}
+close $fh1;
+convert_and_print($header, $sequence);
b
diff -r 000000000000 -r 794a6e864a96 test-data/cigar.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cigar.tabular Thu Dec 15 11:04:25 2016 -0500
b
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus 41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M
+ENSCAFT00000026349_canisfamiliaris 16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D
+ENSRNOT00000019267_rattusnorvegicus 75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D
b
diff -r 000000000000 -r 794a6e864a96 test-data/ids.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ids.txt Thu Dec 15 11:04:25 2016 -0500
b
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus
+ENSCAFT00000026349_canisfamiliaris
+ENSRNOT00000019267_rattusnorvegicus
b
diff -r 000000000000 -r 794a6e864a96 test-data/input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fasta Thu Dec 15 11:04:25 2016 -0500
b
b'@@ -0,0 +1,14 @@\n+>ENSMUST00000091291_musmusculus\n+ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATTGCTGGTGGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAGGTGTGCCCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGAGCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGTTCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCTGAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCTTCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGGCTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAATAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATTCTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGTGGGGATGTCTGTCCAGGCACCGCCAAGGGCAAGACCAACTGTCCTGCCACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATTGTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAAGGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGACCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTGTGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAAGCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGTGCCCGTCTGGCTATACCATGAATTCCAGCAACTTGATGTGCACCCCATGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGAGAAGACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGATCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCTGAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAAGATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTACATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCCTTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCTCACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCTTGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAGGAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGAAAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCCTGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGATTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGATGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACCCGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGGTGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAAGACCTTGGTTACCTTCTCTGATGAACGGCGGACCTATGGAGCCAAAAGTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTGGATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAAGCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGGAGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAAGGGCTGAAGCTCCCTTCACGGACCTGGTCCCCACCCTTTGAGTCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAGTGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATGTGACAGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCTACCATTGTGCCCACAAGTCAGGAGGAGCACAGGCCATTTGAGAAAGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGTCCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAGAGGCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTGAGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGACGAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCAGGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGGGTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGTCTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATTCACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGCCAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCACCTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGTGAGGAGCTGGAGATGGAGTTTGAAGACATGGAGAAT'..b'GCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACGGTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCTCGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCTACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAATGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATGCGGTGTCCAATAACTACATTGTGGGGAATAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCATGGAGGAGAAGCCGATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCTGCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAACAACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAACGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTGTGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTGGACCGTGACTTCTGCGCCAACATCCTCAGCGCCGAGAGCAGCGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGTGCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCTTGTGAAGGTCCTTGCCCGAAGGTCTGTGAGGAAGAAAAGAAAACAAAGACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCTTCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCAGAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTCGCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTCCTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCTGACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTGTTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAAAGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGAAAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCATCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGCTTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGATGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTCCCGCCCAACAAGGACGTGGAGCCCGGCATCTTACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAAGGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGAGTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTGGACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAACCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGCAGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAAGACAAAATCCCCATCAGGAAGTATGCCGACGGCACCATCGACATTGAGGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGCCTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAGGAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTTCGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAACATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACATTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGCAGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCATCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATGTATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAATGTGTGTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAAACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCAGGCCAAAACAGGATATGAAAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGTTGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGAAATAACAGCAGGCTGGGGAATGGAGTGCTGTATGCCTCTGTGAACCCGGAGTACTTCAGCGCTGCTGATGTGTACGTTCCCGATGAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCAGGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAGATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGCATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTTCAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGCCAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAATGGAGAATAATCCAGTCCTAGCACCTCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCATGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGGAATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTATGACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGCTGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTCACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTCGCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGACATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAGGCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTGGCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAGCCGGAGGAGCTGGACCTGGAGCCAGAGAACATGGAGAGCGTCCCCCTGGACCCCTCGGCCTCCCTGCCACTGCCCGACAGACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTCCTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGGCCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA\n'
b
diff -r 000000000000 -r 794a6e864a96 test-data/output1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output1.fasta Thu Dec 15 11:04:25 2016 -0500
b
b'@@ -0,0 +1,425 @@\n+>ENSMUST00000091291_musmusculus\n+ATGGGCTTCGGGAGAGGATGTGAGACGACGG-CTGTGCCATTGCTGGTGG\n+CCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAG\n+GTGTGC---CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA\n+GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT\n+TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC\n+ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT\n+GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT\n+TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG\n+CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA\n+TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT\n+CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT\n+GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCA--ACTGTCCTGC\n+CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT\n+GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA\n+GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA\n+CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG\n+TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG\n+AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA\n+GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT\n+GCCCGTCTGGCTATACCATGAATTCCAGC---AACTTGATGTGCACCCCA\n+TGTCTGGGACCCTGCCCTAAGGTCTGCCA-AATCCTCGAAGGTGA--GAA\n+GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA\n+TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT\n+GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA\n+GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC\n+ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC\n+TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT\n+CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT\n+TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG\n+GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA\n+AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC\n+TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA\n+TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA\n+TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC\n+CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG\n+TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA\n+GACCTTGGTTACCTTCTC--TGATGAACGGC-GGACCTATGGAGCCAAAA\n+GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG\n+GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA\n+GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG\n+AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA\n+GGGCTGAAGCT-CCCTTCACGGACCTGGTCCCCAC--CCTTTGAGTCTGA\n+TGAT-TCTCAGAAG--CACAATCAGAGTGAGTATGACGACTCGGCCAGTG\n+AGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAG\n+GAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTT\n+TGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATG\n+TGAC---AGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCT\n+ACCATTGTGCCCACAAGTCAGGAG---GAGCACAGGCCATTTGAGAA---\n+AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTG\n+GGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGG\n+TGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGC\n+AGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTG\n+TACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTA\n+TATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGT\n+CTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCT\n+CCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAAT\n+GGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGT\n+CCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCC\n+TCTTCAGTGTTGTGATTGGAAGTATT---TATCTATTTCTGAGAAAGAGG\n+CA----GCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG\n+AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA\n+GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG\n+GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT\n+CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT\n+CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC\n+CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC\n+CTCCGTTCTCTGAGGCCAGATGCTGAG---AATAACCCAGGCCGCCCTCC\n+CCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCA\n+TGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGA\n+AACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAAT\n+GACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGAC\n+TGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTT\n+ACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCAC\n+TAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGA\n+AGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCA'..b'ACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG\n+CTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA\n+TGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATG\n+CGGTGTCCAATAACTACATTGTGGGGAATAA-GCCCCCAA--AGGAATGT\n+GGGGACCTGTGTCCAGGGACCATGGAGGAGAA-GCCGA--TGTGTGAGAA\n+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCT\n+GCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAAC\n+AACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAA\n+CGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTG\n+TGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTG\n+GACCGTGACTTCTGC----GCCAACATCCTCAGCGCCGAGA------GCA\n+G--CGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGT\n+GCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCT\n+TGTGAAGGTCCTTGCCCGAAGGTCTGTG---AGGAAGAAAAGAAAACAAA\n+GACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCT\n+TCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCA\n+GAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAA\n+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTC\n+GCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTC\n+CTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCT\n+GACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTG\n+TTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAA\n+AGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGA\n+AAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCA\n+TCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGC\n+TTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGA\n+TGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACC\n+TCCCGCCCAA---CAAGGACGTGGAGCCCGGCATCT--------------\n+----TACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAA\n+GGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGA\n+GTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTG\n+GACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAA\n+CCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGC\n+AGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAA\n+GA--CAAAATC-CCCATCAGGAAGTATGCCGACGGCACCATCGACATTGA\n+GGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGC\n+CTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAG\n+GAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTT\n+CGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACA\n+CCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAAC\n+ATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAG\n+AGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACAT\n+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC\n+TGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGC\n+AGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCA\n+TCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATG\n+TATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAA---TGTGT\n+GTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAA\n+ACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAAT\n+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CAGGCCAAAACAGGAT\n+ATGA--AAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGT\n+TGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGA\n+AATAACAGCAGGCTGGGGAATGGAGT-GCTGTATGCCTCTGTGAACCCGG\n+AGTACTTCAGCGCTGCTGAT---------------GTGTACGTTCCCGAT\n+GAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCA\n+GGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAG\n+ATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGC\n+ATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTT\n+CAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGC\n+CAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTAT\n+CTCCGGTCTCTGAGGCCAGAAATGGAG---AATAATCCAGTCCTAGCACC\n+TCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCA\n+TGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGG\n+AATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTAT\n+GACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGC\n+TGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTC\n+ACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC\n+CACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTC\n+GCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGAC\n+ATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAG\n+GCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTG\n+GCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAG\n+CCGGAGGAGCTGGACCTGG------------AGCCAGAGAACATGGAGAG\n+CGTCCCCCTGGACCCCTCGGCCTCC---------CTGCCACTGCCCGACA\n+GACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTC\n+CTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGG\n+CCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA\n'