changeset 0:794a6e864a96 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author earlhaminst
date Thu, 15 Dec 2016 11:04:25 -0500
parents
children b3833e5b50d4
files filter_by_fasta_ids.py t_coffee.xml t_coffee_to_cigar.pl test-data/cigar.tabular test-data/ids.txt test-data/input.fasta test-data/output1.fasta
diffstat 7 files changed, 811 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_by_fasta_ids.py	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+""" A script to build specific fasta databases """
+from __future__ import print_function
+
+
+import logging
+import sys
+
+
+# ===================================== Iterator ===============================
+class Sequence:
+    ''' Holds protein sequence information '''
+    def __init__(self):
+        self.header = ""
+        self.sequence_parts = []
+
+    def get_sequence(self):
+        return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts])
+
+
+class FASTAReader:
+    """
+        FASTA db iterator. Returns a single FASTA sequence object.
+    """
+    def __init__(self, fasta_name):
+        self.fasta_file = open(fasta_name)
+        self.next_line = self.fasta_file.readline()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        ''' Iteration '''
+        # while True:
+        #    line = self.fasta_file.readline()
+        #    if not line:
+        #        raise StopIteration
+        #    if line[0] == '>':
+        #        break
+        next_line = self.next_line
+        if not next_line:
+            raise StopIteration
+
+        seq = Sequence()
+        seq.header = next_line.rstrip().replace('\n', '').replace('\r', '')
+
+        next_line = self.fasta_file.readline()
+        while next_line and next_line[0] != '>':
+            # tail = self.fasta_file.tell()
+            # line = self.fasta_file.readline()
+            # if not line:
+            #   break
+            # if line[0] == '>':
+            #   self.fasta_file.seek(tail)
+            #   break
+            seq.sequence_parts.append(next_line)
+            next_line = self.fasta_file.readline()
+        self.next_line = next_line
+        return seq
+
+    # Python 2/3 compat
+    next = __next__
+# ==============================================================================
+
+
+def target_match(target, search_entry):
+    ''' Matches '''
+    search_entry = search_entry.upper()
+    for atarget in target:
+        if search_entry.find(atarget) > -1:
+            return atarget
+    return None
+
+
+def main():
+    ''' the main function'''
+    logging.basicConfig(filename='filter_fasta_log',
+                        level=logging.INFO,
+                        format='%(asctime)s :: %(levelname)s :: %(message)s',)
+
+    used_sequences = set()
+    work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0}
+    targets = []
+
+    f_target = open(sys.argv[1])
+    for line in f_target.readlines():
+        targets.append(">%s" % line.strip().upper())
+    f_target.close()
+
+    work_summary['wanted'] = len(targets)
+    homd_db = FASTAReader(sys.argv[2])
+
+    # output = open(sys.argv[3], "w")
+    for entry in homd_db:
+        target_matched_results = target_match(targets, entry.header)
+        if target_matched_results:
+            work_summary['found'] += 1
+            targets.remove(target_matched_results)
+            sequence = entry.get_sequence()
+            used_sequences.add(sequence)
+            print(entry.header)
+            print(sequence)
+    for parm, count in work_summary.items():
+        logging.info('%s ==> %d', parm, count)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee.xml	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,210 @@
+<tool id="t_coffee" name="T-Coffee" version="11.0.8">
+    <description>multiple sequence alignment</description>
+    <requirements>
+        <requirement type="package" version="11.0.8">t_coffee</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    <version_command>
+        t_coffee -version | grep Version
+    </version_command>
+    <command>
+<![CDATA[
+        #if str($input_type.filter_fasta) == 'yes'
+            #set $input = '-infile=stdin'
+            python '$__tool_directory__/filter_by_fasta_ids.py' '$input_type.identifiers' '$input_type.fasta_input' |
+        #end if
+
+        #set $method_opt = ''
+        #if $method01
+            #set $method_opt += str($method01) + ','
+        #end if
+        #if $method02
+            #set $method_opt += str($method02) + ','
+        #end if
+        #if $method03
+            #set $method_opt += str($method03) + ','
+        #end if
+        #if $method_opt
+            #set $method_opt = '-method ' + $method_opt[:-1]
+        #end if
+
+        #set $output_opt = ''
+        #if $outputs
+            #set $outputs_arr = str($outputs).split(',')
+            #for $o in $outputs_arr
+                #if $o not in ['cigar', 'dnd']
+                    #set $output_opt += $o + ','
+                #end if
+            #end for
+            #if 'cigar' in $outputs_arr and 'fasta_aln' not in $outputs_arr
+                #set $output_opt += 'fasta_aln,'
+            #end if
+        #else
+            #set $outputs_arr = []
+        #end if
+        #if $output_opt
+            #set $output_opt = '-output ' + $output_opt[:-1]
+        #end if
+
+        t_coffee '$input' $method_opt $output_opt -n_core \${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet
+
+        #if 'cigar' in $outputs_arr
+            && perl '$__tool_directory__/t_coffee_to_cigar.pl' t_coffee_out.fasta_aln > '$cigar'
+        #end if
+]]>
+    </command>
+    <inputs>
+        <conditional name="input_type">
+            <param name="filter_fasta" type="select" label="Filter FASTA input?">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="yes">
+                <param name="fasta_input" type="data" format="fasta" label="FASTA sequences" />
+                <param name="identifiers" type="data" format="txt" label="List of FASTA sequence IDs" />
+            </when>
+            <when value="no">
+                <param name="input" type="data" format="fasta" label="FASTA sequences" />
+            </when>
+        </conditional>
+        <param name="method01" type="select" display="checkboxes" multiple="true" label="Pairwise Structual Method">
+            <option value="sap_pair">sap_pair</option>
+            <option value="TMalign_pair">TMalign_pair</option>
+            <option value="mustang_pair">mustang_pair</option>
+        </param>
+        <param name="method02" type="select" display="checkboxes" multiple="true" label="Multiple Sequence Alignment Methods">
+            <option value="pcma_msa">pcma_msa</option>
+            <option value="clustalw_msa">clustalw_msa</option>
+            <option value="dialigntx_msa">dialigntx_msa</option>
+            <option value="poa_msa">poa_msa</option>
+            <option value="muscle_msa">muscle_msa</option>
+            <option value="probcons_msa">probcons_msa</option>
+            <option value="t_coffee_msa">t_coffee_msa</option>
+            <option value="amap_msa">amap_msa</option>
+            <option value="kalign_msa">kalign_msa</option>
+        </param>
+        <param name="method03" type="select" display="checkboxes" multiple="true" label="Pairwise Sequence Alignment Methods">
+            <option value="fast_pair">fast_pair</option>
+            <option value="clustalw_pair">clustalw_pair</option>
+            <option value="lalign_id_pair">lalign_id_pair</option>
+            <option value="slow_pair">slow_pair</option>
+            <option value="proba_pair">proba_pair</option>
+        </param>
+        <param name="outputs" type="select" multiple="true" optional="false" display="checkboxes" label="Additional outputs">
+            <option value="cigar">CIGAR</option>
+            <option value="clustalw_aln">clustalw_aln</option>
+            <option value="dnd" selected="true">dnd</option>
+            <option value="fasta_aln">fasta_aln</option>
+            <option value="fasta_seq">fasta_seq</option>
+            <option value="msf_aln">msf_aln</option>
+            <option value="phylip">phylip</option>
+            <option value="pir_aln">pir_aln</option>
+            <option value="pir_seq">pir_seq</option>
+            <option value="score_ascii">score_ascii</option>
+            <option value="score_html">score_html</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="cigar" format="tabular" label="${tool.name} on ${on_string}: cigar" >
+            <filter>'cigar' in outputs</filter>
+        </data>
+        <data name="clustalw_aln" format="clustalw" label="${tool.name} on ${on_string}: clustalw_aln" from_work_dir="t_coffee_out.clustalw_aln">
+            <filter>'clustalw_aln' in outputs</filter>
+        </data>
+        <data name="dnd" format="nhx" label="${tool.name} on ${on_string}: newick.dnd" from_work_dir="t_coffee_out.dnd">
+            <filter>'dnd' in outputs</filter>
+        </data>
+        <data name="fasta_aln" format="fasta" label="${tool.name} on ${on_string}: fasta_aln" from_work_dir="t_coffee_out.fasta_aln">
+            <filter>'fasta_aln' in outputs</filter>
+        </data>
+        <data name="fasta_seq" format="fasta" label="${tool.name} on ${on_string}: fasta_seq" from_work_dir="t_coffee_out.fasta_seq">
+            <filter>'fasta_seq' in outputs</filter>
+        </data>
+        <data name="msf_aln" format="msf" label="${tool.name} on ${on_string}: msf_aln" from_work_dir="t_coffee_out.msf_aln">
+            <filter>'msf_aln' in outputs</filter>
+        </data>
+        <data name="phylip" format="phyloxml" label="${tool.name} on ${on_string}: phylip" from_work_dir="t_coffee_out.phylip">
+            <filter>'phylip' in outputs</filter>
+        </data>
+        <data name="pir_aln" format="pir" label="${tool.name} on ${on_string}: pir_aln" from_work_dir="t_coffee_out.pir_aln">
+            <filter>'pir_aln' in outputs</filter>
+        </data>
+        <data name="pir_seq" format="pir" label="${tool.name} on ${on_string}: pir_seq" from_work_dir="t_coffee_out.pir_seq">
+            <filter>'pir_seq' in outputs</filter>
+        </data>
+        <data name="score_ascii" format="ascii" label="${tool.name} on ${on_string}: score_ascii" from_work_dir="t_coffee_out.score_ascii">
+            <filter>'score_ascii' in outputs</filter>
+        </data>
+        <data name="score_html" format="html" label="${tool.name} on ${on_string}: score_html" from_work_dir="t_coffee_out.score_html">
+            <filter>'score_html' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="filter_fasta" value="no" />
+            <param name="input" value="input.fasta" ftype="fasta" />
+            <param name="method02" value="clustalw_msa" />
+            <param name="outputs" value="fasta_aln" />
+            <output name="fasta_aln" file="output1.fasta" />
+        </test>
+        <test>
+            <param name="filter_fasta" value="yes" />
+            <param name="fasta_input" value="input.fasta" ftype="fasta" />
+            <param name="identifiers" value="ids.txt" ftype="txt" />
+            <param name="method02" value="clustalw_msa" />
+            <param name="outputs" value="cigar" />
+            <output name="cigar" file="cigar.tabular" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool is a wrapper for the T-Coffee multiple sequence alignment suite. The input is a set of sequences in FASTA format. Apart from running on the complete FASTA input, it can also run on a subset of sequences by providing a list of the FASTA IDs.
+
+This wrapper offers selected advanced T-Coffee options like the selection of the alignment methods to use: ''Pairwise Structual Method'', ''Multiple Sequence Alignment Methods'' or ''Pairwise Sequence Alignment Methods''.
+
+The T-Coffee documentation can be found at http://www.tcoffee.org/Projects/tcoffee/ .
+
+**Example**
+
+Suppose you have 5 sequences in FASTA format::
+
+    >1aboA
+    NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS
+    NYITPVN
+    >1ycsB
+    KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY
+    VPRNLLGLYP
+    >1pht
+    GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG
+    WLNGYNETTGERGDFPGTYVEYIGRKKISP
+    >1vie
+    DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI
+    N
+    >1ihvA
+    NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD
+
+By selecting "Yes" in output fasta_aln in the wrapper, the user will obtain the multiple alignment in FASTA format::
+
+    >1aboA
+    NL-FVA---LYDFVASGDNTLSITKGEKLR-------VLGYN-------H
+    NGEWCEA--QTKN-GQGWVPSNYIT------PVN
+    >1ycsB
+    KGVIYA---LWDYEPQNDDELPMKEGDCMT-------IIHREDE-----D
+    EIEWWWA--RLND-KEGYVPRNLLG------LYP
+    >1pht
+    GYQYRA---LYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE
+    EIGWLNGYNETTG-ERGDFPGTYVEYIGRKKISP
+    >1vie
+    DR-----------VRK--KSGAAWQGQIVGWYCTNLTPEGYAVE------
+    ------S--EAHPGSVQIYPVAALE------RIN
+    >1ihvA
+    NF-RVYYRDSRDPVWKGPA-KLLWKGEGAV-------VIQDN-------S
+    DI--------------KVVPRRKAK-----IIRD
+    </help>
+    <citations>
+        <citation type="doi">10.1006/jmbi.2000.4042</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee_to_cigar.pl	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+
+# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
+# TCoffee_to_cigar.pl <file>
+
+sub convert_and_print {
+    my ($header, $sequence) = @_;
+    # Converts each match into M and each gap into D
+    $sequence =~ s/[^-]/M/g;
+    $sequence =~ s/-/D/g;
+
+    # Split the sequence in substrings composed by the same letter
+    $sequence =~ s/DM/D,M/g;
+    $sequence =~ s/MD/M,D/g;
+    my @cigar_array = split(',', $sequence);
+
+    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
+    my $cigar = '';
+    foreach my $str (@cigar_array) {
+        if (length($str) > 1) {
+            $cigar .= length($str);
+        }
+        $cigar .= substr($str, 0, 1);
+    }
+    print "$header\t$cigar\n";
+}
+
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+
+my $header = '', my $sequence = '';
+while (my $line = <$fh1>) {
+    chomp $line;
+    if (substr($line, 0, 1) eq '>') {
+        if ($header) {
+            convert_and_print($header, $sequence);
+        }
+        $header = substr($line, 1);
+        $sequence = '';
+    } else {
+        $sequence .= $line;
+    }
+}
+close $fh1;
+convert_and_print($header, $sequence);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cigar.tabular	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus	41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M
+ENSCAFT00000026349_canisfamiliaris	16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D
+ENSRNOT00000019267_rattusnorvegicus	75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ids.txt	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus
+ENSCAFT00000026349_canisfamiliaris
+ENSRNOT00000019267_rattusnorvegicus
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fasta	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,14 @@
+>ENSMUST00000091291_musmusculus
+ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATTGCTGGTGGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAGGTGTGCCCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGAGCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGTTCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCTGAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCTTCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGGCTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAATAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATTCTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGTGGGGATGTCTGTCCAGGCACCGCCAAGGGCAAGACCAACTGTCCTGCCACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATTGTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAAGGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGACCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTGTGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAAGCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGTGCCCGTCTGGCTATACCATGAATTCCAGCAACTTGATGTGCACCCCATGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGAGAAGACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGATCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCTGAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAAGATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTACATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCCTTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCTCACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCTTGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAGGAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGAAAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCCTGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGATTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGATGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACCCGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGGTGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAAGACCTTGGTTACCTTCTCTGATGAACGGCGGACCTATGGAGCCAAAAGTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTGGATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAAGCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGGAGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAAGGGCTGAAGCTCCCTTCACGGACCTGGTCCCCACCCTTTGAGTCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAGTGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATGTGACAGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCTACCATTGTGCCCACAAGTCAGGAGGAGCACAGGCCATTTGAGAAAGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGTCCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAGAGGCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTGAGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGACGAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCAGGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGGGTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGTCTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATTCACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGCCAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCACCTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGTGAGGAGCTGGAGATGGAGTTTGAAGACATGGAGAATGTCCCGTTGGATCGTTCCTCTCACTGTCAGAGAGAAGAGGCTGGGGGCCGGGAGGGAGGGTCCTCACTGAGCATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGGCAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA
+
+>ENSCAFT00000026349_canisfamiliaris
+ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGATCCTCCTCTCCTTGGGATTTGGCCTGGACACACTAGAGGTGTGCCCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCGGCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGTTCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACTCAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCTGCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCCTGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGGCTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAACCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCTCGCCCAGCGCCAACCACATCGTGGGCAACAAGCTGCGTGAGGAGTGTGCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAGGACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACTGCCAGAGAGTGTGTCCCTGCCCCCATGGGCTGGCCTGCACAGCTGGGGGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGACCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCCACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTCACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCGCACCTCCATCTTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGTGCCCTCCGGGCTTCACCCGCAACGGCAGCAGCATGTTCTGCCACAAGTGTGAGGGGCTGTGCCCCAAAGAGTGCAAGGTGGGTACCAAGACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACGTGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTGGAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAAGATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCAAACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTGCTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCTCACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCTTGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAAAACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCAAACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCTTGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGCTTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACACAGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGCTGCCCTTAAGCCGCACCCAAGAACCCGGGGTAACTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACGGGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGAGCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAGGACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAAGCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGCAGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGCGGCCTGCGGCTGCCCACCAGCAACAACGACCCGCGCTTCGACCGCGAGGACGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTGCCAGCACCCACCGCCTGGGCAGGTCCTGCCGCCGCTGGAGGCGCAAGAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCACCATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCAATAAGAGCCCTCAAAGACATGCGGGGAGGCACCGCCGGGCGGCCGGGGCGCTCCGGCTTGGGGGCAACAGCTCGGATTTCGAGATCCAGGAGGACAAAGTGCCCCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGGAATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGCTGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGCTGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTGTCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAGTATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGTGTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGCCCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAACGGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGAAGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGGGGCTCATGCTGTTCATCATTCTTGCCGCCCTCGGTTTCTTCTACGGCAGGAAGAGAAACAGCACCCTCTATGCCTCAGTGAATCCGGAGTACTTCAGCGCCTCTGATATGTACATCCCTGATGAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCAGGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTGGAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGCCCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATTCAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGCCAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCATCTTCGATCTCTGCGGCCTGAGGCAGAGAACAACCCTGGGCTCCCACGGCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCATGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGAAACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGATGACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGCTGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTCACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGTGACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCAAGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTTCAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCGGCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCTCTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGGGGAGCCAGGGGGCCCTGCTGCCTAATGCCAAACCCAACTCCCTACCAACCCCAGAAGGGGCTCCCTCAGACTGCATGCCCCAAAATGGGGGTCCAGGGCACTGA
+
+>ENSMUST00000005671_homosapiens
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAGTGGAGAAATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACCGTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCTGGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAACGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCGAAGGAATGTGGGGACCTGTGTCCAGGGACATTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAACAACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGATCGCGATTTCTGCGCCAACATCCCCAACGCTGAGAGCAGTGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGTGTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAAAACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCCTGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCGGAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTCCTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTGTCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAGAGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGCTTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACCTGCCTCCGAACAAGGAGGGCGAGCCTGGCATTTTACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTAGATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAATCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGAGGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGCCATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTTTGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACACGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTCTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGCAGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATGTATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAAACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCATGGACAGATCCTGTGTTCTTCTATGTCCCCGCCAAAACGACGTATGAGAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGAAATAACAGCAGGTTGGGCAATGGAGTGCTGTATGCTTCTGTGAACCCCGAGTATTTCAGCGCAGCTGATGTGTACGTGCCTGATGAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGGATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGTATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTTCAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGGAACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTCACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCAGCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAGCCAGAGGAGCTGGAGATGGAGCCTGAGAACATGGAGAGCGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTTCTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA
+
+>ENSRNOT00000019267_rattusnorvegicus
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACGGTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCTGGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAACGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCTTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAACAATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGACCGGGATTTCTGCGCCAACATCCCCAACGCCGAGAGCAGTGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGTGTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAAAACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTTTGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCGGAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAAGATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTCCTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTGTCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAGAGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGTTTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTGCCTCCGAACAAGGAGGGGGAGCCTGGCATTTTGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTAGATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAACCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGAGGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGCCGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTTTGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTCTGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGCAGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATGTATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAAACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCCAGCCAAAACAACGTATGAGAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGGGTGTCTGGGCCAGCAGAAGGCAGTAGTTGGAAAGGGCCATTTCCATCCTGTCTGTTCCTAGTGTACGTGCCTGATGAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGGACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGTATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTTCAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTATCTCCGGTCTCTAAGGCCAGAGGTGGAGAATAATCTAGTCCTGATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGGAACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTCACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGCCACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCAGTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAGCCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAGCGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAACGGCCCTGGCGTGCTGGTTCTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC
+
+>ENSPTRT00000013802_pantroglodytes
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACGGTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCTCGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCTACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAATGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATGCGGTGTCCAATAACTACATTGTGGGGAATAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCATGGAGGAGAAGCCGATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCTGCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAACAACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAACGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTGTGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTGGACCGTGACTTCTGCGCCAACATCCTCAGCGCCGAGAGCAGCGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGTGCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCTTGTGAAGGTCCTTGCCCGAAGGTCTGTGAGGAAGAAAAGAAAACAAAGACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCTTCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCAGAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTCGCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTCCTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCTGACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTGTTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAAAGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGAAAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCATCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGCTTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGATGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTCCCGCCCAACAAGGACGTGGAGCCCGGCATCTTACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAAGGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGAGTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTGGACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAACCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGCAGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAAGACAAAATCCCCATCAGGAAGTATGCCGACGGCACCATCGACATTGAGGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGCCTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAGGAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTTCGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAACATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACATTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGCAGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCATCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATGTATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAATGTGTGTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAAACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCAGGCCAAAACAGGATATGAAAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGTTGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGAAATAACAGCAGGCTGGGGAATGGAGTGCTGTATGCCTCTGTGAACCCGGAGTACTTCAGCGCTGCTGATGTGTACGTTCCCGATGAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCAGGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAGATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGCATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTTCAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGCCAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAATGGAGAATAATCCAGTCCTAGCACCTCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCATGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGGAATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTATGACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGCTGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTCACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTCGCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGACATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAGGCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTGGCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAGCCGGAGGAGCTGGACCTGGAGCCAGAGAACATGGAGAGCGTCCCCCTGGACCCCTCGGCCTCCCTGCCACTGCCCGACAGACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTCCTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGGCCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output1.fasta	Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,425 @@
+>ENSMUST00000091291_musmusculus
+ATGGGCTTCGGGAGAGGATGTGAGACGACGG-CTGTGCCATTGCTGGTGG
+CCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAG
+GTGTGC---CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA
+GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT
+TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC
+ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT
+GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT
+TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG
+CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA
+TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT
+CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT
+GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCA--ACTGTCCTGC
+CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT
+GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA
+GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA
+CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG
+TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG
+AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA
+GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT
+GCCCGTCTGGCTATACCATGAATTCCAGC---AACTTGATGTGCACCCCA
+TGTCTGGGACCCTGCCCTAAGGTCTGCCA-AATCCTCGAAGGTGA--GAA
+GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA
+TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT
+GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA
+GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC
+ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC
+TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT
+CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT
+TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG
+GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA
+AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC
+TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA
+TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA
+TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC
+CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG
+TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA
+GACCTTGGTTACCTTCTC--TGATGAACGGC-GGACCTATGGAGCCAAAA
+GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG
+GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA
+GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG
+AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA
+GGGCTGAAGCT-CCCTTCACGGACCTGGTCCCCAC--CCTTTGAGTCTGA
+TGAT-TCTCAGAAG--CACAATCAGAGTGAGTATGACGACTCGGCCAGTG
+AGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAG
+GAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTT
+TGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATG
+TGAC---AGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCT
+ACCATTGTGCCCACAAGTCAGGAG---GAGCACAGGCCATTTGAGAA---
+AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTG
+GGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGG
+TGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGC
+AGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTG
+TACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTA
+TATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGT
+CTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCT
+CCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAAT
+GGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGT
+CCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCC
+TCTTCAGTGTTGTGATTGGAAGTATT---TATCTATTTCTGAGAAAGAGG
+CA----GCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG
+AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC
+GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA
+GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG
+GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT
+CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT
+CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC
+CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC
+CTCCGTTCTCTGAGGCCAGATGCTGAG---AATAACCCAGGCCGCCCTCC
+CCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCA
+TGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGA
+AACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAAT
+GACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGAC
+TGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTT
+ACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCAC
+TAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGA
+AGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAG
+AGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAG
+GCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCA
+GCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAG
+AGTGAGGAGCTGGAGATGG------------AGTTTGAAGACATGGAGAA
+TGTCCCGTTGGATCGTTC----------------CTCTCACTGTCAGAGA
+GAA----GAGGCTGGGGGCCGGGAGGGAGGG-------TCCTCACTGAGC
+ATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGG
+CAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA
+>ENSCAFT00000026349_canisfamiliaris
+----ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGA
+TC----CTCCTCTCCTTGGGATTTGGCCTGGACACACT--------AGAG
+GTGTGC---CCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCG
+GCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGT
+TCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACT
+CAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCT
+GCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCC
+TGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGG
+CTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAA
+CCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCT
+CGCCCAGCGCCAACCACATCGTGGGCAACAA-GCTGCGTG--AGGAGTGT
+GCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAG
+GACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACT
+GCCAGAGAGTGTGTCCCTGCCCCCATGGG---CTGGCCTGCACAGCTGGG
+GGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGA
+CCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCC
+ACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTC
+ACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCG---------
+----CACCTCCATC--TTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGT
+GCCCTCCGGGCTTCACCCGCAACGGCAGC---AGCATGTTCTGCCACAAG
+TGTGAGGGGCTGTGCCCCAAAGAGTGCA---------AGGTGGGTACCAA
+GACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACG
+TGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTG
+GAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAA
+GATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCA
+AACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTG
+CTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCT
+CACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCT
+TGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAA
+AACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCA
+AACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCT
+TGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGC
+TTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACAC
+AGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGC
+TGCCCTTAAG---CCGCACCCAAGAACCCGGGGTAA--------------
+----CTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACG
+GGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGA
+GCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAG
+GACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAA
+GCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGC
+AGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGC
+GGCCTGCGGCTGCCCACCAGCAACAACGACCCGCG---CTTCGACCGCGA
+GGA-----CGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTG
+CCAGCACCCACCGCCT--GGGCAGGTCCT--GCCGCCGCTGGAGGCGCAA
+GAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCAC
+CATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCA-ATAAGAGCCCTCAA
+AGAC--------ATGCGGGGAGGCACCGCCGGGCGGCCGGGGC--GCTCC
+GGCTTGGGGGCAACAGCTCG-------GATTTCGAGATCCAGGAGGACAA
+AGTGCC---CCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGG
+AATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGC
+TGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGC
+TGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTG
+TCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAG
+TATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGT
+GTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGC
+CCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAAC
+GGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGA
+------AGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGG
+GGCTCATGCTGTTCATCATTCTTGCCGC-CCTCGGTTTCTTCTACGG---
+--------CAGGAAGAGAAACAGCAC-CCTCTATGCCTCAGTGAATCCGG
+AGTACTTCAGCGCCTCTGAT---------------ATGTACATCCCTGAT
+GAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCA
+GGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTG
+GAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGC
+CCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATT
+CAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGC
+CAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCAT
+CTTCGATCTCTGCGGCCTGAGGCAGAG---AACAACCCTGGGCTCCCACG
+GCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCA
+TGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGA
+AACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGAT
+GACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGC
+TGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTC
+ACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGT
+GACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCA
+AGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTT
+CAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCG
+GCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCT
+CTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGG----GGAG
+CCAGGGGGCCCTGCT-----------------GCCTAATGCCAAACCCAA
+CTCCCTACCAACCCCAGAAGGGGCT-------CCCTCAGACTGCATGCCC
+CAAAATGGGGGTCCAGGGCACTGA--------------------------
+--------------------------------------------------
+-------------------------------------------------
+>ENSMUST00000005671_homosapiens
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAG---TGGAGAA
+ATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACC
+GTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCT
+GGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT
+ACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAA
+CGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCGA--AGGAATGT
+GGGGACCTGTGTCCAGGGACATTGGAGGAGAA-GCCCA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT
+GCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAAC
+AACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGA
+CAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTG
+TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG
+GATCGCGATTTCTGC----GCCAACATCCCCAACGCTGAGA------GCA
+G--TGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGT
+GTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC
+TGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAA
+AACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCC
+TGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCG
+GAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAA
+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC
+GTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTC
+CTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT
+GACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTG
+TCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAG
+AGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA
+AAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCA
+TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGC
+TTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGA
+CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACC
+TGCCTCCGAA---CAAGGAGGGCGAGCCTGGCATTT--------------
+----TACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAA
+GGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAA
+GTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTA
+GATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAA
+TCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC
+AGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAA
+GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGA
+GGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGC
+CATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAG
+GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTT
+TGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACA
+CGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT
+ATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTC
+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGC
+AGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCA
+TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATG
+TATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAA---TGTGT
+GTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAA
+ACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT
+GGGTCATGGACAGATCCTGTGTTCTTCTATGTC-CCCGCCAAAACGACGT
+ATGA--GAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC
+TGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGA
+AATAACAGCAGGTTGGGCAATGGAGT-GCTGTATGCTTCTGTGAACCCCG
+AGTATTTCAGCGCAGCTGAT---------------GTGTACGTGCCTGAT
+GAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACA
+AGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGG
+ATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGT
+ATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTT
+CAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGC
+CCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTAT
+CTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCC
+TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA
+TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGG
+AACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTAT
+GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGT
+TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTC
+ACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC
+CACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTC
+GTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGAT
+ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCG
+GCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCA
+GCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAG
+CCAGAGGAGCTGGAGATGG------------AGCCTGAGAACATGGAGAG
+CGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA
+GACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTT
+CTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGG
+ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA
+>ENSRNOT00000019267_rattusnorvegicus
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA
+ATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACG
+GTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCT
+GGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT
+ACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA
+CGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCAA--AGGAATGT
+GGGGACCTGTGTCCAGGGACCTTGGAGGAGAA-GCCCA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT
+GCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAAC
+AATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGA
+CAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCG
+TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG
+GACCGGGATTTCTGC----GCCAACATCCCCAACGCCGAGA------GCA
+G--TGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGT
+GTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC
+TGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAA
+AACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTT
+TGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCG
+GAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAA
+GATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC
+GTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTC
+CTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT
+GACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTG
+TCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAG
+AGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA
+AAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCA
+TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGT
+TTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGA
+CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACC
+TGCCTCCGAA---CAAGGAGGGGGAGCCTGGCATTT--------------
+----TGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAA
+GGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAA
+GTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTA
+GATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAA
+CCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC
+AGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAA
+GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGA
+GGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGC
+CGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAG
+GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTT
+TGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACA
+CCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT
+ATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTC
+TGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGC
+AGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCA
+TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATG
+TATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAA---TGTGT
+GTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAA
+ACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT
+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CCAGCCAAAACAACGT
+ATGA--GAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC
+TGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGG
+-------GTGTCTGGGCCAGCAGAAG-GCAGTA-GTTGGAAAGGGCCATT
+TCCATCCTGTCTGTTCCTA----------------GTGTACGTGCCTGAT
+GAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACA
+AGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGG
+ACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGT
+ATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTT
+CAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGC
+CCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTAT
+CTCCGGTCTCTAAGGCCAGAGGTGGAG---AATAATCTAGTCCTGATTCC
+TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA
+TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGG
+AACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTAT
+GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCT
+TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTC
+ACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGC
+CACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTC
+GTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGAT
+ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCG
+GCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCA
+GTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAG
+CCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAG
+CGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA
+GACACTCAGGACACAAGGCTGAGAACGGCCC------TGGCGTGCTGGTT
+CTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGG
+ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC---
+>ENSPTRT00000013802_pantroglodytes
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA
+ATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACG
+GTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCT
+CGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCT
+ACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA
+TGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAATAA-GCCCCCAA--AGGAATGT
+GGGGACCTGTGTCCAGGGACCATGGAGGAGAA-GCCGA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCT
+GCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAAC
+AACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAA
+CGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTG
+TGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTG
+GACCGTGACTTCTGC----GCCAACATCCTCAGCGCCGAGA------GCA
+G--CGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGT
+GCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCT
+TGTGAAGGTCCTTGCCCGAAGGTCTGTG---AGGAAGAAAAGAAAACAAA
+GACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCT
+TCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCA
+GAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAA
+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTC
+GCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTC
+CTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCT
+GACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTG
+TTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAA
+AGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGA
+AAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCA
+TCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGC
+TTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGA
+TGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACC
+TCCCGCCCAA---CAAGGACGTGGAGCCCGGCATCT--------------
+----TACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAA
+GGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGA
+GTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTG
+GACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAA
+CCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGC
+AGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAA
+GA--CAAAATC-CCCATCAGGAAGTATGCCGACGGCACCATCGACATTGA
+GGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGC
+CTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAG
+GAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTT
+CGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACA
+CCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAAC
+ATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACAT
+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGC
+AGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCA
+TCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATG
+TATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAA---TGTGT
+GTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAA
+ACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAAT
+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CAGGCCAAAACAGGAT
+ATGA--AAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGT
+TGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGA
+AATAACAGCAGGCTGGGGAATGGAGT-GCTGTATGCCTCTGTGAACCCGG
+AGTACTTCAGCGCTGCTGAT---------------GTGTACGTTCCCGAT
+GAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCA
+GGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAG
+ATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGC
+ATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTT
+CAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGC
+CAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTAT
+CTCCGGTCTCTGAGGCCAGAAATGGAG---AATAATCCAGTCCTAGCACC
+TCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCA
+TGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGG
+AATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTAT
+GACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGC
+TGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTC
+ACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC
+CACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTC
+GCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGAC
+ATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAG
+GCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTG
+GCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAG
+CCGGAGGAGCTGGACCTGG------------AGCCAGAGAACATGGAGAG
+CGTCCCCCTGGACCCCTCGGCCTCC---------CTGCCACTGCCCGACA
+GACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTC
+CTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGG
+CCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA