Mercurial > repos > earlhaminst > t_coffee
changeset 0:794a6e864a96 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author | earlhaminst |
---|---|
date | Thu, 15 Dec 2016 11:04:25 -0500 |
parents | |
children | b3833e5b50d4 |
files | filter_by_fasta_ids.py t_coffee.xml t_coffee_to_cigar.pl test-data/cigar.tabular test-data/ids.txt test-data/input.fasta test-data/output1.fasta |
diffstat | 7 files changed, 811 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_by_fasta_ids.py Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,108 @@ +#!/usr/bin/env python +""" A script to build specific fasta databases """ +from __future__ import print_function + + +import logging +import sys + + +# ===================================== Iterator =============================== +class Sequence: + ''' Holds protein sequence information ''' + def __init__(self): + self.header = "" + self.sequence_parts = [] + + def get_sequence(self): + return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts]) + + +class FASTAReader: + """ + FASTA db iterator. Returns a single FASTA sequence object. + """ + def __init__(self, fasta_name): + self.fasta_file = open(fasta_name) + self.next_line = self.fasta_file.readline() + + def __iter__(self): + return self + + def __next__(self): + ''' Iteration ''' + # while True: + # line = self.fasta_file.readline() + # if not line: + # raise StopIteration + # if line[0] == '>': + # break + next_line = self.next_line + if not next_line: + raise StopIteration + + seq = Sequence() + seq.header = next_line.rstrip().replace('\n', '').replace('\r', '') + + next_line = self.fasta_file.readline() + while next_line and next_line[0] != '>': + # tail = self.fasta_file.tell() + # line = self.fasta_file.readline() + # if not line: + # break + # if line[0] == '>': + # self.fasta_file.seek(tail) + # break + seq.sequence_parts.append(next_line) + next_line = self.fasta_file.readline() + self.next_line = next_line + return seq + + # Python 2/3 compat + next = __next__ +# ============================================================================== + + +def target_match(target, search_entry): + ''' Matches ''' + search_entry = search_entry.upper() + for atarget in target: + if search_entry.find(atarget) > -1: + return atarget + return None + + +def main(): + ''' the main function''' + logging.basicConfig(filename='filter_fasta_log', + level=logging.INFO, + format='%(asctime)s :: %(levelname)s :: %(message)s',) + + used_sequences = set() + work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0} + targets = [] + + f_target = open(sys.argv[1]) + for line in f_target.readlines(): + targets.append(">%s" % line.strip().upper()) + f_target.close() + + work_summary['wanted'] = len(targets) + homd_db = FASTAReader(sys.argv[2]) + + # output = open(sys.argv[3], "w") + for entry in homd_db: + target_matched_results = target_match(targets, entry.header) + if target_matched_results: + work_summary['found'] += 1 + targets.remove(target_matched_results) + sequence = entry.get_sequence() + used_sequences.add(sequence) + print(entry.header) + print(sequence) + for parm, count in work_summary.items(): + logging.info('%s ==> %d', parm, count) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/t_coffee.xml Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,210 @@ +<tool id="t_coffee" name="T-Coffee" version="11.0.8"> + <description>multiple sequence alignment</description> + <requirements> + <requirement type="package" version="11.0.8">t_coffee</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + <version_command> + t_coffee -version | grep Version + </version_command> + <command> +<![CDATA[ + #if str($input_type.filter_fasta) == 'yes' + #set $input = '-infile=stdin' + python '$__tool_directory__/filter_by_fasta_ids.py' '$input_type.identifiers' '$input_type.fasta_input' | + #end if + + #set $method_opt = '' + #if $method01 + #set $method_opt += str($method01) + ',' + #end if + #if $method02 + #set $method_opt += str($method02) + ',' + #end if + #if $method03 + #set $method_opt += str($method03) + ',' + #end if + #if $method_opt + #set $method_opt = '-method ' + $method_opt[:-1] + #end if + + #set $output_opt = '' + #if $outputs + #set $outputs_arr = str($outputs).split(',') + #for $o in $outputs_arr + #if $o not in ['cigar', 'dnd'] + #set $output_opt += $o + ',' + #end if + #end for + #if 'cigar' in $outputs_arr and 'fasta_aln' not in $outputs_arr + #set $output_opt += 'fasta_aln,' + #end if + #else + #set $outputs_arr = [] + #end if + #if $output_opt + #set $output_opt = '-output ' + $output_opt[:-1] + #end if + + t_coffee '$input' $method_opt $output_opt -n_core \${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet + + #if 'cigar' in $outputs_arr + && perl '$__tool_directory__/t_coffee_to_cigar.pl' t_coffee_out.fasta_aln > '$cigar' + #end if +]]> + </command> + <inputs> + <conditional name="input_type"> + <param name="filter_fasta" type="select" label="Filter FASTA input?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="fasta_input" type="data" format="fasta" label="FASTA sequences" /> + <param name="identifiers" type="data" format="txt" label="List of FASTA sequence IDs" /> + </when> + <when value="no"> + <param name="input" type="data" format="fasta" label="FASTA sequences" /> + </when> + </conditional> + <param name="method01" type="select" display="checkboxes" multiple="true" label="Pairwise Structual Method"> + <option value="sap_pair">sap_pair</option> + <option value="TMalign_pair">TMalign_pair</option> + <option value="mustang_pair">mustang_pair</option> + </param> + <param name="method02" type="select" display="checkboxes" multiple="true" label="Multiple Sequence Alignment Methods"> + <option value="pcma_msa">pcma_msa</option> + <option value="clustalw_msa">clustalw_msa</option> + <option value="dialigntx_msa">dialigntx_msa</option> + <option value="poa_msa">poa_msa</option> + <option value="muscle_msa">muscle_msa</option> + <option value="probcons_msa">probcons_msa</option> + <option value="t_coffee_msa">t_coffee_msa</option> + <option value="amap_msa">amap_msa</option> + <option value="kalign_msa">kalign_msa</option> + </param> + <param name="method03" type="select" display="checkboxes" multiple="true" label="Pairwise Sequence Alignment Methods"> + <option value="fast_pair">fast_pair</option> + <option value="clustalw_pair">clustalw_pair</option> + <option value="lalign_id_pair">lalign_id_pair</option> + <option value="slow_pair">slow_pair</option> + <option value="proba_pair">proba_pair</option> + </param> + <param name="outputs" type="select" multiple="true" optional="false" display="checkboxes" label="Additional outputs"> + <option value="cigar">CIGAR</option> + <option value="clustalw_aln">clustalw_aln</option> + <option value="dnd" selected="true">dnd</option> + <option value="fasta_aln">fasta_aln</option> + <option value="fasta_seq">fasta_seq</option> + <option value="msf_aln">msf_aln</option> + <option value="phylip">phylip</option> + <option value="pir_aln">pir_aln</option> + <option value="pir_seq">pir_seq</option> + <option value="score_ascii">score_ascii</option> + <option value="score_html">score_html</option> + </param> + </inputs> + <outputs> + <data name="cigar" format="tabular" label="${tool.name} on ${on_string}: cigar" > + <filter>'cigar' in outputs</filter> + </data> + <data name="clustalw_aln" format="clustalw" label="${tool.name} on ${on_string}: clustalw_aln" from_work_dir="t_coffee_out.clustalw_aln"> + <filter>'clustalw_aln' in outputs</filter> + </data> + <data name="dnd" format="nhx" label="${tool.name} on ${on_string}: newick.dnd" from_work_dir="t_coffee_out.dnd"> + <filter>'dnd' in outputs</filter> + </data> + <data name="fasta_aln" format="fasta" label="${tool.name} on ${on_string}: fasta_aln" from_work_dir="t_coffee_out.fasta_aln"> + <filter>'fasta_aln' in outputs</filter> + </data> + <data name="fasta_seq" format="fasta" label="${tool.name} on ${on_string}: fasta_seq" from_work_dir="t_coffee_out.fasta_seq"> + <filter>'fasta_seq' in outputs</filter> + </data> + <data name="msf_aln" format="msf" label="${tool.name} on ${on_string}: msf_aln" from_work_dir="t_coffee_out.msf_aln"> + <filter>'msf_aln' in outputs</filter> + </data> + <data name="phylip" format="phyloxml" label="${tool.name} on ${on_string}: phylip" from_work_dir="t_coffee_out.phylip"> + <filter>'phylip' in outputs</filter> + </data> + <data name="pir_aln" format="pir" label="${tool.name} on ${on_string}: pir_aln" from_work_dir="t_coffee_out.pir_aln"> + <filter>'pir_aln' in outputs</filter> + </data> + <data name="pir_seq" format="pir" label="${tool.name} on ${on_string}: pir_seq" from_work_dir="t_coffee_out.pir_seq"> + <filter>'pir_seq' in outputs</filter> + </data> + <data name="score_ascii" format="ascii" label="${tool.name} on ${on_string}: score_ascii" from_work_dir="t_coffee_out.score_ascii"> + <filter>'score_ascii' in outputs</filter> + </data> + <data name="score_html" format="html" label="${tool.name} on ${on_string}: score_html" from_work_dir="t_coffee_out.score_html"> + <filter>'score_html' in outputs</filter> + </data> + </outputs> + <tests> + <test> + <param name="filter_fasta" value="no" /> + <param name="input" value="input.fasta" ftype="fasta" /> + <param name="method02" value="clustalw_msa" /> + <param name="outputs" value="fasta_aln" /> + <output name="fasta_aln" file="output1.fasta" /> + </test> + <test> + <param name="filter_fasta" value="yes" /> + <param name="fasta_input" value="input.fasta" ftype="fasta" /> + <param name="identifiers" value="ids.txt" ftype="txt" /> + <param name="method02" value="clustalw_msa" /> + <param name="outputs" value="cigar" /> + <output name="cigar" file="cigar.tabular" /> + </test> + </tests> + <help> +**What it does** + +This tool is a wrapper for the T-Coffee multiple sequence alignment suite. The input is a set of sequences in FASTA format. Apart from running on the complete FASTA input, it can also run on a subset of sequences by providing a list of the FASTA IDs. + +This wrapper offers selected advanced T-Coffee options like the selection of the alignment methods to use: ''Pairwise Structual Method'', ''Multiple Sequence Alignment Methods'' or ''Pairwise Sequence Alignment Methods''. + +The T-Coffee documentation can be found at http://www.tcoffee.org/Projects/tcoffee/ . + +**Example** + +Suppose you have 5 sequences in FASTA format:: + + >1aboA + NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS + NYITPVN + >1ycsB + KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY + VPRNLLGLYP + >1pht + GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG + WLNGYNETTGERGDFPGTYVEYIGRKKISP + >1vie + DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI + N + >1ihvA + NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD + +By selecting "Yes" in output fasta_aln in the wrapper, the user will obtain the multiple alignment in FASTA format:: + + >1aboA + NL-FVA---LYDFVASGDNTLSITKGEKLR-------VLGYN-------H + NGEWCEA--QTKN-GQGWVPSNYIT------PVN + >1ycsB + KGVIYA---LWDYEPQNDDELPMKEGDCMT-------IIHREDE-----D + EIEWWWA--RLND-KEGYVPRNLLG------LYP + >1pht + GYQYRA---LYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE + EIGWLNGYNETTG-ERGDFPGTYVEYIGRKKISP + >1vie + DR-----------VRK--KSGAAWQGQIVGWYCTNLTPEGYAVE------ + ------S--EAHPGSVQIYPVAALE------RIN + >1ihvA + NF-RVYYRDSRDPVWKGPA-KLLWKGEGAV-------VIQDN-------S + DI--------------KVVPRRKAK-----IIRD + </help> + <citations> + <citation type="doi">10.1006/jmbi.2000.4042</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/t_coffee_to_cigar.pl Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,48 @@ +#!/usr/bin/perl +# +use strict; +use warnings; + +# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line +# TCoffee_to_cigar.pl <file> + +sub convert_and_print { + my ($header, $sequence) = @_; + # Converts each match into M and each gap into D + $sequence =~ s/[^-]/M/g; + $sequence =~ s/-/D/g; + + # Split the sequence in substrings composed by the same letter + $sequence =~ s/DM/D,M/g; + $sequence =~ s/MD/M,D/g; + my @cigar_array = split(',', $sequence); + + # Condense each substring, e.g. DDDD in 4D, and concatenate them again + my $cigar = ''; + foreach my $str (@cigar_array) { + if (length($str) > 1) { + $cigar .= length($str); + } + $cigar .= substr($str, 0, 1); + } + print "$header\t$cigar\n"; +} + +my $file1 = $ARGV[0]; +open my $fh1, '<', $file1; + +my $header = '', my $sequence = ''; +while (my $line = <$fh1>) { + chomp $line; + if (substr($line, 0, 1) eq '>') { + if ($header) { + convert_and_print($header, $sequence); + } + $header = substr($line, 1); + $sequence = ''; + } else { + $sequence .= $line; + } +} +close $fh1; +convert_and_print($header, $sequence);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cigar.tabular Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,3 @@ +ENSMUST00000091291_musmusculus 41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M +ENSCAFT00000026349_canisfamiliaris 16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D +ENSRNOT00000019267_rattusnorvegicus 75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ids.txt Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,3 @@ +ENSMUST00000091291_musmusculus +ENSCAFT00000026349_canisfamiliaris +ENSRNOT00000019267_rattusnorvegicus
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fasta Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,14 @@ +>ENSMUST00000091291_musmusculus +ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATTGCTGGTGGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAGGTGTGCCCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGAGCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGTTCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCTGAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCTTCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGGCTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAATAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATTCTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGTGGGGATGTCTGTCCAGGCACCGCCAAGGGCAAGACCAACTGTCCTGCCACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATTGTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAAGGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGACCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTGTGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAAGCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGTGCCCGTCTGGCTATACCATGAATTCCAGCAACTTGATGTGCACCCCATGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGAGAAGACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGATCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCTGAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAAGATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTACATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCCTTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCTCACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCTTGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAGGAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGAAAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCCTGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGATTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGATGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACCCGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGGTGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAAGACCTTGGTTACCTTCTCTGATGAACGGCGGACCTATGGAGCCAAAAGTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTGGATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAAGCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGGAGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAAGGGCTGAAGCTCCCTTCACGGACCTGGTCCCCACCCTTTGAGTCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAGTGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATGTGACAGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCTACCATTGTGCCCACAAGTCAGGAGGAGCACAGGCCATTTGAGAAAGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGTCCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAGAGGCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTGAGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGACGAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCAGGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGGGTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGTCTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATTCACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGCCAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCACCTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGTGAGGAGCTGGAGATGGAGTTTGAAGACATGGAGAATGTCCCGTTGGATCGTTCCTCTCACTGTCAGAGAGAAGAGGCTGGGGGCCGGGAGGGAGGGTCCTCACTGAGCATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGGCAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA + +>ENSCAFT00000026349_canisfamiliaris +ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGATCCTCCTCTCCTTGGGATTTGGCCTGGACACACTAGAGGTGTGCCCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCGGCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGTTCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACTCAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCTGCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCCTGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGGCTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAACCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCTCGCCCAGCGCCAACCACATCGTGGGCAACAAGCTGCGTGAGGAGTGTGCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAGGACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACTGCCAGAGAGTGTGTCCCTGCCCCCATGGGCTGGCCTGCACAGCTGGGGGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGACCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCCACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTCACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCGCACCTCCATCTTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGTGCCCTCCGGGCTTCACCCGCAACGGCAGCAGCATGTTCTGCCACAAGTGTGAGGGGCTGTGCCCCAAAGAGTGCAAGGTGGGTACCAAGACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACGTGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTGGAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAAGATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCAAACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTGCTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCTCACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCTTGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAAAACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCAAACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCTTGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGCTTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACACAGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGCTGCCCTTAAGCCGCACCCAAGAACCCGGGGTAACTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACGGGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGAGCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAGGACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAAGCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGCAGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGCGGCCTGCGGCTGCCCACCAGCAACAACGACCCGCGCTTCGACCGCGAGGACGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTGCCAGCACCCACCGCCTGGGCAGGTCCTGCCGCCGCTGGAGGCGCAAGAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCACCATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCAATAAGAGCCCTCAAAGACATGCGGGGAGGCACCGCCGGGCGGCCGGGGCGCTCCGGCTTGGGGGCAACAGCTCGGATTTCGAGATCCAGGAGGACAAAGTGCCCCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGGAATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGCTGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGCTGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTGTCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAGTATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGTGTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGCCCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAACGGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGAAGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGGGGCTCATGCTGTTCATCATTCTTGCCGCCCTCGGTTTCTTCTACGGCAGGAAGAGAAACAGCACCCTCTATGCCTCAGTGAATCCGGAGTACTTCAGCGCCTCTGATATGTACATCCCTGATGAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCAGGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTGGAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGCCCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATTCAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGCCAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCATCTTCGATCTCTGCGGCCTGAGGCAGAGAACAACCCTGGGCTCCCACGGCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCATGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGAAACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGATGACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGCTGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTCACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGTGACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCAAGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTTCAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCGGCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCTCTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGGGGAGCCAGGGGGCCCTGCTGCCTAATGCCAAACCCAACTCCCTACCAACCCCAGAAGGGGCTCCCTCAGACTGCATGCCCCAAAATGGGGGTCCAGGGCACTGA + +>ENSMUST00000005671_homosapiens +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAGTGGAGAAATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACCGTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCTGGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAACGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCGAAGGAATGTGGGGACCTGTGTCCAGGGACATTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAACAACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGATCGCGATTTCTGCGCCAACATCCCCAACGCTGAGAGCAGTGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGTGTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAAAACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCCTGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCGGAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTCCTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTGTCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAGAGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGCTTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACCTGCCTCCGAACAAGGAGGGCGAGCCTGGCATTTTACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTAGATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAATCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGAGGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGCCATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTTTGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACACGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTCTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGCAGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATGTATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAAACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCATGGACAGATCCTGTGTTCTTCTATGTCCCCGCCAAAACGACGTATGAGAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGAAATAACAGCAGGTTGGGCAATGGAGTGCTGTATGCTTCTGTGAACCCCGAGTATTTCAGCGCAGCTGATGTGTACGTGCCTGATGAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGGATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGTATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTTCAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGGAACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTCACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCAGCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAGCCAGAGGAGCTGGAGATGGAGCCTGAGAACATGGAGAGCGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTTCTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA + +>ENSRNOT00000019267_rattusnorvegicus +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACGGTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCTGGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAACGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCTTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAACAATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGACCGGGATTTCTGCGCCAACATCCCCAACGCCGAGAGCAGTGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGTGTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAAAACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTTTGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCGGAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAAGATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTCCTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTGTCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAGAGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGTTTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTGCCTCCGAACAAGGAGGGGGAGCCTGGCATTTTGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTAGATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAACCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGAGGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGCCGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTTTGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTCTGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGCAGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATGTATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAAACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCCAGCCAAAACAACGTATGAGAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGGGTGTCTGGGCCAGCAGAAGGCAGTAGTTGGAAAGGGCCATTTCCATCCTGTCTGTTCCTAGTGTACGTGCCTGATGAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGGACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGTATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTTCAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTATCTCCGGTCTCTAAGGCCAGAGGTGGAGAATAATCTAGTCCTGATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGGAACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTCACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGCCACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCAGTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAGCCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAGCGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAACGGCCCTGGCGTGCTGGTTCTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC + +>ENSPTRT00000013802_pantroglodytes +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACGGTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCTCGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCTACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAATGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATGCGGTGTCCAATAACTACATTGTGGGGAATAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCATGGAGGAGAAGCCGATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCTGCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAACAACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAACGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTGTGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTGGACCGTGACTTCTGCGCCAACATCCTCAGCGCCGAGAGCAGCGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGTGCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCTTGTGAAGGTCCTTGCCCGAAGGTCTGTGAGGAAGAAAAGAAAACAAAGACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCTTCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCAGAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTCGCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTCCTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCTGACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTGTTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAAAGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGAAAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCATCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGCTTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGATGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTCCCGCCCAACAAGGACGTGGAGCCCGGCATCTTACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAAGGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGAGTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTGGACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAACCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGCAGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAAGACAAAATCCCCATCAGGAAGTATGCCGACGGCACCATCGACATTGAGGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGCCTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAGGAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTTCGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAACATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACATTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGCAGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCATCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATGTATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAATGTGTGTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAAACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCAGGCCAAAACAGGATATGAAAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGTTGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGAAATAACAGCAGGCTGGGGAATGGAGTGCTGTATGCCTCTGTGAACCCGGAGTACTTCAGCGCTGCTGATGTGTACGTTCCCGATGAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCAGGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAGATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGCATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTTCAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGCCAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAATGGAGAATAATCCAGTCCTAGCACCTCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCATGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGGAATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTATGACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGCTGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTCACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTCGCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGACATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAGGCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTGGCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAGCCGGAGGAGCTGGACCTGGAGCCAGAGAACATGGAGAGCGTCCCCCTGGACCCCTCGGCCTCCCTGCCACTGCCCGACAGACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTCCTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGGCCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.fasta Thu Dec 15 11:04:25 2016 -0500 @@ -0,0 +1,425 @@ +>ENSMUST00000091291_musmusculus +ATGGGCTTCGGGAGAGGATGTGAGACGACGG-CTGTGCCATTGCTGGTGG +CCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAG +GTGTGC---CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA +GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT +TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC +ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT +GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT +TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG +CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA +TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT +CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT +GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCA--ACTGTCCTGC +CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT +GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA +GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA +CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG +TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG +AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA +GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT +GCCCGTCTGGCTATACCATGAATTCCAGC---AACTTGATGTGCACCCCA +TGTCTGGGACCCTGCCCTAAGGTCTGCCA-AATCCTCGAAGGTGA--GAA +GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA +TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT +GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA +GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC +ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC +TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT +CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT +TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG +GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA +AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC +TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA +TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA +TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC +CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG +TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA +GACCTTGGTTACCTTCTC--TGATGAACGGC-GGACCTATGGAGCCAAAA +GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG +GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA +GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG +AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA +GGGCTGAAGCT-CCCTTCACGGACCTGGTCCCCAC--CCTTTGAGTCTGA +TGAT-TCTCAGAAG--CACAATCAGAGTGAGTATGACGACTCGGCCAGTG +AGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAG +GAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTT +TGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATG +TGAC---AGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCT +ACCATTGTGCCCACAAGTCAGGAG---GAGCACAGGCCATTTGAGAA--- +AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTG +GGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGG +TGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGC +AGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTG +TACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTA +TATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGT +CTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCT +CCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAAT +GGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGT +CCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCC +TCTTCAGTGTTGTGATTGGAAGTATT---TATCTATTTCTGAGAAAGAGG +CA----GCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG +AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC +GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA +GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG +GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT +CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT +CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC +CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC +CTCCGTTCTCTGAGGCCAGATGCTGAG---AATAACCCAGGCCGCCCTCC +CCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCA +TGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGA +AACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAAT +GACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGAC +TGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTT +ACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCAC +TAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGA +AGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAG +AGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAG +GCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCA +GCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAG +AGTGAGGAGCTGGAGATGG------------AGTTTGAAGACATGGAGAA +TGTCCCGTTGGATCGTTC----------------CTCTCACTGTCAGAGA +GAA----GAGGCTGGGGGCCGGGAGGGAGGG-------TCCTCACTGAGC +ATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGG +CAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA +>ENSCAFT00000026349_canisfamiliaris +----ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGA +TC----CTCCTCTCCTTGGGATTTGGCCTGGACACACT--------AGAG +GTGTGC---CCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCG +GCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGT +TCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACT +CAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCT +GCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCC +TGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGG +CTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAA +CCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCT +CGCCCAGCGCCAACCACATCGTGGGCAACAA-GCTGCGTG--AGGAGTGT +GCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAG +GACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACT +GCCAGAGAGTGTGTCCCTGCCCCCATGGG---CTGGCCTGCACAGCTGGG +GGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGA +CCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCC +ACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTC +ACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCG--------- +----CACCTCCATC--TTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGT +GCCCTCCGGGCTTCACCCGCAACGGCAGC---AGCATGTTCTGCCACAAG +TGTGAGGGGCTGTGCCCCAAAGAGTGCA---------AGGTGGGTACCAA +GACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACG +TGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTG +GAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAA +GATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCA +AACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTG +CTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCT +CACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCT +TGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAA +AACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCA +AACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCT +TGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGC +TTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACAC +AGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGC +TGCCCTTAAG---CCGCACCCAAGAACCCGGGGTAA-------------- +----CTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACG +GGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGA +GCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAG +GACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAA +GCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGC +AGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGC +GGCCTGCGGCTGCCCACCAGCAACAACGACCCGCG---CTTCGACCGCGA +GGA-----CGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTG +CCAGCACCCACCGCCT--GGGCAGGTCCT--GCCGCCGCTGGAGGCGCAA +GAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCAC +CATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCA-ATAAGAGCCCTCAA +AGAC--------ATGCGGGGAGGCACCGCCGGGCGGCCGGGGC--GCTCC +GGCTTGGGGGCAACAGCTCG-------GATTTCGAGATCCAGGAGGACAA +AGTGCC---CCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGG +AATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGC +TGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGC +TGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTG +TCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAG +TATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGT +GTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGC +CCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAAC +GGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGA +------AGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGG +GGCTCATGCTGTTCATCATTCTTGCCGC-CCTCGGTTTCTTCTACGG--- +--------CAGGAAGAGAAACAGCAC-CCTCTATGCCTCAGTGAATCCGG +AGTACTTCAGCGCCTCTGAT---------------ATGTACATCCCTGAT +GAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCA +GGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTG +GAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGC +CCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATT +CAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGC +CAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCAT +CTTCGATCTCTGCGGCCTGAGGCAGAG---AACAACCCTGGGCTCCCACG +GCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCA +TGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGA +AACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGAT +GACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGC +TGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTC +ACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGT +GACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCA +AGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTT +CAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCG +GCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCT +CTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGG----GGAG +CCAGGGGGCCCTGCT-----------------GCCTAATGCCAAACCCAA +CTCCCTACCAACCCCAGAAGGGGCT-------CCCTCAGACTGCATGCCC +CAAAATGGGGGTCCAGGGCACTGA-------------------------- +-------------------------------------------------- +------------------------------------------------- +>ENSMUST00000005671_homosapiens +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC +TCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAG---TGGAGAA +ATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG +CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT +CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACC +GTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCT +GGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT +ACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG +CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAA +CGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG +CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCGA--AGGAATGT +GGGGACCTGTGTCCAGGGACATTGGAGGAGAA-GCCCA--TGTGTGAGAA +GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT +GCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAAC +AACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGA +CAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTG +TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG +GATCGCGATTTCTGC----GCCAACATCCCCAACGCTGAGA------GCA +G--TGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGT +GTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC +TGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAA +AACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCC +TGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCG +GAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAA +GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC +GTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTC +CTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT +GACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTG +TCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAG +AGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA +AAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCA +TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGC +TTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGA +CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACC +TGCCTCCGAA---CAAGGAGGGCGAGCCTGGCATTT-------------- +----TACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAA +GGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAA +GTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTA +GATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAA +TCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC +AGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAA +GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGA +GGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGC +CATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAG +GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTT +TGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACA +CGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT +ATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAG +AGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTC +TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC +TGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGC +AGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCA +TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATG +TATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAA---TGTGT +GTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAA +ACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT +GGGTCATGGACAGATCCTGTGTTCTTCTATGTC-CCCGCCAAAACGACGT +ATGA--GAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC +TGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGA +AATAACAGCAGGTTGGGCAATGGAGT-GCTGTATGCTTCTGTGAACCCCG +AGTATTTCAGCGCAGCTGAT---------------GTGTACGTGCCTGAT +GAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACA +AGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGG +ATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGT +ATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTT +CAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGC +CCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTAT +CTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCC +TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA +TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGG +AACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTAT +GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGT +TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTC +ACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC +CACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTC +GTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGAT +ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCG +GCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCA +GCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAG +CCAGAGGAGCTGGAGATGG------------AGCCTGAGAACATGGAGAG +CGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA +GACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTT +CTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGG +ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA +>ENSRNOT00000019267_rattusnorvegicus +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC +TCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA +ATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG +CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT +CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACG +GTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCT +GGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT +ACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG +CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA +CGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG +CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCAA--AGGAATGT +GGGGACCTGTGTCCAGGGACCTTGGAGGAGAA-GCCCA--TGTGTGAGAA +GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT +GCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAAC +AATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGA +CAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCG +TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG +GACCGGGATTTCTGC----GCCAACATCCCCAACGCCGAGA------GCA +G--TGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGT +GTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC +TGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAA +AACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTT +TGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCG +GAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAA +GATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC +GTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTC +CTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT +GACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTG +TCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAG +AGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA +AAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCA +TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGT +TTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGA +CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACC +TGCCTCCGAA---CAAGGAGGGGGAGCCTGGCATTT-------------- +----TGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAA +GGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAA +GTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTA +GATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAA +CCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC +AGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAA +GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGA +GGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGC +CGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAG +GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTT +TGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACA +CCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT +ATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAG +AGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTC +TGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC +TGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGC +AGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCA +TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATG +TATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAA---TGTGT +GTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAA +ACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT +GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CCAGCCAAAACAACGT +ATGA--GAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC +TGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGG +-------GTGTCTGGGCCAGCAGAAG-GCAGTA-GTTGGAAAGGGCCATT +TCCATCCTGTCTGTTCCTA----------------GTGTACGTGCCTGAT +GAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACA +AGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGG +ACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGT +ATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTT +CAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGC +CCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTAT +CTCCGGTCTCTAAGGCCAGAGGTGGAG---AATAATCTAGTCCTGATTCC +TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA +TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGG +AACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTAT +GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCT +TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTC +ACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGC +CACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTC +GTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGAT +ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCG +GCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCA +GTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAG +CCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAG +CGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA +GACACTCAGGACACAAGGCTGAGAACGGCCC------TGGCGTGCTGGTT +CTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGG +ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC--- +>ENSPTRT00000013802_pantroglodytes +ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC +TCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA +ATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCG +CCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCT +CCAAGGCC------GAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACG +GTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCT +CGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCT +ACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG +CTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA +TGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATG +CGGTGTCCAATAACTACATTGTGGGGAATAA-GCCCCCAA--AGGAATGT +GGGGACCTGTGTCCAGGGACCATGGAGGAGAA-GCCGA--TGTGTGAGAA +GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCT +GCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAAC +AACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAA +CGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTG +TGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTG +GACCGTGACTTCTGC----GCCAACATCCTCAGCGCCGAGA------GCA +G--CGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGT +GCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCT +TGTGAAGGTCCTTGCCCGAAGGTCTGTG---AGGAAGAAAAGAAAACAAA +GACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCT +TCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCA +GAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAA +GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTC +GCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTC +CTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCT +GACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTG +TTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAA +AGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGA +AAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCA +TCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGC +TTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGA +TGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACC +TCCCGCCCAA---CAAGGACGTGGAGCCCGGCATCT-------------- +----TACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAA +GGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGA +GTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTG +GACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAA +CCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGC +AGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAA +GA--CAAAATC-CCCATCAGGAAGTATGCCGACGGCACCATCGACATTGA +GGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGC +CTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAG +GAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTT +CGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACA +CCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAAC +ATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAG +AGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACAT +TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC +TGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGC +AGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCA +TCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATG +TATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAA---TGTGT +GTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAA +ACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAAT +GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CAGGCCAAAACAGGAT +ATGA--AAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGT +TGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGA +AATAACAGCAGGCTGGGGAATGGAGT-GCTGTATGCCTCTGTGAACCCGG +AGTACTTCAGCGCTGCTGAT---------------GTGTACGTTCCCGAT +GAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCA +GGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAG +ATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGC +ATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTT +CAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGC +CAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTAT +CTCCGGTCTCTGAGGCCAGAAATGGAG---AATAATCCAGTCCTAGCACC +TCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCA +TGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGG +AATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTAT +GACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGC +TGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTC +ACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC +CACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTC +GCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGAC +ATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAG +GCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTG +GCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAG +CCGGAGGAGCTGGACCTGG------------AGCCAGAGAACATGGAGAG +CGTCCCCCTGGACCCCTCGGCCTCC---------CTGCCACTGCCCGACA +GACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTC +CTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGG +CCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA