Next changeset 1:b3833e5b50d4 (2016-12-19) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty |
added:
filter_by_fasta_ids.py t_coffee.xml t_coffee_to_cigar.pl test-data/cigar.tabular test-data/ids.txt test-data/input.fasta test-data/output1.fasta |
b |
diff -r 000000000000 -r 794a6e864a96 filter_by_fasta_ids.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_by_fasta_ids.py Thu Dec 15 11:04:25 2016 -0500 |
[ |
@@ -0,0 +1,108 @@ +#!/usr/bin/env python +""" A script to build specific fasta databases """ +from __future__ import print_function + + +import logging +import sys + + +# ===================================== Iterator =============================== +class Sequence: + ''' Holds protein sequence information ''' + def __init__(self): + self.header = "" + self.sequence_parts = [] + + def get_sequence(self): + return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts]) + + +class FASTAReader: + """ + FASTA db iterator. Returns a single FASTA sequence object. + """ + def __init__(self, fasta_name): + self.fasta_file = open(fasta_name) + self.next_line = self.fasta_file.readline() + + def __iter__(self): + return self + + def __next__(self): + ''' Iteration ''' + # while True: + # line = self.fasta_file.readline() + # if not line: + # raise StopIteration + # if line[0] == '>': + # break + next_line = self.next_line + if not next_line: + raise StopIteration + + seq = Sequence() + seq.header = next_line.rstrip().replace('\n', '').replace('\r', '') + + next_line = self.fasta_file.readline() + while next_line and next_line[0] != '>': + # tail = self.fasta_file.tell() + # line = self.fasta_file.readline() + # if not line: + # break + # if line[0] == '>': + # self.fasta_file.seek(tail) + # break + seq.sequence_parts.append(next_line) + next_line = self.fasta_file.readline() + self.next_line = next_line + return seq + + # Python 2/3 compat + next = __next__ +# ============================================================================== + + +def target_match(target, search_entry): + ''' Matches ''' + search_entry = search_entry.upper() + for atarget in target: + if search_entry.find(atarget) > -1: + return atarget + return None + + +def main(): + ''' the main function''' + logging.basicConfig(filename='filter_fasta_log', + level=logging.INFO, + format='%(asctime)s :: %(levelname)s :: %(message)s',) + + used_sequences = set() + work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0} + targets = [] + + f_target = open(sys.argv[1]) + for line in f_target.readlines(): + targets.append(">%s" % line.strip().upper()) + f_target.close() + + work_summary['wanted'] = len(targets) + homd_db = FASTAReader(sys.argv[2]) + + # output = open(sys.argv[3], "w") + for entry in homd_db: + target_matched_results = target_match(targets, entry.header) + if target_matched_results: + work_summary['found'] += 1 + targets.remove(target_matched_results) + sequence = entry.get_sequence() + used_sequences.add(sequence) + print(entry.header) + print(sequence) + for parm, count in work_summary.items(): + logging.info('%s ==> %d', parm, count) + + +if __name__ == "__main__": + main() |
b |
diff -r 000000000000 -r 794a6e864a96 t_coffee.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/t_coffee.xml Thu Dec 15 11:04:25 2016 -0500 |
[ |
b'@@ -0,0 +1,210 @@\n+<tool id="t_coffee" name="T-Coffee" version="11.0.8">\n+ <description>multiple sequence alignment</description>\n+ <requirements>\n+ <requirement type="package" version="11.0.8">t_coffee</requirement>\n+ </requirements>\n+ <stdio>\n+ <exit_code range="1:" level="fatal" />\n+ </stdio>\n+ <version_command>\n+ t_coffee -version | grep Version\n+ </version_command>\n+ <command>\n+<![CDATA[\n+ #if str($input_type.filter_fasta) == \'yes\'\n+ #set $input = \'-infile=stdin\'\n+ python \'$__tool_directory__/filter_by_fasta_ids.py\' \'$input_type.identifiers\' \'$input_type.fasta_input\' |\n+ #end if\n+\n+ #set $method_opt = \'\'\n+ #if $method01\n+ #set $method_opt += str($method01) + \',\'\n+ #end if\n+ #if $method02\n+ #set $method_opt += str($method02) + \',\'\n+ #end if\n+ #if $method03\n+ #set $method_opt += str($method03) + \',\'\n+ #end if\n+ #if $method_opt\n+ #set $method_opt = \'-method \' + $method_opt[:-1]\n+ #end if\n+\n+ #set $output_opt = \'\'\n+ #if $outputs\n+ #set $outputs_arr = str($outputs).split(\',\')\n+ #for $o in $outputs_arr\n+ #if $o not in [\'cigar\', \'dnd\']\n+ #set $output_opt += $o + \',\'\n+ #end if\n+ #end for\n+ #if \'cigar\' in $outputs_arr and \'fasta_aln\' not in $outputs_arr\n+ #set $output_opt += \'fasta_aln,\'\n+ #end if\n+ #else\n+ #set $outputs_arr = []\n+ #end if\n+ #if $output_opt\n+ #set $output_opt = \'-output \' + $output_opt[:-1]\n+ #end if\n+\n+ t_coffee \'$input\' $method_opt $output_opt -n_core \\${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet\n+\n+ #if \'cigar\' in $outputs_arr\n+ && perl \'$__tool_directory__/t_coffee_to_cigar.pl\' t_coffee_out.fasta_aln > \'$cigar\'\n+ #end if\n+]]>\n+ </command>\n+ <inputs>\n+ <conditional name="input_type">\n+ <param name="filter_fasta" type="select" label="Filter FASTA input?">\n+ <option value="no">No</option>\n+ <option value="yes">Yes</option>\n+ </param>\n+ <when value="yes">\n+ <param name="fasta_input" type="data" format="fasta" label="FASTA sequences" />\n+ <param name="identifiers" type="data" format="txt" label="List of FASTA sequence IDs" />\n+ </when>\n+ <when value="no">\n+ <param name="input" type="data" format="fasta" label="FASTA sequences" />\n+ </when>\n+ </conditional>\n+ <param name="method01" type="select" display="checkboxes" multiple="true" label="Pairwise Structual Method">\n+ <option value="sap_pair">sap_pair</option>\n+ <option value="TMalign_pair">TMalign_pair</option>\n+ <option value="mustang_pair">mustang_pair</option>\n+ </param>\n+ <param name="method02" type="select" display="checkboxes" multiple="true" label="Multiple Sequence Alignment Methods">\n+ <option value="pcma_msa">pcma_msa</option>\n+ <option value="clustalw_msa">clustalw_msa</option>\n+ <option value="dialigntx_msa">dialigntx_msa</option>\n+ <option value="poa_msa">poa_msa</option>\n+ <option value="muscle_msa">muscle_msa</option>\n+ <option value="probcons_msa">probcons_msa</option>\n+ <option value="t_coffee_msa">t_coffee_msa</option>\n+ <option value="amap_msa">amap_msa</option>\n+ <option value="kalign_msa">kalign_msa</option>\n+ </param>\n+ <param name="method03" type="select" display="checkboxes" multiple="true" label="Pairwise Sequence Alignment Methods">\n+ <option value="fast_pair">fast_pair</option>\n+ <option value="clustalw_pair">clustalw_pair</option>\n+ <option value="lalign_id_pair">lalign_id_pair</opti'..b'" label="${tool.name} on ${on_string}: fasta_seq" from_work_dir="t_coffee_out.fasta_seq">\n+ <filter>\'fasta_seq\' in outputs</filter>\n+ </data>\n+ <data name="msf_aln" format="msf" label="${tool.name} on ${on_string}: msf_aln" from_work_dir="t_coffee_out.msf_aln">\n+ <filter>\'msf_aln\' in outputs</filter>\n+ </data>\n+ <data name="phylip" format="phyloxml" label="${tool.name} on ${on_string}: phylip" from_work_dir="t_coffee_out.phylip">\n+ <filter>\'phylip\' in outputs</filter>\n+ </data>\n+ <data name="pir_aln" format="pir" label="${tool.name} on ${on_string}: pir_aln" from_work_dir="t_coffee_out.pir_aln">\n+ <filter>\'pir_aln\' in outputs</filter>\n+ </data>\n+ <data name="pir_seq" format="pir" label="${tool.name} on ${on_string}: pir_seq" from_work_dir="t_coffee_out.pir_seq">\n+ <filter>\'pir_seq\' in outputs</filter>\n+ </data>\n+ <data name="score_ascii" format="ascii" label="${tool.name} on ${on_string}: score_ascii" from_work_dir="t_coffee_out.score_ascii">\n+ <filter>\'score_ascii\' in outputs</filter>\n+ </data>\n+ <data name="score_html" format="html" label="${tool.name} on ${on_string}: score_html" from_work_dir="t_coffee_out.score_html">\n+ <filter>\'score_html\' in outputs</filter>\n+ </data>\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="filter_fasta" value="no" />\n+ <param name="input" value="input.fasta" ftype="fasta" />\n+ <param name="method02" value="clustalw_msa" />\n+ <param name="outputs" value="fasta_aln" />\n+ <output name="fasta_aln" file="output1.fasta" />\n+ </test>\n+ <test>\n+ <param name="filter_fasta" value="yes" />\n+ <param name="fasta_input" value="input.fasta" ftype="fasta" />\n+ <param name="identifiers" value="ids.txt" ftype="txt" />\n+ <param name="method02" value="clustalw_msa" />\n+ <param name="outputs" value="cigar" />\n+ <output name="cigar" file="cigar.tabular" />\n+ </test>\n+ </tests>\n+ <help>\n+**What it does**\n+\n+This tool is a wrapper for the T-Coffee multiple sequence alignment suite. The input is a set of sequences in FASTA format. Apart from running on the complete FASTA input, it can also run on a subset of sequences by providing a list of the FASTA IDs.\n+\n+This wrapper offers selected advanced T-Coffee options like the selection of the alignment methods to use: \'\'Pairwise Structual Method\'\', \'\'Multiple Sequence Alignment Methods\'\' or \'\'Pairwise Sequence Alignment Methods\'\'.\n+\n+The T-Coffee documentation can be found at http://www.tcoffee.org/Projects/tcoffee/ .\n+\n+**Example**\n+\n+Suppose you have 5 sequences in FASTA format::\n+\n+ >1aboA\n+ NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS\n+ NYITPVN\n+ >1ycsB\n+ KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY\n+ VPRNLLGLYP\n+ >1pht\n+ GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG\n+ WLNGYNETTGERGDFPGTYVEYIGRKKISP\n+ >1vie\n+ DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI\n+ N\n+ >1ihvA\n+ NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD\n+\n+By selecting "Yes" in output fasta_aln in the wrapper, the user will obtain the multiple alignment in FASTA format::\n+\n+ >1aboA\n+ NL-FVA---LYDFVASGDNTLSITKGEKLR-------VLGYN-------H\n+ NGEWCEA--QTKN-GQGWVPSNYIT------PVN\n+ >1ycsB\n+ KGVIYA---LWDYEPQNDDELPMKEGDCMT-------IIHREDE-----D\n+ EIEWWWA--RLND-KEGYVPRNLLG------LYP\n+ >1pht\n+ GYQYRA---LYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE\n+ EIGWLNGYNETTG-ERGDFPGTYVEYIGRKKISP\n+ >1vie\n+ DR-----------VRK--KSGAAWQGQIVGWYCTNLTPEGYAVE------\n+ ------S--EAHPGSVQIYPVAALE------RIN\n+ >1ihvA\n+ NF-RVYYRDSRDPVWKGPA-KLLWKGEGAV-------VIQDN-------S\n+ DI--------------KVVPRRKAK-----IIRD\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1006/jmbi.2000.4042</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r 794a6e864a96 t_coffee_to_cigar.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/t_coffee_to_cigar.pl Thu Dec 15 11:04:25 2016 -0500 |
[ |
@@ -0,0 +1,48 @@ +#!/usr/bin/perl +# +use strict; +use warnings; + +# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line +# TCoffee_to_cigar.pl <file> + +sub convert_and_print { + my ($header, $sequence) = @_; + # Converts each match into M and each gap into D + $sequence =~ s/[^-]/M/g; + $sequence =~ s/-/D/g; + + # Split the sequence in substrings composed by the same letter + $sequence =~ s/DM/D,M/g; + $sequence =~ s/MD/M,D/g; + my @cigar_array = split(',', $sequence); + + # Condense each substring, e.g. DDDD in 4D, and concatenate them again + my $cigar = ''; + foreach my $str (@cigar_array) { + if (length($str) > 1) { + $cigar .= length($str); + } + $cigar .= substr($str, 0, 1); + } + print "$header\t$cigar\n"; +} + +my $file1 = $ARGV[0]; +open my $fh1, '<', $file1; + +my $header = '', my $sequence = ''; +while (my $line = <$fh1>) { + chomp $line; + if (substr($line, 0, 1) eq '>') { + if ($header) { + convert_and_print($header, $sequence); + } + $header = substr($line, 1); + $sequence = ''; + } else { + $sequence .= $line; + } +} +close $fh1; +convert_and_print($header, $sequence); |
b |
diff -r 000000000000 -r 794a6e864a96 test-data/cigar.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cigar.tabular Thu Dec 15 11:04:25 2016 -0500 |
b |
@@ -0,0 +1,3 @@ +ENSMUST00000091291_musmusculus 41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M +ENSCAFT00000026349_canisfamiliaris 16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D +ENSRNOT00000019267_rattusnorvegicus 75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D |
b |
diff -r 000000000000 -r 794a6e864a96 test-data/ids.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ids.txt Thu Dec 15 11:04:25 2016 -0500 |
b |
@@ -0,0 +1,3 @@ +ENSMUST00000091291_musmusculus +ENSCAFT00000026349_canisfamiliaris +ENSRNOT00000019267_rattusnorvegicus |
b |
diff -r 000000000000 -r 794a6e864a96 test-data/input.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fasta Thu Dec 15 11:04:25 2016 -0500 |
b |
b'@@ -0,0 +1,14 @@\n+>ENSMUST00000091291_musmusculus\n+ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATTGCTGGTGGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAGGTGTGCCCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGAGCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGTTCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCTGAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCTTCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGGCTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAATAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATTCTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGTGGGGATGTCTGTCCAGGCACCGCCAAGGGCAAGACCAACTGTCCTGCCACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATTGTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAAGGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGACCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTGTGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAAGCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGTGCCCGTCTGGCTATACCATGAATTCCAGCAACTTGATGTGCACCCCATGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGAGAAGACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGATCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCTGAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAAGATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTACATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCCTTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCTCACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCTTGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAGGAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGAAAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCCTGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGATTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGATGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACCCGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGGTGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAAGACCTTGGTTACCTTCTCTGATGAACGGCGGACCTATGGAGCCAAAAGTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTGGATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAAGCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGGAGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAAGGGCTGAAGCTCCCTTCACGGACCTGGTCCCCACCCTTTGAGTCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAGTGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATGTGACAGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCTACCATTGTGCCCACAAGTCAGGAGGAGCACAGGCCATTTGAGAAAGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGTCCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAGAGGCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTGAGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGACGAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCAGGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGGGTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGTCTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATTCACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGCCAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCACCTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGTGAGGAGCTGGAGATGGAGTTTGAAGACATGGAGAAT'..b'GCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACGGTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCTCGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCTACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAATGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATGCGGTGTCCAATAACTACATTGTGGGGAATAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCATGGAGGAGAAGCCGATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCTGCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAACAACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAACGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTGTGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTGGACCGTGACTTCTGCGCCAACATCCTCAGCGCCGAGAGCAGCGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGTGCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCTTGTGAAGGTCCTTGCCCGAAGGTCTGTGAGGAAGAAAAGAAAACAAAGACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCTTCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCAGAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTCGCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTCCTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCTGACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTGTTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAAAGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGAAAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCATCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGCTTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGATGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTCCCGCCCAACAAGGACGTGGAGCCCGGCATCTTACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAAGGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGAGTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTGGACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAACCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGCAGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAAGACAAAATCCCCATCAGGAAGTATGCCGACGGCACCATCGACATTGAGGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGCCTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAGGAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTTCGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAACATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACATTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGCAGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCATCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATGTATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAATGTGTGTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAAACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCAGGCCAAAACAGGATATGAAAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGTTGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGAAATAACAGCAGGCTGGGGAATGGAGTGCTGTATGCCTCTGTGAACCCGGAGTACTTCAGCGCTGCTGATGTGTACGTTCCCGATGAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCAGGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAGATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGCATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTTCAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGCCAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAATGGAGAATAATCCAGTCCTAGCACCTCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCATGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGGAATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTATGACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGCTGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTCACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTCGCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGACATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAGGCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTGGCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAGCCGGAGGAGCTGGACCTGGAGCCAGAGAACATGGAGAGCGTCCCCCTGGACCCCTCGGCCTCCCTGCCACTGCCCGACAGACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTCCTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGGCCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA\n' |
b |
diff -r 000000000000 -r 794a6e864a96 test-data/output1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.fasta Thu Dec 15 11:04:25 2016 -0500 |
b |
b'@@ -0,0 +1,425 @@\n+>ENSMUST00000091291_musmusculus\n+ATGGGCTTCGGGAGAGGATGTGAGACGACGG-CTGTGCCATTGCTGGTGG\n+CCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAG\n+GTGTGC---CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA\n+GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT\n+TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC\n+ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT\n+GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT\n+TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG\n+CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA\n+TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT\n+CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT\n+GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCA--ACTGTCCTGC\n+CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT\n+GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA\n+GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA\n+CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG\n+TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG\n+AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA\n+GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT\n+GCCCGTCTGGCTATACCATGAATTCCAGC---AACTTGATGTGCACCCCA\n+TGTCTGGGACCCTGCCCTAAGGTCTGCCA-AATCCTCGAAGGTGA--GAA\n+GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA\n+TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT\n+GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA\n+GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC\n+ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC\n+TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT\n+CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT\n+TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG\n+GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA\n+AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC\n+TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA\n+TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA\n+TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC\n+CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG\n+TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA\n+GACCTTGGTTACCTTCTC--TGATGAACGGC-GGACCTATGGAGCCAAAA\n+GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG\n+GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA\n+GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG\n+AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA\n+GGGCTGAAGCT-CCCTTCACGGACCTGGTCCCCAC--CCTTTGAGTCTGA\n+TGAT-TCTCAGAAG--CACAATCAGAGTGAGTATGACGACTCGGCCAGTG\n+AGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAG\n+GAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTT\n+TGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATG\n+TGAC---AGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCT\n+ACCATTGTGCCCACAAGTCAGGAG---GAGCACAGGCCATTTGAGAA---\n+AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTG\n+GGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGG\n+TGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGC\n+AGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTG\n+TACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTA\n+TATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGT\n+CTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCT\n+CCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAAT\n+GGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGT\n+CCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCC\n+TCTTCAGTGTTGTGATTGGAAGTATT---TATCTATTTCTGAGAAAGAGG\n+CA----GCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG\n+AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA\n+GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG\n+GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT\n+CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT\n+CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC\n+CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC\n+CTCCGTTCTCTGAGGCCAGATGCTGAG---AATAACCCAGGCCGCCCTCC\n+CCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCA\n+TGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGA\n+AACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAAT\n+GACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGAC\n+TGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTT\n+ACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCAC\n+TAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGA\n+AGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCA'..b'ACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG\n+CTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA\n+TGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATG\n+CGGTGTCCAATAACTACATTGTGGGGAATAA-GCCCCCAA--AGGAATGT\n+GGGGACCTGTGTCCAGGGACCATGGAGGAGAA-GCCGA--TGTGTGAGAA\n+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCT\n+GCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAAC\n+AACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAA\n+CGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTG\n+TGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTG\n+GACCGTGACTTCTGC----GCCAACATCCTCAGCGCCGAGA------GCA\n+G--CGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGT\n+GCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCT\n+TGTGAAGGTCCTTGCCCGAAGGTCTGTG---AGGAAGAAAAGAAAACAAA\n+GACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCT\n+TCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCA\n+GAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAA\n+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTC\n+GCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTC\n+CTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCT\n+GACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTG\n+TTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAA\n+AGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGA\n+AAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCA\n+TCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGC\n+TTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGA\n+TGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACC\n+TCCCGCCCAA---CAAGGACGTGGAGCCCGGCATCT--------------\n+----TACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAA\n+GGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGA\n+GTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTG\n+GACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAA\n+CCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGC\n+AGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAA\n+GA--CAAAATC-CCCATCAGGAAGTATGCCGACGGCACCATCGACATTGA\n+GGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGC\n+CTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAG\n+GAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTT\n+CGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACA\n+CCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAAC\n+ATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAG\n+AGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACAT\n+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC\n+TGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGC\n+AGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCA\n+TCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATG\n+TATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAA---TGTGT\n+GTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAA\n+ACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAAT\n+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CAGGCCAAAACAGGAT\n+ATGA--AAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGT\n+TGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGA\n+AATAACAGCAGGCTGGGGAATGGAGT-GCTGTATGCCTCTGTGAACCCGG\n+AGTACTTCAGCGCTGCTGAT---------------GTGTACGTTCCCGAT\n+GAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCA\n+GGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAG\n+ATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGC\n+ATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTT\n+CAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGC\n+CAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTAT\n+CTCCGGTCTCTGAGGCCAGAAATGGAG---AATAATCCAGTCCTAGCACC\n+TCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCA\n+TGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGG\n+AATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTAT\n+GACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGC\n+TGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTC\n+ACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC\n+CACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTC\n+GCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGAC\n+ATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAG\n+GCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTG\n+GCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAG\n+CCGGAGGAGCTGGACCTGG------------AGCCAGAGAACATGGAGAG\n+CGTCCCCCTGGACCCCTCGGCCTCC---------CTGCCACTGCCCGACA\n+GACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTC\n+CTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGG\n+CCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA\n' |