changeset 0:0254731f047b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/SynDivA commit 90c5ec603e2c6b8c49d2dc7ec1b1e97f9d8fb92c
author iuc
date Thu, 23 Jun 2022 22:32:13 +0000
parents
children
files args.py macros.xml syndiva.py syndiva.xml test-data/distri.png test-data/syndiva_datatest.fasta test-data/syndiva_report.html
diffstat 7 files changed, 1739 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/args.py	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+import getopt
+import os
+import sys
+
+
+def usage(info) -> str:
+    text = "SynDivA script.\n\n"
+    if info:
+        text += info
+    temp = "Option\t\t\t\tfile\t\t\tDescription\n"
+    text += temp
+    text += '-' * (len(temp) + 60)
+    text += '\n'
+    text += "-i, --input\t\t\tfile.fasta\t\tFasta file that contains the DNA sequences\n"
+    text += "-o, --output_dir\t\t/path/for/output\tDirectory where output files will be written\n"
+    text += "-p, --pattern\t\t\tstring\t\t\tPattern of the sequence bank\n"
+    text += "-5, --restriction-site-5\tstring\t\t\tSequence of the restriction site in 5'\n"
+    text += "-3, --restriction-site-3\tstring\t\t\tSequence of the restriction site in 3'\n"
+    return text
+
+
+def get_os_path_join(directory, filename):
+    return os.path.join(directory, filename)
+
+
+def get_os_path_name(input):
+    return os.path.basename(input)
+
+
+def check_pattern(pattern):
+    authorized_pattern_letter = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
+                                 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', ':', '0',
+                                 '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '*']
+    return len([letter in authorized_pattern_letter for letter in pattern]) == len(pattern)
+
+
+class Args:
+
+    def __init__(self):
+        """
+        Instanciate Files object
+        """
+        self.input = None
+        self.output_dir = None
+        self.pattern = None
+        self.site_res_5 = None
+        self.site_res_3 = None
+        self.getargs()
+
+    def case(self):
+        # Test des fichiers et repertoires
+        if not self.input:
+            sys.exit(usage("input (-i,--input) : \"%s\" must be indicated\n" % (self.input)))
+        if not self.output_dir:
+            sys.exit(usage("output directory (-o,--output_dir) : \"%s\" must be indicated\n" % (self.output_dir)))
+        if not self.pattern:
+            sys.exit(
+                usage("Pattern of the sequence bank (-p,--pattern) : \"%s\" must be indicated\n" % (self.pattern)))
+        if not self.site_res_5:
+            sys.exit(usage(
+                "Sequence of the restriction site in 5' (-5,--restriction-site-5) : \"%s\" must be indicated\n" % (
+                    self.site_res_5)))
+        if not self.site_res_3:
+            sys.exit(usage(
+                "Sequence of the restriction site in 3' (-3,--restriction-site-3) : \"%s\" must be indicated\n" % (
+                    self.site_res_3)))
+
+    def data_format(self):
+        """
+        Check if information are correct
+        """
+        # Run without arguments
+        if len(sys.argv) == 1:
+            sys.exit(usage(None))
+            # Test input file argument
+        if self.input and not os.path.isfile(self.input):
+            print(self.input)
+            print(os.path.isfile(self.input))
+
+    def getargs(self):
+        """
+        Determine the files provided as arguments
+        @return: Choosen options
+        """
+        # Sans argument
+        if len(sys.argv) <= 1:
+            sys.exit("Do './fibronectin.py -h' for a usage summary")
+        # options test
+        try:
+            (opts, args) = getopt.getopt(sys.argv[1:], "i:o:p:5:3:h",
+                                         ["input=", "output_dir=", "pattern=", "site_res_5=", "site_res_3="])
+        except getopt.GetoptError as err:
+            # print help information and exit:
+            print(str(err))  # will print something like "option -a not recognized"
+            sys.exit(usage(None))
+        # Identification of options
+        for (o, a) in opts:
+            if o in ("-i", "--input"):
+                self.input = a
+            elif o in ("-o", "--output_dir"):
+                self.output_dir = a
+            elif o in ("-p", "--pattern"):
+                self.pattern = a
+            elif o in ("-5", "--restriction-site-5"):
+                self.site_res_5 = a
+            elif o in ("-3", "--restriction-site-3"):
+                self.site_res_3 = a
+            elif o in ("-h", "--help"):
+                sys.exit(usage(None))
+            else:
+                assert False, "unhandled option"
+            # Verification of cases
+        self.case()
+        self.data_format()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,20 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.2.4">clustalo</requirement>
+            <requirement type="package" version="14.137">mcl</requirement>
+            <yield />
+  		</requirements>
+    </xml>
+    <xml name="matplotlib_requirement">
+        <requirement type="package" version="2.2.3">matplotlib</requirement>
+    </xml>
+    <xml name="biopython_requirement">
+        <requirement type="package" version="1.76">biopython</requirement>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41467-019-12528-4</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/syndiva.py	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,407 @@
+#!/usr/bin/env python
+# title : syndiva.py
+# description : This script will analyze fasta files, look for restriction sites,
+# cut the sequences around the restriction sites,
+# translate the nucleic sequences into amino acids sequences.
+# author : Fabienne Wong Jun Tai and Benjamin Dartigues
+# creation date : 20121107
+# version : 1.0 - revised November 2012
+# version : 1.1 - revised March 2022
+# usage : python syndiva.py -i file.fasta -o /output/dir/ -p pattern -5 seq_restric_5'-3 seq_restric_3'
+# notes :
+# # python_version :3.7.11
+# # biopython_max_version  :1.72
+# ==============================================================================
+import math
+import re
+import subprocess
+import sys
+
+import matplotlib
+import numpy
+from args import Args
+from args import get_os_path_join, get_os_path_name
+from Bio import pairwise2
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.Seq import translate
+from Bio.SubsMat import MatrixInfo
+
+matplotlib.use('Agg')
+from matplotlib import pyplot as plot  # noqa: I202,E402
+
+
+args = Args()
+# Variables initialization
+directory = args.output_dir
+mcl_file = get_os_path_join(directory, "mcl.in")
+mcl_output = get_os_path_join(directory, "mcl.out")
+html_file = get_os_path_join(directory, "syndiva_report.html")
+graph_pic = get_os_path_join(directory, "distri.png")
+input_file = get_os_path_name(args.input)
+site_res_5 = args.site_res_5
+site_res_3 = args.site_res_3
+tag = {'mut': [], 'ok_stop_ext': [], 'stop': [], 'no_restric': [], 'no_multiple': [], 'amber': []}
+all_seq = []
+all_seq_fasta = {}  # dictionnary that will store information about all the sequences
+good_seq = {}  # dictionnary that will store information about the valid sequences
+identical_clones = {}
+var_seq_common = {}  # dictionnary that will store the number of sequences that share the same variable parts
+align_scores = []
+nb_var_part = 0
+
+
+def get_identity(str1, str2):
+    if len(str2) > len(str1):
+        return (len(str2) - len([i for i in range(len(str1)) if str1[i] != str2[i]])) / len(str2)
+    else:
+        return (len(str1) - len([i for i in range(len(str1)) if str1[i] != str2[i]])) / len(str1)
+
+
+def reverse_complement(_seq):
+    return str(Seq(_seq).reverse_complement())
+
+
+def generate_aln(seq_dic, ids):  # sourcery skip: use-join
+    # Multiple Sequence Alignment via ClustalO
+    _input = ''
+    for sequence_id in ids:
+        _input += '>%s\n%s\n' % (sequence_id, re.sub("(.{80})", "\\1\n", seq_dic[sequence_id]['prot'], re.DOTALL))
+    p = subprocess.Popen(["clustalo", "-i", "-", "--outfmt", "clu"], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
+    aln_out, aln_err = p.communicate(input=_input)
+    return aln_out
+
+
+def report_html(_html_file, _tag, _all_seq, _good_seq, _all_seq_fasta, _identical_clones, _nb_var_part, _var_seq_common, _align_scores, _args):
+    # Generate the html file for the report
+    _all_seq.sort()
+    for key in _tag.keys():
+        _tag[key].sort()
+    _good_seq = dict(sorted(_good_seq.items()))
+    good_ids = _good_seq.keys()
+    w = open(_html_file, 'w')
+    w.write(
+        '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" '
+        'lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>SynDivA Report</title><link '
+        'href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px '
+        '0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: '
+        '#636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="brand" href="#top">SynDivA '
+        'Report</a><div class="nav-collapse collapse"><ul class="nav"><li><a href="#input">Input data</a></li><li><a href="#analysis">Sequences analysis</a></li><li><a '
+        'href="#variable">Variable regions analysis</a></li><li><a href="#cluster">Clustering</a></li><li><a href="#stat">Statistics</a></li><li><a '
+        'href="#annex">Annex</a></li></ul></div></div></div></div><div class="container-fluid"><header class="subhead"><h1>SynDivA Report</h1></header><div '
+        'class="page-header"><a id="input"></a><h2>Input data</h2></div>')
+
+    # Input data
+    w.write(
+        '<p>Input file:<br/><code class="grey">%s</code></p><p>Number of sequences in input file:<br/><code class="grey">%d</code></p><p>Pattern of the sequence bank:<br/><code '
+        'class="grey">%s</code></p><p>5\' restriction site:<br/><code class="grey">%s</code></p><p>3\' restriction site:<br/><code class="grey">%s</code></p>' % (
+            input_file, len(_all_seq), _args.pattern, _args.site_res_5, _args.site_res_3))
+
+    # Sequence analysis
+    w.write(
+        '<div class="page-header"><a id="analysis"></a><h2>Sequences analysis</h2></div><p>Caption:</p><ul><li class="text-success">Valid sequences that will be part of the next '
+        'analysis </li><li class="text-warning">Good sequences but will not be part of the next analysis</li><li class="text-error">Rejected sequences</li></ul><table '
+        'class="table table-striped table-bordered"><tr><th class="text-error">Absence of restriction sites</th><th class="text-error">Incorrect number of nucleotides between '
+        'the restriction sites</th><th class="text-error">Stop codon <u>inside</u> the area of interest</th><th class="text-warning">Mutation in the conserved regions</th><th '
+        'class="text-success">Valid sequences</th><th>Amber codon in the sequence (<u>inside</u> the area of interest)</th></tr>')
+    w.write(
+        '<tr><td class="text-error">%d sequence(s) (%.2f%%)</td><td class="text-error">%d sequence(s) (%.2f%%)</td><td class="text-error">%d sequence(s) (%.2f%%)</td><td '
+        'class="text-warning">%d sequence(s) (%.2f%%)</td><td class="text-success">%d sequence(s) (%.2f%%)</td><td>%d sequence(s)</td></tr>' % (
+            len(_tag['no_restric']), float(len(_tag['no_restric'])) / float(len(_all_seq)) * 100, len(_tag['no_multiple']), float(len(_tag['no_multiple'])) / float(len(_all_seq)) * 100, len(_tag['stop']),
+            float(len(_tag['stop'])) / float(len(_all_seq)) * 100, len(_tag['mut']), float(len(_tag['mut'])) / float(len(_all_seq)) * 100, len(good_ids),
+            float(len(good_ids)) / float(len(_all_seq)) * 100,
+            len(_tag['amber'])))
+    w.write(
+        '<tr><td class="text-error">%s</td><td class="text-error">%s</td><td class="text-error">%s</td><td class="text-warning">%s</td><td '
+        'class="text-success">%s</td><td>%s</td></tr></table>' % (
+            '<br/>'.join(_tag['no_restric']), '<br/>'.join(_tag['no_multiple']), '<br/>'.join(_tag['stop']), '<br/>'.join(_tag['mut']), '<br/>'.join(good_ids), '<br/>'.join(_tag['amber'])))
+    # Variable regions analysis
+    w.write(
+        '<div class="page-header"><a id="variable"></a><h2>Variable regions analysis</h2></div><p>The following group of sequences are identical clones on the variable '
+        'regions:</p>')
+    identical_clones_seq = _identical_clones.keys()
+    if identical_clones_seq:
+        for seq in identical_clones_seq:
+            ids = list(set(_identical_clones[seq]))  # return only one occurrence of each item in the list
+            w.write('<div class="row-fluid"><div class="span5"><pre>%d sequences (%.2f%% of valid sequences)<br/>%s</pre></div>' % (
+                len(ids), float(len(ids)) / float(len(good_ids)) * 100, '<br/>'.join(ids)))
+            w.write('<div class="span3"><table class="table table-striped table-bordered"><thead><tr><th>Variable region</th><th>Repeated sequence</th></tr></thead><tbody>')
+            for z in range(len(_good_seq[ids[0]]['var'])):
+                w.write('<td>%d</td><td>%s</td></tr>' % (z + 1, _good_seq[ids[0]]['var'][z]))
+            w.write('</tbody></table></div></div>')
+    else:
+        w.write('<p>No clone was found.</p>')
+
+    first = True
+    for i in range(_nb_var_part):
+        keys = []
+        for k in _var_seq_common[str(i + 1)].keys():
+            nb = _var_seq_common[str(i + 1)][k]
+            if nb > 1:
+                if first:
+                    w.write(
+                        '<p>Here\'s the distribution of the repeated sequences in variable regions:</p><table class="table table-striped table-bordered"><thead><tr><th>Variable '
+                        'region</th><th>Repeated sequence</th><th>Number of occurrences (percentage of valid sequences)</th></tr></thead><tbody>')
+                    first = False
+                    keys.append(k)
+                else:
+                    keys.append(k)
+        nb = len(keys)
+        if nb != 0:
+            w.write('<tr>')
+            for z in range(nb):
+                if z == 0:
+                    w.write('<td rowspan="%d">%d</td>' % (nb, i + 1))
+                w.write('<td>%s</td><td>%d (%.2f%%)</td></tr>' % (
+                    keys[z], _var_seq_common[str(i + 1)][keys[z]], float(_var_seq_common[str(i + 1)][keys[z]]) / float(len(good_ids)) * 100))
+    w.write('</tbody></table>')
+    # Clustering
+    w.write('<div class="page-header"><a id="cluster"></a><h2>Clustering</h2></div><p>The following clusters were generated by MCL:</p>')
+    for line in open(mcl_output, 'r'):
+        w.write('<div class="row-fluid"><div class="span6"><pre>%d sequences (%.2f%% of valid sequences)<br/>%s</pre></div></div>' % (
+            len(line.split("\t")), float(len(line.split("\t"))) / float(len(good_ids)) * 100, '<br/>'.join(line.split("\t"))))
+    # Statistics
+    w.write('<div class="page-header"><a id="stat"></a><h2>Statistics</h2></div>')
+    w.write('<p>Here\'s some statistics about the valid sequences:</p><p>Mean for the pairwise alignement scores: %.2f<br/>Standard deviation: %.2f</p>' % (
+        float(numpy.mean(_align_scores)), float(numpy.std(_align_scores))))
+    w.write('<div class="row-fluid"><div class="span6"><img src="%s" alt="Distribution of the pairwise alignment score"></div>' % get_os_path_name(graph_pic))
+    w.write('<div class="span6"><table class="table table-striped table-bordered"><thead><tr><th>Pairwise Alignment Score</th><th>Number of occurrences</th></tr></thead><tbody>')
+    uniq_scores = sorted(list(set(_align_scores)))
+    scores_dic = {}
+    for _score in uniq_scores:
+        scores_dic[_score] = _align_scores.count(_score)
+    scores_dic = dict(sorted(scores_dic.items()))
+    scores = scores_dic.items()
+    # scores.sort()
+    for el in scores:
+        w.write('<tr><td>%.2f</td><td>%d</td></tr>' % (el[0], el[1]))
+    w.write('</tbody></table></div></div>')
+    # Annex
+    w.write('<div class="page-header"><a id="annex"></a><h2>Annex</h2></div>')
+    w.write('<p><strong>Valid protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+    for _id in good_ids:
+        w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", _good_seq[_id]['prot'], re.DOTALL)))
+    w.write('</textarea>')
+    aln_out = generate_aln(_good_seq, good_ids)
+    w.write(
+        '<p>Multiple sequence alignment of the <strong>valid sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" '
+        'readonly="readonly">%s</textarea>' % str(
+            aln_out))
+
+    if _tag['no_multiple']:
+        w.write(
+            '<p><strong>Protein sequences with an incorrect number of nucleotides between the restriction sites</strong> in FASTA format:</p><textarea class="span8 fasta" '
+            'type="text" rows="20" readonly="readonly">')
+        for _id in _tag['no_multiple']:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", _all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    if _tag['mut']:
+        w.write('<p><strong>Mutated protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in _tag['mut']:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", _all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+        aln_out = generate_aln(_all_seq_fasta, _tag['mut'])
+
+        w.write(
+            '<p>Multiple sequence alignment of the <strong>mutated sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" '
+            'readonly="readonly">%s</textarea>' % str(
+                aln_out))
+
+    if _tag['stop']:
+        w.write('<p><strong>Protein sequences with a stop codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in _tag['stop']:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", _all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    if _tag['amber']:
+        w.write('<p><strong>Protein sequences with an amber codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">')
+        for _id in _tag['amber']:
+            w.write('>%s\n%s\n' % (_id, re.sub("(.{80})", "\\1\n", _all_seq_fasta[_id]['prot'], re.DOTALL)))
+        w.write('</textarea>')
+
+    w.write('</div></body></html>')
+    w.close()
+
+
+nb_seq = len(list(SeqIO.parse(args.input, "fasta")))
+
+for seq_record in SeqIO.parse(args.input, "fasta"):
+    seq_id = seq_record.id
+    seq = str(seq_record.seq)
+    seq = seq.upper()
+    all_seq.append(seq_id)
+    # Checking if both restriction sites are present in the sequence
+    if site_res_5 in seq and site_res_3 in seq:
+        valid = True
+    else:
+        valid = False
+        tag['no_restric'].append(seq_id)
+    # If sequence has both restriction sites, checking if it is necessary to take the reverse complement strand
+    if valid:
+        site_res_5_pos = seq.index(site_res_5)
+        site_res_3_pos = seq.index(site_res_3)
+        # If site_res_5_pos > site_res_3_pos, reverse complement strand has to be calculated
+        if site_res_5_pos > site_res_3_pos:
+            # Checking if the number of nucleic acids between the restriction sites is a multiple of 3
+            length = math.fabs((site_res_5_pos + len(site_res_5)) - site_res_3_pos)
+            valid = length % 3 == 0
+            cut_seq = seq[:site_res_5_pos + len(site_res_5)]
+            cut_seq = reverse_complement(cut_seq)
+
+        # Else if site_res_5_pos < site_res_3_pos, use the sequence as it is
+        else:
+            # Checking if the number of nucleic acids between the restriction sites is a multiple of 3
+            length = math.fabs((site_res_3_pos + len(site_res_3)) - site_res_5_pos)
+            valid = length % 3 == 0
+            cut_seq = seq[site_res_5_pos:]
+        # If the number of nucleic acids between the restriction sites isn't a multiple of 3, put the sequence away
+        if not valid:
+            tag['no_multiple'].append(seq_id)
+            prot_seq = translate(cut_seq)
+            all_seq_fasta[seq_id] = {}
+            all_seq_fasta[seq_id]['prot'] = prot_seq
+        else:
+            # Translate nucleic sequence into amino acid sequence
+            prot_seq = translate(cut_seq)
+            all_seq_fasta[seq_id] = {}
+            all_seq_fasta[seq_id]['prot'] = prot_seq
+
+            # Looking for stop codon in the sequence and getting their position in the sequence
+            if '*' in prot_seq:
+                pos_stop = [m.start() for m in re.finditer(r"\*", prot_seq)]
+                stop = False
+                # Checking if stop codon is between the restriction sites, also checking if it is an amber codon. if stop codon other than amber codon -> tag stop
+                for i in range(len(pos_stop)):
+                    if pos_stop[i] < length / 3:
+                        stop_codon_nuc = cut_seq[pos_stop[i] * 3:pos_stop[i] * 3 + 3]
+                        if stop_codon_nuc != "TAG":
+                            tag['stop'].append(seq_id)
+                            stop = True
+                            break
+                        else:
+                            if seq_id not in tag['amber']:
+                                tag['amber'].append(seq_id)
+                # If stop codon wasn't found between the restriction sites
+                if not stop:
+                    """
+                    # Checking if there is a stop codon outside the restriction sites. If yes -> tag ok_stop_ext
+                    for i in range(len(pos_stop)):
+                        if (pos_stop[i] > length/3):
+                            stop_codon_nuc = cut_seq[pos_stop[i]*3:pos_stop[i]*3+3]
+                            if stop_codon_nuc != "TAG":
+                                tag['ok_stop_ext'].append(seq_id)
+                                stop = True
+                                break
+                            else:
+                                if (seq_id not in tag['amber']):
+                                    tag['amber'].append(seq_id)
+                    """
+                    # Checking if there was a mutation in the fix part, if yes -> tag mut else retrieve variable parts
+                    mut = False
+                    pattern_part = args.pattern.split(":")
+                    tmp_prot_seq = prot_seq
+                    var_parts = []
+                    for i in range(len(pattern_part) - 1):  # not checking the latest fix part
+                        part = pattern_part[i]
+                        # If part is fix
+                        if not part[0].isdigit():
+                            # If part not in prot_seq -> mutation, flag then break
+                            if part not in tmp_prot_seq:
+                                mut = True
+                                tag['mut'].append(seq_id)
+                                break
+                            # Else, store the variable part if exist then remove the fix part + variable part (tmp_prot_seq starts at the end of part)
+                            else:
+                                pos_fix = tmp_prot_seq.index(part)
+                                if pos_fix != 0:
+                                    var_parts.append(tmp_prot_seq[0:pos_fix])
+                                tmp_prot_seq = tmp_prot_seq[pos_fix + len(part):]
+                        # Else part is variable
+                        else:
+                            nb_var_part += 1
+                    # Treating latest fix part if no mutation before
+                    if not mut:
+                        last_part = pattern_part[-1]
+                        last_var = pattern_part[-2]
+                        if '-' in last_var:
+                            var_max = int(last_var.split('-')[1])
+                        else:
+                            var_max = int(last_var)
+                        last_part = last_part[0:var_max + 1]
+                        if last_part not in tmp_prot_seq:
+                            mut = True
+                            tag['mut'].append(seq_id)
+                        else:
+                            pos_fix = tmp_prot_seq.index(last_part)
+                            if pos_fix != 0:
+                                var_parts.append(tmp_prot_seq[0:pos_fix])
+                    # If no mutation the sequence is validated and all the info are stored
+                    if not mut:
+                        good_seq[seq_id] = {}
+                        good_seq[seq_id]['dna'] = cut_seq
+                        good_seq[seq_id]['prot'] = prot_seq
+                        good_seq[seq_id]['var'] = var_parts
+
+# If all sequences are invalid, the program will exit as there is no data to continue
+if not good_seq:
+    sys.exit("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit")
+elif len(good_seq.keys()) == 1:
+
+    sys.exit("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit")
+
+# Initialization of dict var_seq_common
+for n in range(nb_var_part):
+    var_seq_common[str(n + 1)] = {}
+
+# Opening the file where the mcl input will be written
+with open(mcl_file, 'w+') as mcl:
+    seq_keys = good_seq.keys()
+    for i in range(len(seq_keys)):
+        var_1 = good_seq[list(seq_keys)[i]]['var']
+
+        # Classifying variable sequences
+        for k in range(len(var_1)):
+            try:
+                var_seq_common[str(k + 1)][var_1[k]] += 1
+            except KeyError:
+                var_seq_common[str(k + 1)][var_1[k]] = 1
+
+        for j in range(i + 1, len(seq_keys)):
+            var_2 = good_seq[list(seq_keys)[j]]['var']
+            score = 0.0
+            # Comparing the sequences' variable parts to find identical clones
+            if var_1 == var_2:
+                try:
+                    clone_seq = "".join(var_1)
+                    identical_clones[clone_seq].extend([seq_keys[i], seq_keys[j]])
+                except KeyError:
+                    identical_clones[clone_seq] = [seq_keys[i], seq_keys[j]]
+            # Align the 2 sequences using NWalign_PAM30 => replace by pairwise2
+            seq_1 = ''.join(var_1)
+            seq_2 = ''.join(var_2)
+            matrix = MatrixInfo.pam30
+            if len(seq_2) > len(seq_1):
+                score = get_identity(pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1)[0][0], pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1)[0][1]) * 100
+            else:
+                score = get_identity(pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1)[0][0], pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1)[0][1]) * 100
+            align_scores.append(score)
+            mcl.write('%s\t%s\t%0.2f\n' % (list(seq_keys)[i], list(seq_keys)[j], score))
+
+# Clusters formation
+subprocess.call(["mcl", mcl_file, "--abc", "-I", "6.0", "-o", mcl_output], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+# Producing distribution graph
+plot.hist(align_scores, bins=numpy.arange(0, 101, 2))
+plot.xlabel('Pairwise Alignment Score')
+plot.ylabel('Number of occurrences')
+plot.title('Distribution of the pairwise alignment score')
+plot.grid(True)
+plot.savefig(graph_pic)
+
+# Generating html report
+report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args)
+
+# Removing intermediate files
+subprocess.call(["rm", mcl_file, mcl_output], shell=False)
+print("HTML report has been generated in the output directory. The program will now exit.")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/syndiva.xml	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,79 @@
+<tool id="syndiva" name="syndiva" version="1.0" profile="21.05">
+    <description>
+        diversity analysis of synthetic libraries of a Fibronectin domain
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <expand macro="matplotlib_requirement" />
+        <expand macro="biopython_requirement"/>
+    </expand>
+    <command detect_errors="exit_code">
+    <![CDATA[
+    mkdir -p '$report.extra_files_path' &&
+    python '$__tool_directory__/syndiva.py'
+        -i '$fasta_file'
+        -p '$pattern'
+        -5 '$restriction_site_5'
+        -3 '$restriction_site_3'
+        -o '$report.extra_files_path'
+    && cp '${report.extra_files_path}'/*\.html output.html
+    ]]> 
+    </command>
+    <inputs>
+        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/>
+        <param name="pattern" type="text" area="true" label="Sequence pattern" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only protein alphabet letters (ACDEFGHIKLMNPQRSTVWY), numbers and the characters -:*  ">^[ACDEFGHIKLMNPQRSTVWY\d\-:*]+$</validator>
+        </param>
+        <param name="restriction_site_5" type="text" label="5&#39; Restriction sites" help="Sequence of the restrict" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
+        </param>
+        <param name="restriction_site_3" type="text" label="3&#39; Restriction sites" help="Sequence of the restrict" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" from_work_dir="output.html" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="fasta_file" value="syndiva_datatest.fasta"/>
+            <param name="pattern" value="AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE"/>
+            <param name="restriction_site_5" value="GCGGCCGC"/>
+            <param name="restriction_site_3" value="GGTACC"/>
+            <output name="report" file="syndiva_report.html"  ftype="html" lines_diff="2"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+- **Scientific context**
+
+- *SynDivA* was developed to analyze the diversity of synthetic libraries of a Fibronectin domain.
+- This diversity is generated in the context of a project of directed evolution using a phage display approach, to obtain ligands with high affinity and specificity for biological targets. It was introduced in three loops of the domain. It is both a variation of amino acids and a variation of the lengths of loops.
+- *SynDivA* is used before the step of selecting banks of interest, by ensuring quality - and therefore of diversity - of the bank by determining, by projection, the number of unique and functional sequences.
+
+- **Description**
+
+- *SynDivA* is implemented in Python.
+
+- The computations are divided into three steps:
+
+    1. Pre-processing of the input data (determination of the orientation of the sequences, determination of the reading frame, translation of nucleotide sequences in protein sequences, elimination of the sequences "wastes" containing stop codons in variable regions)
+    2. Alignment of sequences 2-2
+    3. Statistics calculations on the alignments (distances)
+
+- The results are presented as an HTML report.
+
+- **Example**
+
+- Pattern : AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE
+- 5' restriction site : GCGGCCGC
+- 3' restriction site : GGTACC
+
+ ]]></help>
+    <expand macro="citations" />
+</tool>
Binary file test-data/distri.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/syndiva_datatest.fasta	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,991 @@
+>XL1_10_PSEXSEQ-REV_13 status=ok nucl=1301 crlStart=4 crlStop=1186 crlLen=1183 order=COL12-0DIL
+tagTTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT
+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGcttcgggatcgtcg
+tatcccactaccgagatgtccgcaccaacgcgcaccccggactccgaaat
+ggcgcgcattggccccacggccttcggatcgttgggaaccagcatcgcat
+t
+>XL1_11_PSEXSEQ-REV_14 status=ok nucl=1299 crlStart=8 crlStop=1185 crlLen=1178 order=COL12-0DIL
+actctgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACGCAGTTGCCGTGTGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAGCTggcttcgggatcgtc
+gtatcccattaccgaaaatgtccgcaccaacgcgcaaccccggactcggg
+aaagggcgcgcattgcgcccaagcgccatctggatcgttgggaaaccag
+>XL1_12_PSEXSEQ-REV_15 status=ok nucl=1301 crlStart=6 crlStop=1184 crlLen=1179 order=COL12-0DIL
+agcctCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGCAGGTGGCGACGATGGTGTTGCAGTCGTTGTGAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT
+AAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGTGGACGGCGGAGTTGGGGAGGTTTGC
+ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT
+TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC
+TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG
+GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA
+TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC
+TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC
+GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA
+TGGAAAAGCCCAGACCCTTCGGCGCAGGCCGAGAATGCCAGCACCAGACC
+CGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAA
+GCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCC
+GGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCAC
+ATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTT
+TCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGC
+GCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCA
+CCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAG
+AGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTG
+GTTTGATGGTGGTTAACGGCGGGATATAACATGAgctgtcttcggtatcg
+tcgtatcccactaccgagatggccgcaccaacgcgcaacccggaatcggt
+aatggcccgcattgcgcccaaggccctcttgatcgttgggaaccagcatc
+c
+>XL1_13_PSEXSEQ-REV_16 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL
+agcttagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCGGACGAAGGAGCGGGGTGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttccggatcgt
+cgtatcccactaccgagatgtccgcccaacgcgcaacccggactcggtaa
+tgggccgcattgcgcccagcgccttcggatcgttgggaaccagcatccca
+a
+>XL1_14_PSEXSEQ-REV_17 status=ok nucl=1301 crlStart=3 crlStop=1198 crlLen=1196 order=COL12-0DIL
+agCTTCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGGTGGAGTGGCTGATGTAGCTGTAGATGAAGTAGCGAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TATAAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCGGGATCGtc
+gtacccactaccgaaatgtccgcccaacgcgcagcccggactcggtaatg
+ggccgcattgcgcccagcgccatctgatcgttgggaaccagcatcccagt
+g
+>XL1_15_PSEXSEQ-REV_18 status=ok nucl=1301 crlStart=7 crlStop=1186 crlLen=1180 order=COL12-0DIL
+tgcatcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA
+TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG
+GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA
+TCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGA
+GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcggtatcgtcgtat
+cccactaccgaaatgcccgcccaacgcgcagcccggactcggtaatgggc
+cgcattgcgcccagcgccatctgatcgttgggaaccagctccgcagtggg
+a
+>XL1_16_PSEXSEQ-REV_19 status=ok nucl=1301 crlStart=9 crlStop=1194 crlLen=1186 order=COL12-0DIL
+cgctttgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGAAGCAGTCGCTGCTGTTGGCGTTGACGCCGCCGCAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGGatcgtc
+gtatcccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaa
+tgggccgcattgcgcccagggccatctgatctttggcacccagctccgca
+t
+>XL1_17_PSEXSEQ-REV_20 status=ok nucl=1301 crlStart=5 crlStop=1177 crlLen=1173 order=COL12-0DIL
+ctgaAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GGCAGCCGCGGCAGCTGAAGTGGATGGGGACGCTAGCGTAAACCGTAATG
+GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGAACT
+ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT
+AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC
+AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA
+ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA
+CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA
+TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG
+GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT
+CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG
+TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG
+ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA
+GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA
+AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA
+GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT
+TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA
+AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG
+CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG
+GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA
+GCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTGG
+TTAACGGCGGGATATAACATGAACTGGcttcggtatcgtcgtatcccact
+accgaaatgtccgcaccaacgcgcagcccggattcggaaaggcccgcatt
+gcgcccagggccatctgatcgttgggaaccagcatcccagtgggaacaat
+g
+>XL1_18_PSEXSEQ-REV_21 status=ok nucl=1301 crlStart=6 crlStop=1178 crlLen=1173 order=COL12-0DIL
+atctcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCAT
+AGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGG
+TGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAAT
+CGGGATGGTGACGAGGACGTCGAAGTGGCCGATGGGAGCGTAAACCGTAA
+TGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGAA
+TAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCC
+ATAGGTGATACGGTAATAGGCTACGGGGCGGTAGATTGCATCCCAGCTGA
+TCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACG
+GAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCAT
+AACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACAT
+CATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCC
+AGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCC
+ATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGT
+TGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC
+AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCAGAGCC
+AGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTG
+AAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAA
+AGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCG
+TTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGG
+AAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAG
+GCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGAC
+GGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCA
+AGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTG
+GTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatcccac
+taccgagatgtccgcaccaacgcgcagcccggaatcggtaatggcccgca
+ttgggcccagcgccatctgatctttgggaacccgcatccgcatggggaac
+a
+>XL1_19_PSEXSEQ-REV_22 status=ok nucl=1301 crlStart=8 crlStop=1188 crlLen=1181 order=COL12-0DIL
+tcatcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGAAGGCGCAGCTGATGGCGGAGAAGCTGGTGCCGTGAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+ATAAGAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGAGGACGATGCCGCGGAAGATGGAT
+GCATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAG
+TTTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCA
+GCTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTC
+AGGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTG
+CATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAA
+TCTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTA
+ACGTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGC
+GATGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCA
+GACCCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTA
+ATAAGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACG
+AGCCGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAAC
+TCACATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCC
+GCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCA
+ACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTT
+TTCACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTG
+AAAGAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAAT
+CCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAActggcttccgga
+tcgtcgtatcccactaccgagatgtccgcaccaacgcgcaccccggaatc
+cggaaagggcccgcattgcggccaagcgcctcttgatcgttgggaacaag
+a
+>XL1_1_PSEXSEQ-REV_5 status=ok nucl=1301 crlStart=5 crlStop=1182 crlLen=1178 order=COL12-0DIL
+cgtgGAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATA
+GTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGT
+GATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATC
+GGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGT
+AATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGG
+AACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG
+CCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCT
+GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA
+CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC
+ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC
+ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT
+CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG
+CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT
+GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC
+CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA
+GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT
+GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA
+TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT
+GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC
+GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA
+GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA
+GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA
+GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT
+GGTGGTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatc
+ccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggcc
+cgcattggccccagcgccttctgatcgttggcaaccagctccgcagtggg
+a
+>XL1_22_PSEXSEQ-REV_24 status=ok nucl=1301 crlStart=5 crlStop=1180 crlLen=1176 order=COL12-0DIL
+ctatTGGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGAAGATGACGTTGTCGTCGTTGGGGGGGTTGGTAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTCCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGGCGACGAAGGTGATGAGTGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG
+GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC
+TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG
+CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG
+AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG
+TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC
+ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT
+TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT
+CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG
+AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG
+AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC
+AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgtcgtat
+cccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggg
+ccgcattgggcccagcgccatctgatctttgggaaccagcatcccaatgg
+g
+>XL1_23_PSEXSEQ-REV_25 status=ok nucl=1301 crlStart=9 crlStop=1183 crlLen=1175 order=COL12-0DIL
+ttattcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGCAGCGGGAGAAGGGGTTGTCGGAGATGTCGCCAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCA
+GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG
+GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT
+TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC
+CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTAGTGCATAACAT
+CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA
+CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT
+GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA
+AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC
+AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT
+TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA
+GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA
+ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA
+GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG
+GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG
+TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT
+GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG
+ATGGTGGTTAACGGCGGGATATAACATGAACTGgcttcggtatcgtcgta
+tcccactaccgagatggcccgcacaacgcgcaacccggactcggtaatgg
+gccgcattggccccagcgccatctgatctttgggaaccagcatcgcagtg
+g
+>XL1_24_PSEXSEQ-REV_26 status=ok nucl=1301 crlStart=5 crlStop=1178 crlLen=1174 order=COL12-0DIL
+atctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGAGTTACCCCCGTT
+TCGCCATAGGTGATACGGTAATAGCAGACGTAGTCGGGGCTGCTGTGTGC
+ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT
+TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC
+TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG
+GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA
+TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC
+TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC
+GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA
+TGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGA
+CCCGCCAGAGCCACCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTTGATGGTGGTTAACGGCGGGATATaacatgaactggcttcggtatc
+gtcgtatcccactaccgaaatgtcccgaccaacgcgcagcccggactcgg
+taatgggccgcattgggcccagggccatctgatctttgggaaccagctcc
+g
+>XL1_25_PSEXSEQ-REV_27 status=ok nucl=1301 crlStart=10 crlStop=1192 crlLen=1183 order=COL12-0DIL
+aacctcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAGAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCgggatcgt
+cgtatcccactaccgagatgtccgcccaacgcgcagcccggaatcggtat
+tgggcccgattgcgcccagcgccatctgatcgttgggaaccagcatccgc
+a
+>XL1_26_PSEXSEQ-REV_28 status=ok nucl=1301 crlStart=6 crlStop=1173 crlLen=1168 order=COL12-0DIL
+ctggaTATCTTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGTA
+ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGA
+ACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGC
+CATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTG
+ATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCAC
+GGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCA
+TAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACA
+TCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTC
+CAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGC
+CATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTG
+TTGTCACCCGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC
+AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGC
+CAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGT
+GAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATA
+AAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGC
+GTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGG
+GAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGA
+GGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGA
+CGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGC
+AAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGATGG
+TGGTTAACGGCGGGATATAACATgaactggcttcgggatcgtcgtatccc
+actaccgagattgcccgcaccaacgcgcaaccccggactcggtaaagggc
+ccgcattgcgcccagggccatctgatcgttgggaaccagcatccgcaatg
+g
+>XL1_29_PSEXSEQ-REV_29 status=ok nucl=1301 crlStart=5 crlStop=1184 crlLen=1180 order=COL12-0DIL
+taggTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACtggcttcgggatcgtc
+gtatcccactaccgagatgtcccgcccaacgcgaagcccggaatcggtaa
+tgggccgcattgggcccaagcgcctctggatcgttgggaaccaggttcgc
+a
+>XL1_2_PSEXSEQ-REV_6 status=ok nucl=1301 crlStart=5 crlStop=1197 crlLen=1193 order=COL12-0DIL
+ttcgCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGATGAAGCCGTCGGCGCTGGTGCGGGCGCCGGTAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA
+ATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGAAGACGTCGCCGAGGGGTGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTACAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGTATCGTCGtat
+cccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatggc
+gcgcattgcgcccagggccatctgatcgttggcaaccagcatcccattgg
+g
+>XL1_30_PSEXSEQ-REV_30 status=ok nucl=1301 crlStart=4 crlStop=1178 crlLen=1175 order=COL12-0DIL
+atgGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAGT
+CTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTGA
+TGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCGG
+GTTGTGGGCGTGGCTGCAGACGGTGACAGGCCGGAAATCGTTGCGGTTGA
+GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT
+CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG
+CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG
+CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT
+CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC
+ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC
+TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC
+GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG
+CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA
+GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA
+GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT
+GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG
+CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA
+TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG
+TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG
+GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT
+GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG
+CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCTGTCTTCGGTATCGTCGTAT
+CCCACTACCGAAATGTCCGCACCAACGCgcagcccggaatcggtaatggc
+gcgcattgcgcccagcgccatctgatcgttggcaaccagcatcgcagtgg
+gaacgatgccctcattcagcatttgcatggtttgttgaaaaccggaaatg
+g
+>XL1_33_PSEXSEQ-REV_23 status=ok nucl=1301 crlStart=329 crlStop=1165 crlLen=837 order=COL12-0DIL
+cgttttcggctctgatatctttggatcccacgcgtccctagcccacgcgt
+ggtgcatagtctggcacgtcatacggatacgaaccaccatgatggtgatg
+gtgatggtgatggctaccgcccgaaccgccggtaccggtacggtaattga
+tagaaatcggactagagctcgatgaggagatacgttaagagctcgaagcg
+taaaccgtaatggtatagtcgacacccggtgacaggccggaaatcgttgc
+ggttgaataacaacccgggacggtgaattcctgaaccggggagttaccgc
+ccgtttcgccataggtgatacggtaataGCTGACGTTGCTTTGGGATGCA
+TCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTT
+GGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCT
+TGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGG
+TAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCAT
+AACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCT
+GCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACG
+TAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT
+GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC
+CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTGATGGTGGTTAacggcgggatataacatgaactggcttcggtatcg
+tcgtatccactaccgaaatgtccgaccaacgggcaacccggaatcggtaa
+tgggcggattgggcccagcgcatctgatcgttggaaccagcatcgcagtg
+g
+>XL1_3_PSEXSEQ-REV_7 status=ok nucl=1301 crlStart=9 crlStop=1188 crlLen=1180 order=COL12-0DIL
+ttactagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATATAACTTGAGCTGTCttcgggatcgtc
+gtatcccactaccgaaaatgtccgcaccaacgcgcaagcccggaatccgg
+tatgggcgcgcattggccccaaggccatcggatcgttgggaaccagcatc
+c
+>XL1_4_PSEXSEQ-REV_8 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL
+tgactgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGGGTGTGGGTGCAGTGGTTGTTGCGGCAGGTGTTGTCAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+ATAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGATTACCGCCCGT
+TTCGCCATAGGTGATACGGTAATAGACGACGCAGTGGTCGGTGAAGGGTG
+CATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGT
+TTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAG
+CTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCA
+GGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGC
+ATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAAT
+CTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAA
+CGTAAATGCTGTTGACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT
+GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC
+CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA
+AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC
+CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA
+CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT
+TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG
+CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC
+ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA
+GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT
+GGTTTGATGGTGGTTAACGGCGGGAAATAACATGAACTGGcttcggtatc
+gtcgtatcccactaccgaaatgtccgcacaacgcgcagcccggaatcggt
+aatgggccgcattgcgcccagcgccatctgatctttgggaaccagcatcg
+c
+>XL1_5_PSEXSEQ-REV_9 status=ok nucl=1301 crlStart=10 crlStop=1173 crlLen=1164 order=COL12-0DIL
+ctattcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT
+TTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgtc
+gtatcccactaccgagatgtccgcccaacgcgcagcccggactcggtaat
+gggccgcattgcgcccagcgccatctgatcgttgggaacaagcttcgcat
+t
+>XL1_6_PSEXSEQ-REV_10 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL
+agcttagcTCTGAAATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGGGAGTTGATGGGGTGGAAGTTGGAGACGGTGGCGTTAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGtcttcggtatcgt
+cgtatcccactaccgagatgtccgcacaacgcgcagcccggaatcggtaa
+tggccgcattgggcccagcgccatctgatcgttggcaaccagcttcgcat
+t
+>XL1_8_PSEXSEQ-REV_11 status=ok nucl=1301 crlStart=6 crlStop=1176 crlLen=1171 order=COL12-0DIL
+aacttTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATaacatgagctggcttcgggatcgt
+cgtatcccactaccgaaattgtccgaccaacgcgcaacccggactcggta
+ttgggccgcattgcgcccagggccatctgatctttgggaaccagcatccg
+c
+>XL1_9_PSEXSEQ-REV_12 status=ok nucl=1301 crlStart=3 crlStop=1188 crlLen=1186 order=COL12-0DIL
+tgGCTCTGGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGC
+ATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGAT
+GGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAA
+ATCGGGTGGTGGACGGCGTGGACGTCGTCGTTGACGAGGCTAGCGTAAAC
+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG
+AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT
+TCGCCATAGGTGATACGGTAATAGCAGACAGAGCTTTATGATGCATCCCA
+GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG
+GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT
+TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC
+CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACAT
+CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA
+CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT
+GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA
+AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC
+AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT
+TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA
+GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA
+ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA
+GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG
+GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG
+TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT
+GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG
+ATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCgggatcgtcgta
+cccactaccgagatgtccgcacaacgcgcagcccggactcggtatggccc
+gcattggccccagcgccatctgatcgttgggaacaagcatcccaatgggg
+a
+>XL2-1_PSEXSEQ-REV_32 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL
+tcatagctCTGATTTCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT
+TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcgggatcgtc
+gtatcccactaccgagatgtccgcacaacgcgcagcccggaatccggtaa
+tggcccgcattgcgcccagggccatctgatcgttgggaaccaagatccgc
+a
+>XL2-2_PSEXSEQ-REV_33 status=ok nucl=1301 crlStart=9 crlStop=1181 crlLen=1173 order=COL12-0DIL
+ctctctggATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA
+TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG
+GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA
+TCGGGACGAGGGAGGCGGAGCAGCGGAGGCAGGGGCTAGCGTAAACCGTA
+ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGA
+ATAACCCGGGACGGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG
+CCATAGGTGATACGGTAATAGTTGACGAAGGTGTGGCATGCATCCCAGCT
+GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA
+CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC
+ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC
+ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT
+CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG
+CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT
+GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC
+CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA
+GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT
+GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA
+TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT
+GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC
+GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA
+GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA
+GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA
+GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT
+GGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtaccgtcgtatc
+ccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatgggc
+cgcattgggcccagcgccatctgatcgttgggaaccagaatcccaattgg
+g
+>XL2-3_PSEXSEQ-REV_34 status=Failed nucl=519 crlStart=1 crlStop=21 crlLen=21 order=COL12-0DIL
+AAGGCCGTTTTACTTATTTGCtaataacaccttctccacgaacccccccg
+ggttcaacatcgagggcgagaatcagaaaccccccaccatgtggatgagg
+ctaagaatgtggtttcccccaaaacccccggtgcttgcttatggtgataa
+taatcccaccaaatatcggaagtcttcacaaattgtaaaaatcccgcttt
+atttttgtattactttagagtcgccgagacccagctcatgtaggtgtctg
+agaaggactggatctgaatcatcgatgagttcacctttactttctttttt
+ttttttctttttccaaataactaatagatgattcatcttgttgatgcctg
+aaacccgaccaacatagcttccacatgccaccaacatttgcttgttagcc
+tatctccgatctgaccccgtaggccccgctcccttaatggatcaggataa
+attttcttaccctctcggtgatggcggcccccagcgcccggccatcctta
+cctgtttttttatttgtc
+>XL2-4_PSEXSEQ-REV_35 status=ok nucl=1301 crlStart=6 crlStop=1192 crlLen=1187 order=COL12-0DIL
+agattAGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCggtatcgt
+cgtatcccactaccgagatgtccgcaccaacgcggcagcccggaatcggt
+aatggcgcgcattgggcccaagcgccatctgatcgttgggaaccagcatc
+c
+>XL2-5_PSEXSEQ-REV_36 status=ok nucl=1301 crlStart=5 crlStop=1173 crlLen=1169 order=COL12-0DIL
+ctggAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG
+TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG
+ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG
+GGTAGCTGTTGGCGACGATGACGCAGTAGCCGCTAGCGTAAACCGTAATG
+GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAATAAGA
+ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT
+AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC
+AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA
+ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA
+CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA
+TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG
+GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT
+CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG
+TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG
+ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA
+GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA
+AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA
+GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT
+TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA
+AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG
+CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG
+GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA
+GCGGTCCACGCTGGTTTGCCCAGCAGGCGAAATCCTGTTTGATGGTGTTA
+ACGGCGGGATTAACATGAACTGGcttcgggatcgtcgtatccactaccga
+aatgccgcaccacgcgcagcccggactcggaaatggccgcattggcccca
+gggccatctgatcgttggaaccaagatcccaatgggaacaagccctcatc
+c
+>XL3-1_PSEXSEQ-REV_37 status=ok nucl=1301 crlStart=6 crlStop=1187 crlLen=1182 order=COL12-0DIL
+ttcctCGATCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+GCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtatcgt
+cgtatcccactaccgagatgtccgcacaacgcgcaagccggaatcggtaa
+tggcccgcattgcgcccagcgccatctgatcgttgggaaccagcatccgc
+a
+>XL3-2_PSEXSEQ-REV_38 status=ok nucl=1301 crlStart=6 crlStop=1185 crlLen=1180 order=COL12-0DIL
+cgtctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgt
+cgtatcccactaccgagatgtccgcaccaacgcgcagcccggactcggta
+ttgggccgcattgcgcccagcgccatctgatcgttgggaacccagatcgc
+a
+>XL3-3_PSEXSEQ-REV_39 status=ok nucl=1301 crlStart=10 crlStop=1174 crlLen=1165 order=COL12-0DIL
+agactagctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA
+AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG
+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC
+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC
+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG
+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG
+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA
+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA
+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC
+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA
+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG
+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC
+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG
+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG
+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA
+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT
+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG
+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC
+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA
+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG
+TTTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgt
+cgtatcccactaccgaaatgtccgcaccaacgcgcaacccggaatcggga
+atgggccgcattgcgcccagcgccatctgatctttgggaaccagcatccc
+a
+>XL3-4_PSEXSEQ-REV_40 status=ok nucl=1301 crlStart=8 crlStop=1186 crlLen=1179 order=COL12-0DIL
+tcgctcgCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG
+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT
+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA
+GAAATCGGCGGCTGTGGATGAAGCTGTGGTTGAGGCTGGAAGCGTAAACC
+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA
+ATAAGAACCCGGGACGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTC
+GCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGC
+TGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGC
+ACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTC
+CATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCA
+CATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCT
+TCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG
+GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC
+TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG
+CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG
+AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG
+TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC
+ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT
+TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT
+CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG
+AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG
+AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC
+AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA
+TGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTcggtatcgtcgtat
+cccactaccgagatgtccgcacaacgcgcagcccggactcggtaatggcc
+cgcattggccccagcgccatctgatcgttgggaaccagctcccgagtggg
+a
+>XL3-5_PSEXSEQ-REV_41 status=ok nucl=1301 crlStart=8 crlStop=1190 crlLen=1183 order=COL12-0DIL
+ctgtcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT
+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG
+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG
+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA
+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT
+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG
+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC
+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT
+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA
+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA
+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC
+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA
+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA
+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA
+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG
+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC
+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG
+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT
+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC
+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC
+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC
+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG
+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT
+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCggtatcgtcg
+tatcccactaccgagatgtccgcaccaacgcgcagcccggactcggaatg
+gggcgcattgggcccagcgccatttgatcgttgggaaccagcatcgcatt
+g
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/syndiva_report.html	Thu Jun 23 22:32:13 2022 +0000
@@ -0,0 +1,127 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>SynDivA Report</title><link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px 0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: #636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="brand" href="#top">SynDivA Report</a><div class="nav-collapse collapse"><ul class="nav"><li><a href="#input">Input data</a></li><li><a href="#analysis">Sequences analysis</a></li><li><a href="#variable">Variable regions analysis</a></li><li><a href="#cluster">Clustering</a></li><li><a href="#stat">Statistics</a></li><li><a href="#annex">Annex</a></li></ul></div></div></div></div><div class="container-fluid"><header class="subhead"><h1>SynDivA Report</h1></header><div class="page-header"><a id="input"></a><h2>Input data</h2></div><p>Input file:<br/><code class="grey">syndiva_datatest.fasta</code></p><p>Number of sequences in input file:<br/><code class="grey">36</code></p><p>Pattern of the sequence bank:<br/><code class="grey">AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE</code></p><p>5' restriction site:<br/><code class="grey">GCGGCCGC</code></p><p>3' restriction site:<br/><code class="grey">GGTACC</code></p><div class="page-header"><a id="analysis"></a><h2>Sequences analysis</h2></div><p>Caption:</p><ul><li class="text-success">Valid sequences that will be part of the next analysis </li><li class="text-warning">Good sequences but will not be part of the next analysis</li><li class="text-error">Rejected sequences</li></ul><table class="table table-striped table-bordered"><tr><th class="text-error">Absence of restriction sites</th><th class="text-error">Incorrect number of nucleotides between the restriction sites</th><th class="text-error">Stop codon <u>inside</u> the area of interest</th><th class="text-warning">Mutation in the conserved regions</th><th class="text-success">Valid sequences</th><th>Amber codon in the sequence (<u>inside</u> the area of interest)</th></tr><tr><td class="text-error">1 sequence(s) (2.78%)</td><td class="text-error">6 sequence(s) (16.67%)</td><td class="text-error">25 sequence(s) (69.44%)</td><td class="text-warning">0 sequence(s) (0.00%)</td><td class="text-success">4 sequence(s) (11.11%)</td><td>0 sequence(s)</td></tr><tr><td class="text-error">XL2-3_PSEXSEQ-REV_34</td><td class="text-error">XL1_22_PSEXSEQ-REV_24<br/>XL1_24_PSEXSEQ-REV_26<br/>XL1_30_PSEXSEQ-REV_30<br/>XL1_4_PSEXSEQ-REV_8<br/>XL2-2_PSEXSEQ-REV_33<br/>XL3-4_PSEXSEQ-REV_40</td><td class="text-error">XL1_10_PSEXSEQ-REV_13<br/>XL1_11_PSEXSEQ-REV_14<br/>XL1_13_PSEXSEQ-REV_16<br/>XL1_14_PSEXSEQ-REV_17<br/>XL1_15_PSEXSEQ-REV_18<br/>XL1_16_PSEXSEQ-REV_19<br/>XL1_17_PSEXSEQ-REV_20<br/>XL1_1_PSEXSEQ-REV_5<br/>XL1_23_PSEXSEQ-REV_25<br/>XL1_25_PSEXSEQ-REV_27<br/>XL1_26_PSEXSEQ-REV_28<br/>XL1_29_PSEXSEQ-REV_29<br/>XL1_33_PSEXSEQ-REV_23<br/>XL1_3_PSEXSEQ-REV_7<br/>XL1_5_PSEXSEQ-REV_9<br/>XL1_6_PSEXSEQ-REV_10<br/>XL1_8_PSEXSEQ-REV_11<br/>XL1_9_PSEXSEQ-REV_12<br/>XL2-1_PSEXSEQ-REV_32<br/>XL2-4_PSEXSEQ-REV_35<br/>XL2-5_PSEXSEQ-REV_36<br/>XL3-1_PSEXSEQ-REV_37<br/>XL3-2_PSEXSEQ-REV_38<br/>XL3-3_PSEXSEQ-REV_39<br/>XL3-5_PSEXSEQ-REV_41</td><td class="text-warning"></td><td class="text-success">XL1_12_PSEXSEQ-REV_15<br/>XL1_18_PSEXSEQ-REV_21<br/>XL1_19_PSEXSEQ-REV_22<br/>XL1_2_PSEXSEQ-REV_6</td><td></td></tr></table><div class="page-header"><a id="variable"></a><h2>Variable regions analysis</h2></div><p>The following group of sequences are identical clones on the variable regions:</p><p>No clone was found.</p><p>Here's the distribution of the repeated sequences in variable regions:</p><table class="table table-striped table-bordered"><thead><tr><th>Variable region</th><th>Repeated sequence</th><th>Number of occurrences (percentage of valid sequences)</th></tr></thead><tbody><tr><td rowspan="1">3</td><td>YSY</td><td>2 (50.00%)</td></tr></tbody></table><div class="page-header"><a id="cluster"></a><h2>Clustering</h2></div><p>The following clusters were generated by MCL:</p><div class="row-fluid"><div class="span6"><pre>4 sequences (100.00% of valid sequences)<br/>XL1_12_PSEXSEQ-REV_15<br/>XL1_18_PSEXSEQ-REV_21<br/>XL1_19_PSEXSEQ-REV_22<br/>XL1_2_PSEXSEQ-REV_6
+</pre></div></div><div class="page-header"><a id="stat"></a><h2>Statistics</h2></div><p>Here's some statistics about the valid sequences:</p><p>Mean for the pairwise alignement scores: 20.01<br/>Standard deviation: 2.93</p><div class="row-fluid"><div class="span6"><img src="distri.png" alt="Distribution of the pairwise alignment score"></div><div class="span6"><table class="table table-striped table-bordered"><thead><tr><th>Pairwise Alignment Score</th><th>Number of occurrences</th></tr></thead><tbody><tr><td>16.67</td><td>1</td></tr><tr><td>17.24</td><td>1</td></tr><tr><td>19.23</td><td>1</td></tr><tr><td>19.35</td><td>1</td></tr><tr><td>22.58</td><td>1</td></tr><tr><td>25.00</td><td>1</td></tr></tbody></table></div></div><div class="page-header"><a id="annex"></a><h2>Annex</h2></div><p><strong>Valid protein sequences</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_12_PSEXSEQ-REV_15
+AAAGSSVSSVPTKLEVVAATPTSLLISWDANLPNSAVHYYRITYGETGGNSPVQEFTVPGSSYTATISGLSPGVDYTITV
+YAHNDCNTIVATCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL1_18_PSEXSEQ-REV_21
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAIYRPVAYYRITYGETGGNSPVQEFTVPGYSYTATISGLSPGVDYTITVYA
+PIGHFDVLVTIPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRD
+>XL1_19_PSEXSEQ-REV_22
+AAAGSSVSSVPTKLEVVAATPTSLLISWDASIFRGIVLYYRITYGETGGNSPVQEFTVPGYSYTATISGLSPGVDYTITV
+YAHGTSFSAISCAFPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAM
+>XL1_2_PSEXSEQ-REV_6
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPLGDVFYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+TGARTSADGFIPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+</textarea><p>Multiple sequence alignment of the <strong>valid sequences</strong> generated by Clustal Omega:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">CLUSTAL O(1.2.4) multiple sequence alignment
+
+
+XL1_12_PSEXSEQ-REV_15      AAAGSSVSSVPTKLEVVAATPTSLLISWDANLPNSAVHYYRITYGETGGNSPVQEFTVPG
+XL1_18_PSEXSEQ-REV_21      AAAGSSVSSVPTKLEVVAATPTSLLISWDAIY--RPVAYYRITYGETGGNSPVQEFTVPG
+XL1_19_PSEXSEQ-REV_22      AAAGSSVSSVPTKLEVVAATPTSLLISWDASIFRGIVLYYRITYGETGGNSPVQEFTVPG
+XL1_2_PSEXSEQ-REV_6        AAAGSSVSSVPTKLEVVAATPTSLLISWDAPL--GDVFYYRITYGETGGNSPVQEFTVPG
+                           ******************************      * **********************
+
+XL1_12_PSEXSEQ-REV_15      SSYTATISGLSPGVDYTITVYAHNDCNTI-VATCPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_18_PSEXSEQ-REV_21      YSYTATISGLSPGVDYTITVYAPIGHFDV-LVTIPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_19_PSEXSEQ-REV_22      YSYTATISGLSPGVDYTITVYAHGTSFSAISCAFPISINYRTGTGGSGGSHHHHHHHHGG
+XL1_2_PSEXSEQ-REV_6        SYYTATISGLSPGVDYTITVYATGARTSA-DGFIPISINYRTGTGGSGGSHHHHHHHHGG
+                             ********************            **************************
+
+XL1_12_PSEXSEQ-REV_15      SYPYDVPDYAPRVG*GRVGSKDIRAR
+XL1_18_PSEXSEQ-REV_21      SYPYDVPDYAPRVG*GRVGSKDIRD-
+XL1_19_PSEXSEQ-REV_22      SYPYDVPDYAPRVG*GRVGSKDIRAM
+XL1_2_PSEXSEQ-REV_6        SYPYDVPDYAPRVG*GRVGSKDIRAR
+                           ************************  
+</textarea><p><strong>Protein sequences with an incorrect number of nucleotides between the restriction sites</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_22_PSEXSEQ-REV_24
+AAAGSSVSSVPTKLEVVAATPTSLLISWDALITFVAYYRITYGETGGTPRFRNSPSRVLILPQRFPACHRVSTIPLRFTL
+PTPPTTTTSSSRFLSITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKISEPI
+>XL1_24_PSEXSEQ-REV_26
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAHSSPDYVCYYRITYGETGVTPGSGIHRPG*FLNRNDFRPVTGCRLYHYGL
+RFELLTYLLIEL*SDFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYQSK
+>XL1_30_PSEXSEQ-REV_30
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPSAATPTTRFL
+SITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKIS
+>XL1_4_PSEXSEQ-REV_8
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPFTDHCVVYYRITYGETGGNPRFRNSPSRVLIIPQRFPACHRVSTIPLRF
+TLTTPAATTTAPTPRFLSITVPVPAVRAVAITITITIMVVRIRMTCQTMHHAWARDAWDPKISEQS
+>XL2-2_PSEXSEQ-REV_33
+AAAGSSVSSVPTKLEVVAATPTSLLISWDACHTFVNYYRITYGETGGNSPVQEFTRPGLFLYRNDFRPVTGCRLYHYGLR
+*PLPPLLRLPRPDFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYPE
+>XL3-4_PSEXSEQ-REV_40
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTSRVLIIPQRFPACHRVSTIPLRFTL
+PASTTASSTAADFYQLPYRYRRFGR*PSPSPSPSWWFVSV*RARLCTTRGLGTRGIQRYQSER
+</textarea><p><strong>Protein sequences with a stop codon</strong> in FASTA format:</p><textarea class="span8 fasta" type="text" rows="20" readonly="readonly">>XL1_10_PSEXSEQ-REV_13
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAT
+>XL1_11_PSEXSEQ-REV_14
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAHGNCVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAE
+>XL1_13_PSEXSEQ-REV_16
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPRSFVRYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_14_PSEXSEQ-REV_17
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSYTATISGLSPGVDYTITVYA
+RYFIYSYISHSTPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_15_PSEXSEQ-REV_18
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRC
+>XL1_16_PSEXSEQ-REV_19
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA
+CGGVNANSSDCFPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_17_PSEXSEQ-REV_20
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SVPIHFSCRGCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIS
+>XL1_1_PSEXSEQ-REV_5
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP
+>XL1_23_PSEXSEQ-REV_25
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+GDISDNPFSRCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL1_25_PSEXSEQ-REV_27
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL1_26_PSEXSEQ-REV_28
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKRYP
+>XL1_29_PSEXSEQ-REV_29
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAP
+>XL1_33_PSEXSEQ-REV_23
+AAAGSSVSSVPTKLEVVAATPTSLLISWDASQSNVSYYRITYGETGGNSPVQEFTVPGCYSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAEN
+>XL1_3_PSEXSEQ-REV_7
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS
+>XL1_5_PSEXSEQ-REV_9
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL1_6_PSEXSEQ-REV_10
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA
+NATVSNFHPINSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDFRAK
+>XL1_8_PSEXSEQ-REV_11
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK
+>XL1_9_PSEXSEQ-REV_12
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVCYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA
+SLVNDDVHAVHHPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIQS
+>XL2-1_PSEXSEQ-REV_32
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKEIRAM
+>XL2-4_PSEXSEQ-REV_35
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN
+>XL2-5_PSEXSEQ-REV_36
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA
+SGYCVIVANSYPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP
+>XL3-1_PSEXSEQ-REV_37
+AAAGSSVSSVPTKLEVVAATPTSLLISCDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRSR
+>XL3-2_PSEXSEQ-REV_38
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR
+>XL3-3_PSEXSEQ-REV_39
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS
+>XL3-5_PSEXSEQ-REV_41
+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA
+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAT
+</textarea></div></body></html>
\ No newline at end of file