Repository 'syndiva'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/syndiva

Changeset 0:0254731f047b (2022-06-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/SynDivA commit 90c5ec603e2c6b8c49d2dc7ec1b1e97f9d8fb92c
added:
args.py
macros.xml
syndiva.py
syndiva.xml
test-data/distri.png
test-data/syndiva_datatest.fasta
test-data/syndiva_report.html
b
diff -r 000000000000 -r 0254731f047b args.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/args.py Thu Jun 23 22:32:13 2022 +0000
[
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+import getopt
+import os
+import sys
+
+
+def usage(info) -> str:
+    text = "SynDivA script.\n\n"
+    if info:
+        text += info
+    temp = "Option\t\t\t\tfile\t\t\tDescription\n"
+    text += temp
+    text += '-' * (len(temp) + 60)
+    text += '\n'
+    text += "-i, --input\t\t\tfile.fasta\t\tFasta file that contains the DNA sequences\n"
+    text += "-o, --output_dir\t\t/path/for/output\tDirectory where output files will be written\n"
+    text += "-p, --pattern\t\t\tstring\t\t\tPattern of the sequence bank\n"
+    text += "-5, --restriction-site-5\tstring\t\t\tSequence of the restriction site in 5'\n"
+    text += "-3, --restriction-site-3\tstring\t\t\tSequence of the restriction site in 3'\n"
+    return text
+
+
+def get_os_path_join(directory, filename):
+    return os.path.join(directory, filename)
+
+
+def get_os_path_name(input):
+    return os.path.basename(input)
+
+
+def check_pattern(pattern):
+    authorized_pattern_letter = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
+                                 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', ':', '0',
+                                 '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '*']
+    return len([letter in authorized_pattern_letter for letter in pattern]) == len(pattern)
+
+
+class Args:
+
+    def __init__(self):
+        """
+        Instanciate Files object
+        """
+        self.input = None
+        self.output_dir = None
+        self.pattern = None
+        self.site_res_5 = None
+        self.site_res_3 = None
+        self.getargs()
+
+    def case(self):
+        # Test des fichiers et repertoires
+        if not self.input:
+            sys.exit(usage("input (-i,--input) : \"%s\" must be indicated\n" % (self.input)))
+        if not self.output_dir:
+            sys.exit(usage("output directory (-o,--output_dir) : \"%s\" must be indicated\n" % (self.output_dir)))
+        if not self.pattern:
+            sys.exit(
+                usage("Pattern of the sequence bank (-p,--pattern) : \"%s\" must be indicated\n" % (self.pattern)))
+        if not self.site_res_5:
+            sys.exit(usage(
+                "Sequence of the restriction site in 5' (-5,--restriction-site-5) : \"%s\" must be indicated\n" % (
+                    self.site_res_5)))
+        if not self.site_res_3:
+            sys.exit(usage(
+                "Sequence of the restriction site in 3' (-3,--restriction-site-3) : \"%s\" must be indicated\n" % (
+                    self.site_res_3)))
+
+    def data_format(self):
+        """
+        Check if information are correct
+        """
+        # Run without arguments
+        if len(sys.argv) == 1:
+            sys.exit(usage(None))
+            # Test input file argument
+        if self.input and not os.path.isfile(self.input):
+            print(self.input)
+            print(os.path.isfile(self.input))
+
+    def getargs(self):
+        """
+        Determine the files provided as arguments
+        @return: Choosen options
+        """
+        # Sans argument
+        if len(sys.argv) <= 1:
+            sys.exit("Do './fibronectin.py -h' for a usage summary")
+        # options test
+        try:
+            (opts, args) = getopt.getopt(sys.argv[1:], "i:o:p:5:3:h",
+                                         ["input=", "output_dir=", "pattern=", "site_res_5=", "site_res_3="])
+        except getopt.GetoptError as err:
+            # print help information and exit:
+            print(str(err))  # will print something like "option -a not recognized"
+            sys.exit(usage(None))
+        # Identification of options
+        for (o, a) in opts:
+            if o in ("-i", "--input"):
+                self.input = a
+            elif o in ("-o", "--output_dir"):
+                self.output_dir = a
+            elif o in ("-p", "--pattern"):
+                self.pattern = a
+            elif o in ("-5", "--restriction-site-5"):
+                self.site_res_5 = a
+            elif o in ("-3", "--restriction-site-3"):
+                self.site_res_3 = a
+            elif o in ("-h", "--help"):
+                sys.exit(usage(None))
+            else:
+                assert False, "unhandled option"
+            # Verification of cases
+        self.case()
+        self.data_format()
b
diff -r 000000000000 -r 0254731f047b macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 23 22:32:13 2022 +0000
b
@@ -0,0 +1,20 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.2.4">clustalo</requirement>
+            <requirement type="package" version="14.137">mcl</requirement>
+            <yield />
+   </requirements>
+    </xml>
+    <xml name="matplotlib_requirement">
+        <requirement type="package" version="2.2.3">matplotlib</requirement>
+    </xml>
+    <xml name="biopython_requirement">
+        <requirement type="package" version="1.76">biopython</requirement>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41467-019-12528-4</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 0254731f047b syndiva.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/syndiva.py Thu Jun 23 22:32:13 2022 +0000
[
b'@@ -0,0 +1,407 @@\n+#!/usr/bin/env python\n+# title : syndiva.py\n+# description : This script will analyze fasta files, look for restriction sites,\n+# cut the sequences around the restriction sites,\n+# translate the nucleic sequences into amino acids sequences.\n+# author : Fabienne Wong Jun Tai and Benjamin Dartigues\n+# creation date : 20121107\n+# version : 1.0 - revised November 2012\n+# version : 1.1 - revised March 2022\n+# usage : python syndiva.py -i file.fasta -o /output/dir/ -p pattern -5 seq_restric_5\'-3 seq_restric_3\'\n+# notes :\n+# # python_version :3.7.11\n+# # biopython_max_version  :1.72\n+# ==============================================================================\n+import math\n+import re\n+import subprocess\n+import sys\n+\n+import matplotlib\n+import numpy\n+from args import Args\n+from args import get_os_path_join, get_os_path_name\n+from Bio import pairwise2\n+from Bio import SeqIO\n+from Bio.Seq import Seq\n+from Bio.Seq import translate\n+from Bio.SubsMat import MatrixInfo\n+\n+matplotlib.use(\'Agg\')\n+from matplotlib import pyplot as plot  # noqa: I202,E402\n+\n+\n+args = Args()\n+# Variables initialization\n+directory = args.output_dir\n+mcl_file = get_os_path_join(directory, "mcl.in")\n+mcl_output = get_os_path_join(directory, "mcl.out")\n+html_file = get_os_path_join(directory, "syndiva_report.html")\n+graph_pic = get_os_path_join(directory, "distri.png")\n+input_file = get_os_path_name(args.input)\n+site_res_5 = args.site_res_5\n+site_res_3 = args.site_res_3\n+tag = {\'mut\': [], \'ok_stop_ext\': [], \'stop\': [], \'no_restric\': [], \'no_multiple\': [], \'amber\': []}\n+all_seq = []\n+all_seq_fasta = {}  # dictionnary that will store information about all the sequences\n+good_seq = {}  # dictionnary that will store information about the valid sequences\n+identical_clones = {}\n+var_seq_common = {}  # dictionnary that will store the number of sequences that share the same variable parts\n+align_scores = []\n+nb_var_part = 0\n+\n+\n+def get_identity(str1, str2):\n+    if len(str2) > len(str1):\n+        return (len(str2) - len([i for i in range(len(str1)) if str1[i] != str2[i]])) / len(str2)\n+    else:\n+        return (len(str1) - len([i for i in range(len(str1)) if str1[i] != str2[i]])) / len(str1)\n+\n+\n+def reverse_complement(_seq):\n+    return str(Seq(_seq).reverse_complement())\n+\n+\n+def generate_aln(seq_dic, ids):  # sourcery skip: use-join\n+    # Multiple Sequence Alignment via ClustalO\n+    _input = \'\'\n+    for sequence_id in ids:\n+        _input += \'>%s\\n%s\\n\' % (sequence_id, re.sub("(.{80})", "\\\\1\\n", seq_dic[sequence_id][\'prot\'], re.DOTALL))\n+    p = subprocess.Popen(["clustalo", "-i", "-", "--outfmt", "clu"], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)\n+    aln_out, aln_err = p.communicate(input=_input)\n+    return aln_out\n+\n+\n+def report_html(_html_file, _tag, _all_seq, _good_seq, _all_seq_fasta, _identical_clones, _nb_var_part, _var_seq_common, _align_scores, _args):\n+    # Generate the html file for the report\n+    _all_seq.sort()\n+    for key in _tag.keys():\n+        _tag[key].sort()\n+    _good_seq = dict(sorted(_good_seq.items()))\n+    good_ids = _good_seq.keys()\n+    w = open(_html_file, \'w\')\n+    w.write(\n+        \'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" \'\n+        \'lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>SynDivA Report</title><link \'\n+        \'href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px \'\n+        \'0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: \'\n+        \'#636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="'..b'_var:\n+                            var_max = int(last_var.split(\'-\')[1])\n+                        else:\n+                            var_max = int(last_var)\n+                        last_part = last_part[0:var_max + 1]\n+                        if last_part not in tmp_prot_seq:\n+                            mut = True\n+                            tag[\'mut\'].append(seq_id)\n+                        else:\n+                            pos_fix = tmp_prot_seq.index(last_part)\n+                            if pos_fix != 0:\n+                                var_parts.append(tmp_prot_seq[0:pos_fix])\n+                    # If no mutation the sequence is validated and all the info are stored\n+                    if not mut:\n+                        good_seq[seq_id] = {}\n+                        good_seq[seq_id][\'dna\'] = cut_seq\n+                        good_seq[seq_id][\'prot\'] = prot_seq\n+                        good_seq[seq_id][\'var\'] = var_parts\n+\n+# If all sequences are invalid, the program will exit as there is no data to continue\n+if not good_seq:\n+    sys.exit("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit")\n+elif len(good_seq.keys()) == 1:\n+\n+    sys.exit("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit")\n+\n+# Initialization of dict var_seq_common\n+for n in range(nb_var_part):\n+    var_seq_common[str(n + 1)] = {}\n+\n+# Opening the file where the mcl input will be written\n+with open(mcl_file, \'w+\') as mcl:\n+    seq_keys = good_seq.keys()\n+    for i in range(len(seq_keys)):\n+        var_1 = good_seq[list(seq_keys)[i]][\'var\']\n+\n+        # Classifying variable sequences\n+        for k in range(len(var_1)):\n+            try:\n+                var_seq_common[str(k + 1)][var_1[k]] += 1\n+            except KeyError:\n+                var_seq_common[str(k + 1)][var_1[k]] = 1\n+\n+        for j in range(i + 1, len(seq_keys)):\n+            var_2 = good_seq[list(seq_keys)[j]][\'var\']\n+            score = 0.0\n+            # Comparing the sequences\' variable parts to find identical clones\n+            if var_1 == var_2:\n+                try:\n+                    clone_seq = "".join(var_1)\n+                    identical_clones[clone_seq].extend([seq_keys[i], seq_keys[j]])\n+                except KeyError:\n+                    identical_clones[clone_seq] = [seq_keys[i], seq_keys[j]]\n+            # Align the 2 sequences using NWalign_PAM30 => replace by pairwise2\n+            seq_1 = \'\'.join(var_1)\n+            seq_2 = \'\'.join(var_2)\n+            matrix = MatrixInfo.pam30\n+            if len(seq_2) > len(seq_1):\n+                score = get_identity(pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1)[0][0], pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1)[0][1]) * 100\n+            else:\n+                score = get_identity(pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1)[0][0], pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1)[0][1]) * 100\n+            align_scores.append(score)\n+            mcl.write(\'%s\\t%s\\t%0.2f\\n\' % (list(seq_keys)[i], list(seq_keys)[j], score))\n+\n+# Clusters formation\n+subprocess.call(["mcl", mcl_file, "--abc", "-I", "6.0", "-o", mcl_output], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+\n+# Producing distribution graph\n+plot.hist(align_scores, bins=numpy.arange(0, 101, 2))\n+plot.xlabel(\'Pairwise Alignment Score\')\n+plot.ylabel(\'Number of occurrences\')\n+plot.title(\'Distribution of the pairwise alignment score\')\n+plot.grid(True)\n+plot.savefig(graph_pic)\n+\n+# Generating html report\n+report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args)\n+\n+# Removing intermediate files\n+subprocess.call(["rm", mcl_file, mcl_output], shell=False)\n+print("HTML report has been generated in the output directory. The program will now exit.")\n'
b
diff -r 000000000000 -r 0254731f047b syndiva.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/syndiva.xml Thu Jun 23 22:32:13 2022 +0000
[
@@ -0,0 +1,79 @@
+<tool id="syndiva" name="syndiva" version="1.0" profile="21.05">
+    <description>
+        diversity analysis of synthetic libraries of a Fibronectin domain
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <expand macro="matplotlib_requirement" />
+        <expand macro="biopython_requirement"/>
+    </expand>
+    <command detect_errors="exit_code">
+    <![CDATA[
+    mkdir -p '$report.extra_files_path' &&
+    python '$__tool_directory__/syndiva.py'
+        -i '$fasta_file'
+        -p '$pattern'
+        -5 '$restriction_site_5'
+        -3 '$restriction_site_3'
+        -o '$report.extra_files_path'
+    && cp '${report.extra_files_path}'/*\.html output.html
+    ]]> 
+    </command>
+    <inputs>
+        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/>
+        <param name="pattern" type="text" area="true" label="Sequence pattern" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only protein alphabet letters (ACDEFGHIKLMNPQRSTVWY), numbers and the characters -:*  ">^[ACDEFGHIKLMNPQRSTVWY\d\-:*]+$</validator>
+        </param>
+        <param name="restriction_site_5" type="text" label="5&#39; Restriction sites" help="Sequence of the restrict" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
+        </param>
+        <param name="restriction_site_3" type="text" label="3&#39; Restriction sites" help="Sequence of the restrict" >
+            <validator type="empty_field" />
+            <validator type="regex" message="An expression is required and is allowed to contain only DNA alphabet letters (ATGC)">^[ATGC]{4,10}$</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" from_work_dir="output.html" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="fasta_file" value="syndiva_datatest.fasta"/>
+            <param name="pattern" value="AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE"/>
+            <param name="restriction_site_5" value="GCGGCCGC"/>
+            <param name="restriction_site_3" value="GGTACC"/>
+            <output name="report" file="syndiva_report.html"  ftype="html" lines_diff="2"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+- **Scientific context**
+
+- *SynDivA* was developed to analyze the diversity of synthetic libraries of a Fibronectin domain.
+- This diversity is generated in the context of a project of directed evolution using a phage display approach, to obtain ligands with high affinity and specificity for biological targets. It was introduced in three loops of the domain. It is both a variation of amino acids and a variation of the lengths of loops.
+- *SynDivA* is used before the step of selecting banks of interest, by ensuring quality - and therefore of diversity - of the bank by determining, by projection, the number of unique and functional sequences.
+
+- **Description**
+
+- *SynDivA* is implemented in Python.
+
+- The computations are divided into three steps:
+
+    1. Pre-processing of the input data (determination of the orientation of the sequences, determination of the reading frame, translation of nucleotide sequences in protein sequences, elimination of the sequences "wastes" containing stop codons in variable regions)
+    2. Alignment of sequences 2-2
+    3. Statistics calculations on the alignments (distances)
+
+- The results are presented as an HTML report.
+
+- **Example**
+
+- Pattern : AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE
+- 5' restriction site : GCGGCCGC
+- 3' restriction site : GGTACC
+
+ ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 0254731f047b test-data/distri.png
b
Binary file test-data/distri.png has changed
b
diff -r 000000000000 -r 0254731f047b test-data/syndiva_datatest.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/syndiva_datatest.fasta Thu Jun 23 22:32:13 2022 +0000
b
b'@@ -0,0 +1,991 @@\n+>XL1_10_PSEXSEQ-REV_13 status=ok nucl=1301 crlStart=4 crlStop=1186 crlLen=1183 order=COL12-0DIL\n+tagTTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT\n+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG\n+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG\n+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA\n+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT\n+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG\n+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC\n+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT\n+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA\n+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA\n+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC\n+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA\n+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA\n+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA\n+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG\n+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC\n+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG\n+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT\n+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC\n+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC\n+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC\n+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG\n+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT\n+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGcttcgggatcgtcg\n+tatcccactaccgagatgtccgcaccaacgcgcaccccggactccgaaat\n+ggcgcgcattggccccacggccttcggatcgttgggaaccagcatcgcat\n+t\n+>XL1_11_PSEXSEQ-REV_14 status=ok nucl=1299 crlStart=8 crlStop=1185 crlLen=1178 order=COL12-0DIL\n+actctgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT\n+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG\n+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG\n+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA\n+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT\n+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG\n+TTTCGCCATAGGTGATACGGTAATAGCTGACGCAGTTGCCGTGTGCATCC\n+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT\n+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA\n+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA\n+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC\n+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA\n+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA\n+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA\n+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG\n+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC\n+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG\n+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT\n+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC\n+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC\n+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC\n+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG\n+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT\n+TTGATGGTGGTTAACGGCGGGATATAACATGAGCTggcttcgggatcgtc\n+gtatcccattaccgaaaatgtccgcaccaacgcgcaaccccggactcggg\n+aaagggcgcgcattgcgcccaagcgccatctggatcgttgggaaaccag\n+>XL1_12_PSEXSEQ-REV_15 status=ok nucl=1301 crlStart=6 crlStop=1184 crlLen=1179 order=COL12-0DIL\n+agcctCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG\n+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT\n+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA\n+GAAATCGGGCAGGTGGCGACGATGGTGTTGCAGTCGTTGTGAGCGTAAAC\n+CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT\n+AAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT\n+TCGCCATAGGTGATACGGTAATAGTGGACGGCGGAGTTGGGGAGGTTTGC\n+ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT\n+TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC\n+TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG\n+GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA\n+TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC\n+TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC\n+GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA\n+TGGAAAAGCCCAGACCCTTCGGCGCAGGCCGAGAATGCCAGCACCAGACC\n+CGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAA\n+GCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCC\n+GGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCAC\n+ATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCG'..b'G\n+TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC\n+GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC\n+CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG\n+TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG\n+ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA\n+AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA\n+CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC\n+AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA\n+AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG\n+AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC\n+GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG\n+CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG\n+GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA\n+TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT\n+CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG\n+CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC\n+CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA\n+GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG\n+TTTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgt\n+cgtatcccactaccgaaatgtccgcaccaacgcgcaacccggaatcggga\n+atgggccgcattgcgcccagcgccatctgatctttgggaaccagcatccc\n+a\n+>XL3-4_PSEXSEQ-REV_40 status=ok nucl=1301 crlStart=8 crlStop=1186 crlLen=1179 order=COL12-0DIL\n+tcgctcgCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG\n+TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT\n+GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA\n+GAAATCGGCGGCTGTGGATGAAGCTGTGGTTGAGGCTGGAAGCGTAAACC\n+GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA\n+ATAAGAACCCGGGACGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTC\n+GCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGC\n+TGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGC\n+ACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTC\n+CATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCA\n+CATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCT\n+TCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG\n+GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC\n+TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG\n+CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG\n+AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG\n+TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC\n+ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT\n+TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT\n+CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG\n+AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG\n+AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC\n+AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA\n+TGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTcggtatcgtcgtat\n+cccactaccgagatgtccgcacaacgcgcagcccggactcggtaatggcc\n+cgcattggccccagcgccatctgatcgttgggaaccagctcccgagtggg\n+a\n+>XL3-5_PSEXSEQ-REV_41 status=ok nucl=1301 crlStart=8 crlStop=1190 crlLen=1183 order=COL12-0DIL\n+ctgtcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT\n+GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG\n+ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG\n+AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA\n+ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT\n+TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG\n+TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC\n+CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT\n+CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA\n+TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA\n+ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC\n+ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA\n+GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA\n+ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA\n+AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG\n+CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC\n+TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG\n+GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT\n+TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC\n+CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC\n+GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC\n+AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG\n+TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT\n+TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCggtatcgtcg\n+tatcccactaccgagatgtccgcaccaacgcgcagcccggactcggaatg\n+gggcgcattgggcccagcgccatttgatcgttgggaaccagcatcgcatt\n+g\n'
b
diff -r 000000000000 -r 0254731f047b test-data/syndiva_report.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/syndiva_report.html Thu Jun 23 22:32:13 2022 +0000
b
b'@@ -0,0 +1,127 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>SynDivA Report</title><link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet" /><style type="text/css">body {padding-top: 40px;}.subhead {padding: 40px 0;}.subhead h1 {font-size: 60px;}.fasta {   font-family: Monaco, Menlo, Consolas, "Courier New", monospace;   font-size: 12px;}code.grey{color: #636D71;}</style></head><body><a id="top"></a><div class="navbar navbar-fixed-top"><div class="navbar-inner"><div class="container"><a class="brand" href="#top">SynDivA Report</a><div class="nav-collapse collapse"><ul class="nav"><li><a href="#input">Input data</a></li><li><a href="#analysis">Sequences analysis</a></li><li><a href="#variable">Variable regions analysis</a></li><li><a href="#cluster">Clustering</a></li><li><a href="#stat">Statistics</a></li><li><a href="#annex">Annex</a></li></ul></div></div></div></div><div class="container-fluid"><header class="subhead"><h1>SynDivA Report</h1></header><div class="page-header"><a id="input"></a><h2>Input data</h2></div><p>Input file:<br/><code class="grey">syndiva_datatest.fasta</code></p><p>Number of sequences in input file:<br/><code class="grey">36</code></p><p>Pattern of the sequence bank:<br/><code class="grey">AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE</code></p><p>5\' restriction site:<br/><code class="grey">GCGGCCGC</code></p><p>3\' restriction site:<br/><code class="grey">GGTACC</code></p><div class="page-header"><a id="analysis"></a><h2>Sequences analysis</h2></div><p>Caption:</p><ul><li class="text-success">Valid sequences that will be part of the next analysis </li><li class="text-warning">Good sequences but will not be part of the next analysis</li><li class="text-error">Rejected sequences</li></ul><table class="table table-striped table-bordered"><tr><th class="text-error">Absence of restriction sites</th><th class="text-error">Incorrect number of nucleotides between the restriction sites</th><th class="text-error">Stop codon <u>inside</u> the area of interest</th><th class="text-warning">Mutation in the conserved regions</th><th class="text-success">Valid sequences</th><th>Amber codon in the sequence (<u>inside</u> the area of interest)</th></tr><tr><td class="text-error">1 sequence(s) (2.78%)</td><td class="text-error">6 sequence(s) (16.67%)</td><td class="text-error">25 sequence(s) (69.44%)</td><td class="text-warning">0 sequence(s) (0.00%)</td><td class="text-success">4 sequence(s) (11.11%)</td><td>0 sequence(s)</td></tr><tr><td class="text-error">XL2-3_PSEXSEQ-REV_34</td><td class="text-error">XL1_22_PSEXSEQ-REV_24<br/>XL1_24_PSEXSEQ-REV_26<br/>XL1_30_PSEXSEQ-REV_30<br/>XL1_4_PSEXSEQ-REV_8<br/>XL2-2_PSEXSEQ-REV_33<br/>XL3-4_PSEXSEQ-REV_40</td><td class="text-error">XL1_10_PSEXSEQ-REV_13<br/>XL1_11_PSEXSEQ-REV_14<br/>XL1_13_PSEXSEQ-REV_16<br/>XL1_14_PSEXSEQ-REV_17<br/>XL1_15_PSEXSEQ-REV_18<br/>XL1_16_PSEXSEQ-REV_19<br/>XL1_17_PSEXSEQ-REV_20<br/>XL1_1_PSEXSEQ-REV_5<br/>XL1_23_PSEXSEQ-REV_25<br/>XL1_25_PSEXSEQ-REV_27<br/>XL1_26_PSEXSEQ-REV_28<br/>XL1_29_PSEXSEQ-REV_29<br/>XL1_33_PSEXSEQ-REV_23<br/>XL1_3_PSEXSEQ-REV_7<br/>XL1_5_PSEXSEQ-REV_9<br/>XL1_6_PSEXSEQ-REV_10<br/>XL1_8_PSEXSEQ-REV_11<br/>XL1_9_PSEXSEQ-REV_12<br/>XL2-1_PSEXSEQ-REV_32<br/>XL2-4_PSEXSEQ-REV_35<br/>XL2-5_PSEXSEQ-REV_36<br/>XL3-1_PSEXSEQ-REV_37<br/>XL3-2_PSEXSEQ-REV_38<br/>XL3-3_PSEXSEQ-REV_39<br/>XL3-5_PSEXSEQ-REV_41</td><td class="text-warning"></td><td class="text-success">XL1_12_PSEXSEQ-REV_15<br/>XL1_18_PSEXSEQ-REV_21<br/>XL1_19_PSEXSEQ-REV_22<br/>XL1_2_PSEXSEQ-REV_6</td><td></td></tr></table><div class="page-header"><a id="variable"></a><h2>Variable regions analysi'..b'VGSKDIRAE\n+>XL1_13_PSEXSEQ-REV_16\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAPRSFVRYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK\n+>XL1_14_PSEXSEQ-REV_17\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSYTATISGLSPGVDYTITVYA\n+RYFIYSYISHSTPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK\n+>XL1_15_PSEXSEQ-REV_18\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRC\n+>XL1_16_PSEXSEQ-REV_19\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA\n+CGGVNANSSDCFPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK\n+>XL1_17_PSEXSEQ-REV_20\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SVPIHFSCRGCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIS\n+>XL1_1_PSEXSEQ-REV_5\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP\n+>XL1_23_PSEXSEQ-REV_25\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA\n+GDISDNPFSRCPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN\n+>XL1_25_PSEXSEQ-REV_27\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR\n+>XL1_26_PSEXSEQ-REV_28\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKRYP\n+>XL1_29_PSEXSEQ-REV_29\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAP\n+>XL1_33_PSEXSEQ-REV_23\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDASQSNVSYYRITYGETGGNSPVQEFTVPGCYSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAEN\n+>XL1_3_PSEXSEQ-REV_7\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS\n+>XL1_5_PSEXSEQ-REV_9\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN\n+>XL1_6_PSEXSEQ-REV_10\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLSPGVDYTITVYA\n+NATVSNFHPINSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDFRAK\n+>XL1_8_PSEXSEQ-REV_11\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAK\n+>XL1_9_PSEXSEQ-REV_12\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVCYYRITYGETGGNSPVQEFTVPGSYSTATISGLSPGVDYTITVYA\n+SLVNDDVHAVHHPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIQS\n+>XL2-1_PSEXSEQ-REV_32\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKEIRAM\n+>XL2-4_PSEXSEQ-REV_35\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAN\n+>XL2-5_PSEXSEQ-REV_36\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSYYTATISGLSPGVDYTITVYA\n+SGYCVIVANSYPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIP\n+>XL3-1_PSEXSEQ-REV_37\n+AAAGSSVSSVPTKLEVVAATPTSLLISCDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRSR\n+>XL3-2_PSEXSEQ-REV_38\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAR\n+>XL3-3_PSEXSEQ-REV_39\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAS\n+>XL3-5_PSEXSEQ-REV_41\n+AAAGSSVSSVPTKLEVVAATPTSLLISWDAS*SSVSYYRITYGETGGNSPVQEFTVPGSSSTATISGLSPGVDYTITVYA\n+SSS*RISSSSSSPISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAT\n+</textarea></div></body></html>\n\\ No newline at end of file\n'