# HG changeset patch # User cbib # Date 1636557350 0 # Node ID 0c6cfb9906f3595955a4a876cf79bcd1a4cbc594 Uploaded diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/NWalign_PAM30 Binary file fibronectin/NWalign_PAM30 has changed diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/NWalign_PAM30.f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/NWalign_PAM30.f Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,1258 @@ +************************************************************************* +* This is a program for protein sequence alignment using the standard +* Needleman-Wunsch dynamic programming. The mutation matrix is from +* PAM30 with gap openning penaly=-11 and gap extension panalty=-1. +* The program can be freely copied and modified provided the notices +* on the head are retained. Comments and bug report should be addressed +* to Yang Zhang (Email: zhng@umich.edu). Last update is in 2010/08/03. +* +* Instructions: +* 1, the program can be compiled by +* >gfortran -static -O3 -ffast-math -lm -o align align.f +* 2, simply running the program will give a brief note on how to use it +* 3, You can run the program in following convenient ways: +* >align F1.fasta F2.fasta (align two sequences in fasta file) +* >align F1.pdb F2.pdb 1 (align two sequences in PDB file) +* >align F1.fasta F2.pdb 2 (align Sequence 1 in fasta and 2 in pdb) +* >align GKDGL EVADELVSE 3 (align sequences typed by keyboard) +* >align GKDGL F.fasta 4 (align Seq-1 by keyboard and 2 in fasta) +* >align GKDGL F.pdb 5 (align Seq-1 by keyboard and 2 in pdb) +************************************************************************* + + program compares + PARAMETER(ndim=6000) + parameter(naa=24) !number of amino acid + common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim) + & ,nseq1,nseq2 + common/matra/imut(naa,naa) !b,z,x are additional + + integer seq1(ndim),seq2(ndim) + character*10000 fnam1,fnam2,fnam3,fnam4 + character*10000 s + character*3 aa(naa),aanam + character seqw(naa),upper + character*100 du,ad + character sequenceA(ndim),sequenceB(ndim),sequenceM(ndim) + +*---------------------- 24 amino acids --------------------- + data aa/'ALA','ARG','ASN','ASP','CYS','GLN','GLU', + & 'GLY','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER', + & 'THR','TRP','TYR','VAL','ASX','GLX','UNK','STOP'/ + data seqw/'A','R','N','D','C','Q','E','G','H','I','L','K', + & 'M','F','P','S','T','W','Y','V','B','Z','X','*'/ + + call getarg(1,fnam1) + call getarg(2,fnam2) + call getarg(3,fnam3) + call getarg(4,fnam4) + + if(fnam1.eq.' ')then + write(*,*)'align F1.fasta F2.fasta ', + & '(align two sequences in fasta file)' + write(*,*)'align F1.pdb F2.pdb 1 ', + & '(align two sequences in PDB file)' + write(*,*)'align F1.fasta F2.pdb 2 ', + & '(align Sequence 1 in fasta and 2 in pdb)' + write(*,*)'align GKDGL EVADELVSE 3 ', + & '(align two sequences typed by keyboard)' + write(*,*)'align GKDGL F.fasta 4 ', + & '(align Sequence 1 by keyboard and 2 in fasta)' + write(*,*)'align GKDGL F.pdb 5 ', + & '(align Sequence 1 by keyboard and 2 in pdb)' + goto 999 + endif + +*1******* read sequences -------------------------> + if(fnam3.eq.'5')then !direct, 555555555555555555 +*** read sequence1: + i=0 + do k=1,10000 + fnam1(k:k)=upper(fnam1(k:k)) + do j=1,naa + if(fnam1(k:k).eq.seqw(j))then + i=i+1 + seq1(i)=j + goto 5 + endif + enddo + if(fnam1(k:k).ne.'-')goto 55 !same time + 5 continue + if(i.ge.ndim)goto 55 + enddo + 55 continue + nseq1=i +*** read sequence2: + open(unit=10,file=fnam2,status='old') + i=0 + do while (.true.) + read(10,1,end=551) s + if(i.gt.0.and.s(1:3).eq.'TER')goto 551 + if(s(1:3).eq.'ATO')then + if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '. + & or.s(13:16).eq.' CA')then + i=i+1 + read(s,111)du,aanam + do j=1,naa + if(aanam.eq.aa(j))seq2(i)=j + enddo + endif + endif + if(i.ge.ndim)goto 551 + enddo + 551 continue + close(10) + nseq2=i + elseif(fnam3.eq.'4')then !direct, 444444444444444444444444444 +*** read sequence1: + i=0 + do k=1,10000 + fnam1(k:k)=upper(fnam1(k:k)) + do j=1,naa + if(fnam1(k:k).eq.seqw(j))then + i=i+1 + seq1(i)=j + goto 4 + endif + enddo + if(fnam1(k:k).ne.'-')goto 44 + 4 continue + if(i.ge.ndim)goto 44 + enddo + 44 continue + nseq1=i +*** read sequence2: + open(unit=10,file=fnam2,status='old') + i=0 + do while(.true.) + read(10,1,end=443)s + if(s(1:1).eq.'>')goto 442 + do k=1,10000 + s(k:k)=upper(s(k:k)) + do j=1,naa + if(s(k:k).eq.seqw(j))then + i=i+1 + seq2(i)=j + goto 441 + endif + enddo + if(s(k:k).ne.'-')goto 442 !same time + 441 continue + enddo + 442 continue + if(i.ge.ndim)goto 443 + enddo + 443 continue + close(10) + nseq2=i + + + elseif(fnam3.eq.'3')then !direct, 33333333333333333333333333333333333 +*** read sequence1: + i=0 + do k=1,10000 + fnam1(k:k)=upper(fnam1(k:k)) + do j=1,naa + if(fnam1(k:k).eq.seqw(j))then + i=i+1 + seq1(i)=j + goto 3 + endif + enddo + if(fnam1(k:k).ne.'-')goto 33 + 3 continue + if(i.ge.ndim)goto 33 + enddo + 33 continue + nseq1=i +*** read sequence2: + i=0 + do k=1,10000 + fnam2(k:k)=upper(fnam2(k:k)) + do j=1,naa + if(fnam2(k:k).eq.seqw(j))then + i=i+1 + seq2(i)=j + goto 331 + endif + enddo + if(fnam2(k:k).ne.'-')goto 332 + 331 continue + if(i.ge.ndim)goto 332 + enddo + 332 continue + nseq2=i + elseif(fnam3.eq.'1')then !pdb,pdb, 11111111111111111111111111111 +*** read sequence1: + open(unit=10,file=fnam1,status='old') + i=0 + do while (.true.) + read(10,1,end=11) s + if(i.gt.0.and.s(1:3).eq.'TER')goto 11 + if(s(1:3).eq.'ATO')then + if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '. + & or.s(13:16).eq.' CA')then + i=i+1 + read(s,111)du,aanam + do j=1,naa + if(aanam.eq.aa(j))seq1(i)=j + enddo + endif + endif + if(i.ge.ndim)goto 11 + enddo + 1 format(A10000) + 11 continue + 111 format(A17,A3) + close(10) + nseq1=i +*** read sequence2: + open(unit=10,file=fnam2,status='old') + i=0 + do while (.true.) + read(10,1,end=112) s + if(i.gt.0.and.s(1:3).eq.'TER')goto 112 + if(s(1:3).eq.'ATO')then + if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '. + & or.s(13:16).eq.' CA')then + i=i+1 + read(s,111)du,aanam + do j=1,naa + if(aanam.eq.aa(j))seq2(i)=j + enddo + endif + endif + if(i.ge.ndim)goto 112 + enddo + 112 continue + close(10) + nseq2=i + elseif(fnam3.eq.'2')then !seq,pdb 2222222222222222222222222222222 +*** read sequence1: + open(unit=10,file=fnam1,status='old') + i=0 + do while(.true.) + read(10,1,end=221)s + if(s(1:1).eq.'>')goto 22 + do k=1,10000 + s(k:k)=upper(s(k:k)) + do j=1,naa + if(s(k:k).eq.seqw(j))then + i=i+1 + seq1(i)=j + goto 2 + endif + enddo + if(s(k:k).ne.'-')goto 22 + 2 continue + enddo + 22 continue + if(i.ge.ndim)goto 221 + enddo + 221 continue + close(10) + nseq1=i +*** read sequence2: + open(unit=10,file=fnam2,status='old') + i=0 + do while (.true.) + read(10,1,end=222) s + if(i.gt.0.and.s(1:3).eq.'TER')goto 222 + if(s(1:3).eq.'ATO')then + if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '. + & or.s(13:16).eq.' CA')then + i=i+1 + read(s,111)du,aanam + do j=1,naa + if(aanam.eq.aa(j))seq2(i)=j + enddo + endif + endif + if(i.ge.ndim)goto 222 + enddo + 222 continue + close(10) + nseq2=i + else !seq,seq 00000000000000000000000000000000 +*** read sequence1: + open(unit=10,file=fnam1,status='old') + i=0 + do while(.true.) + read(10,1,end=881)s + if(s(1:1).eq.'>')goto 88 + do k=1,10000 + s(k:k)=upper(s(k:k)) + do j=1,naa + if(s(k:k).eq.seqw(j))then + i=i+1 + seq1(i)=j + goto 8 + endif + enddo + if(s(k:k).ne.'-')goto 88 + 8 continue + enddo + 88 continue + if(i.ge.ndim)goto 881 + enddo + 881 continue + close(10) + nseq1=i +*** read sequence2: + open(unit=10,file=fnam2,status='old') + i=0 + do while(.true.) + read(10,1,end=884)s + if(s(1:1).eq.'>')goto 883 + do k=1,10000 + s(k:k)=upper(s(k:k)) + do j=1,naa + if(s(k:k).eq.seqw(j))then + i=i+1 + seq2(i)=j + goto 882 + endif + enddo + if(s(k:k).ne.'-')goto 883 + 882 continue + enddo + 883 continue + if(i.ge.ndim)goto 884 + enddo + 884 continue + close(10) + nseq2=i + endif + +*2** read mutation matrix ----------> + call matrix !take pam +*** set unit mutation matrix ----------> +c do i=1,naa +c do j=1,naa +c imut(i,j)=0 +c enddo +c enddo +c do i=1,naa +c imut(i,i)=1 +c enddo + +*3** score------------------> + do i=1,nseq1 + do j=1,nseq2 + score(i,j)=imut(seq1(i),seq2(j)) + enddo + enddo + +*4***************************************************************** +* dynamatic program: +****************************************************************** + gap_open=-11 + gap_extn=-1 + call DP(score0) !W(k)=Go+Ge*k1+Go+Ge*k2, standard NW +c call DPalt(score0) !W(k)=Go+Ge*k1+Ge*k2, alternative NW + +*5** calculate sequence identity----------------------------> + L_id=0 + L_ali=0 + do j=1,nseq2 + if(j2i(j).gt.0)then + i=j2i(j) + L_ali=L_ali+1 + if(seq1(i).eq.seq2(j))L_id=L_id+1 + endif + enddo + + write(*,*) + write(*,101)nseq1,fnam1 + 101 format('Length of sequence 1: ',I4,' ->',A10) + write(*,102)nseq2,fnam2 + 102 format('Length of sequence 2: ',I4,' ->',A10) + write(*,103)L_ali + 103 format('Aligned length: ',I4) + write(*,104)L_id + 104 format('Identical length: ',I4) + write(*,105)float(L_id)/(nseq2+0.00000001),L_id,nseq2 + 105 format('Sequence identity: ',F8.3,' (=',I4,'/',I4,')') + write(*,*) + +*6****************************************************************** +*** output aligned sequences + k=0 !final aligned order + i=1 !on sequence 1 + j=1 !on sequence 2 + 800 continue + if(i.gt.nseq1.and.j.gt.nseq2)goto 802 + if(i.gt.nseq1.and.j.le.nseq2)then !unaligned C on 1 + k=k+1 + sequenceA(k)='-' + sequenceB(k)=seqw(seq2(j)) + sequenceM(k)=' ' + j=j+1 + goto 800 + endif + if(i.le.nseq1.and.j.gt.nseq2)then !unaligned C on 2 + k=k+1 + sequenceA(k)=seqw(seq1(i)) + sequenceB(k)='-' + sequenceM(k)=' ' + i=i+1 + goto 800 + endif + if(i.eq.j2i(j))then !if aligned + k=k+1 + sequenceA(k)=seqw(seq1(i)) + sequenceB(k)=seqw(seq2(j)) + if(seq1(i).eq.seq2(j))then !identical + sequenceM(k)=':' + else + sequenceM(k)=' ' + endif + i=i+1 + j=j+1 + goto 800 + elseif(j2i(j).lt.0)then !if gap on 1 + k=k+1 + sequenceA(k)='-' + sequenceB(k)=seqw(seq2(j)) + sequenceM(k)=' ' + j=j+1 + goto 800 + elseif(j2i(j).gt.0)then !if gap on 2 + k=k+1 + sequenceA(k)=seqW(seq1(i)) + sequenceB(k)='-' + sequenceM(k)=' ' + i=i+1 + goto 800 + endif + 802 continue + + write(*,601)(sequenceA(i),i=1,k) + write(*,601)(sequenceM(i),i=1,k) + write(*,601)(sequenceB(i),i=1,k) + write(*,602)(mod(i,10),i=1,k) + 601 format(2000A1) + 602 format(2000I1) + write(*,*) + +c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +c STOP + 999 END + +******************************************************************** +* This is a standard Needleman-Wunsch dynamic program (by Y. Zhang 2005) +* 1. Count multiple-gap. +* 2. The gap penality W(k)=Go+Ge*k1+Go+Ge*k2 if gap open on both sequences +* +* Input: score(i,j), gap_open, gap_extn +* Output: j2i(j) +* idir(i,j)=1,2,3, from diagonal, horizontal, vertical +* val(i,j) is the cumulative score of (i,j) +******************************************************************** + subroutine DP(score0) + PARAMETER(ndim=6000) + common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim) + & ,nseq1,nseq2 + + dimension val(0:ndim,0:ndim),idir(0:ndim,0:ndim) + dimension jpV(0:ndim,0:ndim),jpH(0:ndim,0:ndim) + dimension preV(0:ndim,0:ndim),preH(0:ndim,0:ndim) + real D,V,H + +ccc initializations ---------------> + val(0,0)=0.0 + do i=1,nseq1 + val(i,0)=gap_extn*i + preV(i,0)=val(i,0) !not use preV at the beginning + idir(i,0)=0 !useless + jpV(i,0)=1 !useless + jpH(i,0)=i !useless + enddo + do j=1,nseq2 + val(0,j)=gap_extn*j + preH(0,j)=val(0,j) + idir(0,j)=0 + jpV(0,j)=j + jpH(0,j)=1 + enddo + +ccc DP ------------------------------> + do 111 j=1,nseq2 + do 222 i=1,nseq1 +ccc D=VAL(i-1,j-1)+SCORE(i,j)---------------> + D=val(i-1,j-1)+score(i,j) !from diagonal, val(i,j) is val(i-1,j-1) +ccc H=H+gap_open -------> + jpH(i,j)=1 + val1=val(i-1,j)+gap_open !gap_open from both D and V + val2=preH(i-1,j)+gap_extn !gap_extn from horizontal + if(val1.gt.val2) then !last step from D or V + H=val1 + else !last step from H + H=val2 + if(i.gt.1)jpH(i,j)=jpH(i-1,j)+1 !record long-gap + endif +ccc V=V+gap_open ---------> + jpV(i,j)=1 + val1=val(i,j-1)+gap_open + val2=preV(i,j-1)+gap_extn + if(val1.gt.val2) then + V=val1 + else + V=val2 + if(j.gt.1)jpV(i,j)=jpV(i,j-1)+1 + endif + preH(i,j)=H !unaccepted H + preV(i,j)=V !unaccepted V + + if(D.gt.H.and.D.gt.V)then + idir(i,j)=1 + val(i,j)=D + elseif(H.gt.V)then + idir(i,j)=2 + val(i,j)=H + else + idir(i,j)=3 + val(i,j)=V + endif + 222 continue + 111 continue + score0=val(nseq1,nseq2) !alignment score + +c tracing back the pathway: + do j=1,nseq2 + j2i(j)=-1 !all are not aligned + enddo + i=nseq1 + j=nseq2 + do while(i.gt.0.and.j.gt.0) + if(idir(i,j).eq.1)then !from diagonal + j2i(j)=i + i=i-1 + j=j-1 + elseif(idir(i,j).eq.2)then !from horizonal + it=jpH(i,j) + do me=1,it + if(i.gt.0) then + i=i-1 + endif + enddo + else + it=jpV(i,j) + do me=1,it + if(j.gt.0) then + j=j-1 + endif + enddo + endif + enddo + +*^^^^^^^^^^^DP finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + return + end + +******************************************************************** +* This is an alternative implementation of Needleman-Wunsch dynamic program +* (by Y. Zhang 2005) +* 1. Count two-layer iteration and multiple-gaps +* 2. The gap penality W(k)=Go+Ge*k1+Ge*k2 if gap open on both sequences +* +* Input: score(i,j), gap_open, gap_extn +* Output: j2i(j) +* idir(i,j)=1,2,3, from diagonal, horizontal, vertical +* val(i,j) is the cumulative score of (i,j) +******************************************************************** + subroutine DPalt(score0) + PARAMETER(ndim=6000) + common/dpc/score(ndim,ndim),gap_open,gap_extn,j2i(ndim) + & ,nseq1,nseq2 + + dimension val(0:ndim,0:ndim),idir(0:ndim,0:ndim) + dimension preV(0:ndim,0:ndim),preH(0:ndim,0:ndim), + & preD(0:ndim,0:ndim) + dimension idirH(0:ndim,0:ndim),idirV(0:ndim,0:ndim) + +ccc initializations ---------------> + val(0,0)=0.0 + do i=1,nseq1 + val(i,0)=0 + idir(i,0)=0 + preD(i,0)=0.0 + preH(i,0)=-1000.0 + preV(i,0)=-1000.0 + enddo + do j=1,nseq2 + val(0,j)=0 + idir(0,j)=0 + preD(0,j)=0.0 + preH(0,j)=-1000.0 + preV(0,j)=-1000.0 + enddo + +ccc DP ------------------------------> + do 111 j=1,nseq2 + do 222 i=1,nseq1 +ccc preD=VAL(i-1,j-1)+SCORE(i,j)---------------> + preD(i,j)=val(i-1,j-1)+score(i,j) +ccc preH: pre-accepted H-----------------------> + D=preD(i-1,j)+gap_open + H=preH(i-1,j)+gap_extn + V=preV(i-1,j)+gap_extn + if(D.gt.H.and.D.gt.V)then + preH(i,j)=D + idirH(i-1,j)=1 + elseif(H.gt.V)then + preH(i,j)=H + idirH(i-1,j)=2 + else + preH(i,j)=V + idirH(i-1,j)=3 + endif +ccc preV: pre-accepted V-----------------------> + D=preD(i,j-1)+gap_open + H=preH(i,j-1)+gap_extn + V=preV(i,j-1)+gap_extn + if(D.gt.H.and.D.gt.V)then + preV(i,j)=D + idirV(i,j-1)=1 + elseif(H.gt.V)then + preV(i,j)=H + idirV(i,j-1)=2 + else + preV(i,j)=V + idirV(i,j-1)=3 + endif + +ccc decide idir(i,j)-----------> + if(preD(i,j).gt.preH(i,j).and.preD(i,j).gt.preV(i,j))then + idir(i,j)=1 + val(i,j)=preD(i,j) + elseif(preH(i,j).gt.preV(i,j))then + idir(i,j)=2 + val(i,j)=preH(i,j) + else + idir(i,j)=3 + val(i,j)=preV(i,j) + endif + 222 continue + 111 continue + score0=val(nseq1,nseq2) !alignment score + +ccc tracing back the pathway: + do j=1,nseq2 + j2i(j)=-1 !all are not aligned + enddo + i=nseq1 + j=nseq2 + do while(i.gt.0.and.j.gt.0) + if(idir(i,j).eq.1)then !from diagonal + j2i(j)=i + i=i-1 + j=j-1 + elseif(idir(i,j).eq.2)then + i=i-1 + idir(i,j)=idirH(i,j) + else + j=j-1 + idir(i,j)=idirV(i,j) + endif + enddo + +*^^^^^^^^^^^DP finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + return + end + +******************************************************************** +* read matrix +* + subroutine matrix + parameter(naa=24) !number of amino acid + common/matra/imut(naa,naa) !b,z,x are additional + +* following from PAM30: + imut(1,1)=6 + imut(1,2)=-7 + imut(1,3)=-4 + imut(1,4)=-3 + imut(1,5)=-6 + imut(1,6)=-4 + imut(1,7)=-2 + imut(1,8)=-2 + imut(1,9)=-7 + imut(1,10)=-5 + imut(1,11)=-6 + imut(1,12)=-7 + imut(1,13)=-5 + imut(1,14)=-8 + imut(1,15)=-2 + imut(1,16)=0 + imut(1,17)=-1 + imut(1,18)=-13 + imut(1,19)=-8 + imut(1,20)=-2 + imut(1,21)=-3 + imut(1,22)=-3 + imut(1,23)=-1 + imut(1,24)=-17 + imut(2,1)=-7 + imut(2,2)=8 + imut(2,3)=-6 + imut(2,4)=-10 + imut(2,5)=-8 + imut(2,6)=-2 + imut(2,7)=-9 + imut(2,8)=-9 + imut(2,9)=-2 + imut(2,10)=-5 + imut(2,11)=-8 + imut(2,12)=0 + imut(2,13)=-4 + imut(2,14)=-9 + imut(2,15)=-4 + imut(2,16)=-3 + imut(2,17)=-6 + imut(2,18)=-2 + imut(2,19)=-10 + imut(2,20)=-8 + imut(2,21)=-7 + imut(2,22)=-4 + imut(2,23)=-1 + imut(2,24)=-17 + imut(3,1)=-4 + imut(3,2)=-6 + imut(3,3)=8 + imut(3,4)=2 + imut(3,5)=-11 + imut(3,6)=-3 + imut(3,7)=-2 + imut(3,8)=-3 + imut(3,9)=0 + imut(3,10)=-5 + imut(3,11)=-7 + imut(3,12)=-1 + imut(3,13)=-9 + imut(3,14)=-9 + imut(3,15)=-6 + imut(3,16)=0 + imut(3,17)=-2 + imut(3,18)=-8 + imut(3,19)=-4 + imut(3,20)=-8 + imut(3,21)=6 + imut(3,22)=-3 + imut(3,23)=-1 + imut(3,24)=-17 + imut(4,1)=-3 + imut(4,2)=-10 + imut(4,3)=2 + imut(4,4)=8 + imut(4,5)=-14 + imut(4,6)=-2 + imut(4,7)=2 + imut(4,8)=-3 + imut(4,9)=-4 + imut(4,10)=-7 + imut(4,11)=-12 + imut(4,12)=-4 + imut(4,13)=-11 + imut(4,14)=-15 + imut(4,15)=-8 + imut(4,16)=-4 + imut(4,17)=-5 + imut(4,18)=-15 + imut(4,19)=-11 + imut(4,20)=-8 + imut(4,21)=6 + imut(4,22)=1 + imut(4,23)=-1 + imut(4,24)=-17 + imut(5,1)=-6 + imut(5,2)=-8 + imut(5,3)=-11 + imut(5,4)=-14 + imut(5,5)=10 + imut(5,6)=-14 + imut(5,7)=-14 + imut(5,8)=-9 + imut(5,9)=-7 + imut(5,10)=-6 + imut(5,11)=-15 + imut(5,12)=-14 + imut(5,13)=-13 + imut(5,14)=-13 + imut(5,15)=-8 + imut(5,16)=-3 + imut(5,17)=-8 + imut(5,18)=-15 + imut(5,19)=-4 + imut(5,20)=-6 + imut(5,21)=-12 + imut(5,22)=-14 + imut(5,23)=-1 + imut(5,24)=-17 + imut(6,1)=-4 + imut(6,2)=-2 + imut(6,3)=-3 + imut(6,4)=-2 + imut(6,5)=-14 + imut(6,6)=8 + imut(6,7)=1 + imut(6,8)=-7 + imut(6,9)=1 + imut(6,10)=-8 + imut(6,11)=-5 + imut(6,12)=-3 + imut(6,13)=-4 + imut(6,14)=-13 + imut(6,15)=-3 + imut(6,16)=-5 + imut(6,17)=-5 + imut(6,18)=-13 + imut(6,19)=-12 + imut(6,20)=-7 + imut(6,21)=-3 + imut(6,22)=6 + imut(6,23)=-1 + imut(6,24)=-17 + imut(7,1)=-2 + imut(7,2)=-9 + imut(7,3)=-2 + imut(7,4)=2 + imut(7,5)=-14 + imut(7,6)=1 + imut(7,7)=8 + imut(7,8)=-4 + imut(7,9)=-5 + imut(7,10)=-5 + imut(7,11)=-9 + imut(7,12)=-4 + imut(7,13)=-7 + imut(7,14)=-14 + imut(7,15)=-5 + imut(7,16)=-4 + imut(7,17)=-6 + imut(7,18)=-17 + imut(7,19)=-8 + imut(7,20)=-6 + imut(7,21)=1 + imut(7,22)=6 + imut(7,23)=-1 + imut(7,24)=-17 + imut(8,1)=-2 + imut(8,2)=-9 + imut(8,3)=-3 + imut(8,4)=-3 + imut(8,5)=-9 + imut(8,6)=-7 + imut(8,7)=-4 + imut(8,8)=6 + imut(8,9)=-9 + imut(8,10)=-11 + imut(8,11)=-10 + imut(8,12)=-7 + imut(8,13)=-8 + imut(8,14)=-9 + imut(8,15)=-6 + imut(8,16)=-2 + imut(8,17)=-6 + imut(8,18)=-15 + imut(8,19)=-14 + imut(8,20)=-5 + imut(8,21)=-3 + imut(8,22)=-5 + imut(8,23)=-1 + imut(8,24)=-17 + imut(9,1)=-7 + imut(9,2)=-2 + imut(9,3)=0 + imut(9,4)=-4 + imut(9,5)=-7 + imut(9,6)=1 + imut(9,7)=-5 + imut(9,8)=-9 + imut(9,9)=9 + imut(9,10)=-9 + imut(9,11)=-6 + imut(9,12)=-6 + imut(9,13)=-10 + imut(9,14)=-6 + imut(9,15)=-4 + imut(9,16)=-6 + imut(9,17)=-7 + imut(9,18)=-7 + imut(9,19)=-3 + imut(9,20)=-6 + imut(9,21)=-1 + imut(9,22)=-1 + imut(9,23)=-1 + imut(9,24)=-17 + imut(10,1)=-5 + imut(10,2)=-5 + imut(10,3)=-5 + imut(10,4)=-7 + imut(10,5)=-6 + imut(10,6)=-8 + imut(10,7)=-5 + imut(10,8)=-11 + imut(10,9)=-9 + imut(10,10)=8 + imut(10,11)=-1 + imut(10,12)=-6 + imut(10,13)=-1 + imut(10,14)=-2 + imut(10,15)=-8 + imut(10,16)=-7 + imut(10,17)=-2 + imut(10,18)=-14 + imut(10,19)=-6 + imut(10,20)=2 + imut(10,21)=-6 + imut(10,22)=-6 + imut(10,23)=-1 + imut(10,24)=-17 + imut(11,1)=-6 + imut(11,2)=-8 + imut(11,3)=-7 + imut(11,4)=-12 + imut(11,5)=-15 + imut(11,6)=-5 + imut(11,7)=-9 + imut(11,8)=-10 + imut(11,9)=-6 + imut(11,10)=-1 + imut(11,11)=7 + imut(11,12)=-8 + imut(11,13)=1 + imut(11,14)=-3 + imut(11,15)=-7 + imut(11,16)=-8 + imut(11,17)=-7 + imut(11,18)=-6 + imut(11,19)=-7 + imut(11,20)=-2 + imut(11,21)=-9 + imut(11,22)=-7 + imut(11,23)=-1 + imut(11,24)=-17 + imut(12,1)=-7 + imut(12,2)=0 + imut(12,3)=-1 + imut(12,4)=-4 + imut(12,5)=-14 + imut(12,6)=-3 + imut(12,7)=-4 + imut(12,8)=-7 + imut(12,9)=-6 + imut(12,10)=-6 + imut(12,11)=-8 + imut(12,12)=7 + imut(12,13)=-2 + imut(12,14)=-14 + imut(12,15)=-6 + imut(12,16)=-4 + imut(12,17)=-3 + imut(12,18)=-12 + imut(12,19)=-9 + imut(12,20)=-9 + imut(12,21)=-2 + imut(12,22)=-4 + imut(12,23)=-1 + imut(12,24)=-17 + imut(13,1)=-5 + imut(13,2)=-4 + imut(13,3)=-9 + imut(13,4)=-11 + imut(13,5)=-13 + imut(13,6)=-4 + imut(13,7)=-7 + imut(13,8)=-8 + imut(13,9)=-10 + imut(13,10)=-1 + imut(13,11)=1 + imut(13,12)=-2 + imut(13,13)=11 + imut(13,14)=-4 + imut(13,15)=-8 + imut(13,16)=-5 + imut(13,17)=-4 + imut(13,18)=-13 + imut(13,19)=-11 + imut(13,20)=-1 + imut(13,21)=-10 + imut(13,22)=-5 + imut(13,23)=-1 + imut(13,24)=-17 + imut(14,1)=-8 + imut(14,2)=-9 + imut(14,3)=-9 + imut(14,4)=-15 + imut(14,5)=-13 + imut(14,6)=-13 + imut(14,7)=-14 + imut(14,8)=-9 + imut(14,9)=-6 + imut(14,10)=-2 + imut(14,11)=-3 + imut(14,12)=-14 + imut(14,13)=-4 + imut(14,14)=9 + imut(14,15)=-10 + imut(14,16)=-6 + imut(14,17)=-9 + imut(14,18)=-4 + imut(14,19)=2 + imut(14,20)=-8 + imut(14,21)=-10 + imut(14,22)=-13 + imut(14,23)=-1 + imut(14,24)=-17 + imut(15,1)=-2 + imut(15,2)=-4 + imut(15,3)=-6 + imut(15,4)=-8 + imut(15,5)=-8 + imut(15,6)=-3 + imut(15,7)=-5 + imut(15,8)=-6 + imut(15,9)=-4 + imut(15,10)=-8 + imut(15,11)=-7 + imut(15,12)=-6 + imut(15,13)=-8 + imut(15,14)=-10 + imut(15,15)=8 + imut(15,16)=-2 + imut(15,17)=-4 + imut(15,18)=-14 + imut(15,19)=-13 + imut(15,20)=-6 + imut(15,21)=-7 + imut(15,22)=-4 + imut(15,23)=-1 + imut(15,24)=-17 + imut(16,1)=0 + imut(16,2)=-3 + imut(16,3)=0 + imut(16,4)=-4 + imut(16,5)=-3 + imut(16,6)=-5 + imut(16,7)=-4 + imut(16,8)=-2 + imut(16,9)=-6 + imut(16,10)=-7 + imut(16,11)=-8 + imut(16,12)=-4 + imut(16,13)=-5 + imut(16,14)=-6 + imut(16,15)=-2 + imut(16,16)=6 + imut(16,17)=0 + imut(16,18)=-5 + imut(16,19)=-7 + imut(16,20)=-6 + imut(16,21)=-1 + imut(16,22)=-5 + imut(16,23)=-1 + imut(16,24)=-17 + imut(17,1)=-1 + imut(17,2)=-6 + imut(17,3)=-2 + imut(17,4)=-5 + imut(17,5)=-8 + imut(17,6)=-5 + imut(17,7)=-6 + imut(17,8)=-6 + imut(17,9)=-7 + imut(17,10)=-2 + imut(17,11)=-7 + imut(17,12)=-3 + imut(17,13)=-4 + imut(17,14)=-9 + imut(17,15)=-4 + imut(17,16)=0 + imut(17,17)=7 + imut(17,18)=-13 + imut(17,19)=-6 + imut(17,20)=-3 + imut(17,21)=-3 + imut(17,22)=-6 + imut(17,23)=-1 + imut(17,24)=-17 + imut(18,1)=-13 + imut(18,2)=-2 + imut(18,3)=-8 + imut(18,4)=-15 + imut(18,5)=-15 + imut(18,6)=-13 + imut(18,7)=-17 + imut(18,8)=-15 + imut(18,9)=-7 + imut(18,10)=-14 + imut(18,11)=-6 + imut(18,12)=-12 + imut(18,13)=-13 + imut(18,14)=-4 + imut(18,15)=-14 + imut(18,16)=-5 + imut(18,17)=-13 + imut(18,18)=13 + imut(18,19)=-5 + imut(18,20)=-15 + imut(18,21)=-10 + imut(18,22)=-14 + imut(18,23)=-1 + imut(18,24)=-17 + imut(19,1)=-8 + imut(19,2)=-10 + imut(19,3)=-4 + imut(19,4)=-11 + imut(19,5)=-4 + imut(19,6)=-12 + imut(19,7)=-8 + imut(19,8)=-14 + imut(19,9)=-3 + imut(19,10)=-6 + imut(19,11)=-7 + imut(19,12)=-9 + imut(19,13)=-11 + imut(19,14)=2 + imut(19,15)=-13 + imut(19,16)=-7 + imut(19,17)=-6 + imut(19,18)=-5 + imut(19,19)=10 + imut(19,20)=-7 + imut(19,21)=-6 + imut(19,22)=-9 + imut(19,23)=-1 + imut(19,24)=-17 + imut(20,1)=-2 + imut(20,2)=-8 + imut(20,3)=-8 + imut(20,4)=-8 + imut(20,5)=-6 + imut(20,6)=-7 + imut(20,7)=-6 + imut(20,8)=-5 + imut(20,9)=-6 + imut(20,10)=2 + imut(20,11)=-2 + imut(20,12)=-9 + imut(20,13)=-1 + imut(20,14)=-8 + imut(20,15)=-6 + imut(20,16)=-6 + imut(20,17)=-3 + imut(20,18)=-15 + imut(20,19)=-7 + imut(20,20)=7 + imut(20,21)=-8 + imut(20,22)=-6 + imut(20,23)=-1 + imut(20,24)=-17 + imut(21,1)=-3 + imut(21,2)=-7 + imut(21,3)=6 + imut(21,4)=6 + imut(21,5)=-12 + imut(21,6)=-3 + imut(21,7)=1 + imut(21,8)=-3 + imut(21,9)=-1 + imut(21,10)=-6 + imut(21,11)=-9 + imut(21,12)=-2 + imut(21,13)=-10 + imut(21,14)=-10 + imut(21,15)=-7 + imut(21,16)=-1 + imut(21,17)=-3 + imut(21,18)=-10 + imut(21,19)=-6 + imut(21,20)=-8 + imut(21,21)=6 + imut(21,22)=0 + imut(21,23)=-1 + imut(21,24)=-17 + imut(22,1)=-3 + imut(22,2)=-4 + imut(22,3)=-3 + imut(22,4)=1 + imut(22,5)=-14 + imut(22,6)=6 + imut(22,7)=6 + imut(22,8)=-5 + imut(22,9)=-1 + imut(22,10)=-6 + imut(22,11)=-7 + imut(22,12)=-4 + imut(22,13)=-5 + imut(22,14)=-13 + imut(22,15)=-4 + imut(22,16)=-5 + imut(22,17)=-6 + imut(22,18)=-14 + imut(22,19)=-9 + imut(22,20)=-6 + imut(22,21)=0 + imut(22,22)=6 + imut(22,23)=-1 + imut(22,24)=-17 + imut(23,1)=-1 + imut(23,2)=-1 + imut(23,3)=-1 + imut(23,4)=-1 + imut(23,5)=-1 + imut(23,6)=-1 + imut(23,7)=-1 + imut(23,8)=-1 + imut(23,9)=-1 + imut(23,10)=-1 + imut(23,11)=-1 + imut(23,12)=-1 + imut(23,13)=-1 + imut(23,14)=-1 + imut(23,15)=-1 + imut(23,16)=-1 + imut(23,17)=-1 + imut(23,18)=-1 + imut(23,19)=-1 + imut(23,20)=-1 + imut(23,21)=-1 + imut(23,22)=-1 + imut(23,23)=-1 + imut(23,24)=-17 + imut(24,1)=-17 + imut(24,2)=-17 + imut(24,3)=-17 + imut(24,4)=-17 + imut(24,5)=-17 + imut(24,6)=-17 + imut(24,7)=-17 + imut(24,8)=-17 + imut(24,9)=-17 + imut(24,10)=-17 + imut(24,11)=-17 + imut(24,12)=-17 + imut(24,13)=-17 + imut(24,14)=-17 + imut(24,15)=-17 + imut(24,16)=-17 + imut(24,17)=-17 + imut(24,18)=-17 + imut(24,19)=-17 + imut(24,20)=-17 + imut(24,21)=-17 + imut(24,22)=-17 + imut(24,23)=-17 + imut(24,24)=1 + + return + end + + function upper(A) + CHARACTER A,upper + IF(A.LE.'z'.and.A.GE.'a')then + A=CHAR(ICHAR(A)-32) + endif + upper=A + RETURN + END diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/args.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/args.py Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +import sys, getopt, os + + +class Args: + + def __init__(self): + """ + Instanciate Files object + """ + self.input = None + self.output_dir = None + self.pattern = None + self.site_res_5 = None + self.site_res_3 = None + self.getargs() + + def usage(self, info): + text = None + text = "Fibronectin script.\n\n" + if (info): text += info + temp = "Option\t\t\t\tfile\t\t\tDescription\n" + text += temp + text += '-' * (len(temp) + 60) + text += '\n' + text += "-i, --input\t\t\tfile.fasta\t\tFasta file that contains the DNA sequences\n" + text += "-o, --output_dir\t\t/path/for/output\tDirectory where output files will be written\n" + text += "-p, --pattern\t\t\tstring\t\t\tPattern of the sequence bank\n" + text += "-5, --restriction-site-5\tstring\t\t\tSequence of the restriction site in 5'\n" + text += "-3, --restriction-site-3\tstring\t\t\tSequence of the restriction site in 3'\n" + return text + + def case(self): + # Test des fichiers et repertoires + if not self.input: + sys.exit(self.usage("input (-i,--input) : \"%s\" must be indicated\n" % (self.input))) + if not self.output_dir: + sys.exit(self.usage("output directory (-o,--output_dir) : \"%s\" must be indicated\n" % (self.output_dir))) + if not self.pattern: + sys.exit( + self.usage("Pattern of the sequence bank (-p,--pattern) : \"%s\" must be indicated\n" % (self.pattern))) + if not self.site_res_5: + sys.exit(self.usage( + "Sequence of the restriction site in 5' (-5,--restriction-site-5) : \"%s\" must be indicated\n" % ( + self.site_res_5))) + if not self.site_res_3: + sys.exit(self.usage( + "Sequence of the restriction site in 3' (-3,--restriction-site-3) : \"%s\" must be indicated\n" % ( + self.site_res_3))) + + def data_format(self): + """ + Check if information are correct + """ + # Run without arguments + if len(sys.argv) == 1: + sys.exit(self.usage(None)) + # Test input file argument + if self.input: + if not os.path.isfile(self.input): + print(self.input) + print(os.path.isfile(self.input)) + #sys.exit(self.usage("Error with \"%s\" : -i required an input file\n" % self.multilist)) + + # Determine les fichiers fournis en arguments + + def getargs(self): + """ + Determine the files provided as arguments + @return: Choosen options + """ + # Sans argument + if len(sys.argv) <= 1: sys.exit("Do './fibronectin.py -h' for a usage summary") + # test des option + try: + (opts, args) = getopt.getopt(sys.argv[1:], "i:o:p:5:3:h", + ["input=", "output_dir=", "pattern=", "site_res_5=", "site_res_3="]) + except getopt.GetoptError as err: + # print help information and exit: + print(str(err)) # will print something like "option -a not recognized" + sys.exit(self.usage(None)) + # Identification of options + for (o, a) in opts: + if o in ("-i", "--input"): + self.input = a + elif o in ("-o", "--output_dir"): + self.output_dir = a + elif o in ("-p", "--pattern"): + self.pattern = a + elif o in ("-5", "--restriction-site-5"): + self.site_res_5 = a + elif o in ("-3", "--restriction-site-3"): + self.site_res_3 = a + elif o in ("-h", "--help"): + sys.exit(self.usage(None)) + else: + assert False, "unhandled option" + # Verification of cases + self.case() + self.data_format() diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/args.pyc Binary file fibronectin/args.pyc has changed diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/fibronectin.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/fibronectin.py Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,424 @@ +#!/usr/bin/env python +# title :fibronectin.py +# description :This script will analyze fasta files, look for restriction sites, cut the sequences around the restriction sites, translate the nucleic sequences into amino acids sequences. +# author :Fabienne Wong Jun Tai +# date :20121107 +# version :1.0 +# usage :python fibronectin.py -i file.fasta -o /output/dir/ -p pattern -5 seq_restric_5'-3 seq_restric_3' +# notes : +# python_version :3.7.11 +# biopython_max_version :1.72 +# ============================================================================== +import math +import matplotlib +import numpy +import re +import subprocess +import matplotlib.pyplot as plot +from args import * +from Bio import SeqIO, Seq +from Bio.SubsMat import MatrixInfo as matlist +from Bio import pairwise2 +from Bio.pairwise2 import format_alignment +matplotlib.use('Agg') + +args = Args() +print(sys.path[0]) +# Variables initialization +fibronectin_script_dir = sys.path[0] +print(fibronectin_script_dir) +directory = args.output_dir +mcl_file = directory + "mcl.in" +mcl_output = directory + "mcl.out" +html_file = directory + "fibronectin_report.html" +graph_pic = directory + "distri.png" +input_file = os.path.basename(args.input) +site_res_5 = args.site_res_5 +site_res_3 = args.site_res_3 +tag = {'mut': [], 'ok_stop_ext': [], 'stop': [], 'no_restric': [], 'no_multiple': [], 'amber': []} +all_seq = [] +all_seq_fasta = {} # dictionnary that will store information about all the sequences +good_seq = {} # dictionnary that will store information about the valid sequences +identical_clones = {} +var_seq_common = {} # dictionnary that will store the number of sequences that share the same variable parts +align_scores = [] +nb_var_part = 0 + + +def reverse_complement(seq): + # Generate the reverse complement + complement_nuc = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'} + rev_com = "" + for n in (seq[::-1]): + rev_com += complement_nuc[n] + return rev_com + + +def generate_aln(seq_dic, ids): + # Multiple Sequence Alignment via ClustalO + input = '' + for k in ids: + input += '>%s\n%s\n' % (k, re.sub("(.{80})", "\\1\n", seq_dic[k]['prot'], re.DOTALL)) + p = subprocess.Popen("clustalo -i - --outfmt clu", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) + aln_out, aln_err = p.communicate(input=input) + print(type(aln_out)) + return aln_out + + +def report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args): + # Generate the html file for the report + all_seq.sort() + no_restric = tag['no_restric'] + no_restric.sort() + no_multiple = tag['no_multiple'] + no_multiple.sort() + stop = tag['stop'] + stop.sort() + amber = tag['amber'] + amber.sort() + mut = tag['mut'] + mut.sort() + # good_ids = good_seq.keys() + + good_seq = dict(sorted(good_seq.items())) + good_ids = good_seq.keys() + + # good_ids.sort() + + w = open(html_file, 'w') + w.write( + 'Fibronectin Report

Fibronectin Report

') + # Input data + w.write( + '

Input file:
%s

Number of sequences in input file:
%d

Pattern of the sequence bank:
%s

5\' restriction site:
%s

3\' restriction site:
%s

' % ( + input_file, len(all_seq), args.pattern, args.site_res_5, args.site_res_3)) + # Sequence analysis + w.write( + '

Caption:

') + w.write( + '' % ( + len(no_restric), float(len(no_restric)) / float(len(all_seq)) * 100, len(no_multiple), float(len(no_multiple)) / float(len(all_seq)) * 100, len(stop), + float(len(stop)) / float(len(all_seq)) * 100, len(mut), float(len(mut)) / float(len(all_seq)) * 100, len(good_ids), float(len(good_ids)) / float(len(all_seq)) * 100, + len(amber))) + w.write( + '
Absence of restriction sitesIncorrect number of nucleotides between the restriction sitesStop codon inside the area of interestMutation in the conserved regionsValid sequencesAmber codon in the sequence (inside the area of interest)
%d sequence(s) (%.2f%%)%d sequence(s) (%.2f%%)%d sequence(s) (%.2f%%)%d sequence(s) (%.2f%%)%d sequence(s) (%.2f%%)%d sequence(s)
%s%s%s%s%s%s
' % ( + '
'.join(no_restric), '
'.join(no_multiple), '
'.join(stop), '
'.join(mut), '
'.join(good_ids), '
'.join(amber))) + # Variable regions analysis + w.write( + '

The following group of sequences are identical clones on the variable regions:

') + identical_clones_seq = identical_clones.keys() + if identical_clones_seq: + for seq in identical_clones_seq: + ids = list(set(identical_clones[seq])) # return only one occurrence of each item in the list + w.write('
%d sequences (%.2f%% of valid sequences)
%s
' % ( + len(ids), float(len(ids)) / float(len(good_ids)) * 100, '
'.join(ids))) + w.write('
') + for z in range(len(good_seq[ids[0]]['var'])): + w.write('' % (z + 1, good_seq[ids[0]]['var'][z])) + w.write('
Variable regionRepeated sequence
%d%s
') + else: + w.write('

No clone was found.

') + + first = True + for i in range(nb_var_part): + keys = [] + for k in (var_seq_common[str(i + 1)].keys()): + nb = var_seq_common[str(i + 1)][k] + if nb > 1: + if first: + w.write( + '

Here\'s the distribution of the repeated sequences in variable regions:

') + first = False + keys.append(k) + else: + keys.append(k) + nb = len(keys) + if nb != 0: + w.write('') + for z in range(nb): + if z == 0: + w.write('' % (nb, i + 1)) + w.write('' % ( + keys[z], var_seq_common[str(i + 1)][keys[z]], float(var_seq_common[str(i + 1)][keys[z]]) / float(len(good_ids)) * 100)) + w.write('
Variable regionRepeated sequenceNumber of occurrences (percentage of valid sequences)
%d%s%d (%.2f%%)
') + # Clustering + w.write('

The following clusters were generated by MCL:

') + for line in open(mcl_output, 'r'): + w.write('
%d sequences (%.2f%% of valid sequences)
%s
' % ( + len(line.split("\t")), float(len(line.split("\t"))) / float(len(good_ids)) * 100, '
'.join(line.split("\t")))) + # Statistics + w.write('') + w.write('

Here\'s some statistics about the valid sequences:

Mean for the pairwise alignement scores: %.2f
Standard deviation: %.2f

' % ( + numpy.mean(align_scores), numpy.std(align_scores))) + w.write('
Distribution of the pairwise alignment score
' % os.path.basename(graph_pic)) + w.write('
') + uniq_scores = sorted(list(set(align_scores))) + scores_dic = {} + for score in uniq_scores: + scores_dic[score] = align_scores.count(score) + + scores_dic = dict(sorted(scores_dic.items())) + scores = scores_dic.items() + # scores.sort() + + for el in scores: + w.write('' % (el[0], el[1])) + w.write('
Pairwise Alignment ScoreNumber of occurrences
%.2f%d
') + # Annex + w.write('') + w.write('

Valid protein sequences in FASTA format:

') + aln_out = generate_aln(good_seq, good_ids) + print(str(aln_out)) + w.write( + '

Multiple sequence alignment of the valid sequences generated by Clustal Omega:

' % str( + aln_out)) + + if no_multiple: + w.write( + '

Protein sequences with an incorrect number of nucleotides between the restriction sites in FASTA format:

') + + if mut: + w.write('

Mutated protein sequences in FASTA format:

') + aln_out = generate_aln(all_seq_fasta, mut) + + w.write( + '

Multiple sequence alignment of the mutated sequences generated by Clustal Omega:

' % str( + aln_out)) + + if stop: + w.write('

Protein sequences with a stop codon in FASTA format:

') + + if amber: + w.write('

Protein sequences with an amber codon in FASTA format:

') + + w.write('
') + w.close() + + +nb_seq = len(list(SeqIO.parse(args.input, "fasta"))) + +for seq_record in SeqIO.parse(args.input, "fasta"): + seq_id = seq_record.id + seq = str(seq_record.seq) + seq = seq.upper() + all_seq.append(seq_id) + # Checking if both restriction sites are present in the sequence + if site_res_5 in seq and site_res_3 in seq: + valid = True + else: + valid = False + tag['no_restric'].append(seq_id) + # If sequence has both restriction sites, checking if it is necessary to take the reverse complement strand + if valid: + site_res_5_pos = seq.index(site_res_5) + site_res_3_pos = seq.index(site_res_3) + # If site_res_5_pos > site_res_3_pos, reverse complement strand has to be calculated + if site_res_5_pos > site_res_3_pos: + # Checking if the number of nucleic acids between the restriction sites is a multiple of 3 + length = math.fabs((site_res_5_pos + len(site_res_5)) - site_res_3_pos) + valid = length % 3 == 0 + cut_seq = seq[:site_res_5_pos + len(site_res_5)] + cut_seq = reverse_complement(cut_seq) + # Else if site_res_5_pos < site_res_3_pos, use the sequence as it is + else: + # Checking if the number of nucleic acids between the restriction sites is a multiple of 3 + length = math.fabs((site_res_3_pos + len(site_res_3)) - site_res_5_pos) + valid = length % 3 == 0 + cut_seq = seq[site_res_5_pos:] + # If the number of nucleic acids between the restriction sites isn't a multiple of 3, put the sequence away + if not valid: + tag['no_multiple'].append(seq_id) + prot_seq = Seq.translate(cut_seq) + all_seq_fasta[seq_id] = {} + all_seq_fasta[seq_id]['prot'] = prot_seq + else: + # Translate nucleic sequence into amino acid sequence + prot_seq = Seq.translate(cut_seq) + all_seq_fasta[seq_id] = {} + all_seq_fasta[seq_id]['prot'] = prot_seq + + # Looking for stop codon in the sequence and getting their position in the sequence + if '*' in prot_seq: + pos_stop = [m.start() for m in re.finditer("\*", prot_seq)] + stop = False + # Checking if stop codon is between the restriction sites, also checking if it is an amber codon. if stop codon other than amber codon -> tag stop + for i in range(len(pos_stop)): + if pos_stop[i] < length / 3: + stop_codon_nuc = cut_seq[pos_stop[i] * 3:pos_stop[i] * 3 + 3] + if stop_codon_nuc != "TAG": + tag['stop'].append(seq_id) + stop = True + break + else: + if seq_id not in tag['amber']: + tag['amber'].append(seq_id) + # If stop codon wasn't found between the restriction sites + if not stop: + """ + # Checking if there is a stop codon outside the restriction sites. If yes -> tag ok_stop_ext + for i in range(len(pos_stop)): + if (pos_stop[i] > length/3): + stop_codon_nuc = cut_seq[pos_stop[i]*3:pos_stop[i]*3+3] + if stop_codon_nuc != "TAG": + tag['ok_stop_ext'].append(seq_id) + stop = True + break + else: + if (seq_id not in tag['amber']): + tag['amber'].append(seq_id) + """ + # Checking if there was a mutation in the fix part, if yes -> tag mut else retrieve variable parts + mut = False + pattern_part = args.pattern.split(":") + tmp_prot_seq = prot_seq + var_parts = [] + for i in range(len(pattern_part) - 1): # not checking the latest fix part + part = pattern_part[i] + # If part is fix + if not part[0].isdigit(): + # If part not in prot_seq -> mutation, flag then break + if part not in tmp_prot_seq: + mut = True + tag['mut'].append(seq_id) + break + # Else, store the variable part if exist then remove the fix part + variable part (tmp_prot_seq starts at the end of part) + else: + pos_fix = tmp_prot_seq.index(part) + if pos_fix != 0: + var_parts.append(tmp_prot_seq[0:pos_fix]) + tmp_prot_seq = tmp_prot_seq[pos_fix + len(part):] + # Else part is variable + else: + nb_var_part += 1 + # Treating latest fix part if no mutation before + if not mut: + last_part = pattern_part[-1] + last_var = pattern_part[-2] + if '-' in last_var: + var_max = int(last_var.split('-')[1]) + else: + var_max = int(last_var) + last_part = last_part[0:var_max + 1] + if last_part not in tmp_prot_seq: + mut = True + tag['mut'].append(seq_id) + else: + pos_fix = tmp_prot_seq.index(last_part) + if pos_fix != 0: + var_parts.append(tmp_prot_seq[0:pos_fix]) + # If no mutation the sequence is validated and all the info are stored + if not mut: + good_seq[seq_id] = {} + good_seq[seq_id]['dna'] = cut_seq + good_seq[seq_id]['prot'] = prot_seq + good_seq[seq_id]['var'] = var_parts + +# If all sequences are invalid, the program will exit as there is no data to continue +if not good_seq: + print("All sequences are invalid. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit.") + sys.exit() +elif len(good_seq.keys()) == 1: + print("There is only one valid sequence among the input data. At least 2 valid sequences are necessary to proceed to the next step. The program will now exit") + sys.exit() + +# Initialization of dict var_seq_common +for n in range(nb_var_part): + var_seq_common[str(n + 1)] = {} + +# Opening the file where the mcl input will be written +mcl = open(mcl_file, 'w') + +id = good_seq.keys() +for i in range(len(id)): + var_1 = good_seq[list(id)[i]]['var'] + + # Classifying variable sequences + for k in range(len(var_1)): + try: + var_seq_common[str(k + 1)][var_1[k]] += 1 + except KeyError: + var_seq_common[str(k + 1)][var_1[k]] = 1 + + for j in range(i + 1, len(id)): + var_2 = good_seq[list(id)[j]]['var'] + # Comparing the sequences' variable parts to find identical clones + if var_1 == var_2: + try: + s = "".join(var_1) + identical_clones[s].extend([id[i], id[j]]) + except KeyError: + identical_clones[s] = [id[i], id[j]] + + # Align the 2 sequences using NWalign_PAM30 + seq_1 = ''.join(var_1) + seq_2 = ''.join(var_2) + print(seq_1) + print(seq_2) + matrix = matlist.pam30 + cpt = 0 + if len(seq_2) > len(seq_1): + print(pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1)) + for a in pairwise2.align.globalds(seq_1, seq_2, matrix, -11, -1): + for k in range(a[4]): + if a[0][k] == a[1][k]: + cpt += 1 + print(format_alignment(*a, full_sequences=True)) + else: + print(pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1)) + for a in pairwise2.align.globalds(seq_2, seq_1, matrix, -11, -1): + for k in range(a[4]): + if a[0][k] == a[1][k]: + cpt += 1 + print(format_alignment(*a, full_sequences=True)) + print("######################################@") + print(cpt) + + if len(seq_2) > len(seq_1): + p = subprocess.Popen(fibronectin_script_dir + "/NWalign_PAM30 %s %s 3" % (seq_1, seq_2), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + else: + p = subprocess.Popen(fibronectin_script_dir + "/NWalign_PAM30 %s %s 3" % (seq_2, seq_1), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + out, err = p.communicate() + + print(out) + print("######################################@") + lines = out.split(bytes("\n", encoding='utf8')) + print(lines[5].split(bytes(' ', encoding='utf8'))[5]) + score = float(lines[5].split(bytes(' ', encoding='utf8'))[5]) * 100 + align_scores.append(score) + mcl.write('%s\t%s\t%0.2f\n' % (list(id)[i], list(id)[j], score)) +mcl.close() + +# Clusters formation +subprocess.call("mcl %s --abc -I 6.0 -o %s" % (mcl_file, mcl_output), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + +# Producing distribution graph +plot.hist(align_scores, bins=numpy.arange(0, 101, 2)) +plot.xlabel('Pairwise Alignment Score') +plot.ylabel('Number of occurrences') +plot.title('Distribution of the pairwise alignment score') +plot.grid(True) +plot.savefig(graph_pic) + +# Generating html report +report_html(html_file, tag, all_seq, good_seq, all_seq_fasta, identical_clones, nb_var_part, var_seq_common, align_scores, args) + +# Removing intermediate files +subprocess.call("rm %s %s " % (mcl_file, mcl_output), shell=True) + +print("HTML report has been generated in the output directory. The program will now exit.") diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/fibronectin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/fibronectin.xml Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,93 @@ + + + diversity analysis of synthetic libraries of a Fibronectin domain + + + fibronectin_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/fibronectin_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/fibronectin_macros.xml Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,12 @@ + + + + clustalo + + + + + 10.1038/s41467-019-12528-4 + + + \ No newline at end of file diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/fibronectin_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/fibronectin_wrapper.py Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +""" +Wrapper for fibronectin.py +""" +import pkg_resources +import logging, os, string, sys, tempfile, glob, shutil, types, urllib +import shlex, subprocess +from optparse import OptionParser, OptionGroup +from stat import * + + +log = logging.getLogger( __name__ ) + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write("%s\n" % msg) + sys.exit() + +def __main__(): + #Parse Command Line + s = 'fibronectin_wrapper.py: argv = %s\n' % (sys.argv) + argcnt = len(sys.argv) + fasta_file = sys.argv[1] + pattern = sys.argv[2] + restriction_site_5 = sys.argv[3] + restriction_site_3 = sys.argv[4] + install_dir = sys.argv[5] + extra_file_path = sys.argv[6]+"/" + report = sys.argv[7] + tmp_file_path = sys.argv[8] + tool_file_path = sys.argv[9]+"/" + try:# for test - needs this done + os.makedirs(extra_file_path) + except Exception as e: + stop_err('1- Error running fibronectin ' + str(e)) + cmdline = 'python %sfibronectin.py -i %s -o %s -p %s -5 %s -3 %s > /dev/null' % (tool_file_path, fasta_file, extra_file_path, pattern, restriction_site_5, restriction_site_3) + try: + proc = subprocess.Popen(args=cmdline, shell=True, stderr=subprocess.PIPE) + returncode = proc.wait() + # get stderr, allowing for case where it's very large + stderr = b'' + buffsize = 1048576 + try: + while True: + stderr += proc.stderr.read(buffsize) + if not stderr or len(stderr) % buffsize != 0: + break + except OverflowError: + pass + if returncode != 0: + raise Exception(stderr) + except Exception as e: + stop_err('2 -Error running fibronectin ' + str(e)) + png_path = os.path.join(extra_file_path,'distrib.png') + shutil.move(extra_file_path+"/fibronectin_report.html", report) + #rval = ['Fibronectin Galaxy Composite Dataset

'] + #rval.append('

%s

' % (cmdline) ) + #rval.append('
This composite dataset is composed of the following files:

' ) + #f = file(html_file,'w') + #f.write("\n".join( rval )) + #f.write('\n') + #f.close() + +if __name__ == "__main__": __main__() diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/test-data/distri.png Binary file fibronectin/test-data/distri.png has changed diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/test-data/fibronectin_datatest.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/test-data/fibronectin_datatest.fasta Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,991 @@ +>XL1_10_PSEXSEQ-REV_13 status=ok nucl=1301 crlStart=4 crlStop=1186 crlLen=1183 order=COL12-0DIL +tagTTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC +CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT +CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA +TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA +ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC +ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA +GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA +ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA +AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG +CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC +TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT +TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC +CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC +GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC +AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG +TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT +TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGcttcgggatcgtcg +tatcccactaccgagatgtccgcaccaacgcgcaccccggactccgaaat +ggcgcgcattggccccacggccttcggatcgttgggaaccagcatcgcat +t +>XL1_11_PSEXSEQ-REV_14 status=ok nucl=1299 crlStart=8 crlStop=1185 crlLen=1178 order=COL12-0DIL +actctgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGCTGACGCAGTTGCCGTGTGCATCC +CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT +CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA +TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA +ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC +ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA +GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA +ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA +AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG +CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC +TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT +TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC +CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC +GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC +AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG +TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT +TTGATGGTGGTTAACGGCGGGATATAACATGAGCTggcttcgggatcgtc +gtatcccattaccgaaaatgtccgcaccaacgcgcaaccccggactcggg +aaagggcgcgcattgcgcccaagcgccatctggatcgttgggaaaccag +>XL1_12_PSEXSEQ-REV_15 status=ok nucl=1301 crlStart=6 crlStop=1184 crlLen=1179 order=COL12-0DIL +agcctCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGCAGGTGGCGACGATGGTGTTGCAGTCGTTGTGAGCGTAAAC +CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT +AAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT +TCGCCATAGGTGATACGGTAATAGTGGACGGCGGAGTTGGGGAGGTTTGC +ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT +TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC +TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG +GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA +TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC +TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC +GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA +TGGAAAAGCCCAGACCCTTCGGCGCAGGCCGAGAATGCCAGCACCAGACC +CGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAA +GCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCC +GGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCAC +ATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTT +TCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGC +GCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCA +CCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAG +AGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTG +GTTTGATGGTGGTTAACGGCGGGATATAACATGAgctgtcttcggtatcg +tcgtatcccactaccgagatggccgcaccaacgcgcaacccggaatcggt +aatggcccgcattgcgcccaaggccctcttgatcgttgggaaccagcatc +c +>XL1_13_PSEXSEQ-REV_16 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL +agcttagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TAGAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCGGACGAAGGAGCGGGGTGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttccggatcgt +cgtatcccactaccgagatgtccgcccaacgcgcaacccggactcggtaa +tgggccgcattgcgcccagcgccttcggatcgttgggaaccagcatccca +a +>XL1_14_PSEXSEQ-REV_17 status=ok nucl=1301 crlStart=3 crlStop=1198 crlLen=1196 order=COL12-0DIL +agCTTCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGGTGGAGTGGCTGATGTAGCTGTAGATGAAGTAGCGAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TATAAGAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT +TTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCGGGATCGtc +gtacccactaccgaaatgtccgcccaacgcgcagcccggactcggtaatg +ggccgcattgcgcccagcgccatctgatcgttgggaaccagcatcccagt +g +>XL1_15_PSEXSEQ-REV_18 status=ok nucl=1301 crlStart=7 crlStop=1186 crlLen=1180 order=COL12-0DIL +tgcatcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA +TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG +GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA +TCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACC +GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGA +GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT +CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG +CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG +CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT +CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC +ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC +TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC +GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG +CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA +GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA +GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT +GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG +CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA +TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG +TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG +GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT +GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG +CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA +TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcggtatcgtcgtat +cccactaccgaaatgcccgcccaacgcgcagcccggactcggtaatgggc +cgcattgcgcccagcgccatctgatcgttgggaaccagctccgcagtggg +a +>XL1_16_PSEXSEQ-REV_19 status=ok nucl=1301 crlStart=9 crlStop=1194 crlLen=1186 order=COL12-0DIL +cgctttgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGAAGCAGTCGCTGCTGTTGGCGTTGACGCCGCCGCAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT +TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGGatcgtc +gtatcccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaa +tgggccgcattgcgcccagggccatctgatctttggcacccagctccgca +t +>XL1_17_PSEXSEQ-REV_20 status=ok nucl=1301 crlStart=5 crlStop=1177 crlLen=1173 order=COL12-0DIL +ctgaAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG +TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG +ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG +GGCAGCCGCGGCAGCTGAAGTGGATGGGGACGCTAGCGTAAACCGTAATG +GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGAACT +ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT +AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC +AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA +ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA +CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA +TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG +GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT +CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG +TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG +ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA +GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA +AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA +GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT +TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA +AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG +CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG +GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA +GCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTGG +TTAACGGCGGGATATAACATGAACTGGcttcggtatcgtcgtatcccact +accgaaatgtccgcaccaacgcgcagcccggattcggaaaggcccgcatt +gcgcccagggccatctgatcgttgggaaccagcatcccagtgggaacaat +g +>XL1_18_PSEXSEQ-REV_21 status=ok nucl=1301 crlStart=6 crlStop=1178 crlLen=1173 order=COL12-0DIL +atctcTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCAT +AGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGG +TGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAAT +CGGGATGGTGACGAGGACGTCGAAGTGGCCGATGGGAGCGTAAACCGTAA +TGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGAA +TAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCC +ATAGGTGATACGGTAATAGGCTACGGGGCGGTAGATTGCATCCCAGCTGA +TCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACG +GAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCAT +AACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACAT +CATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCC +AGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCC +ATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGT +TGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC +AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCAGAGCC +AGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTG +AAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAA +AGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCG +TTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGG +AAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAG +GCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGAC +GGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCA +AGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTG +GTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatcccac +taccgagatgtccgcaccaacgcgcagcccggaatcggtaatggcccgca +ttgggcccagcgccatctgatctttgggaacccgcatccgcatggggaac +a +>XL1_19_PSEXSEQ-REV_22 status=ok nucl=1301 crlStart=8 crlStop=1188 crlLen=1181 order=COL12-0DIL +tcatcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGGAAGGCGCAGCTGATGGCGGAGAAGCTGGTGCCGTGAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +ATAAGAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGAGGACGATGCCGCGGAAGATGGAT +GCATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAG +TTTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCA +GCTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTC +AGGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTG +CATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAA +TCTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTA +ACGTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGC +GATGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCA +GACCCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTA +ATAAGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACG +AGCCGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAAC +TCACATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCC +GCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCA +ACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTT +TTCACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTG +AAAGAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAAT +CCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAActggcttccgga +tcgtcgtatcccactaccgagatgtccgcaccaacgcgcaccccggaatc +cggaaagggcccgcattgcggccaagcgcctcttgatcgttgggaacaag +a +>XL1_1_PSEXSEQ-REV_5 status=ok nucl=1301 crlStart=5 crlStop=1182 crlLen=1178 order=COL12-0DIL +cgtgGAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATA +GTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGT +GATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATC +GGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGT +AATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGG +AACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG +CCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCT +GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA +CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC +ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC +ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT +CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG +CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT +GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC +CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA +GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT +GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA +TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT +GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC +GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA +GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA +GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA +GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT +GGTGGTTAACGGCGGGATATAACATGAGCTGGcttcggtatcgtcgtatc +ccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggcc +cgcattggccccagcgccttctgatcgttggcaaccagctccgcagtggg +a +>XL1_22_PSEXSEQ-REV_24 status=ok nucl=1301 crlStart=5 crlStop=1180 crlLen=1176 order=COL12-0DIL +ctatTGGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGAAGATGACGTTGTCGTCGTTGGGGGGGTTGGTAGCGTAAAC +CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG +AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTCCGCCCGTTT +CGCCATAGGTGATACGGTAATAGGCGACGAAGGTGATGAGTGCATCCCAG +CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG +CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT +CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC +ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC +TTCCAGGCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG +GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC +TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG +CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG +AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG +TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC +ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT +TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT +CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG +AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG +AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC +AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA +TGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgtcgtat +cccactaccgaaatgtccgcaccaacgcgcagcccggaatcggtaatggg +ccgcattgggcccagcgccatctgatctttgggaaccagcatcccaatgg +g +>XL1_23_PSEXSEQ-REV_25 status=ok nucl=1301 crlStart=9 crlStop=1183 crlLen=1175 order=COL12-0DIL +ttattcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGCAGCGGGAGAAGGGGTTGTCGGAGATGTCGCCAGCGTAAAC +CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAT +AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT +TCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCA +GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG +GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT +TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC +CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTAGTGCATAACAT +CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA +CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT +GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA +AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC +AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT +TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA +GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA +ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA +GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG +GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG +TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT +GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG +ATGGTGGTTAACGGCGGGATATAACATGAACTGgcttcggtatcgtcgta +tcccactaccgagatggcccgcacaacgcgcaacccggactcggtaatgg +gccgcattggccccagcgccatctgatctttgggaaccagcatcgcagtg +g +>XL1_24_PSEXSEQ-REV_26 status=ok nucl=1301 crlStart=5 crlStop=1178 crlLen=1174 order=COL12-0DIL +atctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGAGTTACCCCCGTT +TCGCCATAGGTGATACGGTAATAGCAGACGTAGTCGGGGCTGCTGTGTGC +ATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTT +TGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGC +TTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAG +GTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCA +TAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATC +TGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAAC +GTAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGA +TGGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGA +CCCGCCAGAGCCACCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA +AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC +CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA +CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT +TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG +CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC +ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA +GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT +GGTTTGATGGTGGTTAACGGCGGGATATaacatgaactggcttcggtatc +gtcgtatcccactaccgaaatgtcccgaccaacgcgcagcccggactcgg +taatgggccgcattgggcccagggccatctgatctttgggaaccagctcc +g +>XL1_25_PSEXSEQ-REV_27 status=ok nucl=1301 crlStart=10 crlStop=1192 crlLen=1183 order=COL12-0DIL +aacctcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAGAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCgggatcgt +cgtatcccactaccgagatgtccgcccaacgcgcagcccggaatcggtat +tgggcccgattgcgcccagcgccatctgatcgttgggaaccagcatccgc +a +>XL1_26_PSEXSEQ-REV_28 status=ok nucl=1301 crlStart=6 crlStop=1173 crlLen=1168 order=COL12-0DIL +ctggaTATCTTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG +TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG +ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG +GACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAAACCGTA +ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTTGAGGA +ACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGC +CATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTG +ATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCAC +GGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCA +TAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACA +TCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTC +CAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGC +CATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTG +TTGTCACCCGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCC +AGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGC +CAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGT +GAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATA +AAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGC +GTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGG +GAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGA +GGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGA +CGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGC +AAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGATGG +TGGTTAACGGCGGGATATAACATgaactggcttcgggatcgtcgtatccc +actaccgagattgcccgcaccaacgcgcaaccccggactcggtaaagggc +ccgcattgcgcccagggccatctgatcgttgggaaccagcatccgcaatg +g +>XL1_29_PSEXSEQ-REV_29 status=ok nucl=1301 crlStart=5 crlStop=1184 crlLen=1180 order=COL12-0DIL +taggTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC +CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT +CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA +TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA +ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC +ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA +GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA +ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA +AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG +CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC +TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT +TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC +CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC +GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC +AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG +TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT +TTGATGGTGGTTAACGGCGGGATATAACATGAACtggcttcgggatcgtc +gtatcccactaccgagatgtcccgcccaacgcgaagcccggaatcggtaa +tgggccgcattgggcccaagcgcctctggatcgttgggaaccaggttcgc +a +>XL1_2_PSEXSEQ-REV_6 status=ok nucl=1301 crlStart=5 crlStop=1197 crlLen=1193 order=COL12-0DIL +ttcgCGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGGATGAAGCCGTCGGCGCTGGTGCGGGCGCCGGTAGCGTAAACC +GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA +ATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT +CGCCATAGGTGATACGGTAATAGAAGACGTCGCCGAGGGGTGCATCCCAG +CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG +CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT +CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTACAGGTAAACC +ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC +TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC +GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG +CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA +GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA +GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT +GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG +CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA +TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG +TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG +GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT +GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG +CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA +TGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCGGTATCGTCGtat +cccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatggc +gcgcattgcgcccagggccatctgatcgttggcaaccagcatcccattgg +g +>XL1_30_PSEXSEQ-REV_30 status=ok nucl=1301 crlStart=4 crlStop=1178 crlLen=1175 order=COL12-0DIL +atgGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAGT +CTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTGA +TGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCGG +GTTGTGGGCGTGGCTGCAGACGGTGACAGGCCGGAAATCGTTGCGGTTGA +GGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTT +CGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAG +CTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGG +CACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTT +CCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACC +ACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATC +TTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGAC +GGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATG +CTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAA +GCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCA +GAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTT +GTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAG +CATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAA +TTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAG +TCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGG +GAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGT +GAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTG +CAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGA +TGGTGGTTAACGGCGGGATATAACATGAGCTGTCTTCGGTATCGTCGTAT +CCCACTACCGAAATGTCCGCACCAACGCgcagcccggaatcggtaatggc +gcgcattgcgcccagcgccatctgatcgttggcaaccagcatcgcagtgg +gaacgatgccctcattcagcatttgcatggtttgttgaaaaccggaaatg +g +>XL1_33_PSEXSEQ-REV_23 status=ok nucl=1301 crlStart=329 crlStop=1165 crlLen=837 order=COL12-0DIL +cgttttcggctctgatatctttggatcccacgcgtccctagcccacgcgt +ggtgcatagtctggcacgtcatacggatacgaaccaccatgatggtgatg +gtgatggtgatggctaccgcccgaaccgccggtaccggtacggtaattga +tagaaatcggactagagctcgatgaggagatacgttaagagctcgaagcg +taaaccgtaatggtatagtcgacacccggtgacaggccggaaatcgttgc +ggttgaataacaacccgggacggtgaattcctgaaccggggagttaccgc +ccgtttcgccataggtgatacggtaataGCTGACGTTGCTTTGGGATGCA +TCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTT +GGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCT +TGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGG +TAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCAT +AACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCT +GCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACG +TAAATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT +GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC +CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA +AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC +CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA +CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT +TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG +CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC +ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA +GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT +GGTTGATGGTGGTTAacggcgggatataacatgaactggcttcggtatcg +tcgtatccactaccgaaatgtccgaccaacgggcaacccggaatcggtaa +tgggcggattgggcccagcgcatctgatcgttggaaccagcatcgcagtg +g +>XL1_3_PSEXSEQ-REV_7 status=ok nucl=1301 crlStart=9 crlStop=1188 crlLen=1180 order=COL12-0DIL +ttactagcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT +TTGATGGTGGTTAACGGCGGGATATAACTTGAGCTGTCttcgggatcgtc +gtatcccactaccgaaaatgtccgcaccaacgcgcaagcccggaatccgg +tatgggcgcgcattggccccaaggccatcggatcgttgggaaccagcatc +c +>XL1_4_PSEXSEQ-REV_8 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL +tgactgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGGGTGTGGGTGCAGTGGTTGTTGCGGCAGGTGTTGTCAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +ATAATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGATTACCGCCCGT +TTCGCCATAGGTGATACGGTAATAGACGACGCAGTGGTCGGTGAAGGGTG +CATCCCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGT +TTGGTCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAG +CTTGATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCA +GGTAAACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGC +ATAACATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAAT +CTGCAGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAA +CGTAAATGCTGTTGACCCGGAATGTGTTGATTGCCCACGCCACCTGCGAT +GGAAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGAC +CCGCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATA +AGCTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGC +CGGGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCA +CATTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCT +TTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACG +CGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTC +ACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAA +GAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCT +GGTTTGATGGTGGTTAACGGCGGGAAATAACATGAACTGGcttcggtatc +gtcgtatcccactaccgaaatgtccgcacaacgcgcagcccggaatcggt +aatgggccgcattgcgcccagcgccatctgatctttgggaaccagcatcg +c +>XL1_5_PSEXSEQ-REV_9 status=ok nucl=1301 crlStart=10 crlStop=1173 crlLen=1164 order=COL12-0DIL +ctattcgctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGT +TTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgtc +gtatcccactaccgagatgtccgcccaacgcgcagcccggactcggtaat +gggccgcattgcgcccagcgccatctgatcgttgggaacaagcttcgcat +t +>XL1_6_PSEXSEQ-REV_10 status=ok nucl=1301 crlStart=9 crlStop=1187 crlLen=1179 order=COL12-0DIL +agcttagcTCTGAAATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGGGAGTTGATGGGGTGGAAGTTGGAGACGGTGGCGTTAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TAGAATAATAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGtcttcggtatcgt +cgtatcccactaccgagatgtccgcacaacgcgcagcccggaatcggtaa +tggccgcattgggcccagcgccatctgatcgttggcaaccagcttcgcat +t +>XL1_8_PSEXSEQ-REV_11 status=ok nucl=1301 crlStart=6 crlStop=1176 crlLen=1171 order=COL12-0DIL +aacttTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATaacatgagctggcttcgggatcgt +cgtatcccactaccgaaattgtccgaccaacgcgcaacccggactcggta +ttgggccgcattgcgcccagggccatctgatctttgggaaccagcatccg +c +>XL1_9_PSEXSEQ-REV_12 status=ok nucl=1301 crlStart=3 crlStop=1188 crlLen=1186 order=COL12-0DIL +tgGCTCTGGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGC +ATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGAT +GGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAA +ATCGGGTGGTGGACGGCGTGGACGTCGTCGTTGACGAGGCTAGCGTAAAC +CGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTAG +AATAAGAACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTT +TCGCCATAGGTGATACGGTAATAGCAGACAGAGCTTTATGATGCATCCCA +GCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCG +GCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATT +TCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAAC +CACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACAT +CTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGA +CGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAAT +GCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAA +AGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCC +AGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTT +TGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGA +GCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTA +ATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCA +GTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGG +GGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAG +TGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTT +GCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTG +ATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCgggatcgtcgta +cccactaccgagatgtccgcacaacgcgcagcccggactcggtatggccc +gcattggccccagcgccatctgatcgttgggaacaagcatcccaatgggg +a +>XL2-1_PSEXSEQ-REV_32 status=ok nucl=1301 crlStart=9 crlStop=1190 crlLen=1182 order=COL12-0DIL +tcatagctCTGATTTCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC +CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT +CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA +TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA +ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC +ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA +GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA +ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA +AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG +CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC +TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT +TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC +CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC +GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC +AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG +TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGT +TTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTcgggatcgtc +gtatcccactaccgagatgtccgcacaacgcgcagcccggaatccggtaa +tggcccgcattgcgcccagggccatctgatcgttgggaaccaagatccgc +a +>XL2-2_PSEXSEQ-REV_33 status=ok nucl=1301 crlStart=9 crlStop=1181 crlLen=1173 order=COL12-0DIL +ctctctggATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCA +TAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATG +GTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAA +TCGGGACGAGGGAGGCGGAGCAGCGGAGGCAGGGGCTAGCGTAAACCGTA +ATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAAGA +ATAACCCGGGACGGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCG +CCATAGGTGATACGGTAATAGTTGACGAAGGTGTGGCATGCATCCCAGCT +GATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCA +CGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCC +ATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCAC +ATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTT +CCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGG +CCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCT +GTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGC +CCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGA +GCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGT +GTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCA +TAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATT +GCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTC +GGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGA +GAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGA +GACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCA +GCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGAT +GGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtaccgtcgtatc +ccactaccgagatgtccgcaccaacgcgcagcccggaatcggtaatgggc +cgcattgggcccagcgccatctgatcgttgggaaccagaatcccaattgg +g +>XL2-3_PSEXSEQ-REV_34 status=Failed nucl=519 crlStart=1 crlStop=21 crlLen=21 order=COL12-0DIL +AAGGCCGTTTTACTTATTTGCtaataacaccttctccacgaacccccccg +ggttcaacatcgagggcgagaatcagaaaccccccaccatgtggatgagg +ctaagaatgtggtttcccccaaaacccccggtgcttgcttatggtgataa +taatcccaccaaatatcggaagtcttcacaaattgtaaaaatcccgcttt +atttttgtattactttagagtcgccgagacccagctcatgtaggtgtctg +agaaggactggatctgaatcatcgatgagttcacctttactttctttttt +ttttttctttttccaaataactaatagatgattcatcttgttgatgcctg +aaacccgaccaacatagcttccacatgccaccaacatttgcttgttagcc +tatctccgatctgaccccgtaggccccgctcccttaatggatcaggataa +attttcttaccctctcggtgatggcggcccccagcgcccggccatcctta +cctgtttttttatttgtc +>XL2-4_PSEXSEQ-REV_35 status=ok nucl=1301 crlStart=6 crlStop=1192 crlLen=1187 order=COL12-0DIL +agattAGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAACTGGCTTCggtatcgt +cgtatcccactaccgagatgtccgcaccaacgcggcagcccggaatcggt +aatggcgcgcattgggcccaagcgccatctgatcgttgggaaccagcatc +c +>XL2-5_PSEXSEQ-REV_36 status=ok nucl=1301 crlStart=5 crlStop=1173 crlLen=1169 order=COL12-0DIL +ctggAATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGTGCATAG +TCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTGATGGTG +ATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAGAAATCG +GGTAGCTGTTGGCGACGATGACGCAGTAGCCGCTAGCGTAAACCGTAATG +GTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATAATAAGA +ACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTCGCCAT +AGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGCTGATC +AGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGCACGGA +ACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTCCATAA +CCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCACATCA +TAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCTTCCAG +GCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACGGCCAT +CTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGCTGTTG +TCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAGCCCAG +ACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAGAGCCA +GCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTGTGTGA +AATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGCATAAA +GTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGT +TGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGTCGGGA +AACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGG +CGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACG +GGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGCAGCAA +GCGGTCCACGCTGGTTTGCCCAGCAGGCGAAATCCTGTTTGATGGTGTTA +ACGGCGGGATTAACATGAACTGGcttcgggatcgtcgtatccactaccga +aatgccgcaccacgcgcagcccggactcggaaatggccgcattggcccca +gggccatctgatcgttggaaccaagatcccaatgggaacaagccctcatc +c +>XL3-1_PSEXSEQ-REV_37 status=ok nucl=1301 crlStart=6 crlStop=1187 crlLen=1182 order=COL12-0DIL +ttcctCGATCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +GCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGgcttcggtatcgt +cgtatcccactaccgagatgtccgcacaacgcgcaagccggaatcggtaa +tggcccgcattgcgcccagcgccatctgatcgttgggaaccagcatccgc +a +>XL3-2_PSEXSEQ-REV_38 status=ok nucl=1301 crlStart=6 crlStop=1185 crlLen=1180 order=COL12-0DIL +cgtctTGCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATATAACATGAGCtggcttcgggatcgt +cgtatcccactaccgagatgtccgcaccaacgcgcagcccggactcggta +ttgggccgcattgcgcccagcgccatctgatcgttgggaacccagatcgc +a +>XL3-3_PSEXSEQ-REV_39 status=ok nucl=1301 crlStart=10 crlStop=1174 crlLen=1165 order=COL12-0DIL +agactagctCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTA +AACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGG +TTGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCC +GTTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATC +CCAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGG +TCGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTG +ATTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTA +AACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAA +CATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGC +AGACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTA +AATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGG +AAAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCC +GCCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAG +CTTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCG +GGAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA +TTAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTT +CCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCG +CGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCAC +CAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGA +GTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGG +TTTGATGGTGGTTAACGGCGGGATataacatgaactggcttcggtatcgt +cgtatcccactaccgaaatgtccgcaccaacgcgcaacccggaatcggga +atgggccgcattgcgcccagcgccatctgatctttgggaaccagcatccc +a +>XL3-4_PSEXSEQ-REV_40 status=ok nucl=1301 crlStart=8 crlStop=1186 crlLen=1179 order=COL12-0DIL +tcgctcgCTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGG +TGCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGT +GATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATA +GAAATCGGCGGCTGTGGATGAAGCTGTGGTTGAGGCTGGAAGCGTAAACC +GTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGTATA +ATAAGAACCCGGGACGTGAATTCCTGAACCGGGGAGTTACCGCCCGTTTC +GCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCCCAGC +TGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGTCGGC +ACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGATTTC +CATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAAACCA +CATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAACATCT +TCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCAGACG +GCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAAATGC +TGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGAAAAG +CCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCGCCAG +AGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGCTTTG +TGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGGAGC +ATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAAT +TGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTCCAGT +CGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGG +AGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTG +AGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAGTTGC +AGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGGTTTGA +TGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTcggtatcgtcgtat +cccactaccgagatgtccgcacaacgcgcagcccggactcggtaatggcc +cgcattggccccagcgccatctgatcgttgggaaccagctcccgagtggg +a +>XL3-5_PSEXSEQ-REV_41 status=ok nucl=1301 crlStart=8 crlStop=1190 crlLen=1183 order=COL12-0DIL +ctgtcgcTCTGATATCTTTGGATCCCACGCGTCCCTAGCCCACGCGTGGT +GCATAGTCTGGCACGTCATACGGATACGAACCACCATGATGGTGATGGTG +ATGGTGATGGCTACCGCCCGAACCGCCGGTACCGGTACGGTAATTGATAG +AAATCGGACTAGAGCTCGATGAGGAGATACGTTAAGAGCTCGAAGCGTAA +ACCGTAATGGTATAGTCGACACCCGGTGACAGGCCGGAAATCGTTGCGGT +TGAGGAACTACCCGGGACGGTGAATTCCTGAACCGGGGAGTTACCGCCCG +TTTCGCCATAGGTGATACGGTAATAGCTGACAGAGCTTTATGATGCATCC +CAGCTGATCAGCAGAGACGTCGGGGTGGCCGCAACCACTTCCAGTTTGGT +CGGCACGGAACTGACAGAGCTGCCCGCGGCCGCACCCTTGATCAGCTTGA +TTTCCATAACCTTTTCGGCGCCATTAGACGGCTTCGCCACTTTCAGGTAA +ACCACATCATAGGTGTTTTTCAGAGCTGCGACGGCGTCTTCGTGCATAAC +ATCTTCCAGGCCCACGCTGTTGACTGCCAGGATCTTGTCACCAATCTGCA +GACGGCCATCTTTATGGGCCGCACCGCCTTCAATGATCTTCGTAACGTAA +ATGCTGTTGTCACCCGGAATGTGTTGATTGCCCACGCCACCTGCGATGGA +AAAGCCCAGACCCTTCGGCGCCGAGGCCGAGAATGCCAGCACCAGACCCG +CCAGAGCCAGCCAGATTTTTTTCATGGTTAATTTCTCCTCTTTAATAAGC +TTTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +GAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACAT +TAATTGCGTTGCGCTCACTGCCCGCTCTCGAGTTATCACTGCCCGCTTTC +CAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC +GGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGTTTTTCTTTTCACC +AGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCTGGCCCTGAAAGAG +TTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTT +TGATGGTGGTTAACGGCGGGATATAACATGAGCTGGCTTCggtatcgtcg +tatcccactaccgagatgtccgcaccaacgcgcagcccggactcggaatg +gggcgcattgggcccagcgccatttgatcgttgggaaccagcatcgcatt +g diff -r 000000000000 -r 0c6cfb9906f3 fibronectin/test-data/fibronectin_report.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fibronectin/test-data/fibronectin_report.html Wed Nov 10 15:15:50 2021 +0000 @@ -0,0 +1,127 @@ +Fibronectin Report

Fibronectin Report

Input file:
fibronectin_datatest.fasta

Number of sequences in input file:
36

Pattern of the sequence bank:
AAAGSSVSSVPTKLEVVAATPTSLLISWDA:4-6:V:1:YYRITYGETGGNSPVQEFTVPG:3:TATISGLSPGVDYTITVYA:11-12:PISINYRTGTGGSGGSHHHHHHHHGGSYPYDVPDYAPRVG*GRVGSKDIRAETVESCLAKSHTENSFTNVWKDDKTLDRYANYE

5' restriction site:
GCGGCCGC

3' restriction site:
GGTACC

Caption:

Absence of restriction sitesIncorrect number of nucleotides between the restriction sitesStop codon inside the area of interestMutation in the conserved regionsValid sequencesAmber codon in the sequence (inside the area of interest)
1 sequence(s) (2.78%)6 sequence(s) (16.67%)25 sequence(s) (69.44%)0 sequence(s) (0.00%)4 sequence(s) (11.11%)0 sequence(s)
XL2-3_PSEXSEQ-REV_34XL1_22_PSEXSEQ-REV_24
XL1_24_PSEXSEQ-REV_26
XL1_30_PSEXSEQ-REV_30
XL1_4_PSEXSEQ-REV_8
XL2-2_PSEXSEQ-REV_33
XL3-4_PSEXSEQ-REV_40
XL1_10_PSEXSEQ-REV_13
XL1_11_PSEXSEQ-REV_14
XL1_13_PSEXSEQ-REV_16
XL1_14_PSEXSEQ-REV_17
XL1_15_PSEXSEQ-REV_18
XL1_16_PSEXSEQ-REV_19
XL1_17_PSEXSEQ-REV_20
XL1_1_PSEXSEQ-REV_5
XL1_23_PSEXSEQ-REV_25
XL1_25_PSEXSEQ-REV_27
XL1_26_PSEXSEQ-REV_28
XL1_29_PSEXSEQ-REV_29
XL1_33_PSEXSEQ-REV_23
XL1_3_PSEXSEQ-REV_7
XL1_5_PSEXSEQ-REV_9
XL1_6_PSEXSEQ-REV_10
XL1_8_PSEXSEQ-REV_11
XL1_9_PSEXSEQ-REV_12
XL2-1_PSEXSEQ-REV_32
XL2-4_PSEXSEQ-REV_35
XL2-5_PSEXSEQ-REV_36
XL3-1_PSEXSEQ-REV_37
XL3-2_PSEXSEQ-REV_38
XL3-3_PSEXSEQ-REV_39
XL3-5_PSEXSEQ-REV_41
XL1_12_PSEXSEQ-REV_15
XL1_18_PSEXSEQ-REV_21
XL1_19_PSEXSEQ-REV_22
XL1_2_PSEXSEQ-REV_6

The following group of sequences are identical clones on the variable regions:

No clone was found.

Here's the distribution of the repeated sequences in variable regions:

Variable regionRepeated sequenceNumber of occurrences (percentage of valid sequences)
3YSY2 (50.00%)

The following clusters were generated by MCL:

4 sequences (100.00% of valid sequences)
XL1_12_PSEXSEQ-REV_15
XL1_18_PSEXSEQ-REV_21
XL1_19_PSEXSEQ-REV_22
XL1_2_PSEXSEQ-REV_6 +

Here's some statistics about the valid sequences:

Mean for the pairwise alignement scores: 25.92
Standard deviation: 3.89

Distribution of the pairwise alignment score
Pairwise Alignment ScoreNumber of occurrences
19.001
23.801
26.301
27.302
31.801

Valid protein sequences in FASTA format:

Multiple sequence alignment of the valid sequences generated by Clustal Omega:

Protein sequences with an incorrect number of nucleotides between the restriction sites in FASTA format:

Protein sequences with a stop codon in FASTA format:

\ No newline at end of file