Mercurial > repos > iuc > basil
changeset 0:e6ef29001647 draft
planemo upload commit b89c8017aeef91f940543a1cc7dadb4a85290865
author | iuc |
---|---|
date | Thu, 30 May 2019 21:14:58 -0400 |
parents | |
children | 77fc7640abc7 |
files | basil.xml test-data/basil.vcf test-data/ref.fa test-data/simulated.bam |
diffstat | 4 files changed, 220 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basil.xml Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,61 @@ +<tool id="basil" name="basil" version="1.2.0"> + <description>Breakpoint detection, including large insertions</description> + <requirements> + <requirement type="package" version="1.2.0">anise_basil</requirement> + </requirements> + <version_command>basil --version 2>&1 | grep 'basil version' | cut -f 3 -d ' '</version_command> + <command detect_errors="aggressive"><![CDATA[ + ln -s '$ref' 'ref.fa' && + ln -s '$bam' 'in.bam' && + ln -s '$vcf' 'out.vcf' && + basil + --input-reference 'ref.fa' + --input-mapping 'in.bam' + --out-vcf 'out.vcf' + --oea-min-support-each-side '$min_oea_each_side' + ]]></command> + <inputs> + <param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/> + <param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/> + <param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion. In range [1..inf]. This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered." /> + </inputs> + <outputs> + <data name="vcf" format="vcf" /> + </outputs> + <tests> + <test> + <param name="ref" value="ref.fa"/> + <param name="bam" value="simulated.bam"/> + <param name="min_oea_each_side" value="2"/> + <output name="vcf" file="basil.vcf"/> + </test> + </tests> + <help><![CDATA[ + BASIL is a method to detect breakpoints for structural variants (including insertion breakpoints) from aligned paired HTS reads in BAM format. Use BASIL to analyze BAM files for tentative insertion sites. + + Note that BASIL will in general detect all kinds of breakpoints, e.g. for inversions on real-world data. + + BASIL VCF fields + + A typical line in BASIL might look as follows. + + 1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 + + The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV). + + The eighth column contains the names of the score values given in the ninth column: + + GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores. + CLEFT Number of clipping signatures supporting the site from the left side. + CRIGHT Number of clipping signatures supporting the site from the right side. + OEALEFT Number of OEA alignments supporting the site from the left. + OEARIGHT Number of OEA alignmetns supproting the site from the right. + + Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data. + + For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning. + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv051</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/basil.vcf Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##source=BASIL +##reference=ref.fa +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=OEA_ONLY,Number=0,Type=Flag,Description="Breakpoint support by OEA signature only"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##FORMAT=<ID=GSCORE,Number=1,Type=String,Description="Sum of Geometric score means (see BASIL documentation)"> +##FORMAT=<ID=CLEFT,Number=1,Type=String,Description="Clipped alignments supporting call from left side."> +##FORMAT=<ID=CRIGHT,Number=1,Type=String,Description="Clipped alignments supporting call from right side."> +##FORMAT=<ID=OEALEFT,Number=1,Type=String,Description="One-end anchored alignments supporting call from left side."> +##FORMAT=<ID=OEARIGHT,Number=1,Type=String,Description="One-end anchored alignments supporting call from right side."> +##contig=<ID=1,length=10000> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT individual +1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fa Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,144 @@ +>1 +CTGTAACTAAACACCGCTGTCCGGACTAAGGTCCGCATATTGTTCAGGTTTTATACAATGAGCGTCGAGC +CAGTTCAATGTGGCCCTAAACCTACTCAAGAAGTATCCCGAGGGATGCGAAGCTGCCGATGAGAGTTAGT +TGACATTCCGAGTCCGTGGATGGGTCCTCCTTCTTAGCGAATTACCGTCCATTTGCATGACGGATGCCTT +GCACTTGGTAACAGTAGCAATGTATAGGAAACAAAGAATCGAGTTCAACAGGTCGCATTCAGTGCCTTGC +GCACAAATATATAGGAGGAAACTCGCAACAGAATGCTATGACCAGTTTATAGAACTGTGCCGTAGACGGA +GTGCCAATCAGAAATAACCATAATGGGTTGCCAGGGTGTATGACAGGTTCTTGATAGTCCCAGAGCTTTC +CGTAACAAAGGAACTTTCACAACGATTGCCTCGTTATATTCCGCTGCACTGCATGACATTCATTGGAATG +GAAACAGCTGATTTCCTAGATAGCGCCGGGTATATTGTTACCGCCCCACACGCAGGCATCGGACCACATA +TTTTGGCACCTGGTTAGAGAGCTAGACTCGATCGAATAACTTTAGGGAAGCTCCATGTCATGTCCAACAC +CCGAAGAGTACAGGACATGTCAAAGGGAGGATTAAAGTTGACCCTATCGATGTATGTCAAGGGTGCTGAG +TTGCCCCAGGCTTGCGTATCCTACTGGCTTGATTGGGTACCTAGAGGTACCGTTGCGCATCGGCTCACAA +CGTTCAATGCAGTCCCAAGATACGGTGTGATGTCAGGACCTAAGCGACCCATGATCGCTGCTTCCTTGGA +AGAGAGAGCGACATTTCCGTCTCGCGCATTCTTTAGTAAGGGAGGAACTTTTTGTGAAAGATGGTGTTGG +GCTTCGAGACATTTATCGTGCCTTCTGGCTTACTCAGATTACGCTGAAGCCTTTGGGCCTGCCCATGACT +GTTGCAATCATTTGTCTGAAACACCAATGACTTAACGAATGCCCACAAGACTGAAGGTTATCGATCTTGG +AGAAAACTCCTGATTCTATAGGGACCATCTCAGGCCCGACAATCCTTAACCGGATCCTAACAGGGTTCAC +ATGTTCATGGGTCGTAGTAACCTGTGGCGTGACCGTGGCATAAAGACGTCCGTCCAATTCCTGAGAAAAG +ACACCGAAATGGTGAAAGAGTGGGGACCTCCGTAGACAACTTACATCGCCACGCAAGCTTATGTGTGCGT +CAGCTACTTCATTGACCACAAATTCGGCCGAAAGATGGCGCTATTGATTTATGGATTAAGGGGCACGCAG +TTGTACTTATTCGGCGTAGGTTCCAATCTAGTGGGATTCCTATGGGCCGAAGTCTGAGAAACCTTAGACA +AACGGTCACCCATGCGCCGCACCTCCGGTGCCTCCAGTAACATAGTTTGTCCTCAGGTTTGAAATGCTGG +GTTTGGTAAGTGAACCTTAATATCCGCTTGCGATTGTCCATGCGGACCGGATTTCAAGAAAGGAGATAGA +TTGATCCAGGAACCATCTATCGTACATCGGCCTCGCCCTGCACGCAGACCTGCAGTCATCTTATGATGCG +TCCCGTGCAAACCCTCCGTTATCTAGGAGCGGCAGCAGCGCACGAAGAGCGGGAAGATGATTATCTCCCG +AGCTCTTGTGCCGAAGGGCATTGTGGAACATTTGGAACGGGGCTCGGATCTCGCTCACTAAAATCTAAGG +ATCGAGGAGACAAAGTCATTCGGCAAGACAAACTCACGTCCGTGGTTGCAACTGATTAGACTTTATGGAT +GAACGCTATGCTGCTACCGAACCGACGACCCGTGGTCACCAGCTATCCGCTCAAGACATTATATCATTAA +TCCGAGACCCCTGCAGACACACGTCTCGGCTATTGTTGAGAGCCCTTCCTGCTCCATACCCCCGGCACGT +ATAATAAACCGAGTACTATGTCCCGGCGGATTCCGTCTTACCTATCTACGAGATCCAGGTGCCGTAGCAT +AAAGCTATGCGGGACGTTCTACTCGTTCTAACAGAACCTCAGCCATTGTAACCCACTTTTGTTGCATCCT +ATTGGCAGTGGGGCGACCGCTGAGATTGGGGTAGCTTCCTAGTAACTGTGCAGCGAGGTACGTCGCAGAG +GGATACACTTGTTACGGCGGGGACTTTGGCAGTGACCCTAGGTTGATGTCCTAAGACGATGTAAATTGAG +CTATTCGACGTAGTCGGTCCCTCTAAGTGAACTTTCTATCACCGCGGCCCGAGGGGGAATTATGTGATAG +CGCGTGGCTCCAACATCTATTCGGGGCAAAACCGACACGTGTCGTACGTTGCGTACGTTACCTTTTCTGG +TTACTTATTCGACCAAGTTATTGCTTGTATCACTTTCGTTGTAAACCTATGCCCTTTTAACACAGCTACG +CCCATGAGTGACGTCATGACATTAGACAAAACACAGTATTGCCATTCCCACCTTCTCACATGCCCAGCAA +AATGGGGAACGTGTCGACTTCCTGCTGTCACAACAGAACGGAGCGTATGTGAAGAAACTGTATCCCTCGG +TGCCGTTCAAGACCGGTACACTTTGACGGCCAACCGCAAGTGGCGCGGGGGCTGTTAGCTAATCAGGCAT +ATGCATCGGCCAGCGGCGGGGCGAGACGGGTGGGTGCGGAAATAGAATCCGATGAGGTTCGTTAATCGTC +TTGCTTTGTGTTACATACAATAAATCGTCCCCCCCATGGTAACACGAGTTTCCATTCACATTTGGCTCTT +GCCCGGCAGGTGTGCGATAAGTCTTATGACTTTTTTAGACATCCACCACTTGCACGGCGTACTCTTTATT +CTCCATAATGCCTGTGTGATTCGCAGCTACCAATAAAGGTGCTTACGCAAGGTATTACCCAAATATCAAC +CCGGCCCCGCCCGCCGGACCCCAATATGTTATTCTGCCCTGTAGCGACTCCGTGGTCGTCAAGAGTCTAT +CACATACTCTATGGTAACTAAGATAGTGTCCGTGTCCTCTGGATAACGAAGCGACCTAACATCAAGACTC +AGGTAAATGGTCTTCTACTATCTTGATGTCCTGTGAGCGAGATGGATACCATTCGCCTGATCAAACGCAG +TAGCGTATAGGGCTAGCGGGAATTTCTTGACGGCCGCAGAGTTTCTTGTGATCGGATGACTGCTTGATAC +TTGGCGACGAAGCCCTGTGGTATGGGGGCCTGACTTCCGCGGACATGGTCGTAATGCCTAGGATAGTTGC +TACGGTGGGAACCTATTTTTATGCCCGCCAGACTCGTGTGGGACTCTATCAGTTAAGCGCGACTTCTCGC +AGAGAAAGATTAAGATTATGGCTTAATTCTTCTGTACATTCCTTGGTGAGGCGGCTCTCTAGAGTTGCAT +TTGGGTGGCATCACTCTTCCAGGTAGTTCACCTCTTGTTGCCTGCCCCAATGCGGGGCGAACTTTCGCTG +CTGACCGCCGTTACACAGAACCTGAAGGGACAACACACGAGACCGGAATGTAGTCTCTCTTGTCCAGTTC +TGGGGGCATATAGATTTATGATGTTATTCGTGCCCTTATGTTACCCATCCGTGTCGCCGTGAGCGACATC +CGGCTAATGTATGGGCAATGGGAATCAGGTATGACTGCGCGAGCTATCGACGCGTAGAAGGCTCACTCTG +CAAAAATCACCAGCACTAACGCGTAACACATTGGCTGTCAGCGCACTCGTTGATCTTTGTGTCGGGTGAT +TCTAAAATAGTTAGTCCGTGACCTTTGGCGGCGGCGAGAGATATGGTCCTAACCGCCGTATTTGGACAGA +AATTAGTCTATGCGATCTTCCCTCCAGATGGTAACGCTGCCAAGGGGCTGACTCTGGCATCCCAGGCGCG +TATGTCTAGGTGGCGGCTGTAGATATGAAACGATATTAGTAGGTTTTTCACATGAGCAACTACTGGGTTA +CGACTCAGTCGGTGGCCGCGGATTATGTCATTTGCGAAATAGCAGATCACTTGCGTTTCCGAGAGCCCAC +CATCGGTCGTAGGACAAATACCCAGTTAACAGCTATCGTTTCGAATATGGATGGGTAAATAGAATGACAT +ATGTAACGAGCCCCCCCCAGTAAACCCGCAGCGGAACGCTGCACTTGAGGCGAACGTGACCGTCGTCCCT +CGGGGTTTGAACTATTCTCGTGAAGTCTTACTGTAACGTTCTAGGCTACTCGTATCGCGTTATAATCCGA +GTACCACCCACGGGCAGAGTCCACAACGAAGAGCGACGGGAGATGTCAGGATAGCCGCAGATCCGTTCAA +GCCAGTACGGATAACGAGGCGATACCTTGTCAGGGGCCCGATCGACCGCTGAAGGGCAGAGTCGGAACTT +ACTAACCATGGGGTCCTCTTCGGGAACACTTTACGTGTGCAAATCCAAGCTGGTGCGACCCCGTTCAACG +GCCCAGAGAACCGCTCTACCTGCGCACACGATCAACTAGCTCGTCTTGGGCTGTGGCGAGCCTGTCGTAT +TCGCTATATAGTTAGTGGTATATCCTACTGGGGTTAAGTGATGGGAGTCTCCGCTAGAGGCGTGGGCGGC +GTTACTACAACTTCCTGCGACCAGGACCCTCGGGAGGTATTTGACTATGGTGCTTACATGGTGTTCCTTA +GTTAGATAGGATGACTTCAGCTAACTGGGCTTCGCCTAGGGTCTCGGGAGAAATCTAGGGACCCCAATCT +ATTAGACGAACACGTCCAGGGCATGGTCAGGTATACACCTTCCGACTAGACGTGTTCGAAGATTCGGGAA +AATTACCTGAAGAGCCCCCGTAAGCCGTAGTAGAAGAGGACACTTCATTTAAACAATACCGAAAAAGTGT +CTTGGCAGACCGTATCTTCACAGGGCCGAAGCACTTTTGGCAGGCTTATAAACGCCCAGAATGAAGCACT +CGCCATAGGTGGAAACCTTTAAGCGACGCGTTGGGTCCGCGCAGCGCCAACGATTTCAACCGGGAGACGT +TCGTTCATGATGAGAAGACGGCATATGATCTGTGACATACGCTAGGTGAATCATAGCTGCTTAACAAACT +CTGGCGTGTCTGAGGCATTCAGACCCCGCGACAACCTTATGTCTACAAAACGTTGCCCCCTGTACGAAAA +GTCCCTAATTGAGACTGCAACCCCCTAGGCTTTCCAGTCAACAGGCGTAGCGGTAATAGTGGGTTCGATG +CAGCTCGGCGGGCTCGGGATCATACCATCGTGTATAAACGTCAGCGTTACGACCATTTAAGTGCGCGGAA +CGGTCGGGTTAGTGCATTGCTTGCGACTAACTATATCTAGTGATACGCTGCAACTAGCCACAGTCCTAGT +TACTTTCGGAGATCCTCGTGGGTCTGAGAGGAGGGCTACCCGCACAGCGAATCCTTCGACTAGTAATTCA +TACTAGAACCTGACCGTAGTAAACGTTCTGTGGTTCACCCGCTTGGACTATGAAAGCCGGTGGTATCGAG +TTTTGGCTACCCAGTCGAATAGCCCACTCGGAACCAGTTGTAAGATAATAATTGCTATGTGTATCGTGAC +ACGTTGAGGCGCATACTTAACTGACAAGGTCCAATCATGGTCTTATACACTGTTGCAAATGACACCACAT +CCGAGCTAAATCTAGGCATCTCAATCAGGCGGTTCGAGGGATCATGTAAATGTTTAACCTTTGCGGGCAC +AAGCCCCCGTGTACTCCGGATGGCCAGGGACAGCTTCCACGAAAACTTCTTACACTCCATGTTGAGATGA +GTGGCCAACTAAATGCTTCGCTGATGTGGACAGCATCATATTAGCAATCAAGGTGCATACTTCCGGTCTT +CAAAGGCCAGTACTGGTATAGACGATAACCGTGGACATCTGCTAGACCGCCAGATTGTGGCATCTTTTGA +AACTCACCCGGAAGATAACGCGCATCTCCCTCAGGTCGTTTTAGCGACGCTAATATTATCGTGGCATCGT +GAAATCTGGGGTAAGGAAGGAGTCCGATGGCCTTACTCTCCGTTTTCACTCTTGCTTTCATCTTAGGAGC +CTATCTTTATATGAGGCCCGGCTCCCTTCGGTGACTTCGCGACTGTCATCATAAGCGCCCTTTTCATGCG +GTATACTCTGCCGTCGAACCCGATCATATAGCCAAATCGGACAATAACACAGTGGACATCGTCCATTACA +GATTAGTCGTACTGTGGGGGACTCCAATTATCGTAGATGACATCTTTCGGTTCTATTACGCCGCGACACC +AGGAATTTGGATTCTGATAACCCGGTGCTCACGTCGTGTCGCGAACAAATCTACGAGGAGCCAGGAGCGC +TGAAGCCTATCAATTGGGATTACCTTCTTGAACGCTATCCGACCTGCCCTCTGTACAGTCCCGTAGACAT +TCATGGATCCTGGATGGGAGCCACCACTGCAAGACAGCACATTCCAGAGCTTTCCCAAGCGGGATCTTGG +GTACGTTCTTTCGTCTCCTTGTTTGCGTCCGCAATAGTAGTCATACCGTAAGATTTTCTTCCCGCGGAGA +TGGCGGCTCTCCCAGCTAGAACATTATTATAATGGCAAGGGTGGCACACCAAAGGTGCGCACCCAAGGTT +CACATGTCCCTTTCACGCTACCCATGAGCGGTAGTTTATCGAAGTTTAGGTGTTTAGTGCTTGAATGAGT +CAATACGTTGTCAGAATTCACAATAAAGATCTTCTGTGCGGACTATACACTAGCCGGAGGAACCTAAATT +TTTCACGGGGTAAATCCCGAGGTTCGGACATATACCTGGCTTACCCAGCGCGGGCTGACTAGCCCACTTG +CGCGAGCGCTGACTCAGGTAAGACAAGAACGAAGTGCAGTATCTCCGGGCTTACTCGAGCGTACACCAGG +ACCCCGTAAATTTGCTTACATACAGAAGGTATGCCTTGCATGCCCTTGCTCAGCTAGACGACGATGAACG +TAGAGTAAGTGGACTTAGGGCCAACACTCGACGTTTTCGTCTAGACCAAAGGTTTAAAAGCTTTCCGAGT +GTCGACTCAAAACATGGGCTAATCTGATTCGTCAGTATCCAGGCCACACTTCAGCGGGTGGAGCACCTAC +GATAAACTAAGAAGGTAGTTAGCAAGTGCGTTAGCTTCTGGAAGTTAATATCGGCCTTAGATCGGAGACG +CGACTGCCCATAGTCTATCACTTCCCAAACAGAACACTTATAAGTTAAGCGTAATATGTACGAATTTCAA +CGTGGGATTGCCAGAGTAGAATGTGTGTAGATTCGCAACAGATAAAAGCTTTCGAAAGCAAAGCGACAGC +GCGAACGTGGGTCGGAATCCACATCCGTCCAGGGTTGTAATCACCTTGCGAATATCCCAGAATCGGTGCC +ACCGAGGCGACTCGACCACGGATCCGATTCTTCTCATACGGATCAGCAAATTGTAATAGCGGTTTCTTAC +AGAGAGGTGGGCACCCTAGCCAGGCTAGGGCGCATAGCGAACCCCTGCCACCTTAGAAAAGTGATGCCTG +GAGTTCGACAACGCGCGGCAGAAAGGATTGAGCGTATTTTCGTCTGGGTATTCACCCTATCGCCTATGTA +CCGAGAAGTGAGGGAGAGGCGGCGTTCGTCTGTACGCGGCTGGATGTACTGCGAGGTCGTTTGGCCGTTT +GCGAGACCACCCATTCGCCGAATGGGTAAAACAAGAACTGCCGTAGGTATATTATAAATCTCGGGATAGA +CGGGGGTAAGTTTATGTCTCCGGTTCTTTGTAGTCGTAACTCAGGGGCCCTCGACCCTAGCGAGGCGTTT +GTAGTGGGACACCAGGTATCTCCCGGAATAATTCTAGCATCATGAGAGGTGGAGTCGGGATTTGATCAAC +CTACATCATTTTTCAGGCAGATACATGGGACCGATCACACCTCACCTCCGTGTTGTCAAACTTAAGGACA +ACTTAAGGTTGGCTACGCGGCCGCGGATTGCGCTTGCGCGGGAGGGGCGGGGGATATCAGCCAAACGGAG +AGTGCTCCGTTGATGAGATTAAGCAGATGTATAAATCATTTGAATGTATGCAATTTGTCTAGATCACCCC +TACACATCCTTCTTGAGTAAGTCACAAATACGCCGCAGGTTTGGCGGTTTTTGTCCCCGGCACCTCACTA +GATTTTCATCCAACATACGTAGTGACGTTCCGGTGGCTCCGGAAATCGATCCACACTGCCGAGACTGACT +TGAGGTCGCGCAGTAGCCCCAACAACTGGGGAGATGGCGGACCCGAGATCAAGGCGCTTTCGCGCCACCT +GGGCTTGAGCGTACTAACCCCGGAGACGTGTGTACGGCCCCAGAACATGTATCGTGTGATGAATACCAGT +GTATTCCTTCAAAGGTCTAGAGACATACGGCCTGACTCGTTCAAACTAGAGAAGGATTTACCCGCCACAC +CTGGGGTCATGGTTGACAACAACAGGAGCCCATGTGTTTATCACTGAATTCTCGGGAATTCCTCGCGAGT +CGGAGCACTATCTCGTTCAAACCCGCTAGCCATGCCGTAGAGCGCGCGTGCCAAGGTGCGCGTAGGAGTG +CGCTGGCCCCGCATCCGCTAAATATATATCACTCAGCCCTCGCCGCAGCAGAATATCTAATCCCCGCTCG +ACTAGCCGAAACCGACACCGTTCAGGGCTGACGACCTGCCTTTTGCACAAAATAACTGAGCCCAGCGATC +GTATATTGGAGTCGGACTCAGACGATTCTACGGGTGCTTAAGGCTGACAGGGCGGCAACCATAGGCACGA +CTTCGTTGGCGCCGCCCAATAGCTACGTCGCGATGATTAAATCAGCCCTATTCGCCAGGGAGAGAGACAA +AATTGGCAATATTTCTACAGGTCTAGCTAACCGACCTAAAGGAGAACCAGGTCGTTGTAGGTCTGTGTAC +CACTGGCAGTCCGTCATCGCTGAGAAGGATGTCACGATAGGCTCCGCCTAGTAACTTTTCCCTGCTAGCT +ACACCGTTGCGGATATATAGTTTGAGCTCGGGGTCAATGGATTAGAACGTACAACATTGTGTCTGCTTAC +GATTGCATAGGTAATAGTCAGCCAGTATATGATCGTACAACACATCTGAAAACTAGTCAAGATACGTGCG +TTTGTCCATCTGTATTTACTCAATTCCTTAAACAGCTCGTCCAACGCGGGTCAAAACTGTGGCTTGGTAC +AAGTCGTACTTAAACCTCAACCACTACTTGTCAACTCGTTTACGTTTTCTACGTGGAAGTCCGAAAGTGT +CATGCGCCCTGCTTCGTATGCAAGCTCACTGATGATCACACCGATGCTTTACGTGCTCTATTGATTGTAA +AGTCGGCAATGCGTGTGCCTTAGCTGGTGAGATAATCGTTATAACCACGCTCCATAAGGAGGTTGGTTCC +TCTTCCCGATCTATCCCTGGCAGTTGTCGACACTAGCGCTAGTGGCTTCGCAGACTGATGGCAGTGTCTA +CCGCGGATTACAGTTAGCTAGCACGCCCCGTTTTTAAGGCTAGAAATGTTGCGATATTTCCGTGAAGCCG +AAAGTGAGGTAGGAAACCCCGGTCCTGTAGATCATTAATACTACGTAAGCGATTGTCAACGGAATGGGAA +GGCAGTTTATGAACGGCTGGACTGCTTAACCGTTTTCTTGGCACCTATGCCTAGTATGCACAATACGAGA +TGGGTTGCGGAGGGACGGAGAATGAGCCTAAGCGAGTTACCTCAGACCACCGGTTACGCTGACCCCTCGG +TTAAAGGCGATTCTCGATGCTTTCAGAACTACGGGTCTGTAATCAAAATCTCTATACGTTAGACGTCAGG +CGCCGTTCATTAGGAGTCCGCCATAGGATAAACAAGTCGAGTGTTGTGGTCGCTACTAGTTGCATATTTG +AACAATTTGCCGCGGCGTTATGTCCTTCAGTATATTGGCAAATGTGGCTACTGCCATACC