changeset 0:c0f8fe52948e draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cutesv commit abfd3162e28a388d1dedbe55cb8b3567fa79c178"
author iuc
date Thu, 24 Sep 2020 15:11:57 +0000
parents
children
files cutesv.xml macros.xml test-data/fasta_indexes.loc test-data/genome.fasta test-data/genome.fasta.fai test-data/output.vcf test-data/sample.bam tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 10 files changed, 650 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cutesv.xml	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,141 @@
+<tool id="cutesv" name="cuteSV" version="@WRAPPER_VERSION@">
+    <description>detects long-read-based SVs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">cutesv</requirement>
+        <requirement type="package" version="1.10">samtools</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s '$input_sample_file' ./sample.bam &&
+        ln -s '$input_sample_file.metadata.bam_index' ./sample.bam.bai &&
+
+        #if str($reference_source.ref_selector) == 'history':
+            ln -s '$reference_source.ref' ./genome.fa &&
+            samtools faidx ./genome.fa 2>&1 || echo 'Error running samtools faidx for indexing fasta reference for cuteSV' >&2 &&
+        #else:
+            ln -s '$reference_source.ref.fields.path' ./genome.fa &&
+            ln -s '${reference_source.ref.fields.path}.fai' ./genome.fa.fai &&
+        #end if
+
+        cuteSV ./sample.bam ./genome.fa ./output.vcf ./
+            --threads \${GALAXY_SLOTS:-4}
+            --batches '$sel_method.adv_settings.batches'
+            --sample '$sample'
+            #if $sel_method.adv_settings.report_readid:
+            --report_readid
+            #end if
+            --max_split_parts '$sel_method.adv_settings.max_split_parts'
+            --min_mapq '$sel_method.adv_settings.min_mapq'
+            --min_read_len '$sel_method.adv_settings.min_read_len'
+            --merge_del_threshold '$sel_method.adv_settings.merge_del_threshold'
+            --merge_ins_threshold '$sel_method.adv_settings.merge_ins_threshold'
+            --min_support '$sel_method.adv_settings.min_support'
+            --min_size '$sel_method.adv_settings.min_size'
+            --max_size '$sel_method.adv_settings.max_size'
+            #if $sel_method.adv_settings.genotyping.genotype:
+            --genotype
+            #end if
+            --gt_round '$sel_method.adv_settings.genotyping.gt_round'
+            --max_cluster_bias_INS '$sel_method.adv_settings.seq_method.max_cluster_bias_INS'
+            --diff_ratio_merging_INS '$sel_method.adv_settings.seq_method.diff_ratio_merging_INS'
+            --max_cluster_bias_DEL '$sel_method.adv_settings.seq_method.max_cluster_bias_DEL'
+            --diff_ratio_merging_DEL '$sel_method.adv_settings.seq_method.diff_ratio_merging_DEL'
+            --max_cluster_bias_INV '$sel_method.adv_settings.max_cluster_bias_INV'
+            --max_cluster_bias_DUP '$sel_method.adv_settings.max_cluster_bias_DUP'
+            --max_cluster_bias_TRA '$sel_method.adv_settings.max_cluster_bias_TRA'
+            --diff_ratio_filtering_TRA '$sel_method.adv_settings.diff_ratio_filtering_TRA'
+    ]]></command>
+    <inputs>
+        <param name="input_sample_file" type="data" format="bam" label="Sample file" help="Sample file in .BAM format." />
+        <expand macro="reference_interface" />
+        <param name="sample" type="text" value="Sample" label="Sample name/id" help="">
+            <validator type="length" min="1" message="Please enter a sample name/id"/>
+            <validator type="regex" message="Please enter a sample name/id">.*[^ ].*</validator>
+        </param>
+        <conditional name="sel_method">
+            <param name="meth_selector" type="select" label="Select the sequencing method of the input file" help="">
+                <option value="pb_clr" selected="True">PacBio continuous long reads</option>
+                <option value="pb_ccs">PacBio circular consensus sequencing (HiFi)</option>
+                <option value="pb_ont">ONT</option>
+                <option value="other">other method</option>
+            </param>
+            <when value="pb_clr">
+                <section name="adv_settings" title="Advanced settings" expanded="false">
+                    <expand macro="adv_shared_1" />
+                    <expand macro="adv_pb_clr" />
+                    <expand macro="adv_shared_2" />
+                </section>
+            </when>
+            <when value="pb_ccs">
+                <section name="adv_settings" title="Advanced settings" expanded="false">
+                    <expand macro="adv_shared_1" />
+                    <expand macro="adv_pb_ccs" />
+                    <expand macro="adv_shared_2" />
+                </section>
+            </when>
+            <when value="pb_ont">
+                <section name="adv_settings" title="Advanced settings" expanded="false">
+                    <expand macro="adv_shared_1" />
+                    <expand macro="adv_ont" />
+                    <expand macro="adv_shared_2" />
+                </section>
+            </when>
+            <when value="other">
+                <section name="adv_settings" title="Advanced settings" expanded="true">
+                    <expand macro="adv_shared_1" />
+                    <expand macro="adv_other" />
+                    <expand macro="adv_shared_2" />
+                </section>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_sample" format="vcf" label="${tool.name} on ${on_string}: output" from_work_dir="output.vcf" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="reference_source">
+                <param name="ref_selector" value="history"/>
+                <param name="ref" ftype="fasta" value="genome.fasta" />
+            </conditional>
+            <param name="seq_method" value="pb_ccs" />
+            <param name="input_sample_file" ftype="bam" value="sample.bam" />
+            <output name="out_sample">
+                <assert_contents>
+                    <has_text text="##fileformat=VCFv4.2"/>
+                    <has_text text="##source=cuteSV-1.0.8"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference_source">
+                <param name="ref_selector" value="cached"/>
+                <param name="ref" value="test_buildid"/>
+            </conditional>
+            <param name="seq_method" value="pb_ccs" />
+            <param name="input_sample_file" ftype="bam" value="sample.bam" />
+            <output name="out_sample">
+                <assert_contents>
+                    <has_text text="##fileformat=VCFv4.2"/>
+                    <has_text text="##source=cuteSV-1.0.8"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+cuteSV
+=============
+
+Long-read sequencing enables the comprehensive discovery of structural variations (SVs). However, it is still non-trivial to achieve high sensitivity and performance simultaneously due to the complex SV characteristics implied by noisy long reads. Therefore, we propose cuteSV, a sensitive, fast and scalable long-read-based SV detection approach. cuteSV uses tailored methods to collect the signatures of various types of SVs and employs a clustering-and-refinement method to analyze the signatures to implement sensitive SV detection. Benchmarks on real Pacific Biosciences (PacBio) and Oxford Nanopore Technology (ONT) datasets demonstrate that cuteSV has better yields and scalability than state-of-the-art tools.
+
+For more information see the cuteSV documentation_.
+
+.. _documentation: https://github.com/tjiangHIT/cuteSV
+
+    ]]></help>
+     <citations>
+         <citation type="doi">10.1186/s13059-020-02107-y</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,77 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
+    <token name="@TOOL_VERSION@">1.0.8</token>
+    <xml name="reference_interface">
+        <conditional name="reference_source">
+            <param name="ref_selector" type="select" label="Choose the source for the reference genome">
+                <option value="cached">Locally cached</option>
+                <option value="history">History</option>
+            </param>
+            <when value="cached">
+                <param argument="--ref" type="select" label="Reference genome">
+                    <options from_data_table="fasta_indexes">
+                        <!-- <filter type="data_meta" column="dbkey" key="dbkey" ref="input_sample_file" /> -->
+                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param argument="--ref" type="data" format="fasta" label="Reference" help="Reference sequence" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="adv_shared_1">
+        <param name="batches" type="integer" value="10000000" min="0" label="Batches" help="Batch of genome segmentation interval." />
+        <param name="max_split_parts" type="integer" value="7" min="0" label="Maximum number of split segments" help="Maximum number of split segments a read may be aligned before it is ignored." />
+        <param name="min_mapq" type="integer" value="20" min="0" label="Minimum mapping quality" help="Minimum mapping quality value of alignment to be taken into account." />
+        <param name="min_read_len" type="integer" value="500" min="0" label="Minimum read length" help="Ignores reads that only report alignments with not longer than bp." />
+        <param name="merge_del_threshold" type="integer" value="0" min="0" label="Maximum distance of deletions" help="Maximum distance of deletion signals to be merged." />
+        <param name="merge_ins_threshold" type="integer" value="100" min="0" label="Maximum distance of insertions" help="Maximum distance of insertion signals to be merged." />
+        <param name="min_support" type="integer" value="10" min="0" label="Minimum number of reads that support a SV" help="Minimum number of reads that support a SV to be reported." />
+        <param name="min_size" type="integer" value="30" min="0" label="Minimum SV length" help="Minimum length of SV to be reported. " />
+        <param name="max_size" type="integer" value="100000" min="0" label="Maximum SV length" help="Maximum length of SV to be reported." />
+        <section name="genotyping" title="Genotyping settings" expanded="false">
+            <param name="genotype" type="boolean" checked="False" label="Genotype" help="Enable to generate genotypes." />
+            <param name="gt_round" type="integer" value="500" min="1" label="Maximum iterations for genotyping" help="Maximum round of iteration for alignments searching if perform genotyping." />
+        </section>
+    </xml>
+    <xml name="adv_pb_clr">
+        <section name="seq_method" title="PacBio CLR settings" expanded="false">
+            <param name="max_cluster_bias_INS" type="integer" value="100" min="0" label="Maximum cluster distance (insertion)" help="Maximum distance to cluster read together for insertion." />
+            <param name="diff_ratio_merging_INS" type="float" value="0.3" min="0" max="1" label="Breakpoints merge threshold (insertion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for insertion." />
+            <param name="max_cluster_bias_DEL" type="integer" value="200" min="0" label="Maximum cluster distance (deletion)" help="Maximum distance to cluster read together for deletion." />
+            <param name="diff_ratio_merging_DEL" type="float" value="0.5" min="0" max="1" label="Breakpoints merge threshold (deletion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for deletion." />
+        </section>
+    </xml>
+    <xml name="adv_pb_ccs">
+        <section name="seq_method" title="PacBio CCS(HIFI) settings" expanded="false">
+            <param name="max_cluster_bias_INS" type="integer" value="1000" min="0" label="Maximum cluster distance (insertion)" help="Maximum distance to cluster read together for insertion." />
+            <param name="diff_ratio_merging_INS" type="float" value="0.9" min="0" max="1" label="Breakpoints merge threshold (insertion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for insertion." />
+            <param name="max_cluster_bias_DEL" type="integer" value="1000" min="0" label="Maximum cluster distance (deletion)" help="Maximum distance to cluster read together for deletion." />
+            <param name="diff_ratio_merging_DEL" type="float" value="0.5" min="0" max="1" label="Breakpoints merge threshold (deletion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for deletion." />
+        </section>
+    </xml>
+    <xml name="adv_ont">
+        <section name="seq_method" title="ONT settings" expanded="false">
+            <param name="max_cluster_bias_INS" type="integer" value="100" min="0" label="Maximum cluster distance (insertion)" help="Maximum distance to cluster read together for insertion." />
+            <param name="diff_ratio_merging_INS" type="float" value="0.3" min="0" max="1" label="Breakpoints merge threshold (insertion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for insertion." />
+            <param name="max_cluster_bias_DEL" type="integer" value="100" min="0" label="Maximum cluster distance (deletion)" help="Maximum distance to cluster read together for deletion." />
+            <param name="diff_ratio_merging_DEL" type="float" value="0.3" min="0" max="1" label="Breakpoints merge threshold (deletion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for deletion." />
+        </section>
+    </xml>
+    <xml name="adv_other">
+        <section name="seq_method" title="Advanced method settings" expanded="true">
+            <param name="max_cluster_bias_INS" type="integer" value="100" min="0" label="Maximum cluster distance (insertion)" help="Maximum distance to cluster read together for insertion." />
+            <param name="diff_ratio_merging_INS" type="float" value="0.3" min="0" max="1" label="Breakpoints merge threshold (insertion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for insertion." />
+            <param name="max_cluster_bias_DEL" type="integer" value="200" min="0" label="Maximum cluster distance (deletion)" help="Maximum distance to cluster read together for deletion." />
+            <param name="diff_ratio_merging_DEL" type="float" value="0.5" min="0" max="1" label="Breakpoints merge threshold (deletion)" help="Do not merge breakpoints with basepair identity more than the ratio of default for deletion." />
+        </section>
+    </xml>
+    <xml name="adv_shared_2">
+        <param name="max_cluster_bias_INV" type="integer" value="500" min="0" label="Maximum distance to cluster (inversion)" help="Maximum distance to cluster read together for inversion." />
+        <param name="max_cluster_bias_DUP" type="integer" value="500" min="0" label="Maximum distance to cluster (duplication)" help="Maximum distance to cluster read together for duplication." />
+        <param name="max_cluster_bias_TRA" type="integer" value="50" min="0" label="Maximum distance to cluster (translocation)" help="Maximum distance to cluster read together for translocation." />
+        <param name="diff_ratio_filtering_TRA" type="float" value="0.6" min="0" max="1" label="Breakpoints filter threshold (translocation)" help="Filter breakpoints with basepair identity less than the ratio of default for translocation." />
+        <param name="report_readid" type="boolean" checked="False" label="Report read id" help="Enable to report supporting read ids for each SV." />
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+test_buildid	hg17	test_displayname	${__HERE__}/genome.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fasta	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,333 @@
+>chrM
+NNNCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT
+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG
+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT
+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA
+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT
+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA
+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC
+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT
+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA
+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC
+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC
+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA
+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA
+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC
+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG
+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC
+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT
+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC
+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT
+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA
+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC
+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC
+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA
+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA
+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC
+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT
+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG
+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT
+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT
+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC
+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA
+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG
+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT
+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC
+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG
+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA
+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT
+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA
+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC
+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT
+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT
+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG
+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA
+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA
+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT
+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA
+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA
+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA
+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG
+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT
+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC
+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca
+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt
+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg
+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA
+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA
+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA
+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT
+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC
+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT
+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA
+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT
+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC
+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT
+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg
+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct
+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA
+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA
+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC
+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA
+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC
+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT
+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT
+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC
+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT
+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC
+TTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTG
+GCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTT
+CGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAAT
+ACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATT
+ATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGA
+CGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTAC
+TTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGC
+TACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCT
+AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTC
+CCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGT
+AAATAATAGGAGCTTAAACCCCCTTATTTctaggactatgagaatcgaac
+ccatccctgagaatccaaaattctccgtgccacctatcacaccccatcct
+aAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTG
+GTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC
+TCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATT
+TTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTC
+TAACCAAAAAAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTC
+CTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATCCTCTTCAA
+CAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACT
+CATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCC
+TTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGG
+CCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACC
+AAATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTA
+TCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAA
+AATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTC
+TACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATC
+CTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGAC
+CCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAA
+TTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTT
+TTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCAT
+CATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACC
+TACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAAC
+GTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCC
+CACACTCATCGCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATAC
+TAATAATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCTTCAAAG
+CCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAA
+ACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAA
+GCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGC
+TAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAA
+AAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCA
+ATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTG
+TCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCTCACCCCC
+ACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTG
+GAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCT
+CTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGG
+TAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAA
+TCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTA
+GTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAA
+CATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTG
+CTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTA
+GCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTC
+CTTACACCTAGCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCA
+CAACAATTATCAATATAAAACCCCCTGCCATAACCCAATACCAAACGCCC
+CTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCT
+CCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCA
+ACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAA
+CACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACC
+AGGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAG
+AACCATTTGGATACATAGGTATGGTCTGAGCTATGATATCAATTGGCTTC
+CTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGAATAGACGT
+AGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCA
+CCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATG
+AAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCAC
+CGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCG
+TACTACACGACACGTACTACGTTGTAGCTCACTTCCACTATGTCCTATCA
+ATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCT
+ATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCACTA
+TCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGC
+CTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCAC
+ATGAAACATCCTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAA
+TATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTC
+CTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCC
+CCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAaa
+aaaggaaggaatcgaaccccccaaagctggtttcaagccaaccccatggc
+ctccatgactttttcAAAAAGGTATTAGAAAAACCATTTCATAACTTTGT
+CAAAGTTAAATTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGC
+GCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCA
+CCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTC
+CTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAT
+CTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCA
+TCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGAC
+GAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTA
+CTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACA
+TACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTT
+GACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTAC
+ATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAA
+CAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGA
+CCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAG
+TTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAG
+GGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTG
+TAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAAC
+ACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCA
+TAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATA
+TTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAA
+AAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTTCGCTTC
+ATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTC
+TATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAACCGA
+CTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGAT
+AGCCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCT
+TAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCA
+TTTACACCAACCACCCAACTATCTATAAACCTAGCCATGGCCATCCCCTT
+ATGAGCGGGCGCAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCC
+TAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTA
+GTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGT
+ACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTG
+GAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATC
+ATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTT
+AATCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACA
+ACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCC
+CATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA
+GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACT
+AACCAACACACTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAA
+GCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATAC
+GGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTT
+CTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAACTAGGAGGGC
+ACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC
+CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCA
+CCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTA
+TTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAGTAC
+TTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTT
+TGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCC
+TCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT
+CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGT
+TTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTA
+GTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAA
+AAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGC
+CTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACA
+TAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC
+CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATT
+ATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAA
+CAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATC
+CTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAGC
+CGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGACTCATTAAATT
+ATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA
+GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCAT
+ATCCTCCCTACTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAG
+CTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAATATTGTGCCT
+ATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCT
+ACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAA
+ACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC
+TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCAC
+AGCCTAATTATTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAA
+CAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCC
+CCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGC
+CAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAACTCTACCTCTC
+TATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG
+AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTG
+GCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCAC
+ATACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCAC
+TAATTTACACTCACAACACCCTAGGCTCACTAAACATTCTACTACTCACT
+CTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTTAATATGACT
+AGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT
+TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTA
+CTTGCCGCAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCAC
+ACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTAC
+TATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACA
+GACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCGT
+AGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA
+TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCA
+AACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGG
+ACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCC
+TCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCT
+GTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGG
+ACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA
+CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTC
+ACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCT
+ATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTT
+TAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTTACGACCCCTT
+ATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAA
+CAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTA
+GGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACA
+CTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACC
+ACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATC
+CATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCA
+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACC
+CAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAAT
+ATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCAC
+TGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTA
+CTCATTTTCCTAATTACCATACTAATCTTAGTTACCGCTAACAACCTATT
+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA
+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA
+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG
+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA
+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA
+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA
+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG
+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA
+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC
+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT
+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA
+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT
+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG
+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC
+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG
+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA
+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC
+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC
+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA
+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC
+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT
+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA
+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT
+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC
+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC
+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC
+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT
+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC
+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT
+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG
+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT
+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA
+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA
+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA
+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA
+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA
+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA
+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT
+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA
+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG
+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT
+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT
+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA
+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA
+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA
+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC
+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT
+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT
+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC
+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA
+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT
+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC
+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT
+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT
+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC
+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT
+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC
+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA
+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG
+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC
+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA
+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC
+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA
+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG
+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA
+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG
+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC
+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA
+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT
+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC
+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT
+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC
+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG
+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT
+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC
+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC
+CTTAAATAAGACATCACGATG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fasta.fai	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,1 @@
+chrM	16571	6	50	51
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.vcf	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,28 @@
+##fileformat=VCFv4.2
+##source=cuteSV-1.0.8
+##fileDate=2020-09-24 12:14:05 4-CEST
+##contig=<ID=chrM,length=16571>
+##ALT=<ID=INS,Description="Insertion of novel sequence relative to the reference">
+##ALT=<ID=DEL,Description="Deletion relative to the reference">
+##ALT=<ID=DUP,Description="Region of elevated copy number relative to the reference">
+##ALT=<ID=INV,Description="Inversion of reference sequence">
+##ALT=<ID=BND,Description="Breakend of translocation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variant">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variant">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CILEN,Number=2,Type=Integer,Description="Confidence interval around inserted/deleted material between breakends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="Number of read support this record">
+##INFO=<ID=STRAND,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=RNAMES,Number=.,Type=String,Description="Supporting read names of SVs (comma separated)">
+##FILTER=<ID=q5,Description="Quality below 5">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# High-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# High-quality variant reads">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="# Phred-scaled genotype likelihoods rounded to the closest integer">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="# Genotype quality">
+##CommandLine="cuteSV ./sample.bam ./genome.fa ./output.vcf ./ --threads 1 --batches 10000000 --sample Sample --max_split_parts 7 --min_mapq 20 --min_read_len 500 --merge_del_threshold 0 --merge_ins_threshold 100 --min_support 10 --min_size 30 --max_size 100000 --gt_round 500 --max_cluster_bias_INS 100 --diff_ratio_merging_INS 0.3 --max_cluster_bias_DEL 200 --diff_ratio_merging_DEL 0.5 --max_cluster_bias_INV 500 --max_cluster_bias_DUP 500 --max_cluster_bias_TRA 50 --diff_ratio_filtering_TRA 0.6"
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample
Binary file test-data/sample.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Sep 24 15:11:57 2020 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of SAMTools indexed FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>