Mercurial > repos > iuc > idba_ud
changeset 1:2ed5c0795f99 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/idba_ud commit 61e1699e65d6fd9f4f73650ed8463b37cd701344
author | iuc |
---|---|
date | Mon, 05 Aug 2019 15:59:42 -0400 |
parents | fdaf2375d405 |
children | 694b0f55b744 |
files | idba_ud.xml macros.xml test-data/all_fasta.loc test-data/reference.fa tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 7 files changed, 592 insertions(+), 85 deletions(-) [+] |
line wrap: on
line diff
--- a/idba_ud.xml Fri Sep 21 15:25:56 2018 -0400 +++ b/idba_ud.xml Mon Aug 05 15:59:42 2019 -0400 @@ -1,107 +1,137 @@ -<tool id="idba_ud" name="IDBA-UD" version="1.1.3"> +<tool id="idba_ud" name="IDBA-UD" version="@IDBA_VERSION@+galaxy1"> <description> - Iterative de Bruijn Graph Assembler <!--for sequencing data with highly uneven depth--> + Iterative de Bruijn Graph Assembler for data with highly uneven depth </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> - <command><![CDATA[ - + <command detect_errors="aggressive"><![CDATA[ idba_ud - --read '$read' - #if $read_level_2: - --read_level_2 '$read_level_2' - #end if - #if $read_level_3: - --read_level_3 '$read_level_3' - #end if - #if $read_level_4: - --read_level_4 '$read_level_4' - #end if - #if $read_level_5: - --read_level_5 '$read_level_5' - #end if - #if $long_read: - --long_read '$long_read' - #end if - --mink $mink - --maxk $maxk - --step $step - --inner_mink $inner_mink - --inner_step $inner_step - --prefix $prefix - --min_count $min_count - --min_support $min_support - --num_threads \${GALAXY_SLOTS:-1} - --seed_kmer $seed_kmer - --min_contig $min_contig - --similar $similar - --max_mismatch $max_mismatch - --min_pairs $min_pairs - #if $other: - ${" ".join(str($other).split(","))} - #end if + @MAIN_INPUT@ + @LEVELS_INPUT@ + @KMER_OPTIONS@ + @FILTER_OPTIONS@ + --min_pairs $min_pairs + @OTHER_OPTIONS@ + @THREADS@ ]]></command> <inputs> - <param argument="--read" type="data" format="fasta" label="Fasta read file. Lower or equal to 600b"/> - <param argument="--long_read" type="data" format="fasta" optional="true" label="Fasta long read file. More than 600b"/> - - <param argument="--read_level_2" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for second level scaffolds"/> - <param argument="--read_level_3" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for third level scaffolds"/> - <param argument="--read_level_4" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fourth level scaffolds"/> - <param argument="--read_level_5" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fifth level scaffolds"/> - - <param argument="--mink" type="integer" value="20" max="312" label="Minimum k value. Lower or equal to 312"/> - <param argument="--maxk" type="integer" value="100" max="312" label="Maximum k value. Lower or equal to 312"/> - <param argument="--step" type="integer" value="20" label="Increment of k-mer of each iteration"/> - <param argument="--inner_mink" type="integer" value="10" label="Inner minimum k value"/> - <param argument="--inner_step" type="integer" value="5" label="Inner increment of k-mer"/> - <param argument="--prefix" type="integer" value="3" label="Prefix length used to build sub k-mer table"/> - <param argument="--min_count" type="integer" value="2" label="Minimum multiplicity for filtering k-mer when building the graph"/> - <param argument="--min_support" type="integer" value="1" label="Minimum supoort in each iteration"/> - <param argument="--seed_kmer" type="integer" value="30" label="Seed kmer size for alignment"/> - <param argument="--min_contig" type="integer" value="200" label="Minimum size of contig"/> - <param argument="--similar" type="float" value="0.95" label="Similarity for alignment"/> - <param argument="--max_mismatch" type="integer" value="3" label="Max mismatch of error correction"/> - <param argument="--min_pairs" type="integer" value="3" label="Minimum number of pairs"/> - - <param name="other" type="select" display="checkboxes" multiple="true" label="Other options"> + <expand macro="main_input"/> + <expand macro="levels_input"/> + <expand macro="kmer_options" maxk_default="100" step_default="20"/> + <expand macro="filter_options"/> + <expand macro="min_pairs_filter"/> + <expand macro="other_options"> <option value="--no_bubble">Do not merge bubble (--no_bubble)</option> - <option value="--no_local">Do not use local assembly (--no_local)</option> - <option value="--no_coverage">Do not iterate on coverage (--no_coverage)</option> - <option value="--no_correct">Do not do correction (--no_correct)</option> - <option value="--pre_correction">Perform pre-correction before assembly (--pre_correction)</option> - </param> + </expand> </inputs> <outputs> <data name="output" from_work_dir="out/scaffold.fa" format="fasta"/> </outputs> <tests> + <!-- basic test + check of defaults --> + <test> + <param name="read" value="merged.fa" ftype="fasta"/> + <assert_command> + <has_text text="--mink 20" /> + <has_text text="--maxk 100" /> + <has_text text="--step 20" /> + <has_text text="--inner_mink 10" /> + <has_text text="--inner_step 5" /> + <has_text text="--prefix 3" /> + <has_text text="--min_count 2" /> + <has_text text="--min_support 1" /> + <has_text text="--seed_kmer 30" /> + <has_text text="--min_contig 200" /> + <has_text text="--similar 0.95" /> + <has_text text="--max_mismatch 3" /> + <has_text text="--min_pairs 3" /> + <not_has_text text="--read_level_2" /> + <not_has_text text="--read_level_3" /> + <not_has_text text="--read_level_4" /> + <not_has_text text="--read_level_5" /> + <not_has_text text="--no_bubble" /> + <not_has_text text="--no_local" /> + <not_has_text text="--no_coverage" /> + <not_has_text text="--no_correct" /> + <not_has_text text="--pre_correction" /> + </assert_command> + <output name="output" file="out/scaffold.fa" ftype="fasta"/> + </test> + <!-- read levels test --> + <test> + <param name="read" value="merged.fa" ftype="fasta"/> + <param name="read_level_2" ftype="fasta" value="merged.fa"/> + <param name="read_level_3" ftype="fasta" value="merged.fa"/> + <param name="read_level_4" ftype="fasta" value="merged.fa"/> + <param name="read_level_5" ftype="fasta" value="merged.fa"/> + <assert_command> + <has_text text="--read_level_2" /> + <has_text text="--read_level_3" /> + <has_text text="--read_level_4" /> + <has_text text="--read_level_5" /> + </assert_command> + <output name="output" file="out/scaffold.fa" ftype="fasta" compare="sim_size"/> + </test> + <!-- k-mer options --> <test> <param name="read" value="merged.fa" ftype="fasta"/> - <output name="output" file="out/scaffold.fa" ftype="fasta"/> + <param name="mink" value="19"/> + <param name="maxk" value="99"/> + <param name="step" value="19"/> + <param name="inner_mink" value="9"/> + <param name="inner_step" value="4"/> + <param name="prefix" value="2"/> + <param name="min_count" value="1"/> + <param name="min_support" value="2"/> + <param name="seed_kmer" value="29"/> + <assert_command> + <has_text text="--mink 19" /> + <has_text text="--maxk 99" /> + <has_text text="--step 19" /> + <has_text text="--inner_mink 9" /> + <has_text text="--inner_step 4" /> + <has_text text="--prefix 2" /> + <has_text text="--min_count 1" /> + <has_text text="--min_support 2" /> + <has_text text="--seed_kmer 29" /> + </assert_command> + <output name="output" file="out/scaffold.fa" compare="sim_size"/> + </test> + <!-- filter options --> + <test> + <param name="read" value="merged.fa" ftype="fasta"/> + <param name="min_contig" value="199"/> + <param name="similar" value="0.96"/> + <param name="max_mismatch" value="2"/> + <assert_command> + <has_text text="--min_contig 199" /> + <has_text text="--similar 0.96" /> + <has_text text="--max_mismatch 2" /> + </assert_command> + <output name="output" file="out/scaffold.fa" compare="sim_size"/> + </test> + <!-- min-pairs and other options --> + <test> + <param name="read" value="merged.fa" ftype="fasta"/> + <param name="min_pairs" value="2"/> + <param name="other" value="--no_bubble,--no_local,--no_coverage,--no_correct,--pre_correction"/> + <assert_command> + <has_text text="--min_pairs 2" /> + <has_text text="--no_bubble" /> + <has_text text="--no_local" /> + <has_text text="--no_coverage" /> + <has_text text="--no_correct" /> + <has_text text="--pre_correction" /> + </assert_command> + <output name="output" file="out/scaffold.fa" compare="sim_size"/> </test> </tests> - <help><![CDATA[ - IDBA-UD is a iterative De Bruijn Graph De Novo Assembler for Short Reads Sequencing data with Highly Uneven Sequencing Depth. It is an extension of IDBA algorithm. IDBA-UD also iterates from small k to a large k. In each iteration, short and low-depth contigs are removed iteratively with cutoff threshold from low to high to reduce the errors in low-depth and high-depth regions. Paired-end reads are aligned to contigs and assembled locally to generate some missing k-mers in low-depth regions. With these technologies, IDBA-UD can iterate k value of de Bruijn graph to a very large value with less gaps and less branches to form long contigs in both low-depth and high-depth regions. - - -Input: IDBA-UD takes interleaved paired end data in the FASTA format as input, -i.e. paired-end reads need to be stored in the same FASTA file suc h that a pair -of reads should be in two consecutive lines. -In Galaxy paired reads in separate FASTQ files can be converted into interleaved -FASTA using the tools: - -* `FASTQ interlacer on paired end read <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_paired_end_interlacer>`_ -* `Samtools extract FASTA or FASTQ from a SAM file <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_to_fasta>`_ - -Note that, IDBA-UD assumes that the paired-end reads are in order (->,<-). -If your data is in reverse order (<-,->), please convert it by yourself. - ]]></help> - <citations> + <expand macro="help" more_help="IDBA-UD is an extension of IDBA algorithm for Short Reads Sequencing data with Highly Uneven Sequencing Depth. IDBA-UD also iterates from small k to a large k. In each iteration, short and low-depth contigs are removed iteratively with cutoff threshold from low to high to reduce the errors in low-depth and high-depth regions. Paired-end reads are aligned to contigs and assembled locally to generate some missing k-mers in low-depth regions. With these technologies, IDBA-UD can iterate k value of de Bruijn graph to a very large value with less gaps and less branches to form long contigs in both low-depth and high-depth regions."/> + <expand macro="citations"> <citation type="doi">10.1093/bioinformatics/bts174</citation> - </citations> + </expand> </tool>
--- a/macros.xml Fri Sep 21 15:25:56 2018 -0400 +++ b/macros.xml Mon Aug 05 15:59:42 2019 -0400 @@ -1,8 +1,128 @@ <macros> + <token name="@IDBA_VERSION@">1.1.3</token> <xml name="requirements"> <requirements> - <requirement type="package" version="1.1.3">idba</requirement> + <requirement type="package" version="@IDBA_VERSION@">idba</requirement> <yield/> </requirements> </xml> + + <xml name="main_input"> + <param argument="--read" type="data" format="fasta" label="Fasta read file. Lower or equal to 600b"/> + <param argument="--long_read" type="data" format="fasta" optional="true" label="Fasta long read file. More than 600b"/> + </xml> + + <token name="@MAIN_INPUT@"> + --read '$read' + #if $long_read: + --long_read '$long_read' + #end if + </token> + + <xml name="levels_input"> + <param argument="--read_level_2" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for second level scaffolds"/> + <param argument="--read_level_3" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for third level scaffolds"/> + <param argument="--read_level_4" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fourth level scaffolds"/> + <param argument="--read_level_5" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fifth level scaffolds"/> + </xml> + <token name="@LEVELS_INPUT@"> + #if $read_level_2: + --read_level_2 '$read_level_2' + #end if + #if $read_level_3: + --read_level_3 '$read_level_3' + #end if + #if $read_level_4: + --read_level_4 '$read_level_4' + #end if + #if $read_level_5: + --read_level_5 '$read_level_5' + #end if + </token> + + <xml name="kmer_options" token_maxk_default="" token_step_default=""> + <param argument="--mink" type="integer" value="20" max="312" label="Minimum k value. Lower or equal to 312"/> + <param argument="--maxk" type="integer" value="@MAXK_DEFAULT@" max="312" label="Maximum k value. Lower or equal to 312"/> + <param argument="--step" type="integer" value="@STEP_DEFAULT@" label="Increment of k-mer of each iteration"/> + <param argument="--inner_mink" type="integer" value="10" label="Inner minimum k value"/> + <param argument="--inner_step" type="integer" value="5" label="Inner increment of k-mer"/> + <param argument="--prefix" type="integer" value="3" label="Prefix length used to build sub k-mer table"/> + <param argument="--min_count" type="integer" value="2" label="Minimum multiplicity for filtering k-mer when building the graph"/> + <param argument="--min_support" type="integer" value="1" label="Minimum supoort in each iteration"/> + <param argument="--seed_kmer" type="integer" value="30" label="Seed kmer size for alignment"/> + </xml> + <token name="@KMER_OPTIONS@"> + --mink $mink + --maxk $maxk + --step $step + --inner_mink $inner_mink + --inner_step $inner_step + --prefix $prefix + --min_count $min_count + --min_support $min_support + --seed_kmer $seed_kmer + </token> + + <xml name="filter_options"> + <param argument="--min_contig" type="integer" value="200" label="Minimum size of contig"/> + <param argument="--similar" type="float" value="0.95" label="Similarity for alignment"/> + <param argument="--max_mismatch" type="integer" value="3" label="Max mismatch of error correction"/> + </xml> + <token name="@FILTER_OPTIONS@"> + --min_contig $min_contig + --similar $similar + --max_mismatch $max_mismatch + </token> + + <xml name="min_pairs_filter"> + <param argument="--min_pairs" type="integer" value="3" label="Minimum number of pairs"/> + </xml> + <token name="@MIN_PAIRS_FILTER@"> + --min_pairs $min_pairs + </token> + <xml name="other_options"> + <param name="other" type="select" display="checkboxes" multiple="true" label="Other options"> + <yield/> + <option value="--no_local">Do not use local assembly (--no_local)</option> + <option value="--no_coverage">Do not iterate on coverage (--no_coverage)</option> + <option value="--no_correct">Do not do correction (--no_correct)</option> + <option value="--pre_correction">Perform pre-correction before assembly (--pre_correction)</option> + </param> + </xml> + <token name="@OTHER_OPTIONS@"> + #if $other: + ${" ".join(str($other).split(","))} + #end if + </token> + + <token name="@THREADS@"> + --num_threads \${GALAXY_SLOTS:-1} + </token> + + <xml name="help" token_more_help=""> + <help><![CDATA[ +IDBA is an iterative De Bruijn Graph De Novo Assembler for sequence assembly. Most assemblers based on de Bruijn graph build a de Bruijn graph with a specific k-mer size to perform the assembling task. For all of them, it is very crucial to find a specific value of k. If k is too large, there will be a lot of gap problems in the graph. If k is too small, there will a lot of branch problems. IDBA uses not only one specific k but a range of k values to build the iterative de Bruijn graph. It can keep all the information in graphs with different k values. + +@MORE_HELP@ + +Input: IDBA-* take interleaved paired end data in the FASTA format as input, +i.e. paired-end reads need to be stored in the same FASTA file such that a pair +of reads should be in two consecutive lines. +In Galaxy paired reads in separate FASTQ files can be converted into interleaved +FASTA using the tools: + +* `FASTQ interlacer on paired end read <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_paired_end_interlacer>`_ +* `Samtools extract FASTA or FASTQ from a SAM file <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_to_fasta>`_ + +Note that, IDBA-* assumes that the paired-end reads are in order (->,<-). +If your data is in reverse order (<-,->), please convert it by yourself. + ]]></help> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1007/978-3-642-12683-3_28</citation> + <yield/> + </citations> + </xml> + </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Mon Aug 05 15:59:42 2019 -0400 @@ -0,0 +1,20 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +test_id test_dbkey test display name ${__HERE__}/merged.fa +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fa Mon Aug 05 15:59:42 2019 -0400 @@ -0,0 +1,303 @@ +>reference +GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC +AAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTA +TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA +AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG +ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG +AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT +CAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGC +AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG +CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC +ACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGG +CCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGATATACCGACGACTC +CGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGT +AAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGC +GCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTG +TGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGT +CGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGA +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC +GCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAG +GGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGG +CTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGT +TTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGG +TTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTC +CTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGATA +TGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCG +CAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGC +GCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGT +CCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGA +CAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTC +CTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTA +AGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCG +GTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCG +GATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGG +CATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGA +ATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGT +AGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGC +TGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTA +CAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGA +GGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTAT +AGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGA +GGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATA +TGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCC +GTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATAT +GTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCG +TCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATC +CAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTC +CGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCT +CATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGAT +GCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTG +AGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCAT +CAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGA +GATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTAC +AGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAA +CCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCC +GGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAAT +AGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGAT +GTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATG +TAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCAT +TTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGG +ATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGT +TACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGA +AGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCC +ACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAG +GCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATC +CTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGA +TCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAA +TTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGAT +TCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACC +TCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATA +CGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAAT +CCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATAC +AATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCC +CTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACG +TTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGA +TTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGA +AATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTG +TGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGAC +TTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTT +GCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACA +GGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAA +CGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAA +GCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAA +GCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAAT +CGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCT +AGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTA +TTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGA +GTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTAT +GTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTAT +TGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATT +GGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCAT +GGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTC +TAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACT +GTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCG +GCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCC +TGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGG +CGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGG +AGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGT +CAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGA +GTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTA +CGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCG +TCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTAC +TGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTG +CGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACA +TCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATC +GTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAG +AATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCC +TCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGG +TGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTA +CGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGA +ACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTT +GGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGAT +CTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTT +GTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATA +TCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCT +TATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAA +CGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAG +ATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAAA +ATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCA +ATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAAAGC +TGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCC +GGGTACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACA +CATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTA +GGTACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACAT +TTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATC +TACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCA +AGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGG +ACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCAT +CATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATA +CAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATG +CATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTA +AGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGG +ACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATA +GGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGG +AGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTG +ATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGAC +ATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCT +GCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAA +CAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAA +CTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAAC +TATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGG +TGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTAT +ATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGC +ATATTGACAGTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTA +TAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGG +GTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTAC +CGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCT +CATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGC +AGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGT +CATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGC +CTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATC +TGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCG +CATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAA +GACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAG +GATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTC +ACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGA +CGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTC +CAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGAT +GGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGAC +AAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATA +CGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCAT +TGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCT +CGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATG +GCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTC +TCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGC +CCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCT +CGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGA +CGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTG +GGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATC +GCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGC +GGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGT +CAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCA +GGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGAC +AGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAA +CCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGA +ATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGG +CACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGT +ATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTT +TCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGA +TTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTC +GCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAA +TCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCT +GTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACC +GTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGC +GAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTAC +TGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCT +TCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTT +GAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCG +CCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAA +ACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCG +ACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTA +GCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTT +GAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATC +CGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATG +CTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGC +GATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGAT +GCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTA +TCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGT +TACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTT +TATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTC +TAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCG +TAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCAC +CTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTC +CTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTT +TCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGT +CTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTT +GTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGG +AACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCG +GATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAA +CCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTT +GCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGA +CTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCAC +CTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCG +TGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACG +CCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGAC +GCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGA +GTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGC +CAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAA +CGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAA +AAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAG +GAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAAC +AGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGC +GACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAAC +GGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCT +CTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCC +GTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTC +GGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAA +TTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT +ACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCA +TCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCTC +GATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCG +TAGTCACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCG +CTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCT +AGTCACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGG +CGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGT +ACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCA +CGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTG +ATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCA +GTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATG +ATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCA +TGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGA +GTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACA +TCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGG +TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT +CGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAA +TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT +CTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCT +TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT +CTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATG +CACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGG +CTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCAT +AGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAA +CGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCG +AGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAA +ACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGC +CTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACT +CGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGC +TCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTG +TCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACG +CTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGG +CCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCC +TCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGC +GTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGA +CTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCG +AACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAG +GGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGC +CGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCG +GGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGC +AACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAG +GACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCA +ACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCAT +CCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATC +TTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTAC +GTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCG +GTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGA +TTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCA +CTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCT +GGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACT +GCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCT +TTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCC +CGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGG +CCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGA +GAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTC +CGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAA +TGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTG +AACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGT +GATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAG +ACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTT +AGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGC +CTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTG +TGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCG +CTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTG +TGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAA +TGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGC +CTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCC +CCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACC +GATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAG +CATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCA +CTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATG +ATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCAG +TAACGGAGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACC +TGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCAGC +AACGGAGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Mon Aug 05 15:59:42 2019 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Aug 05 15:59:42 2019 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Aug 05 15:59:42 2019 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables>