Next changeset 1:2485637fe656 (2018-10-17) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exonerate commit e02d20338ee01ed58265d93e9cc5b0695a322d47 |
added:
exonerate.xml macros.xml test-data/all_fasta.loc test-data/coding2coding.gff test-data/est2genome.gff test-data/genome.fa test-data/out.txt test-data/out_query.gff test-data/out_target.gff test-data/protein2genome.gff test-data/proteome.fa test-data/transcriptome.fa tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r f48ed38dfddf exonerate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exonerate.xml Thu Aug 02 09:11:30 2018 -0400 |
[ |
@@ -0,0 +1,168 @@ +<tool id="exonerate" name="Exonerate" profile="16.04" version="@VERSION@"> + <description>pairwise sequence comparison</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command><![CDATA[ + exonerate + + --query '$query' + + #if str( $ref_seq.ref_seq_selector ) == "personal" + --target '${ref_seq.input_fasta}' + #else + --target '${ref_seq.input_fasta.fields.path}' + #end if + + --score $score + --percent $percent + --bestn $bestn + --verbose 0 + + #if str($model) != "ungapped" + --model '${model}' + #end if + + #if str($model) == "est2genome" + --querytype dna --targettype dna + #elif str($model) == "protein2genome" + --querytype protein --targettype dna + #elif str($model) == "coding2coding" + --querytype dna --targettype dna + #end if + + #if str($outformat) == "alignment" + --showalignment yes --showvulgar no > '${output_ali}' + #elif str($outformat) == "targetgff" + --showalignment no --showvulgar no --showtargetgff yes --showquerygff no > '${output_gff}' + #elif str($outformat) == "querygff" + --showalignment no --showvulgar no --showtargetgff no --showquerygff yes > '${output_gff}' + #end if + ]]></command> + + <inputs> + <param argument="--query" type="data" format="fasta" label="Select the query sequence(s) in fasta" /> + + <conditional name="ref_seq"> + <param name="ref_seq_selector" type="select" label="Reference sequence(s)"> + <option selected="True" value="database">Use a built-in genome</option> + <option value="personal">Use a genome from history</option> + </param> + <when value="database"> + <param + help="If your genome of interest is not listed, contact the Galaxy server administrators" + label="Reference sequence(s)" + name="input_fasta" + type="select" + > + <options from_data_table="all_fasta"> + <filter column="2" type="sort_by" /> + <validator message="No sequences are available" type="no_options" /> + </options> + </param> + </when> + <when value="personal"> + <param name="input_fasta" type="data" format="fasta" label="Reference sequence(s)" /> + </when> + </conditional> + + <param name='model' type='select' label="Alignment method"> + <option value="ungapped" selected="true">Simple ungapped alignment</option> + <option value="est2genome">est2genome: align cDNA to a genome</option> + <option value="protein2genome">protein2genome: align proteins to a genome</option> + <option value="coding2coding">coding2coding: 6-frame translated alignment of DNA sequences</option> + </param> + <param name='outformat' type='select' label="Output format"> + <option value="targetgff" selected="true">GFF on target sequence(s)</option> + <option value="querygff">GFF on query sequence(s)</option> + <option value="alignment">Human readable alignment</option> + </param> + <param name='score' type='integer' min="0" max="10000" value="100" label="Score threshold for gapped alignment"/> + <param name='percent' type='float' min="0" max="100" value="0.0" label="Report alignment over a percentage of the maximum score attainable by each query"/> + <param name='bestn' type='integer' min="0" max="10000" value="0" label="Report best N results per query (0 to report all)"/> + </inputs> + <outputs> + <data name="output_gff" format="txt" label="${tool.name} on $on_string"> + <filter>outformat != 'alignment'</filter> + </data> + <data name="output_ali" format="gff" label="${tool.name} on $on_string"> + <filter>outformat == 'alignment'</filter> + </data> + </outputs> + <tests> + <test> + <param name="query" value="genome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="outformat" value="targetgff"/> + <output name="output_gff" file="out_target.gff"/> + </test> + <test> + <param name="query" value="genome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="database"/> + <param name="input_fasta" value="merlin"/> + </conditional> + <param name="outformat" value="targetgff"/> + <output name="output_gff" file="out_target.gff"/> + </test> + <test> + <param name="query" value="genome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="outformat" value="querygff"/> + <output name="output_gff" file="out_query.gff"/> + </test> + <test> + <param name="query" value="genome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="outformat" value="alignment"/> + <output name="output_ali" file="out.txt"/> + </test> + <test> + <param name="query" value="transcriptome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="model" value="est2genome"/> + <param name="outformat" value="targetgff"/> + <output name="output_gff" file="est2genome.gff"/> + </test> + <test> + <param name="query" value="proteome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="model" value="protein2genome"/> + <param name="outformat" value="targetgff"/> + <output name="output_gff" file="protein2genome.gff"/> + </test> + <test> + <param name="query" value="genome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="model" value="coding2coding"/> + <param name="outformat" value="targetgff"/> + <output name="output_gff" file="coding2coding.gff"/> + </test> + </tests> + <help><![CDATA[ + Exonerate is a generic tool for pairwise sequence comparison. + It allows you to align sequences using a many alignment models, using either exhaustive dynamic programming, or a variety of heuristics. + + .. _Exonerate website: https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate + ]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 000000000000 -r f48ed38dfddf macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">exonerate</requirement> + <yield /> + </requirements> + </xml> + + <token name="@VERSION@">2.4.0</token> + + <xml name="citations"> + <citations> + <citation type="doi">10.1186/1471-2105-6-31</citation> + </citations> + </xml> + +</macros> |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/all_fasta.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +merlin merlin Merlin ${__HERE__}/genome.fa |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/coding2coding.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/coding2coding.gff Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,28 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:coding2coding 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:coding2coding similarity 1 3536 6207 + . alignment_id 1 ; Query sample ; Align 1 1 2463 ; Align 2466 2464 3 ; Align 2469 2469 1068 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:coding2coding 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:coding2coding similarity 2 3535 6151 - . alignment_id 1 ; Query sample ; Align 3536 3536 3534 +# --- END OF GFF DUMP --- +# |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/est2genome.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est2genome.gff Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,37 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:est2genome 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:est2genome gene 2742 2819 102 - . gene_id 1 ; sequence sample ; gene_orientation . ; identity 58.97 ; similarity 58.97 +sample exonerate:est2genome exon 2742 2819 . - . insertions 0 ; deletions 0 ; identity 58.97 ; similarity 58.97 +sample exonerate:est2genome similarity 2742 2819 102 - . alignment_id 1 ; Query sample ; Align 2820 1168 78 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:est2genome 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:est2genome gene 758 3050 7309 + . gene_id 1 ; sequence sample ; gene_orientation + ; identity 99.93 ; similarity 99.93 +sample exonerate:est2genome utr5 758 1332 . + . +sample exonerate:est2genome exon 758 1332 . + . insertions 0 ; deletions 2 ; identity 99.83 ; similarity 99.83 +sample exonerate:est2genome splice5 1333 1334 . + . intron_id 1 ; splice_site "TT" +sample exonerate:est2genome intron 1333 2151 . + . intron_id 1 +sample exonerate:est2genome splice3 2150 2151 . + . intron_id 0 ; splice_site "AT" +sample exonerate:est2genome exon 2152 3050 . + . insertions 0 ; deletions 0 ; identity 100.00 ; similarity 100.00 +sample exonerate:est2genome similarity 758 3050 7309 + . alignment_id 1 ; Query sample ; Align 758 1 572 ; Align 1330 575 3 ; Align 2152 578 899 +# --- END OF GFF DUMP --- +# |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/genome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fa Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,14 @@ +>sample +TGCATCGGTTATTTGCATTGGCTGGCATTTAATTTATGCACAAACACACACGCTGGAACACATTAGTCATACGCAACGTTGCACAAATATTTATTAGCTATTGCAAGTCAGTGCGGGGCAAATAGCACCGTGAAATGCAAACGAAAGCATTGAGTGTGCTGGCATCATCGATTTAGCAGCCATAAATACTGCTCAACACTAAATCAATCATACGCAATGTGGTGCTAATTGATTGAGCAAACATTGCCAAGATGCAATTGCCATGATCAAGG +GGCGAAGGTTTCTTTGGGCCAAACAGAAGAGTGGTTTCGCCACTTAACTAGAGGTTAATTATTAGATTGTATTAGCCAGCTAGCTGCGTACGAGCATATTGAGCTACTAATGATACCACAAGTTCGCAGTTATCGCCCCAGTGGAACCCTTGAGCTTAATCCCAGTGCCCCATTAAATACTTAAGCGCGGACCCTCCAGCGCTACTCCACTCGAGCGGTTCAACGAACTGGCCAGCACTCGAAACTAAAGCCCTACCATGATTCAATATCAA +GTTGGCAACTCGAAACCCACTTGGCATTAAGTATGACTGACCTGCAGGCGACTCAACCTCAACCTGAAGCTCAACCTGAAGCTCAACCGGCGCATTCAGCTCCAGCGTTTTGCATTTAGCATTCAGCCGTGTCCAATGCTAAGATCAATGCCACTCCACGCTGGCTGTGCCAACTTCCTGTTGCAGCAACCACTACTACCCTATATGGACACAATGTGCCACGCCACGAGTATAGTGTATCGTACCTACACGAGTATCTGGCCAGTCCGCAG +TCCTAGTTCTTCCCTGCTCCTTTTTATTTAAACACTTCCTCATTTCGCTTTGTGCCCTGGCATTTGGCAGCTATTCTGTGTCAATTCCCGGTGAGTGTCATTCGAACTGGTGAACCGGAGAATGGAGCAGTCGCCAGCCGAGGATTCAGAGCCAGAACCCCAAACCCAGAACCCAGAAGGCCAGGTGAATGGGATCGGGATTCACCCAGTCCTCGCTCGCCTGGCTGAGTGCCAAGTGTAGTGCAATAAACTTGCCGTCCTGAATGGCTGGC +AATCAAAGATCTGAATGCAAGATACGATGACATTTACTCGCACATTCAATTGCGCTTTTGATTTGAATATTTACATGCTTTCTGGTCGTAGTCCCAGTTCCGCAGACTCGAGTCCCTCGGGCTGCCATTCTTTCATGTCCTTTCGTTCTATTGACATAGTCAAATTGGCTTTGCTGCACAGCGAGAAAATGTACTTAACTTCCACACAATCCTGGCTATTTGCTGCTATGACTGTAAGCATAATTTATTTTAGGGTAAATGGGTGAGTGACC +AAAAGGCATGCCGTGCAGTTCATTTTATCAAAGCACCATGACTAACTGGTACAACCATTTTTTCTGCGTGCCTTTCGGGTCAACCGAATCAGGCCCTAATTGATTTTCAAATGTTTCCACAGCGTCTTAAAGTCCAGTCCTTATTTAAACAACTTACTAAGTATTGCAGAGCCTTGGCATACTTAGCTGCAGTTATGCATTTCAAGCATAAACAACTAATTTGACTAATTGATAAGCCATGCAAAGGCAAAGGCATCTCGAGATGTCTTGAG +TCGACTTCACTGCACTTCTGGCCGGCTTGTCTCGGCCTGGTTGGTTGCGTTTCTGTATCTTTGGCTCCGGCTGTTTGCTTTGTCCATCAACATCTTTTGAGCGGAGGCACTCAGGCGCAAACTGTTGCACTTCAGACCAGTCTGGAGAGCAGTGTTCACTAGGATACGAGTATATACATAGGCTCATGTGGAGCCGGGATCAAGCCACCCCAAACCCCAAACCCCATTAATTGCAATCAATTTCGCTGTCAACACACACTCCAAATAACTGC +GGGATATTTATTTATGGGCTGCGCTTCCCTTCAGCAACCTTTTGCAACTGAATAATTGCCTTATGACACATTATCATTATTATTAAGGCTGCCACGGGTCGGCAAAGTAATCATTACCTCCTGCCCATATCATCCTGCCCCTGGGCCATGAACCTGGGAATTAGGGAGTCAGCATTGCAGTGCCCACAGCTCCTCTATGCTCATAAATATATTTCCAGCTGGCAACATTATATAATTATTTTGCCATGGATTGCATTGAACTTAGTCCGAGT +GCGTCGATGTTGCTCGTTTGCTGTTTGCTGTTTGCTATTTCTGTTTGCACTTCTTGCCGCAGGCGCGCAACTCTTCATGTGGCTTCTTCTCGTTTTTAGGCCAGAATCCAGAAGGCAGCGTTCAGAACCCAGAATCCAGAATCCATGTGATGCCTTTGCGTGAATTTCATTTTAAAGTGCAAATATTTGCGGCCTGGCCCGGCCAGTTGCTTAAATGGAAAACTGGGCGAGAAAAAAGAATGCACTGGTAGCTCACAAACAGCCCACACAGA +AACCAGTAATGAAATTGTGCGGCACTCGTCATTGCGGCACTGGCCTTACATATAAATTATATAAAATATATACACATTTGTTTGGCATTGACCGGCCTACAAAGGAACTGCAGCCAGGGATGCAGCATGGGTATGATTTCCCTATAGTCGTGGCAAATACCTTTAACACGAGTAAGTACAAGTAATGCCCAACTGAGGGCCTTCAAGTAATATTAAGAGAAGATATTTTTAAACATCTATCTTTTTTAAGGACACACATTCTAACTTTATTT +ACGAGAGACACTGGCACCCTCATACAAAATTCTAGAAAGCTAGTCAGTAAACTAGAACTGCAGTTCTCCCCTGCTTATGACAATAACAGCAATTTGTATTTAAATTTAGGAATGACCAGGGCTCTTCCGCTCCACCCTGGCCAGTATAATTTGTGCATCTCTAGCCAATACAAACTTATTATATACTTGCAAAAGTTGTGCACTGTAAATATTTGCGGCGACCACCCGCACTTTCGCCATGACTACAAAATGGGTGGCATGAAATGGGGGCT +TTTGGGGGGCAACCGCAGAGAATGCTTGACTTTGGCCGGTAAAAAACTACGCAGACCACCCACCACCCAACCCGGTTTTTCTCTCTTGGCCACGTTTATTGATGGCGGCGGGAAGTGCTTTAATGGAAATTTAAGTATCATCGTTTAAAATGAAGCAGAAAAGCGCAAGCGAACGAAAGACCCTGGAAAATTGCGAGTTGCGACTGGTATTGCGACCCTTGAGCTTTGGTCATTGCGTTGGCAACGCAAATTATTATTTTTATTATTATTGC +AGGTTTTCGGTGCACATTTATTTCATGCAACTTTACAGCGCTGCACATAGAAATTATGTACAGGCCGCGTGGGCCATAATGCAGATTGCCAACCCGAGAAGGGTAACTCGGAGTGAAATGGCGGGAATTAGGCAAGCAAACAGGCAACTAAAATATGCAACGCAGGCAGTGGGGAAATTTGATGGGCGCCAGAAAAGGCCGTGAAAAGGCCGTAAAACTGGGCCAAGTAATGAGCTACGGCGGCCACACATTAAATATGCAACAATTAAAAG |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/out.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out.txt Thu Aug 02 09:11:30 2018 -0400 |
[ |
b'@@ -0,0 +1,288 @@\n+\n+C4 Alignment:\n+------------\n+ Query: sample\n+ Target: sample\n+ Model: ungapped:dna2dna\n+ Raw score: 17680\n+ Query range: 0 -> 3536\n+ Target range: 0 -> 3536\n+\n+ 1 : TGCATCGGTTATTTGCATTGGCTGGCATTTAATTTATGCACAAACACACACGCTGGAACACA : 62\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 1 : TGCATCGGTTATTTGCATTGGCTGGCATTTAATTTATGCACAAACACACACGCTGGAACACA : 62\n+\n+ 63 : TTAGTCATACGCAACGTTGCACAAATATTTATTAGCTATTGCAAGTCAGTGCGGGGCAAATA : 124\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 63 : TTAGTCATACGCAACGTTGCACAAATATTTATTAGCTATTGCAAGTCAGTGCGGGGCAAATA : 124\n+\n+ 125 : GCACCGTGAAATGCAAACGAAAGCATTGAGTGTGCTGGCATCATCGATTTAGCAGCCATAAA : 186\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 125 : GCACCGTGAAATGCAAACGAAAGCATTGAGTGTGCTGGCATCATCGATTTAGCAGCCATAAA : 186\n+\n+ 187 : TACTGCTCAACACTAAATCAATCATACGCAATGTGGTGCTAATTGATTGAGCAAACATTGCC : 248\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 187 : TACTGCTCAACACTAAATCAATCATACGCAATGTGGTGCTAATTGATTGAGCAAACATTGCC : 248\n+\n+ 249 : AAGATGCAATTGCCATGATCAAGGGGCGAAGGTTTCTTTGGGCCAAACAGAAGAGTGGTTTC : 310\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 249 : AAGATGCAATTGCCATGATCAAGGGGCGAAGGTTTCTTTGGGCCAAACAGAAGAGTGGTTTC : 310\n+\n+ 311 : GCCACTTAACTAGAGGTTAATTATTAGATTGTATTAGCCAGCTAGCTGCGTACGAGCATATT : 372\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 311 : GCCACTTAACTAGAGGTTAATTATTAGATTGTATTAGCCAGCTAGCTGCGTACGAGCATATT : 372\n+\n+ 373 : GAGCTACTAATGATACCACAAGTTCGCAGTTATCGCCCCAGTGGAACCCTTGAGCTTAATCC : 434\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 373 : GAGCTACTAATGATACCACAAGTTCGCAGTTATCGCCCCAGTGGAACCCTTGAGCTTAATCC : 434\n+\n+ 435 : CAGTGCCCCATTAAATACTTAAGCGCGGACCCTCCAGCGCTACTCCACTCGAGCGGTTCAAC : 496\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 435 : CAGTGCCCCATTAAATACTTAAGCGCGGACCCTCCAGCGCTACTCCACTCGAGCGGTTCAAC : 496\n+\n+ 497 : GAACTGGCCAGCACTCGAAACTAAAGCCCTACCATGATTCAATATCAAGTTGGCAACTCGAA : 558\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 497 : GAACTGGCCAGCACTCGAAACTAAAGCCCTACCATGATTCAATATCAAGTTGGCAACTCGAA : 558\n+\n+ 559 : ACCCACTTGGCATTAAGTATGACTGACCTGCAGGCGACTCAACCTCAACCTGAAGCTCAACC : 620\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 559 : ACCCACTTGGCATTAAGTATGACTGACCTGCAGGCGACTCAACCTCAACCTGAAGCTCAACC : 620\n+\n+ 621 : TGAAGCTCAACCGGCGCATTCAGCTCCAGCGTTTTGCATTTAGCATTCAGCCGTGTCCAATG : 682\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 621 : TGAAGCTCAACCGGCGCATTCAGCTCCAGCGTTTTGCATTTAGCATTCAGCCGTGTCCAATG : 682\n+\n+ 683 : CTAAGATCAATGCCACTCCACGCTGGCTGTGCCAACTTCCTGTTGCAGCAACCACTACTACC : 744\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 683 : CTAAGATCAATGCCACTCCACGCTGGCTGTGCCAACTTCCTGTTGCAGCAACCACTACTACC : 744\n+\n+ 745 : CTATATGGACACAATGTGCCACGCCACGAGTATAGTGTATCGTACCTACACGAGTATCTGGC : 806\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 745 : CTATATGGACACAATGTGCCACGCCACGAGTATAGTGTATCGTACCTACACGAGTATCTGGC : 806\n+\n+ 807 : CAGTCCGCAGTCCTAGTTCTTCCCTGCTCCTTTTTATTTAAACACTTCCTCATTTCGCTTTG : 868\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 807 : CAGTCCGCAGTCCTAGTTCTTCCCTGCTCCTTTTTATTTAAACACTTCCTCATTTCGCTTTG : 868\n+\n+ 869 : TGCCCTGGCATTTGGCAGCTATTCTGTGTCAATTCCCGGTGAGTGTCATTCGAACTGGTGAA : 930\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 869 : TGCCCTGGCATTTGGCAGCTATTCTGTGTCAATTCCCGGTGAGTGTCATTCGAACTGGTGAA : 930\n+\n+ 931 : CCGGAGAATGGAGCAGTCGCCAGCCGAGGATTCAGAGCCAGAACCCCAAACCCAGAACCCAG : 992\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 931 : CCGGAGAATGGAGCAGTCGCCAGCCGAGGATTCAGAGCCAGAACCCCAAACCCAGAACCCAG : 992\n+\n+ 993 : AAGGCCAGGTGAATGGGATCGGGATTCACCCAGTCCTCGCTCGCCTGGCTGAGTGCCAAGTG : 105'..b'\n+\n+ 2791 : CTGCTTATGACAATAACAGCAATTTGTATTTAAATTTAGGAATGACCAGGGCTCTTCCGCTC : 2852\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 2791 : CTGCTTATGACAATAACAGCAATTTGTATTTAAATTTAGGAATGACCAGGGCTCTTCCGCTC : 2852\n+\n+ 2853 : CACCCTGGCCAGTATAATTTGTGCATCTCTAGCCAATACAAACTTATTATATACTTGCAAAA : 2914\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 2853 : CACCCTGGCCAGTATAATTTGTGCATCTCTAGCCAATACAAACTTATTATATACTTGCAAAA : 2914\n+\n+ 2915 : GTTGTGCACTGTAAATATTTGCGGCGACCACCCGCACTTTCGCCATGACTACAAAATGGGTG : 2976\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 2915 : GTTGTGCACTGTAAATATTTGCGGCGACCACCCGCACTTTCGCCATGACTACAAAATGGGTG : 2976\n+\n+ 2977 : GCATGAAATGGGGGCTTTTGGGGGGCAACCGCAGAGAATGCTTGACTTTGGCCGGTAAAAAA : 3038\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 2977 : GCATGAAATGGGGGCTTTTGGGGGGCAACCGCAGAGAATGCTTGACTTTGGCCGGTAAAAAA : 3038\n+\n+ 3039 : CTACGCAGACCACCCACCACCCAACCCGGTTTTTCTCTCTTGGCCACGTTTATTGATGGCGG : 3100\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3039 : CTACGCAGACCACCCACCACCCAACCCGGTTTTTCTCTCTTGGCCACGTTTATTGATGGCGG : 3100\n+\n+ 3101 : CGGGAAGTGCTTTAATGGAAATTTAAGTATCATCGTTTAAAATGAAGCAGAAAAGCGCAAGC : 3162\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3101 : CGGGAAGTGCTTTAATGGAAATTTAAGTATCATCGTTTAAAATGAAGCAGAAAAGCGCAAGC : 3162\n+\n+ 3163 : GAACGAAAGACCCTGGAAAATTGCGAGTTGCGACTGGTATTGCGACCCTTGAGCTTTGGTCA : 3224\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3163 : GAACGAAAGACCCTGGAAAATTGCGAGTTGCGACTGGTATTGCGACCCTTGAGCTTTGGTCA : 3224\n+\n+ 3225 : TTGCGTTGGCAACGCAAATTATTATTTTTATTATTATTGCAGGTTTTCGGTGCACATTTATT : 3286\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3225 : TTGCGTTGGCAACGCAAATTATTATTTTTATTATTATTGCAGGTTTTCGGTGCACATTTATT : 3286\n+\n+ 3287 : TCATGCAACTTTACAGCGCTGCACATAGAAATTATGTACAGGCCGCGTGGGCCATAATGCAG : 3348\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3287 : TCATGCAACTTTACAGCGCTGCACATAGAAATTATGTACAGGCCGCGTGGGCCATAATGCAG : 3348\n+\n+ 3349 : ATTGCCAACCCGAGAAGGGTAACTCGGAGTGAAATGGCGGGAATTAGGCAAGCAAACAGGCA : 3410\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3349 : ATTGCCAACCCGAGAAGGGTAACTCGGAGTGAAATGGCGGGAATTAGGCAAGCAAACAGGCA : 3410\n+\n+ 3411 : ACTAAAATATGCAACGCAGGCAGTGGGGAAATTTGATGGGCGCCAGAAAAGGCCGTGAAAAG : 3472\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3411 : ACTAAAATATGCAACGCAGGCAGTGGGGAAATTTGATGGGCGCCAGAAAAGGCCGTGAAAAG : 3472\n+\n+ 3473 : GCCGTAAAACTGGGCCAAGTAATGAGCTACGGCGGCCACACATTAAATATGCAACAATTAAA : 3534\n+ ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n+ 3473 : GCCGTAAAACTGGGCCAAGTAATGAGCTACGGCGGCCACACATTAAATATGCAACAATTAAA : 3534\n+\n+ 3535 : AG : 3536\n+ ||\n+ 3535 : AG : 3536\n+\n+\n+C4 Alignment:\n+------------\n+ Query: sample\n+ Target: sample\n+ Model: ungapped:dna2dna\n+ Raw score: 108\n+ Query range: 607 -> 634\n+ Target range: 595 -> 622\n+\n+ 608 : CTGAAGCTCAACCTGAAGCTCAACCGG : 634\n+ || || ||||||||||||||||||| |\n+ 596 : CTCAACCTCAACCTGAAGCTCAACCTG : 622\n+\n+\n+C4 Alignment:\n+------------\n+ Query: sample\n+ Target: sample\n+ Model: ungapped:dna2dna\n+ Raw score: 108\n+ Query range: 595 -> 622\n+ Target range: 607 -> 634\n+\n+ 596 : CTCAACCTCAACCTGAAGCTCAACCTG : 622\n+ || || ||||||||||||||||||| |\n+ 608 : CTGAAGCTCAACCTGAAGCTCAACCGG : 634\n+\n+\n+C4 Alignment:\n+------------\n+ Query: sample\n+ Target: sample [revcomp]\n+ Model: ungapped:dna2dna\n+ Raw score: 102\n+ Query range: 2741 -> 2819\n+ Target range: 2819 -> 2741\n+\n+ 2742 : ATACAAAATTCTAGAAAGCTAGTCAGTAAACTAGAACTGCAGTTCTCCCCTGCTTATGACAA : 2803\n+ ||||||| | || | | | || | |||||||||||||| | || | | \n+ 2819 : ATACAAATTGCTGTTATTGTCATAAGCAGGGGAGAACTGCAGTTCTAGTTTACTGACTAGCT : 2758\n+\n+ 2804 : TAACAGCAATTTGTAT : 2819\n+ | || | |||||||\n+ 2757 : TTCTAGAATTTTGTAT : 2742\n+\n' |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/out_query.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_query.gff Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,56 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 1 3536 17680 + . alignment_id 1 ; Target sample ; Align 1 1 3536 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 608 634 108 + . alignment_id 2 ; Target sample ; Align 608 596 27 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 596 622 108 + . alignment_id 3 ; Target sample ; Align 596 608 27 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 2742 2819 102 + . alignment_id 1 ; Target sample ; Align 2742 2820 78 +# --- END OF GFF DUMP --- +# |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/out_target.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_target.gff Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,56 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 1 3536 17680 + . alignment_id 1 ; Query sample ; Align 1 1 3536 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 596 622 108 + . alignment_id 2 ; Query sample ; Align 596 608 27 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 608 634 108 + . alignment_id 3 ; Query sample ; Align 608 596 27 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:ungapped:dna2dna 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:ungapped:dna2dna similarity 2742 2819 102 - . alignment_id 1 ; Query sample ; Align 2820 2742 78 +# --- END OF GFF DUMP --- +# |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/protein2genome.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein2genome.gff Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,22 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:protein2genome:local 2.4.0 +##date 2018-08-02 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:protein2genome:local gene 1181 2291 456 + . gene_id 1 ; sequence sample ; gene_orientation + ; identity 98.97 ; similarity 100.00 +sample exonerate:protein2genome:local cds 1181 1326 . + . +sample exonerate:protein2genome:local exon 1181 1326 . + . insertions 0 ; deletions 0 ; identity 100.00 ; similarity 100.00 +sample exonerate:protein2genome:local splice5 1327 1328 . + . intron_id 1 ; splice_site "CA" +sample exonerate:protein2genome:local intron 1327 2143 . + . intron_id 1 +sample exonerate:protein2genome:local splice3 2142 2143 . + . intron_id 0 ; splice_site "AT" +sample exonerate:protein2genome:local cds 2144 2291 . + . +sample exonerate:protein2genome:local exon 2144 2291 . + . insertions 0 ; deletions 0 ; identity 97.96 ; similarity 100.00 +sample exonerate:protein2genome:local similarity 1181 2291 456 + . alignment_id 1 ; Query sample ; Align 1181 1 144 ; Align 2145 50 147 +# --- END OF GFF DUMP --- +# |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/proteome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/proteome.fa Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +>sample +PSSADSSPSGCHSFMSFRSIDIVKLALLHSEKMYLTSTQSWLFAAMTVSMPWIALNLVRVRRCCSFAVCCLLFLFALLAAGAQLFMWLLLVFRPESRR |
b |
diff -r 000000000000 -r f48ed38dfddf test-data/transcriptome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcriptome.fa Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,8 @@ +>sample +ATGTGCCACGCCACGAGTATAGTGTATCGTACCTACACGAGTATCTGGCCAGTCCGCAG +TCCTAGTTCTTCCCTGCTCCTTTTTATTTAAACACTTCCTCATTTCGCTTTGTGCCCTGGCATTTGGCAGCTATTCTGTGTCAATTCCCGGTGAGTGTCATTCGAACTGGTGAACCGGAGAATGGAGCAGTCGCCAGCCGAGGATTCAGAGCCAGAACCCCAAACCCAGAACCCAGAAGGCCAGGTGAATGGGATCGGGATTCACCCAGTCCTCGCTCGCCTGGCTGAGTGCCAAGTGTAGTGCAATAAACTTGCCGTCCTGAATGGCTGGC +AATCAAAGATCTGAATGCAAGATACGATGACATTTACTCGCACATTCAATTGCGCTTTTGATTTGAATATTTACATGCTTTCTGGTCGTAGTCCCAGTTCCGCAGACTCGAGTCCCTCGGGCTGCCATTCTTTCATGTCCTTTCGTTCTATTGACATAGTCAAATTGGCTTTGCTGCACAGCGAGAAAATGTACTTAACTTCCACACAATCCTGGCTATTTGCTGCTATGACTGTAAGCATGCCATGGATTGCATTGAACTTAGTCCGAGT +GCGTCGATGTTGCTCGTTTGCTGTTTGCTGTTTGCTATTTCTGTTTGCACTTCTTGCCGCAGGCGCGCAACTCTTCATGTGGCTTCTTCTCGTTTTTAGGCCAGAATCCAGAAGGCAGCGTTCAGAACCCAGAATCCAGAATCCATGTGATGCCTTTGCGTGAATTTCATTTTAAAGTGCAAATATTTGCGGCCTGGCCCGGCCAGTTGCTTAAATGGAAAACTGGGCGAGAAAAAAGAATGCACTGGTAGCTCACAAACAGCCCACACAGA +AACCAGTAATGAAATTGTGCGGCACTCGTCATTGCGGCACTGGCCTTACATATAAATTATATAAAATATATACACATTTGTTTGGCATTGACCGGCCTACAAAGGAACTGCAGCCAGGGATGCAGCATGGGTATGATTTCCCTATAGTCGTGGCAAATACCTTTAACACGAGTAAGTACAAGTAATGCCCAACTGAGGGCCTTCAAGTAATATTAAGAGAAGATATTTTTAAACATCTATCTTTTTTAAGGACACACATTCTAACTTTATTT +ACGAGAGACACTGGCACCCTCATACAAAATTCTAGAAAGCTAGTCAGTAAACTAGAACTGCAGTTCTCCCCTGCTTATGACAATAACAGCAATTTGTATTTAAATTTAGGAATGACCAGGGCTCTTCCGCTCCACCCTGGCCAGTATAATTTGTGCATCTCTAGCCAATACAAACTTATTATATACTTGCAAAAGTTGTGCACTGTAAATATTTGCGGCGACCACCCGCACTTTCGCCATGACTACAAAATGGGTGGCATGAAATGGGGGCT +TTTGGGGGGCAACCGCAGAGAATGCTTGACTTTGGCCGGTAAAAAACTACGCAGACCA |
b |
diff -r 000000000000 -r f48ed38dfddf tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# |
b |
diff -r 000000000000 -r f48ed38dfddf tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r f48ed38dfddf tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Aug 02 09:11:30 2018 -0400 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables> |