diff spaln.xml @ 1:37b5e1f0b544 draft

"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
author iuc
date Thu, 16 Jul 2020 07:57:10 -0400
parents 95ea8d97abb4
children dd0cd2319ae5
line wrap: on
line diff
--- a/spaln.xml	Fri Jan 11 18:15:21 2019 -0500
+++ b/spaln.xml	Thu Jul 16 07:57:10 2020 -0400
@@ -1,7 +1,7 @@
 <tool id="spaln" name="Spaln: align cDNA or Protein to genome" version="@TOOL_VERSION@+galaxy0">
     <description>Maps and aligns a set of cDNA or protein sequences onto a whole genomic sequence.</description>
     <macros>
-        <token name="@TOOL_VERSION@">2.3.2</token>
+        <import>macros.xml</import>
     </macros>
     <edam_topics>
         <edam_topic>topic_3512</edam_topic>
@@ -10,7 +10,36 @@
         <requirement type="package" version="@TOOL_VERSION@">spaln</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
-	    spaln -t\${GALAXY_SLOTS:-1} -O$format -o '$output1' '$genome' '$query'
+	    spaln -t\${GALAXY_SLOTS:-1} -O$format
+        #if str($species_params).strip() != ''
+            -T'${species_params}'
+        #end if
+        #if $adv.use == "yes"
+            -S'${adv.query_orientation}'
+            -V'${adv.hirschberg_threshold}'
+            -pa'${adv.polya_trim}'
+            ${adv.all_results}
+            -yu'${adv.gap_extension_penalty}'
+            -yv'${adv.gap_open_penalty}'
+            -yw'${adv.dp_matrix_scan_width}'
+            -ya'${adv.splice_stringency}'
+            -yj'${adv.gap_penalty_incline}'
+            -yk'${adv.gap_penalty_flex}'
+            '${adv.double_affine_gap}'
+            -ym'${adv.match_score}'
+            -yn'${adv.mismatch_score}'
+            -yo'${adv.stop_codon_penalty}'
+            -yx'${adv.frameshift_penalty}'
+            -yy'${adv.splice_site_weight}
+            -yz'${adv.coding_potential_weight}'
+            -yB'${adv.branch_point_weight}
+            -yL'${adv.min_intron_len}'
+            -yZ'${adv.intron_potential_weight}'
+            #if str($adv.max_gene_length).strip() != ''
+                -XG'${adv.max_gene_length}'
+            #end if
+        #end if
+        '$genome' '$query' >'$output1' 
     ]]></command>
     <inputs>
         <param type="data" name="genome" format="fasta" label="Genome sequence to search (FASTA format)" />
@@ -20,7 +49,49 @@
 	    <option value="2">GFF3 format matches</option>
 	    <option value="3">BED format</option>
 	    <option value="4">Tabular format exon information</option>
+    </param>
+    <param argument="-T" name="species_params" type="text" optional="true" label="Species to use for parameter setting" help="Choose a species table (e.g. cynosemi) from which to read parameters to optimise spaln" />
+    <conditional name="adv">
+        <param type="select" name="use" label="Advanced settings">
+            <option selected="true" value="no">No</option>
+            <option value="yes">Yes</option>
         </param>
+        <when value="no">
+        </when>
+        <when value="yes">
+            <param argument="-S" name="query_orientation" type="select" label="DNA query orientation" help="Determines how to treat orientation of query sequence when searching">
+                <option value="0">Infer orientation from sequence header (no poly-A/poly-T trimming)</option>
+                <option value="1">Forward orientation only. Poly-A tail might be trimmed off</option>
+                <option value="2">Reverse orientation only. Leading poly-T might be trimmed off</option>
+                <option selected="true" value="3">Examine both orientations. Poly-A / Poly-T might be trimmed off</option>
+            </param>
+            <param argument="-V" name="hirschberg_threshold" type="integer" value="16777216" label="Minimum space to induce Hirschberg's algorithm" help="Default is 16M (16x1024x1024 bytes)" />
+            <param argument="-pa" name="polya_trim" type="integer" value="12" label="Limit 3' poly-As to this number of bases" help="poly-A/poly-T trimming is only done if -S (orientation) option is 0 or 3" />
+            <param argument="-pw" name="all_results" type="boolean" checked="false" truevalue="-pw" falsevalue="" label="Report results even if the score is below the threshold" />
+            <param argument="-yu" name="gap_extension_penalty" type="integer" value="3" label="Gap-extension penalty" />
+            <param argument="-yv" name="gap_open_penalty" type="integer" value="8" label="Gap-open penalty" />
+            <param argument="-yw" name="dp_matrix_scan_width" type="integer" value="100" label="Band width for DP matrix scan" />
+            <param argument="-ya" name="splice_stringency" type="select" label="Stringency of splice site selection" help="Which dinucleotide pairs to accept at the ends of an intron">
+                <option value="0" selected="true">accept only the canonical pairs (GT..AG,GC..AG,AT..AC)</option>
+                <option value="1">accept also AT..AN</option>
+                <option value="2">allow up to one mismatch from GT..AG</option>
+                <option value="3">accept any pairs</option>
+            </param>
+            <param argument="-yj" name="gap_penalty_incline" type="float" value="0.6" label="Incline of long gap penalty" />
+            <param argument="-yk" name="gap_penalty_flex" type="integer" value="7" label="Flex point where the incline of gap penalty changes" />
+            <param argument="-yl3" name="double_affine_gap" type="boolean" checked="false" truevalue="-yl3" falsevalue="" label="Use double affine gap penalty" help="Use the double affine gap rathr than single affine gap penalty calculation" />
+            <param argument="-ym" name="match_score" type="integer" value="2" label="Nucleotide match score" />
+            <param argument="-yn" name="mismatch_score" type="integer" value="-6" label="Nucleotide mismatch score" />
+            <param argument="-yo" name="stop_codon_penalty" type="integer" value="100" label="Penalty for a premature termination codon" />
+            <param argument="-yx" name="frameshift_penalty" type="integer" value="100" label="Penalty for a frame shift error" />
+            <param argument="-yy" name="splice_site_weight" type="integer" value="8" label="Weight for splice site signal" />
+            <param argument="-yz" name="coding_potential_weight" type="integer" value="2" label="Weight for coding potential" />
+            <param argument="-yB" name="branch_point_weight" type="integer" value="0" label="Weight for branch point signal" />
+            <param argument="-yL" name="min_intron_len" type="integer" value="30" label="Minimum expected length of intron" />
+            <param argument="-yZ" name="intron_potential_weight" type="integer" value="0" label="Weight for intron potential" />
+            <param argument="-XG" name="max_gene_length" type="text" label="Reset maximum expected gene size, suffix k or M is effective" />
+        </when>
+    </conditional>
     </inputs>
     <outputs>
         <data name="output1" format="tabular">
@@ -49,32 +120,61 @@
             <param name="genome" ftype="fasta" value="genome.fasta" />
             <param name="query" ftype="fasta" value="query.fasta" />
             <param name="format" value="0"/>
-	        <output name="output1" value="output1_gff_genes.gff3" />
+            <conditional name="adv">
+                <param name="use" value="no" />
+            </conditional>
+	        <output name="output1" ftype="gff3" value="output1_gff_genes.gff3" />
         </test>
         <test>
             <param name="genome" ftype="fasta" value="genome.fasta" />
             <param name="query" ftype="fasta" value="query.fasta" />
             <param name="format" value="2"/>
-	        <output name="output1" value="output1_gff_matches.gff3" />
+            <conditional name="adv">
+                <param name="use" value="no" />
+            </conditional>
+	        <output name="output1" ftype="gff3" value="output1_gff_matches.gff3" />
         </test>
         <test>
             <param name="genome" ftype="fasta" value="genome.fasta" />
             <param name="query" ftype="fasta" value="query.fasta" />
             <param name="format" value="3"/>
-	        <output name="output1" value="output1.bed12" />
+            <conditional name="adv">
+                <param name="use" value="no" />
+            </conditional>
+	        <output name="output1" ftype="bed12" value="output1.bed12" />
         </test>
         <test>
             <param name="genome" ftype="fasta" value="genome.fasta" />
             <param name="query" ftype="fasta" value="query.fasta" />
             <param name="format" value="4"/>
-	        <output name="output1" value="output1.tabular" />
+            <conditional name="adv">
+                <param name="use" value="no" />
+            </conditional>
+	        <output name="output1" ftype="tabular" value="output1.tabular" />
+        </test>
+        <test>
+            <param name="genome" ftype="fasta" value="genome.fasta" />
+            <param name="query" ftype="fasta" value="query.fasta" />
+            <param name="format" value="4"/>
+            <param name="species_params" value="cynosemi" />
+            <conditional name="adv">
+                <param name="use" value="no" />
+            </conditional>
+	        <output name="output1" ftype="tabular" value="output2.tabular" />
         </test>
     </tests>
     <help><![CDATA[
         Spaln_ (space-efficient spliced alignment) is a stand-alone program that maps and aligns a set of cDNA or
         protein sequences onto a whole genomic sequence in a single job. 
 
-        This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln with default parameters.
+        This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln. This algorithm
+        takes FASTA format query and genome sequence and finds an alignment of the query (either cDNA or protein)
+        against the genome.
+
+        Spaln optionally takes a species name to use for parameter setting (the "-T" parameter). The 
+        "List spaln parameter tables" (list_spaln_tables) can be used to find a parameter file that is
+        close (in terms of taxonomic distance) to your species of interest. Use of this setting is recommended.
+
 
         .. _Spaln: http://www.genome.ist.i.kyoto-u.ac.jp/~aln_user/spaln/
     ]]></help>