view spaln.xml @ 1:37b5e1f0b544 draft

"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
author iuc
date Thu, 16 Jul 2020 07:57:10 -0400
parents 95ea8d97abb4
children dd0cd2319ae5
line wrap: on
line source

<tool id="spaln" name="Spaln: align cDNA or Protein to genome" version="@TOOL_VERSION@+galaxy0">
    <description>Maps and aligns a set of cDNA or protein sequences onto a whole genomic sequence.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <edam_topics>
        <edam_topic>topic_3512</edam_topic>
    </edam_topics>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">spaln</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
	    spaln -t\${GALAXY_SLOTS:-1} -O$format
        #if str($species_params).strip() != ''
            -T'${species_params}'
        #end if
        #if $adv.use == "yes"
            -S'${adv.query_orientation}'
            -V'${adv.hirschberg_threshold}'
            -pa'${adv.polya_trim}'
            ${adv.all_results}
            -yu'${adv.gap_extension_penalty}'
            -yv'${adv.gap_open_penalty}'
            -yw'${adv.dp_matrix_scan_width}'
            -ya'${adv.splice_stringency}'
            -yj'${adv.gap_penalty_incline}'
            -yk'${adv.gap_penalty_flex}'
            '${adv.double_affine_gap}'
            -ym'${adv.match_score}'
            -yn'${adv.mismatch_score}'
            -yo'${adv.stop_codon_penalty}'
            -yx'${adv.frameshift_penalty}'
            -yy'${adv.splice_site_weight}
            -yz'${adv.coding_potential_weight}'
            -yB'${adv.branch_point_weight}
            -yL'${adv.min_intron_len}'
            -yZ'${adv.intron_potential_weight}'
            #if str($adv.max_gene_length).strip() != ''
                -XG'${adv.max_gene_length}'
            #end if
        #end if
        '$genome' '$query' >'$output1' 
    ]]></command>
    <inputs>
        <param type="data" name="genome" format="fasta" label="Genome sequence to search (FASTA format)" />
        <param type="data" name="query" format="fasta" label="Query sequence(s) (protein or cDNA)" />
	<param argument="-O" type="select" name="format" label="Output format">
	    <option value="0">GFF3 format genes</option>
	    <option value="2">GFF3 format matches</option>
	    <option value="3">BED format</option>
	    <option value="4">Tabular format exon information</option>
    </param>
    <param argument="-T" name="species_params" type="text" optional="true" label="Species to use for parameter setting" help="Choose a species table (e.g. cynosemi) from which to read parameters to optimise spaln" />
    <conditional name="adv">
        <param type="select" name="use" label="Advanced settings">
            <option selected="true" value="no">No</option>
            <option value="yes">Yes</option>
        </param>
        <when value="no">
        </when>
        <when value="yes">
            <param argument="-S" name="query_orientation" type="select" label="DNA query orientation" help="Determines how to treat orientation of query sequence when searching">
                <option value="0">Infer orientation from sequence header (no poly-A/poly-T trimming)</option>
                <option value="1">Forward orientation only. Poly-A tail might be trimmed off</option>
                <option value="2">Reverse orientation only. Leading poly-T might be trimmed off</option>
                <option selected="true" value="3">Examine both orientations. Poly-A / Poly-T might be trimmed off</option>
            </param>
            <param argument="-V" name="hirschberg_threshold" type="integer" value="16777216" label="Minimum space to induce Hirschberg's algorithm" help="Default is 16M (16x1024x1024 bytes)" />
            <param argument="-pa" name="polya_trim" type="integer" value="12" label="Limit 3' poly-As to this number of bases" help="poly-A/poly-T trimming is only done if -S (orientation) option is 0 or 3" />
            <param argument="-pw" name="all_results" type="boolean" checked="false" truevalue="-pw" falsevalue="" label="Report results even if the score is below the threshold" />
            <param argument="-yu" name="gap_extension_penalty" type="integer" value="3" label="Gap-extension penalty" />
            <param argument="-yv" name="gap_open_penalty" type="integer" value="8" label="Gap-open penalty" />
            <param argument="-yw" name="dp_matrix_scan_width" type="integer" value="100" label="Band width for DP matrix scan" />
            <param argument="-ya" name="splice_stringency" type="select" label="Stringency of splice site selection" help="Which dinucleotide pairs to accept at the ends of an intron">
                <option value="0" selected="true">accept only the canonical pairs (GT..AG,GC..AG,AT..AC)</option>
                <option value="1">accept also AT..AN</option>
                <option value="2">allow up to one mismatch from GT..AG</option>
                <option value="3">accept any pairs</option>
            </param>
            <param argument="-yj" name="gap_penalty_incline" type="float" value="0.6" label="Incline of long gap penalty" />
            <param argument="-yk" name="gap_penalty_flex" type="integer" value="7" label="Flex point where the incline of gap penalty changes" />
            <param argument="-yl3" name="double_affine_gap" type="boolean" checked="false" truevalue="-yl3" falsevalue="" label="Use double affine gap penalty" help="Use the double affine gap rathr than single affine gap penalty calculation" />
            <param argument="-ym" name="match_score" type="integer" value="2" label="Nucleotide match score" />
            <param argument="-yn" name="mismatch_score" type="integer" value="-6" label="Nucleotide mismatch score" />
            <param argument="-yo" name="stop_codon_penalty" type="integer" value="100" label="Penalty for a premature termination codon" />
            <param argument="-yx" name="frameshift_penalty" type="integer" value="100" label="Penalty for a frame shift error" />
            <param argument="-yy" name="splice_site_weight" type="integer" value="8" label="Weight for splice site signal" />
            <param argument="-yz" name="coding_potential_weight" type="integer" value="2" label="Weight for coding potential" />
            <param argument="-yB" name="branch_point_weight" type="integer" value="0" label="Weight for branch point signal" />
            <param argument="-yL" name="min_intron_len" type="integer" value="30" label="Minimum expected length of intron" />
            <param argument="-yZ" name="intron_potential_weight" type="integer" value="0" label="Weight for intron potential" />
            <param argument="-XG" name="max_gene_length" type="text" label="Reset maximum expected gene size, suffix k or M is effective" />
        </when>
    </conditional>
    </inputs>
    <outputs>
        <data name="output1" format="tabular">
	    <change_format>
            <!-- these values correspond with the format options of the spaln command, not all of which are current supported -->
            <when input="format" value="0" format="gff3" />
            <when input="format" value="2" format="gff3" />
            <when input="format" value="3" format="bed12" />
            <when input="format" value="4" format="tabular" />
	    </change_format>
	    <!-- <actions> .. <conditional> .. <when> .. <action> current does not work in Galaxy,
		 something that https://github.com/galaxyproject/galaxy/pull/7197 is addressing, so this is
                 commented out till that is merged 
	    <actions>
	        <conditional name="format">
		    <when value="4">
                        <action type="metadata" name="column_names" default="rID,gID,%id,ExonL,MisMch,Unpair,ref_l,ref_r,tgt_l,tgt_r,eScore,IntrnL,iScore,Sig3/I,Sig5/T  # -  X P DiNuc" />
		    </when>
		</conditional>
	    </actions>
            -->
	</data>
    </outputs>
    <tests>
        <test>
            <param name="genome" ftype="fasta" value="genome.fasta" />
            <param name="query" ftype="fasta" value="query.fasta" />
            <param name="format" value="0"/>
            <conditional name="adv">
                <param name="use" value="no" />
            </conditional>
	        <output name="output1" ftype="gff3" value="output1_gff_genes.gff3" />
        </test>
        <test>
            <param name="genome" ftype="fasta" value="genome.fasta" />
            <param name="query" ftype="fasta" value="query.fasta" />
            <param name="format" value="2"/>
            <conditional name="adv">
                <param name="use" value="no" />
            </conditional>
	        <output name="output1" ftype="gff3" value="output1_gff_matches.gff3" />
        </test>
        <test>
            <param name="genome" ftype="fasta" value="genome.fasta" />
            <param name="query" ftype="fasta" value="query.fasta" />
            <param name="format" value="3"/>
            <conditional name="adv">
                <param name="use" value="no" />
            </conditional>
	        <output name="output1" ftype="bed12" value="output1.bed12" />
        </test>
        <test>
            <param name="genome" ftype="fasta" value="genome.fasta" />
            <param name="query" ftype="fasta" value="query.fasta" />
            <param name="format" value="4"/>
            <conditional name="adv">
                <param name="use" value="no" />
            </conditional>
	        <output name="output1" ftype="tabular" value="output1.tabular" />
        </test>
        <test>
            <param name="genome" ftype="fasta" value="genome.fasta" />
            <param name="query" ftype="fasta" value="query.fasta" />
            <param name="format" value="4"/>
            <param name="species_params" value="cynosemi" />
            <conditional name="adv">
                <param name="use" value="no" />
            </conditional>
	        <output name="output1" ftype="tabular" value="output2.tabular" />
        </test>
    </tests>
    <help><![CDATA[
        Spaln_ (space-efficient spliced alignment) is a stand-alone program that maps and aligns a set of cDNA or
        protein sequences onto a whole genomic sequence in a single job. 

        This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln. This algorithm
        takes FASTA format query and genome sequence and finds an alignment of the query (either cDNA or protein)
        against the genome.

        Spaln optionally takes a species name to use for parameter setting (the "-T" parameter). The 
        "List spaln parameter tables" (list_spaln_tables) can be used to find a parameter file that is
        close (in terms of taxonomic distance) to your species of interest. Use of this setting is recommended.


        .. _Spaln: http://www.genome.ist.i.kyoto-u.ac.jp/~aln_user/spaln/
    ]]></help>
    <citations>
        <citation type="doi">0.1093/nar/gkn105</citation>
    </citations>
</tool>