view gaeval.xml @ 0:1dd7adc21b6d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/aegean commit e6c01517075cab35e620fe1bbdb5fd68e4d1359f"
author iuc
date Sun, 03 Jan 2021 14:59:26 +0000
parents
children 8dc7512c9dc9
line wrap: on
line source

<tool id="aegean_gaeval" name="AEGeAn GAEVAL" version="@TOOL_VERSION@" profile="20.01">
    <description> compute coverage and integrity scores for gene models using transcript alignments.</description>
    <macros>                                                                                           
        <import>macros.xml</import>
    </macros>
    <expand macro='xrefs'/>
    <expand macro='edam_ontology'/>
    <expand macro='requirements'/>
    <version_command>gaeval --version</version_command>
    <command detect_errors='exit_code'>
        <![CDATA[
            gaeval '$alignmentgff3' '$genesgff3' 
            -a $weights.alpha
            -b $weights.beta
            -g $weights.gamma
            -e $weights.epsilon
            -c $expected.expcds
            -5 $expected.exp5putr
            -3 $expected.exp3putr > '$output'
            ]]>
    </command>
    <inputs>
        <param name='alignmentgff3' type='data' format='gff3' label='Alignment GFF3 file' />
        <param name='genesgff3' type='data' format='gff3' label='Genes GFF3 file'/>
        <!-- Weight options -->
        <section name='weights'
            title='Weights for calculating integrity score'
            expanded='False'>
            <param argument='--alpha' type='float'                
                min='0' max='1' value='0.6' 
                label='Introns confirmed, or % expected CDS lenght for single-exon genes'   
                help='The percentage of introns confirmed by an alignment gap; for single-exon gene predictions lacking introns, it represents the ratio of the observed CDS length to the expected CDS length. '/>
            <param argument='--beta' type='float'                
                min='0' max='1' value='0.3' 
                label='Exon coverage'   
                help='Overall percentage of individual exons spanned by at least one sequencing read.'/>
            <param argument='--gamma' type='float' 
                min='0' max='1' value='0.05'
                label='% expected 5’ UTR length'
                help='The ratio of the observed 5’ UTR length to the expected 5’ UTR length.'/>
            <param argument='--epsilon' type='float' 
                min='0' max='1' value='0.05'
                label='% expected 3’ UTR lenght'
                help='The ratio of the observed 3’ UTR length to the expected 3’ UTR length.'/>
        </section>
        <!-- Expected feature lenght options -->
        <section name='expected'
            title='Expected feature lenghts for calculating integrity score'
            expanded='False'>
            <param name='expcds' type='integer'
                min='0' max='1000' value='400'
                label='Expected CDs length (in bp) (-c)' />
            <param name='exp5putr' type='integer' 
                min='0' max='1000' value='200'
                label='Expected 5’ UTR length (-5)' />
            <param name='exp3putr' type='integer'
                min='0' max='1000' value='100' 
                label='Expected 3’ UTR length (-3)' />
        </section>
    </inputs>
    <outputs>
        <data name='output' format='tabular' />
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name='alignmentgff3' value='TAIR10_GFF3_alignment.gff'/>
            <param name='genesgff3' value='TAIR10_GFF3_genes.gff'/>
            <output name='output' file='gaeval_output_test1.txt'/>
        </test>
        <test expect_num_outputs="1">
            <param name='alignmentgff3' value='TAIR10_GFF3_alignment.gff'/>
            <param name='genesgff3' value='TAIR10_GFF3_genes.gff'/>
            <section name='weights'>
                <param name='alpha' value='0.5'/>
                <param name='beta' value='0.4'/>
                <param name='gamma' value='0.05'/>
                <param name='epsion' value='0.05'/>
            </section>
            <output name='output' file='gaeval_output_test2.txt'/>
        </test>
        <test expect_num_outputs="1">
            <param name='alignmentgff3' value='TAIR10_GFF3_alignment.gff'/>
            <param name='genesgff3' value='TAIR10_GFF3_genes.gff'/>
            <section name='expected'>
                <param name='expcds' value='20'/>
                <param name='exp5putr' value='150'/>
                <param name='exp3putr' value='120'/>
            </section>
            <output name='output' file='gaeval_output_test3.txt' lines_diff='2'/>
        </test>

    </tests>
    <help>
        <![CDATA[
.. class:: infomark
            
**Purpose**
            
GAEVAL is a program for computing coverage and integrity scores for gene models using transcript alignments. The integrity score is a value between 0 and 1 that indicates the level of agreement between the gene model and any related transcript alignments, with 0 corresponding to no transcript support and 1 corresponding to complete transcript support.
            
-----
            
.. class:: infomark
            
**Input**
            
Input for GAEVAL is two GFF3 files, one containing gene predictions/annotations and one containing transcript alignments. Although the GFF3 Specification explicitly supports several similar encoding conventions for alignment features, only one is supported by GAEVAL, as shown below.


ctg123 . cDNA_match 1050  1500  5.8e-42  +  . ID=match00001;Target=cdna0123 12 462
ctg123 . cDNA_match 5000  5500  8.1e-43  +  . ID=match00001;Target=cdna0123 463 963
ctg123 . cDNA_match 7000  9000  1.4e-40  +  . ID=match00001;Target=cdna0123 964 2964


GAEVAL will accept alignments with types cDNA_match, EST_match, and the generic nucleotide_match, depending on the source of the data.
            
The Target and Gap attributes are not disallowed by GAEVAL, but they are not interpreted by GAEVAL either. Gapped or spliced alignments must be encoded as multifeatures, with each segment of the alignment on its own distinct line and all segments of a single alignment sharing the same ID attribute.
            
-----
            
.. class:: infomark
            
**Output**
            
GAEVAL computes coverage and integrity scores for each mRNA feature in the gene prediction input. The output will be identical to the input, except that each mRNA feature will have two new attribtues: gaeval_coverage and gaeval_integrity.
            
]]>
    </help>
    <expand macro='citations'/>
</tool>