Mercurial > repos > bgruening > glimmer_knowledge_based

<tool id="glimmer_knowledge_based" name="Glimmer3" version="@WRAPPER_VERSION@">
    <description>Predict ORFs in prokaryotic genomes (knowlegde-based)</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="aggressive"><![CDATA[
    glimmer3
        --max_olap $max_olap
        --gene_len $gene_len
        --threshold $threshold
        #if float( str($gc_percent) ) > 0.0:
            --gc_percent $gc_percent
        #end if

        #if $stop_codon_opts.stop_codon_opts_selector == "gb":
            --trans_table '${stop_codon_opts.genbank_gencode}'
        #else:
            --stop_codons '${stop_codon_opts.stop_codons}'
        #end if

        --start_codons '$start_codons'

        $linear
        $no_indep
        $extend
        '$seq_input'
        '$icm_input'
        glimmer_output

    &&

    #if $report:
        cp glimmer_output.predict '$report_output' &&
    #end if
    #if $detailed_report:
        cp glimmer_output.detail '$detailed_output' &&
    #end if

    ## convert prediction to FASTA sequences
    python '$__tool_directory__/glimmer2seq.py' glimmer_output.predict '$seq_input' '$genes_output'
]]></command>
    <inputs>
        <param name="seq_input" type="data" format="fasta" label="Genome Sequence" />
        <param name="icm_input" type="data" format="tar" label="Interpolated context model (ICM)" />

        <param name="max_olap" type="integer" value="50" label="Set maximum overlap length" help="Overlaps this short or shorter are ignored." />
        <param name="gene_len" type="integer" value="90" label="Set the minimum gene length to n nucleotides" help="This does not include the bases in the stop codon."/>
        <param name="threshold" type="integer" value="30" label="Set threshold score for calling as gene" help="If the in-frame score >= N, then the region is given a number and considered a potential gene." />
        <param name="gc_percent" type="float" value="0.0" label="Set the GC percentage of the independent model, i.e., the model of intergenic sequence" help="If 0.0 specified, the GC percentage will be counted from the input file." />

        <param name="linear" type="boolean" truevalue="--linear" falsevalue="" checked="true" label="Assume linear rather than circular genome, i.e., no wraparound" />
        <param name="no_indep" type="boolean" truevalue="--no_indep" falsevalue="" checked="false" label="Don’t use the independent probability score column at all" help="Using this option will produce more short gene predictions." />
        <param name="extend" type="boolean" truevalue="--extend" falsevalue="" checked="false" label="Also score orfs that extend off the end of the sequence(s)" />
        <param name="start_codons" type="text" value="atg,gtg,ttg" label="Specify start codons as a comma-separated list" />

        <conditional name="stop_codon_opts">
            <param name="stop_codon_opts_selector" type="select" label="Specify start codons as">
              <option value="gb" selected="True">Genbank translation table entry</option>
              <option value="free_form">Comma-separated list</option>
            </param>
            <when value="gb">
                <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons">
                    <option value="1" selected="True">1. Standard</option>
                    <option value="2">2. Vertebrate Mitochondrial</option>
                    <option value="3">3. Yeast Mitochondrial</option>
                    <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
                    <option value="5">5. Invertebrate Mitochondrial</option>
                    <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
                    <option value="9">9. Echinoderm Mitochondrial</option>
                    <option value="10">10. Euplotid Nuclear</option>
                    <option value="11">11. Bacteria and Archaea</option>
                    <option value="12">12. Alternative Yeast Nuclear</option>
                    <option value="13">13. Ascidian Mitochondrial</option>
                    <option value="14">14. Flatworm Mitochondrial</option>
                    <option value="15">15. Blepharisma Macronuclear</option>
                    <option value="16">16. Chlorophycean Mitochondrial</option>
                    <option value="21">21. Trematode Mitochondrial</option>
                    <option value="22">22. Scenedesmus obliquus mitochondrial</option>
                    <option value="23">23. Thraustochytrium Mitochondrial</option>
                    <option value="24">24. Pterobranchia mitochondrial</option>
                </param>
            </when>
            <when value="free_form">
                <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" />
            </when>
        </conditional>

        <param name="report" type="boolean" truevalue="" falsevalue="" checked="false" label="Report the classic glimmer table output"/>
        <param name="detailed_report" type="boolean" truevalue="" falsevalue="" checked="false" label="Output a detailed gene prediction report as separate file"/>
    </inputs>
    <outputs>
        <data name="genes_output" format="fasta" label="Glimmer3 on ${on_string} (Gene Prediction FASTA)" />
        <data name="report_output" format="txt" label="Glimmer3 on ${on_string} (Gene Prediction table)">
            <filter>report == True</filter>
        </data>
        <data name="detailed_output" format="txt" label="Glimmer3 on ${on_string} (detailed report)">
            <filter>detailed_report == True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="seq_input" value='streptomyces_Tue6071_plasmid_genomic.fasta' />
            <param name="icm_input" value='streptomyces_Tu6071_plasmid_genes.icm' ftype="tar" />
            <param name="max_olap" value="50" />
            <param name="gene_len" value="90" />
            <param name="threshold" value="30" />
            <param name="gc_percent" value="0.0" />
            <param name="linear" value="--linear" />
            <param name="no_indep" value="" />
            <param name="extend" value="" />
            <param name="start_codons" value="atg,gtg,ttg" />
            <param name="genbank_gencode" value="11" />
            <param name="detailed_report" value="true" />
            <param name="report" value="true" />
            <output name="genes_output" file='glimmer_w_icm_trans-table-11_genomic.fasta' ftype="fasta" />
            <output name="report_output" file='glimmer_w_icm_trans-table-11_genomic.out' ftype="txt" />
            <output name="detailed_output" file='glimmer_w_icm_trans-table-11_genomic.dout' ftype="txt" lines_diff="6" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This is the main program that makes gene predictions based on an interpolated context model (ICM).

The ICM can be generated with extracted CDS from related organisms (ICM builder). If you can't generate an ICM model you can use the non knowlegde-based Glimmer with a de novo prediction.


**Input**

- Interpolated context model (ICM): Use the 'Glimmer ICM builder' tool to create one
- Genome Sequence in FASTA format


**Output**

- FASTA file with predicted proteins
- Glimmer prediction file (optional)
]]></help>
    <expand macro="citation" />
</tool>
author	iuc
date	Sun, 20 Mar 2022 10:06:25 +0000
parents	9b2e283dc3b5
children