view chimera.ccode.xml @ 4:6e2e58e07e0f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit be5c8af076296a53959fc3ed2b82d92dc7607eeb
author iuc
date Mon, 23 Apr 2018 09:39:31 -0400
parents ad1b47de42a3
children 79255ca0b19a
line wrap: on
line source

<tool profile="16.07" id="mothur_chimera_ccode" name="Chimera.ccode" version="@WRAPPER_VERSION@.0">
    <description>Find putative chimeras using ccode</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@SHELL_OPTIONS@

## create symlinks to input datasets
ln -s '$fasta' fasta.dat &&
ln -s '$alignment.reference' alignment.template.dat &&
#if $mask_cond.source2 == "history":
    ln -s '$mask_cond.input' mask_cond.mask.dat &&
#end if

echo 'chimera.ccode(
    fasta=fasta.dat,
    reference=alignment.template.dat,
    filter=$filter,
    #if $mask_cond.source2 == "default":
        mask=default,
    #elif $mask_cond.source2 == "history":
        mask=mask_cond.mask.dat,
    #end if
    #if int($window) > 0:
        window=$window,
    #end if
    #if int($numwanted) > 0:
        numwanted=$numwanted,
    #end if
    processors='\${GALAXY_SLOTS:-8}'
)'
| sed 's/ //g'  ## mothur trips over whitespace
| mothur
| tee mothur.out.log
    ]]></command>
    <inputs>
        <param argument="fasta" type="data" format="mothur.align" label="fasta - Candiate Aligned Sequences"/>
        <conditional name="alignment">
            <param name="source" type="select" label="Select Reference Template from">
                <option value="hist">History</option>
                <option value="ref">Cached Reference</option>
            </param>
            <when value="ref">
                <param argument="reference" type="select" label="reference - Select an alignment database">
                    <options from_data_table="mothur_aligndb"/>
                </param>
            </when>
            <when value="hist">
                <param argument="reference" type="data" format="fasta" label="reference - Reference to align with"/>
            </when>
        </conditional>
        <param argument="filter" type="boolean" falsevalue="False" truevalue="True" checked="false" label="filter - Apply a 50% soft vertical filter"/>
        <conditional name="mask_cond">
            <param name="source2" type="select" label="Mask option">
                <option value="">None</option>
                <option value="default">default ecoli mask</option>
                <option value="history">From Your History</option>
            </param>
            <when value="history">
                <param argument="mask" type="data" format="mothur.filter" label="Mask for the reference file"/>
            </when>
            <when value=""/>
            <when value="default"/>
        </conditional>
        <param name="window" type="integer" value="0" label="window - Length of sequence you want in each window analyzed"
            help="Recommended window size is between 5 and 20% of your trimmed sequence length. Default is 10% of the sequence length. Default is used if &lt; 1"/>
        <param name="numwanted" type="integer" value="20" label="numwanted - Number of sequences compared with each query"
            help="uses default if set to 0"/>
        <expand macro="param-savelog"/>
    </inputs>
    <outputs>
        <expand macro="logfile-output"/>
        <data name="ccode.chimeras" format="txt" from_work_dir="fasta.*ccode.chimeras" label="${tool.name} on ${on_string}: ccode.chimeras"/>
        <data name="ccode.accnos" format="mothur.accnos" from_work_dir="fasta.*ccode.accnos" label="${tool.name} on ${on_string}: ccode.accnos"/>
        <data name="mapinfo" format="tabular" from_work_dir="*.mapinfo" label="${tool.name} on ${on_string}: mapinfo"/>
    </outputs>
    <tests>
        <test>
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="reference" value="HMP_MOCK.v35.align"/>
            <output name="ccode.chimeras" ftype="txt">
                <assert_contents>
                    <has_line_matching expression="^Window\tAvgQ\t\(sdQ\)\tAvgR\t\(sdR\)\tRatio\tAnova$"/>
                </assert_contents>
            </output>
            <output name="ccode.accnos" file="Mock_S280_L001_R1_001_small.trim.contigs.good.ccode.accnos" ftype="mothur.accnos"/>
            <output name="mapinfo" md5="ef3353b1467fe95a24153513917e6444" ftype="tabular"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
        <test> <!-- test with default ecoli mask -->
            <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/>
            <param name="source" value="hist"/>
            <param name="reference" value="HMP_MOCK.v35.align"/>
            <param name="source2" value="default"/>
            <param name="window" value="100"/>
            <output name="ccode.chimeras" ftype="txt">
                <assert_contents>
                    <has_line_matching expression="^Window\tAvgQ\t\(sdQ\)\tAvgR\t\(sdR\)\tRatio\tAnova$"/>
                </assert_contents>
            </output>
            <output name="ccode.accnos" md5="47ddf86ec36aafd495aa902b3e06ffca" ftype="mothur.accnos"/>
            <output name="mapinfo" md5="25221b1e109d9c0a939efb319e7a9896" ftype="tabular"/>
            <param name="savelog" value="true"/>
            <expand macro="logfile-test"/>
        </test>
    </tests>
    <help><![CDATA[

@MOTHUR_OVERVIEW@

**Command Documentation**

The chimera.ccode_ command identifies putative chimeras using the ccode approach (Chimera and Cross-Over Detection and Evaluation).  Ccode_ compares differences in distances, for each word, between query sequence and reference sequences, and reference sequences and themselves.

This method was written using the algorithms described in the paper_ "Evaluating putative chimeric sequences from PCR-amplified products" by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.

The program can analyze sequences for any required word length. Generally, values of 5-20% of sequence length appear to deliver accurate results, for example, working on 16S rDNA sequences with a full-length of #1500 nt. It should be noted that the use of fragments either too long or too short might result in a reduction of sensitivity.

.. _Ccode: http://www.microextreme.net/downloads.html
.. _paper: http://bioinformatics.oxfordjournals.org/content/21/3/333.full.pdf
.. _chimera.ccode: https://www.mothur.org/wiki/Chimera.ccode

    ]]></help>
    <expand macro="citations"/>
</tool>