view tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml @ 22:6f386c5dc4fb draft

v0.2.01 add -max_hsps, -use_sw_tback; lists args; internal updates
author peterjc
date Mon, 18 Sep 2017 06:21:27 -0400
parents 3034ce97dd33
children 31e517610e1f
line wrap: on
line source

<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="@WRAPPER_VERSION@">
    <description>low-complexity regions in protein sequences</description>
    <macros>
        <token name="@BINARY@">segmasker</token>
        <import>ncbi_macros.xml</import>
    </macros>
    <expand macro="preamble" />
    <command detect_errors="aggressive">
## The command is a Cheetah template which allows some Python based syntax.
## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
segmasker
#if $db_opts.db_opts_selector == "db":
  -in '${db_opts.database.fields.path}' -infmt blastdb
#elif $db_opts.db_opts_selector == "histdb":
  -in '${os.path.join($db_opts.histdb.files_path, "blastdb")}' -infmt blastdb
#else:
  -in '$subject' -infmt fasta
#end if
-out '$outfile'
-window $window
-locut $locut
-hicut $hicut
-outfmt $outformat
    </command>
    <inputs>
        <expand macro="input_conditional_protein_db" />
        <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" />
        <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" />
        <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" />
        <param name="outformat" type="select" label="Output format">
            <!-- seqloc_* formats are not very useful
                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
            -->
            <option value="fasta">FASTA</option>
            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
            <option value="maskinfo_xml">maskinfo_xml</option>
        </param>
    </inputs>
    <outputs>
        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
            <change_format>
                <when input="outformat" value="fasta" format="fasta" />
                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
                <!--
                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
                -->
                <when input="outformat" value="maskinfo_xml" format="xml" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="db_opts_selector" value="file" />
            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
            <param name="window" value="12" />
            <param name="locut" value="2.2" />
            <param name="hicut" value="2.5" />
            <param name="outformat" value="fasta" />
            <output name="outfile" file="segmasker_four_human.fasta" />
        </test>
        <test>
            <param name="db_opts_selector" value="file" />
            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
            <param name="window" value="12" />
            <param name="locut" value="2.2" />
            <param name="hicut" value="2.5" />
            <param name="outformat" value="maskinfo_asn1_bin" />
            <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" />
        </test>
        <test>
            <param name="db_opts_selector" value="file" />
            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
            <param name="window" value="12" />
            <param name="locut" value="2.2" />
            <param name="hicut" value="2.5" />
            <param name="outformat" value="maskinfo_asn1_text" />
            <output name="outfile" file="segmasker_four_human.maskinfo-asn1" />
        </test>
    </tests>
    <help>
**What it does**

This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.

If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.

More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.

.. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/
.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706

**References**

If you use this Galaxy tool in work leading to a scientific publication please
cite the following papers:

@REFERENCES@
    </help>
    <expand macro="blast_citations" />
</tool>