view twobit_to_fa.xml @ 0:cc33b1551791 draft default tip

planemo upload commit c1f0c5ceaac87b6b1db12160a8f5b287635db61b
author yating-l
date Mon, 12 Jun 2017 21:09:34 -0400
parents
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="twobit_to_fa" name="twoBitToFa" version="1.0">
    <description>Convert a twoBit file to FASTA format</description>

    <macros>
        <import>ucsc_macros.xml</import>
    </macros>

    <expand macro="requirements_twobit" />

    <command detect_errors="exit_code">
<![CDATA[
    @OPTIONAL_PARAM_FUNC@

    #set no_mask = ""
    #if str($masking_option) == "upper":
        #set no_mask = "-noMask"
    #end if

    #set selected_extract_type = str($extract_type.extract_type_selector)

    twoBitToFa ${no_mask}
        #if $selected_extract_type == "single_sequence":
            -seq="${extract_type.seq}"

            $optional_param("-start", $extract_type.start)
            $optional_param("-end", $extract_type.end)
        #end if

        #if $selected_extract_type == "sequence_list":
            -seqList="${extract_type.seq_list}"
        #end if

        #if $selected_extract_type == "bed_file":
            -bed="${extract_type.bed_regions}"
            ${extract_type.bed_pos}
        #end if

        "${twobit_input}" stdout

    ## Add extra option to change soft masked bases to N's
    #if str($masking_option) == "hard":
        | awk '{ if (/^>/) { print } else { gsub(/[acgt]/, "N"); print } }'
    #end if

        > "${fasta_output}"
]]>
    </command>
    <inputs>
        <param name="twobit_input" type="data" format="twobit" label="twoBit input file" />

        <param name="masking_option" type="select"
            label="Repeat masking option"
            help="Specify how repeats within the twoBit file should be represented">

            <option value="upper">Show repeats in uppercase (no masking)</option>

            <option value="lower" selected="true">Show repeats in lowercase (soft masking)</option>

            <option value="hard">Show repeats as N's (hard masking)</option>
        </param>

        <conditional name="extract_type">
            <param name="extract_type_selector" type="select"
                    label="Sequence extraction mode"
                    help="Specify the method to extract sequences from a twoBit file">
                <option value="all" selected="true">All sequences</option>
                <option value="single_sequence">Single sequence</option>
                <option value="sequence_list">Sequence list</option>
                <option value="bed_file">BED file</option>
            </param>

            <when value="all"></when>

            <when value="single_sequence">
                <param name="seq" type="text" label="Sequence name" />
                <param name="start" type="integer" min="0" optional="true"
                        label="Start position (zero-based)"
                        help="Start at given position in sequence (zero-based)" />
                <param name="end" type="integer" min="0" optional="true"
                        label="End position"
                        help="End at given position in sequence (non-inclusive)" />
            </when>

            <when value="sequence_list">
                <param name="seq_list" type="data" format="txt"
                        label="Sequence list"
                        help="List of sequence names in the format seqSpec[:start-end]" />
            </when>

            <when value="bed_file">
                <param name="bed_regions" type="data" format="bed"
                        label="BED file"
                        help="Sequences specified in a BED file (exclude introns)" />

                <param name="bed_pos" type="boolean" checked="false"
                    truevalue="-bedPos" falsevalue=""
                    label="use chrom:start-end as the FASTA ID in output file"
                    help="-bedPos" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="fasta_output" format="fasta" />
    </outputs>
    <tests>
        <test>
            <!-- Test twoBitToFa with default (soft) masking -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <output name="fasta_output" file="contigs.out.soft.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa with no masking -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="masking_option" value="upper" />
            <output name="fasta_output" file="contigs.out.unmask.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa with hard masking -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="masking_option" value="hard" />
            <output name="fasta_output" file="contigs.out.hard.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for single sequence -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="single_sequence" />
            <param name="seq" value="contig12" />
            <output name="fasta_output" file="contig12.soft.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for single sequence with range -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="single_sequence" />
            <param name="seq" value="contig12" />
            <param name="start" value="1000" />
            <param name="end" value="2000" />
            <output name="fasta_output" file="contig12_1000_2000.soft.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for single sequence with only start position -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="single_sequence" />
            <param name="seq" value="contig12" />
            <param name="start" value="1000" />
            <output name="fasta_output" file="contig12_1000_start.soft.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for seqList -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="sequence_list" />
            <param name="seq_list" value="seq_list.txt" ftype="txt" />
            <output name="fasta_output" file="contig12_seq_list.soft.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for bed items -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="bed_file" />
            <param name="bed_regions" value="contigs.genes.bed" ftype="bed" />
            <output name="fasta_output" file="contigs.genes.fa" />
        </test>
        <test>
            <!-- Test twoBitToFa for bed items with bedPos header -->
            <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
            <param name="extract_type_selector" value="bed_file" />
            <param name="bed_regions" value="contigs.genes.bed" ftype="bed" />
            <param name="bed_pos" value="-bedPos" />
            <output name="fasta_output" file="contigs.genes.bedPos.fa" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

twoBitToFa converts either all or a part of a twoBit file into FASTA format.

.. class:: warningmark

twoBitToFa uses a `zero-start, half-open <http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/>`_
coordinate system:

* The first nucleotide of a sequence is at position **0**
* The last nucleotide of a sequence is at position **end - 1**

    ]]></help>

    <expand macro="citations" />
</tool>