view mapper.xml @ 4:dbbe92348c7a draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mirdeep2/mirdeep2_mapper commit 04c75332ac618b43ce5c3f307f7866e97147e865
author rnateam
date Thu, 05 Apr 2018 08:55:27 -0400
parents a8d24f4b6d95
children
line wrap: on
line source

<tool id="rbc_mirdeep2_mapper" name="MiRDeep2 Mapper" version="2.0.0.8.1">
    <description>process and map reads to a reference genome</description>
    <macros>
        <macro name="map_params">
            <conditional name="refGenomeSource">
                <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Map to genome. (-p)">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Use one from the history</option>
                </param>
                <when value="indexed">
                    <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin.">
                        <options from_data_table="bowtie_indexes">
                        <filter type="sort_by" column="2"/>
                            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
                        </options>
                    </param>
                </when> <!-- build-in -->
                <when value="history">
                    <param name="ownFile" type="data" format="fasta" label="Select the reference genome" />
                </when> <!-- history -->
            </conditional> <!-- refGenomeSource -->
            <param name="map_mismatch" type="boolean" truevalue="-q" falsevalue="" checked="false" label="Map with one mismatch in the seed (mapping takes longer)" help="(-q)"/>
            <param name="map_threshold" value="5" type="integer" optional="false" label="A read is allowed to map up to this number of positions in the genome" help="Map threshold. (-r)">
                <validator type="in_range" min="1" message="Minimum value is 1"/>
            </param>
        </macro>
    </macros>
    <requirements>
        <requirement type="package" version="2.0.0.8">mirdeep2</requirement>
    </requirements>
    <command detect_errors="aggressive">
<![CDATA[
        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_map"
            #if $operation.refGenomeSource.genomeSource == "history"
                bowtie-build '$operation.refGenomeSource.ownFile' custom_bowtie_indices &&
            #end if
        #end if

        mapper.pl

        #if $input.type == "single":
            '$input.reads'

            #if $input.reads.extension.startswith("fasta")
                -c
            #else if $input.reads.extension.startswith("fastq")
                -e
                -h
            #end if
        #else:
            '$samples' -d

            #if $input.reads_list[0].reads.extension.startswith("fasta")
                -c
            #else if $input.reads_list[0].reads.extension.startswith("fastq")
                -e
                -h
            #end if
        #end if

        $remove_non_canon

        $convert_rna_dna

        #if $clip_adapter.clip == "true"
            -k $clip_adapter.adapter_seq
        #end if

        -l $discard_short_reads

        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_collapse"
            -m -s '$output_reads_collapsed'
        #end if

        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_map"
            -p

            #if $operation.refGenomeSource.genomeSource == "history"
                custom_bowtie_indices
            #else
                '$operation.refGenomeSource.index.fields.path'
            #end if
            $operation.map_mismatch
            -r $operation.map_threshold

            -t '$output_mapping'
        #end if

        -v -n
]]>
    </command>
    <configfiles>
        <configfile name="samples"><![CDATA[#if $input.type == "multiple":
#for $r in $input.reads_list:
$r.reads    $r.sample_name
#end for
#end if]]></configfile>
    </configfiles>
    <inputs>
        <conditional name="input">
            <param name="type" type="select" label="Pool multiple read sets">
                <option value="single" selected="true">No</option>
                <option value="multiple">Yes</option>
            </param>
            <when value="single">
                <param format="fastq,fasta" name="reads" type="data" label="Deep sequencing reads" help="Reads in fastq or FASTA format"/>
            </when>
            <when value="multiple">
                <repeat name="reads_list" title="Reads">
                    <param name="sample_name" value="" type="text" label="Sample name" help="Must be a 3 letters/digits code">
                        <validator type="expression" message="The sample name must be a 3 letters/digits code">len(value) == 3 and value.isalnum()</validator>
                    </param>
                    <param format="fastq,fasta" name="reads" type="data" optional="false" label="Deep sequencing reads" help="Reads in fastq or FASTA format"/>
                </repeat>
            </when>
        </conditional>
        <param name="remove_non_canon" type="boolean" truevalue="-j" falsevalue="" checked="false" label="Remove reads with non-standard nucleotides" help="Remove all entries that have a sequence that contains letters other than a,c,g,t,u,n,A,C,G,T,U,N. (-j)"/>
        <param name="convert_rna_dna" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Convert RNA to DNA alphabet (to map against genome)" help="(-i)"/>

        <conditional name="clip_adapter">
            <param name="clip" type="select" label="Clip 3' Adapter Sequence" help="(-k)">
                <option value="false">Don't Clip</option>
                <option value="true">Clip Sequence</option>
            </param>
            <when value="true">
                <param name="adapter_seq" value="" type="text" optional="false" label="Sequence to clip" help="Adapter Sequence can only contain a,c,g,t,u,n,A,C,G,T,U,N">
                    <validator type="regex" message="Adapter can ONLY contain a,c,g,t,u,n,A,C,G,T,U,N">^[ACGTUacgtu]+$</validator>
                </param>
            </when>
            <when value="false"/>
        </conditional>

        <param name="discard_short_reads" value="18" type="integer" optional="false" label="Discard reads shorter than this length" help="Set to 0 to keep all reads. (-l)">
            <validator type="in_range" min="0" message="Minimum value is 0"/>
        </param>

        <conditional name="operation">
            <param name="collapse_map" type="select" label="Collapse reads and/or Map" help="(-m) and/or (-p)">
                <option value="collapse_and_map">Collapse reads and Map</option>
                <option value="only_map">Map</option>
                <option value="only_collapse">Collapse</option>
            </param>
            <when value="collapse_and_map">
                <expand macro="map_params"/>
            </when>
            <when value="only_map">
                <expand macro="map_params"/>
            </when>
            <when value="only_collapse"/>
        </conditional>
    </inputs>
    <outputs>
        <data format="fasta" name="output_reads_collapsed" label="Collapsed reads of ${tool.name} on ${on_string}">
            <filter>
            (
            operation['collapse_map'] == "collapse_and_map" or
            operation['collapse_map'] == "only_collapse"
            )
            </filter>
        </data>
        <data format="tabular" name="output_mapping" label="Mapping output of ${tool.name} on ${on_string} in ARF format">
            <filter>
            (
            operation['collapse_map'] == "collapse_and_map" or
            operation['collapse_map'] == "only_map"
            )
            </filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="type" value="single"/>
                <param name="reads" value="reads.fa"/>
            </conditional>
            <param name="remove_non_canon" value="True"/>
            <param name="clip" value="true"/>
            <param name="adapter_seq" value="TCGTATGCCGTCTTCTGCTTGT"/>
            <param name="discard_short_reads" value="18"/>
            <param name="collapse_map" value="collapse_and_map"/>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" value="cel_cluster.fa"/>
            <output name="output_reads_collapsed">
                <assert_contents>
                    <has_text text=">seq_349713_x268"/>
                    <has_text text="TCACCGGGTGTANATCAGCTAA"/>
                    <has_text text=">seq_354255_x214"/>
                    <has_text text="TAACCGGGTGAACACTTGCAGT"/>
                    <has_text text=">seq_357284_x187"/>
                </assert_contents>
            </output>
            <output name="output_mapping">
                <assert_contents>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtggaaactagtagt\tchrII:11534525-11540624\t22\t3060\t3081.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtgtacatcagcgaa\tchrII:11534525-11540624\t22\t3631\t3652.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggagaaaaactggtgt\tchrII:11534525-11540624\t22\t3382\t3403.*$"/>
                    <has_line_matching expression="^.*25\t1\t25\ttcaccgggtggaaactagcagtggc\tchrII:11534525-11540624\t25\t3060\t3084.*$"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="input">
                <param name="type" value="multiple"/>
                <repeat name="reads_list">
                    <param name="sample_name" value="sa1"/>
                    <param name="reads" value="reads_sample1.fa"/>
                </repeat>
                <repeat name="reads_list">
                    <param name="sample_name" value="sa2"/>
                    <param name="reads" value="reads_sample2.fa"/>
                </repeat>
            </conditional>
            <param name="remove_non_canon" value="True"/>
            <param name="clip" value="true"/>
            <param name="adapter_seq" value="TCGTATGCCGTCTTCTGCTTGT"/>
            <param name="discard_short_reads" value="18"/>
            <param name="collapse_map" value="collapse_and_map"/>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" value="cel_cluster.fa"/>
            <output name="output_reads_collapsed">
                <assert_contents>
                    <has_text text=">sa1_220_x1"/>
                    <has_text text="TCACCGGGTGTACATCAGC"/>
                    <has_text text=">sa2_0_x250"/>
                    <has_text text="AATGACACTGGTTATCTTTTCCATCG"/>
                </assert_contents>
            </output>
            <output name="output_mapping">
                <assert_contents>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081.*$"/>
                    <has_line_matching expression="^.*21\t1\t21\ttcaccgggtggaaactagcag\tchrII:11534525-11540624\t21\t3060\t3080.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtgtacatcagctaa\tchrII:11534525-11540624\t22\t3631\t3652.*$"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

The MiRDeep2 Mapper module is designed as a tool to process deep sequencing reads and/or map them to the reference genome.
The module works in sequence space, and can process or map data that is in sequence FASTA format.
A number of the functions of the mapper module are implemented specifically with Solexa/Illumina data in mind.

**Input**

Default input is a file in FASTA format, seq.txt or qseq.txt format. More input can be given depending on the options used.

**Output**

The output depends on the options used. Either a FASTA file with processed reads or an arf file with with mapped reads, or both, are output.

Arf format:
Is a proprietary file format generated and processed by miRDeep2. It contains information of reads mapped to a reference genome. Each line in such a file contains 13 columns:

1. read identifier
2. length of read sequence
3. start position in read sequence that is mapped
4. end position in read sequence that is mapped
5. read sequence
6. identifier of the genome-part to which a read is mapped to. This is either a scaffold id or a chromosome name
7. length of the genome sequence a read is mapped to
8. start position in the genome where a read is mapped to
9. end position in the genome where a read is mapped to
10. genome sequence to which a read is mapped
11. genome strand information. Plus means the read is aligned to the sense-strand of the genome. Minus means it is aligned to the antisense-strand of the genome.
12. Number of mismatches in the read mapping
13. Edit string that indicates matches by lowercase 'm' and mismatches by uppercase 'M'

]]>
    </help>
    <citations>
        <citation type="doi">10.1093/nar/gkr688</citation>
        <citation type="doi">10.1002/0471250953.bi1210s36</citation>
    </citations>
</tool>