view mapper.xml @ 3:a8d24f4b6d95 draft

planemo upload for repository commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
author rnateam
date Wed, 12 Jul 2017 14:38:46 -0400
parents ab8cd78709e1
children dbbe92348c7a
line wrap: on
line source

<tool id="rbc_mirdeep2_mapper" name="MiRDeep2 Mapper" version="2.0.0">
    <description>process and map reads to a reference genome</description>
        <macro name="map_params">
            <conditional name="refGenomeSource">
                <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Map to genome. (-p)">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Use one from the history</option>
                <when value="indexed">
                    <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin.">
                        <options from_data_table="bowtie_indexes">
                        <filter type="sort_by" column="2"/>
                            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
                </when> <!-- build-in -->
                <when value="history">
                    <param name="ownFile" type="data" format="fasta" label="Select the reference genome" />
                </when> <!-- history -->
            </conditional> <!-- refGenomeSource -->
            <param name="map_mismatch" type="boolean" truevalue="-q" falsevalue="" checked="false" label="Map with one mismatch in the seed (mapping takes longer)" help="(-q)"/>
            <param name="map_threshold" value="5" type="integer" optional="false" label="A read is allowed to map up to this number of positions in the genome" help="Map threshold. (-r)">
                <validator type="in_range" min="1" message="Minimum value is 1"/>
        <requirement type="package" version="">mirdeep2</requirement>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <exit_code range=":-1" />
        <!-- In case the return code has not been set propery check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />

        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_map"
            #if $operation.refGenomeSource.genomeSource == "history"
                bowtie-build '$operation.refGenomeSource.ownFile' custom_bowtie_indices &&
            #end if
        #end if 
        #if $reads.extension.startswith("fasta")
        #else if $reads.extension.startswith("fastq")
        #end if


        #if $clip_adapter.clip == "true"
            -k $clip_adapter.adapter_seq
        #end if

        -l $discard_short_reads

        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_collapse"
            -m -s '$output_reads_collapsed'
        #end if
        #if $operation.collapse_map == "collapse_and_map" or $operation.collapse_map == "only_map"
            #if $operation.refGenomeSource.genomeSource == "history"
            #end if
            -r $operation.map_threshold
            -t '$output_mapping'
        #end if

        -v -n
        <param format="fastq, fasta" name="reads" type="data" optional="false" label="Deep sequencing reads" help="Reads in fastq or FASTA format"/>
        <param name="remove_non_canon" type="boolean" truevalue="-j" falsevalue="" checked="false" label="Remove reads with non-standard nucleotides" help="Remove all entries that have a sequence that contains letters other than a,c,g,t,u,n,A,C,G,T,U,N. (-j)"/>
        <param name="convert_rna_dna" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Convert RNA to DNA alphabet (to map against genome)" help="(-i)"/>

        <conditional name="clip_adapter">
            <param name="clip" type="select" label="Clip 3' Adapter Sequence" help="(-k)">
                <option value="false">Don't Clip</option>
                <option value="true">Clip Sequence</option>
            <when value="true">
                <param name="adapter_seq" value="" type="text" optional="false" label="Sequence to clip" help="Adapter Sequence can only contain a,c,g,t,u,n,A,C,G,T,U,N">
                    <validator type="regex" message="Adapter can ONLY contain a,c,g,t,u,n,A,C,G,T,U,N">^[ACGTUacgtu]+$</validator>
            <when value="false"/>
        <param name="discard_short_reads" value="18" type="integer" optional="false" label="Discard reads shorter than this length" help="Set to 0 to keep all reads. (-l)">
            <validator type="in_range" min="0" message="Minimum value is 0"/>
        <conditional name="operation">
            <param name="collapse_map" type="select" label="Collapse reads and/or Map" help="(-m) and/or (-p)">
                <option value="collapse_and_map">Collapse reads and Map</option>
                <option value="only_map">Map</option>
                <option value="only_collapse">Collapse</option>
            <when value="collapse_and_map">
                <expand macro="map_params"/>
            <when value="only_map">
                <expand macro="map_params"/>
            <when value="only_collapse"/>
        <data format="fasta" name="output_reads_collapsed" label="Collapsed reads of ${} on ${on_string}">
            operation['collapse_map'] == "collapse_and_map" or
            operation['collapse_map'] == "only_collapse"
        <data format="tabular" name="output_mapping" label="Mapping output of ${} on ${on_string} in ARF format">
            operation['collapse_map'] == "collapse_and_map" or
            operation['collapse_map'] == "only_map"
            <param name="reads" value="reads.fa"/>
            <param name="remove_non_canon" value="True"/>
            <param name="clip" value="true"/>
            <param name="adapter_seq" value="TCGTATGCCGTCTTCTGCTTGT"/>
            <param name="discard_short_reads" value="18"/>
            <param name="collapse_map" value="collapse_and_map"/>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" value="cel_cluster.fa"/>
            <output name="output_reads_collapsed">
                    <has_text text=">seq_349713_x268"/>
                    <has_text text="TCACCGGGTGTANATCAGCTAA"/>
                    <has_text text=">seq_354255_x214"/>
                    <has_text text="TAACCGGGTGAACACTTGCAGT"/>
                    <has_text text=">seq_357284_x187"/>
            <output name="output_mapping">
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtggaaactagtagt\tchrII:11534525-11540624\t22\t3060\t3081.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggtgtacatcagcgaa\tchrII:11534525-11540624\t22\t3631\t3652.*$"/>
                    <has_line_matching expression="^.*22\t1\t22\ttcaccgggagaaaaactggtgt\tchrII:11534525-11540624\t22\t3382\t3403.*$"/>
                    <has_line_matching expression="^.*25\t1\t25\ttcaccgggtggaaactagcagtggc\tchrII:11534525-11540624\t25\t3060\t3084.*$"/>
**What it does**

The MiRDeep2 Mapper module is designed as a tool to process deep sequencing reads and/or map them to the reference genome. 
The module works in sequence space, and can process or map data that is in sequence FASTA format. 
A number of the functions of the mapper module are implemented specifically with Solexa/Illumina data in mind.


Default input is a file in FASTA format, seq.txt or qseq.txt format. More input can be given depending on the options used. 


The output depends on the options used. Either a FASTA file with processed reads or an arf file with with mapped reads, or both, are output. 

Arf format:
Is a proprietary file format generated and processed by miRDeep2. It contains information of reads mapped to a reference genome. Each line in such a file contains 13 columns:

1. read identifier
2. length of read sequence
3. start position in read sequence that is mapped
4. end position in read sequence that is mapped
5. read sequence
6. identifier of the genome-part to which a read is mapped to. This is either a scaffold id or a chromosome name
7. length of the genome sequence a read is mapped to
8. start position in the genome where a read is mapped to
9. end position in the genome where a read is mapped to
10. genome sequence to which a read is mapped
11. genome strand information. Plus means the read is aligned to the sense-strand of the genome. Minus means it is aligned to the antisense-strand of the genome.
12. Number of mismatches in the read mapping
13. Edit string that indicates matches by lowercase 'm' and mismatches by uppercase 'M'

        <citation type="doi">10.1093/nar/gkr688</citation>
        <citation type="doi">10.1002/0471250953.bi1210s36</citation>