view macs2_callpeak.xml @ 6:2119d851a53b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/macs2 commit df97bfd630719a2e1d4e11d67960753184afa238
author iuc
date Tue, 11 Apr 2017 17:46:41 -0400
parents beb902da6e5f
children f5d67c722d67
line wrap: on
line source

<tool id="macs2_callpeak" name="MACS2 callpeak" version="@VERSION_STRING@.0">
    <description>Call peaks from alignment results</description>
    <macros>
        <import>macs2_macros.xml</import>
    </macros>
    <expand macro="requirements">
    </expand>
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
        <![CDATA[
        #set $temp_stderr = 'macs2_stderr'
        (macs2 callpeak

            --name 'MACS2'
            -t ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_treatment_file ) ) }

            #if str( $input_control_file ) != 'None':
                -c ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_control_file ) ) }
            #end if

        #for $ifile in $input_treatment_file:
            #if $ifile.ext.upper() == "BAM" and $bampe:
                --format BAMPE
            #else
                --format='$ifile.ext.upper()'
            #end if
        #end for

        @effective_genome_size@

        --bw '${$band_width}'
        @mfold_command@

        ## advanced options
        #if $advanced_options.advanced_options_selector == "on":
            $advanced_options.nolambda
            $advanced_options.to_large
            --ratio $advanced_options.ratio
            --slocal $advanced_options.slocal
            --llocal $advanced_options.llocal
            #if $advanced_options.broad_options.broad_options_selector == "broad":
                --broad
                --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }'
            #else:
                $advanced_options.broad_options.call_summits
            #end if

            #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user":
                --keep-dup '${ advanced_options.keep_dup_options.user_keepdup }'
            #else
                --keep-dup '${ advanced_options.keep_dup_options.keep_dup_options_selector }'
            #end if

        #end if

        ## With --bdg two additional output files will be generated.
        #if "bdg" in str($outputs).split(','):
            --bdg
        #end if

        ## cutoff selection
        #if str( $cutoff_options.cutoff_options_selector ) == "qvalue":
            --qvalue '${ cutoff_options.qvalue }'
        #elif str( $cutoff_options.cutoff_options_selector ) == "pvalue":
            #if str($cutoff_options.pvalue).strip() != "":
                --pvalue '${ cutoff_options.pvalue }'
            #end if
        #end if

        ## model options
        #if $nomodel_type.nomodel_type_selector == "nomodel":
            --nomodel
            --extsize '${ nomodel_type.extsize }'
        #end if

        2> $temp_stderr)
        #if "peaks_tabular" in str($outputs).split(','):
            &&
            cp MACS2_peaks.xls '${ output_tabular }'
        #end if

        ## run R to create pdf from model script
        #if $nomodel_type.nomodel_type_selector == "create_model" and "pdf" in str($outputs).split(','):
            &&
            Rscript MACS2_model.r > MACS2_model.r.log
        #end if

        #if 'html' in str($outputs).split(','):
            ## if output files exists, move them to the files_path and create a html result page linking to them
            &&
            (
            count=`ls -1 MACS2* 2>/dev/null | wc -l`;
            if [ \$count != 0 ];
            then
                mkdir '${ output_extra_files.files_path }' &&
                cp MACS2* '${ output_extra_files.files_path }' &&
                python '$__tool_directory__/dir2html.py' 
                    '${ output_extra_files.files_path }' $temp_stderr > '${ output_extra_files }'
            fi;
            )
        #end if
        &&
        exit_code_for_galaxy=\$? &&
        cat $temp_stderr 2>&1 &&
        (exit \$exit_code_for_galaxy)
        ]]>
    </command>
    <inputs>
        <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="true"
               label="ChIP-Seq Treatment File" />
        <param name="input_control_file" type="data" format="bam,sam,bed" multiple="true" optional="True"
               label="ChIP-Seq Control File" />

        <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False"
               label="Are your inputs Paired-end BAM files?"
               help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/>

        <expand macro="conditional_effective_genome_size" />
        <expand macro="band_width" />
        <expand macro="mfold_options" />

        <conditional name="cutoff_options">
            <param name="cutoff_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
                <option value="qvalue" selected="true">q-value</option>
                <option value="pvalue">p-value</option>
            </param>
            <when value="pvalue">
                <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection" help="default: not set (--pvalue)"/>
            </when>
            <when value="qvalue">
                <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" help="The q-value (minimum FDR) cutoff to call significant regions. Default is 0.01. For broad marks, you can try 0.05 as cutoff. Q-values are calculated from p-values using Benjamini-Hochberg procedure. (--qvalue)"/>
            </when>
        </conditional>

        <conditional name="nomodel_type">
            <param name="nomodel_type_selector" type="select" label="Build Model">
                <option value="nomodel">Do not build the shifting model (--nomodel)</option>
                <option value="create_model" selected="true">Build the shifting model</option>
            </param>
            <when value="create_model"/>
            <when value="nomodel">
                <param name="extsize" type="integer" value="200" label="Set extension size" help="The arbitrary extension size in bp. When nomodel is true, MACS will use this value as fragment size to extend each read towards 3-prime; end, then pile them up. It is exactly twice the number of obsolete SHIFTSIZE. In previous language, each read is moved 5-prime-to-3-prime direction to middle of fragment by 0.5 d, then extended to both direction with 0.5 d. This is equivalent to say each read is extended towards 5-prime-to-3-prime into a d size fragment. --extsize (this option) and --shift (the option below) can be combined when necessary. See --shift option below. Default = 200 (--extsize)."/>
                <param name="shift" type="integer" value="0" label="Set shift size" help="(NOT the legacy --shiftsize option!) The arbitrary shift in bp. Use discretion while setting it other than default value. When NOMODEL is set, MACS will use this value to move cutting ends (5-prime) towards 5-prime-to-3-prime  direction then apply EXTSIZE to extend them to fragments. When this value is negative, ends will be moved toward 3-prime-to-5-prime  direction. Recommended to keep it as default 0 for ChIP-Seq datasets, or -1 * 0.5 of --extsize (option above) together with --extsize option for detecting enriched cutting loci such as certain DNAseI-Seq datasets. Note, you can't set values other than 0 if format is BAMPE for paired-end data. Default = 0 (--shift)."/>
            </when>
        </conditional>

        <param name="outputs" type="select" display="checkboxes" multiple="True" optional="false" label="Outputs" help="PDF only created when model is build">
            <option value="peaks_tabular" selected="True">Peaks as tabular file</option>
            <option value="summits" selected="true">Peak summits</option>
            <option value="bdg" selected="true">Scores in bedGraph files (--bdg)</option>
            <option value="html">Summary page (html)</option>
            <option value="pdf">Plot in PDF</option>
        </param>

        <conditional name="advanced_options">
            <param name="advanced_options_selector" type="select" label="Advanced options">
                <option value="off" selected="true">Hide advanced options</option>
                <option value="on">Display advanced options</option>
            </param>
            <when value="on">
                <param name="to_large" type="boolean" truevalue="--to-large" falsevalue="" checked="False"
                    label="When set, scale the small sample up to the bigger sample"
                    help="By default, the bigger dataset will be scaled down towards the smaller dataset, which will lead to smaller p/qvalues and more specific results. Keep in mind that scaling down will bring down background noise more. (--to-large)"/>
                <param name="nolambda" type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
                    label="Use fixed background lambda as local lambda for every peak region" help="up to 9X more time consuming (--nolambda)"/>
                <param name="ratio" type="float" value="1.0"
                    label="When set, use a custom scaling ratio of ChIP/control (e.g. calculated using NCIS) for linear scaling"
                    help="(--ratio)"/>
                <param name="slocal" value="1000" type="integer" label="The small nearby region in basepairs to calculate dynamic lambda"
                    help="This is used to capture the bias near the peak summit region. Invalid if there is no control data. If you set this to 0, MACS will skip slocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--slocal)"/>
                <param name="llocal" value="10000" type="integer" label="The large nearby region in basepairs to calculate dynamic lambda"
                    help="This is used to capture the surround bias. If you set this to 0, MACS will skip llocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--llocal)"/>
                <conditional name="broad_options">
                    <param name="broad_options_selector" type="select"
                        label="Composite broad regions" help="by putting nearby highly enriched regions into a broad region with loose cutoff (--broad)">
                        <option value="nobroad" selected="true">No broad regions</option>
                        <option value="broad">broad regions</option>
                    </param>
                    <when value="broad">
                        <param name="broad_cutoff" type="float" label="Cutoff for broad region" value="0.1"
                            help="value is either p-value or q-value as specified above (--broad-cutoff)"/>
                    </when>
                    <when value="nobroad">
                        <param name="call_summits" type="boolean" truevalue="--call-summits" falsevalue="" checked="False"
                            label="Use a more sophisticated signal processing approach to find subpeak summits in each enriched peak region"
                            help="(--call-summits)"/>
                    </when>
                </conditional>
                <expand macro="keep_duplicates" />
            </when>
            <when value="off" />
        </conditional>
    </inputs>
    <outputs>
        <!--callpeaks output-->
        <data name="output_tabular" format="tabular" label="${tool.name} on ${on_string} (Peaks in tabular format)">
            <filter>'peaks_tabular' in outputs</filter>
        </data>
        <data name="output_broadpeaks" format="bed" from_work_dir="MACS2_peaks.broadPeak" label="${tool.name} on ${on_string} (broad Peaks)">
            <filter>
            ((
              advanced_options['advanced_options_selector'] == "on" and
              advanced_options['broad_options']['broad_options_selector'] == "broad"
            ))
            </filter>
        </data>
        <data name="output_gappedpeaks" format="bed" from_work_dir="MACS2_peaks.gappedPeak" label="${tool.name} on ${on_string} (gapped Peaks)">
            <filter>
            ((
              advanced_options['advanced_options_selector'] == "on" and
              advanced_options['broad_options']['broad_options_selector'] == "broad"
            ))
            </filter>
        </data>
        <data name="output_narrowpeaks" format="bed" from_work_dir="MACS2_peaks.narrowPeak" label="${tool.name} on ${on_string} (narrow Peaks)">
            <filter>
            (
              advanced_options['advanced_options_selector'] == "off" or
            (
              advanced_options['advanced_options_selector'] == "on" and
              advanced_options['broad_options']['broad_options_selector'] == "nobroad"
            ))
            </filter>
        </data>
        <data name="output_summits" format="bed" from_work_dir="MACS2_summits.bed" label="${tool.name} on ${on_string} (summits in BED)">
            <filter>'summits' in outputs</filter>
        </data>
        <data name="output_plot" format="pdf" from_work_dir="MACS2_model.pdf" label="${tool.name} on ${on_string} (plot)">
            <filter>
            ((
              'pdf' in outputs and
              nomodel_type['nomodel_type_selector'] == "create_model"
            ))
            </filter>
        </data>
        <data name="output_treat_pileup" format="bedgraph" from_work_dir="MACS2_treat_pileup.bdg" label="${tool.name} on ${on_string} (Bedgraph Treatment)">
            <filter>'bdg' in outputs</filter>
        </data>
        <data name="output_control_lambda" format="bedgraph" from_work_dir="MACS2_control_lambda.bdg" label="${tool.name} on ${on_string} (Bedgraph Control)">
            <filter>'bdg' in outputs</filter>
        </data>
        <data name="output_extra_files" format="html" label="${tool.name} on ${on_string} (html report)">
            <filter>'html' in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
            <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/>
            <param name="cutoff_options_selector" value="qvalue"/>
            <param name="qvalue" value="0.05"/>
            <param name="band_width" value="300"/>
            <param name="outputs" value="peaks_tabular,bdg"/>
            <param name="effective_genome_size_options_selector" value="user_defined" />
            <param name="gsize" value="3300000000" />
            <param name="lower" value="5" />
            <param name="upper" value="50" />
            <output name="output_control_lambda" compare="contains" file="callpeak_control_part.bdg" lines_diff="1"/>
            <output name="output_treat_pileup" compare="contains" file="callpeak_treatment_part.bdg" lines_diff="1"/>
            <output name="output_tabular" compare="contains" file="callpeak_part.tabular" lines_diff="1"/>
        </test>
        <test>
            <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
            <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/>
            <param name="cutoff_options_selector" value="qvalue"/>
            <param name="qvalue" value="0.05"/>
            <param name="band_width" value="300"/>
            <param name="outputs" value="pdf"/>
            <param name="effective_genome_size_options_selector" value="user_defined" />
            <param name="gsize" value="3300000000" />
            <param name="lower" value="5" />
            <param name="upper" value="50" />
            <output name="output_plot" file="magic.pdf" ftype="pdf" compare="contains" />
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**

**callpeak** is the main function of the MACS2_ package. MACS identifies enriched binding sites in ChIP-seq experiments.
It captures the influence of genome complexity to evaluate the significance of enriched ChIP regions,
and improves the spatial resolution of binding sites through combining the information of both sequencing
tag position and orientation. MACS can be used for ChIP-Seq data alone, or with control sample with the
increase of specificity (recommended).

.. _MACS2: https://github.com/taoliu/MACS

MACS2 performs the following analysis steps:

 * Artificially extend reads to expected fragment length, and generate coverage map along genome.
 * Assume background reads are Poisson distributed. Mean of the Poisson is locally variable and is estimated from control experiment (if available) in 5Kbp or 10Kbp around examined location.
 * For a given location, do we see more reads than we would have expected from the Poisson (p < 0.00005)? If Yes, MACS2 calls a peak. 


.. class:: warningmark

If MACS2 fails, it is usually because it cannot build the model for peaks.  You may want to extend **mfold** range by increasing the upper bound or play with **Build model** options.


@citation@
]]>
  </help>
  <expand macro="citations" />
</tool>