diff macs2_callpeak.xml @ 9:acbd3fb47f90 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/macs2 commit 586ecaebf9e6020fac2674fbda368e293d1c9bc2
author iuc
date Thu, 25 Jan 2018 02:11:52 -0500
parents e8a060164e11
children f0b351e734b8
line wrap: on
line diff
--- a/macs2_callpeak.xml	Wed Dec 27 10:18:03 2017 -0500
+++ b/macs2_callpeak.xml	Thu Jan 25 02:11:52 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="macs2_callpeak" name="MACS2 callpeak" version="@VERSION_STRING@.1">
+<tool id="macs2_callpeak" name="MACS2 callpeak" version="@VERSION_STRING@.2">
     <description>Call peaks from alignment results</description>
     <macros>
         <import>macs2_macros.xml</import>
@@ -8,6 +8,8 @@
     <expand macro="stdio" />
     <expand macro="version_command" />
     <command><![CDATA[
+        @home_dir@
+
         #set $temp_stderr = 'macs2_stderr'
         (macs2 callpeak
 
@@ -17,21 +19,8 @@
 
             #if str($treatment.t_multi_select) == "Yes":
                 -t ${ ' '.join( map( lambda x:'"%s"' % ( x ), '$treatment.input_treatment_file' ) ) }
-
-                #if '$treatment.input_treatment_file[0].ext.upper()' == "BAM" and $bampe:
-                    --format BAMPE
-                #else
-                    --format='$treatment.input_treatment_file[0].ext.upper()'
-                #end if
-
             #else
                 -t '$treatment.input_treatment_file'
-
-                #if '$treatment.input_treatment_file.ext.upper()' == "BAM" and $bampe:
-                    --format BAMPE
-                #else
-                    --format='$treatment.input_treatment_file.ext.upper()'
-                #end if
             #end if
 
             ## Control File(s)
@@ -44,33 +33,41 @@
                 #end if
             #end if
 
-        @effective_genome_size@
+            --format $format
 
-        --bw '${band_width}'
-        @mfold_command@
+        @effective_genome_size@
 
         ## advanced options
-        #if $advanced_options.advanced_options_selector == "on":
-            $advanced_options.nolambda
-            $advanced_options.to_large
+
+        $advanced_options.nolambda
+        $advanced_options.to_large
+
+        #if $advanced_options.ratio:
             --ratio $advanced_options.ratio
+        #end if
+
+        #if $advanced_options.slocal:
             --slocal $advanced_options.slocal
+        #end if
+
+        #if $advanced_options.llocal:
             --llocal $advanced_options.llocal
-            #if $advanced_options.broad_options.broad_options_selector == "broad":
-                --broad
-                --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }'
-            #else
-                $advanced_options.broad_options.call_summits
-            #end if
+        #end if
 
-            #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user":
-                --keep-dup '${ advanced_options.keep_dup_options.user_keepdup }'
-            #else
-                --keep-dup '${ advanced_options.keep_dup_options.keep_dup_options_selector }'
-            #end if
+        #if $advanced_options.broad_options.broad_options_selector == "broad":
+            --broad
+            --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }'
+        #else
+            $advanced_options.broad_options.call_summits
+        #end if
 
+        #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user":
+            --keep-dup '${ advanced_options.keep_dup_options.user_keepdup }'
+        #else
+            --keep-dup '${ advanced_options.keep_dup_options.keep_dup_options_selector }'
         #end if
 
+
         ## With --bdg two additional output files will be generated.
         #if "bdg" in str($outputs).split(','):
             --bdg
@@ -89,6 +86,13 @@
         #if $nomodel_type.nomodel_type_selector == "nomodel":
             --nomodel
             --extsize '${ nomodel_type.extsize }'
+            --shift '${ nomodel_type.shift}'
+        #else
+            --mfold '${nomodel_type.mfold_lower}' '${nomodel_type.mfold_upper}'
+
+            #if $nomodel_type.band_width:
+            --bw '${nomodel_type.band_width}'
+            #end if
         #end if
 
         2>&1 > $temp_stderr)
@@ -111,7 +115,7 @@
             if [ \$count != 0 ];
             then
                 mkdir '${ output_extra_files.files_path }' &&
-                cp MACS2* '${ output_extra_files.files_path }' &&
+                cp -r MACS2* '${ output_extra_files.files_path }' &&
                 python '$__tool_directory__/dir2html.py'
                     '${ output_extra_files.files_path }' $temp_stderr > '${ output_extra_files }';
             fi;
@@ -138,8 +142,8 @@
 
         <conditional name="control">
             <param name="c_select" type="select" label="Do you have a Control File?" >
-                <option value="Yes" selected="True">Yes</option>
-                <option value="No">No</option>
+                <option value="Yes">Yes</option>
+                <option value="No" selected="True">No</option>
             </param>
             <when value="Yes">
                 <conditional name="c_multiple">
@@ -158,13 +162,31 @@
             <when value="No" />
         </conditional>
 
-        <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False"
-               label="Are your inputs Paired-end BAM files?"
-               help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/>
+        <param name="format" type="select" label="Format of Input Files" help="For Paired-end BAM (BAMPE) the 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions (--format). Default: Single-end BAM">
+            <option value="BAM" selected="True">Single-end BAM</option>
+            <option value="BAMPE">Paired-end BAM</option>
+            <option value="BED">Single-end BED</option>
+        </param>
 
         <expand macro="conditional_effective_genome_size" />
-        <expand macro="band_width" />
-        <expand macro="mfold_options" />
+
+        <conditional name="nomodel_type">
+            <param name="nomodel_type_selector" type="select" label="Build Model">
+                <option value="nomodel">Do not build the shifting model (--nomodel)</option>
+                <option value="create_model" selected="true">Build the shifting model</option>
+            </param>
+            <when value="create_model">
+                <param name="mfold_lower" type="integer" value="5" label="Set lower mfold bound" help="Select the lower region within MFOLD range of high confidence enrichment ratio against background to build model. Fold-enrichment in regions must be higher than lower limit (--mfold). Default: 5" />
+                <param name="mfold_upper" type="integer" value="50" label="Set upper mfold bound" help="Select the upper region within MFOLD range of high confidence enrichment ratio against background to build model. Fold-enrichment in regions must be lower than the upper limit (--mfold). Default: 50"/>
+                <param name="band_width" type="integer" value="300"
+                label="Band width for picking regions to compute fragment size"
+                help=" You can set this parameter as the medium fragment size expected from sonication or size selection (--bw). Default: 300" />
+            </when>
+            <when value="nomodel">
+                <param name="extsize" type="integer" value="200" label="Set extension size" help="The arbitrary extension size in bp. When nomodel is true, MACS will use this value as fragment size to extend each read towards 3-prime; end, then pile them up. It is exactly twice the number of obsolete SHIFTSIZE. In previous language, each read is moved 5-prime-to-3-prime direction to middle of fragment by 0.5 d, then extended to both direction with 0.5 d. This is equivalent to say each read is extended towards 5-prime-to-3-prime into a d size fragment. --extsize (this option) and --shift (the option below) can be combined when necessary. See --shift option below. Default: 200 (--extsize)."/>
+                <param name="shift" type="integer" value="0" label="Set shift size" help="(NOT the legacy --shiftsize option!) The arbitrary shift in bp. Use discretion while setting it other than default value. When NOMODEL is set, MACS will use this value to move cutting ends (5-prime) towards 5-prime-to-3-prime  direction then apply EXTSIZE to extend them to fragments. When this value is negative, ends will be moved toward 3-prime-to-5-prime  direction. Recommended to keep it as default 0 for ChIP-Seq datasets, or -1 * 0.5 of --extsize (option above) together with --extsize option for detecting enriched cutting loci such as certain DNAseI-Seq datasets. Note, you can't set values other than 0 if format is paired-end data (BAMPE). Default: 0 (--shift)."/>
+            </when>
+        </conditional>
 
         <conditional name="cutoff_options">
             <param name="cutoff_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
@@ -175,19 +197,7 @@
                 <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection" help="default: not set (--pvalue)"/>
             </when>
             <when value="qvalue">
-                <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" help="The q-value (minimum FDR) cutoff to call significant regions. Default is 0.01. For broad marks, you can try 0.05 as cutoff. Q-values are calculated from p-values using Benjamini-Hochberg procedure. (--qvalue)"/>
-            </when>
-        </conditional>
-
-        <conditional name="nomodel_type">
-            <param name="nomodel_type_selector" type="select" label="Build Model">
-                <option value="nomodel">Do not build the shifting model (--nomodel)</option>
-                <option value="create_model" selected="true">Build the shifting model</option>
-            </param>
-            <when value="create_model"/>
-            <when value="nomodel">
-                <param name="extsize" type="integer" value="200" label="Set extension size" help="The arbitrary extension size in bp. When nomodel is true, MACS will use this value as fragment size to extend each read towards 3-prime; end, then pile them up. It is exactly twice the number of obsolete SHIFTSIZE. In previous language, each read is moved 5-prime-to-3-prime direction to middle of fragment by 0.5 d, then extended to both direction with 0.5 d. This is equivalent to say each read is extended towards 5-prime-to-3-prime into a d size fragment. --extsize (this option) and --shift (the option below) can be combined when necessary. See --shift option below. Default = 200 (--extsize)."/>
-                <param name="shift" type="integer" value="0" label="Set shift size" help="(NOT the legacy --shiftsize option!) The arbitrary shift in bp. Use discretion while setting it other than default value. When NOMODEL is set, MACS will use this value to move cutting ends (5-prime) towards 5-prime-to-3-prime  direction then apply EXTSIZE to extend them to fragments. When this value is negative, ends will be moved toward 3-prime-to-5-prime  direction. Recommended to keep it as default 0 for ChIP-Seq datasets, or -1 * 0.5 of --extsize (option above) together with --extsize option for detecting enriched cutting loci such as certain DNAseI-Seq datasets. Note, you can't set values other than 0 if format is paired-end data (BAMPE). Default = 0 (--shift)."/>
+                <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" help="The q-value (minimum FDR) cutoff to call significant regions. Default is 0.05. For broad marks, you can try 0.05 as cutoff. Q-values are calculated from p-values using Benjamini-Hochberg procedure. (--qvalue)"/>
             </when>
         </conditional>
 
@@ -199,24 +209,19 @@
             <option value="pdf">Plot in PDF</option>
         </param>
 
-        <conditional name="advanced_options">
-            <param name="advanced_options_selector" type="select" label="Advanced Options">
-                <option value="off" selected="true">Hide advanced options</option>
-                <option value="on">Display advanced options</option>
-            </param>
-            <when value="on">
-                <param name="to_large" type="boolean" truevalue="--to-large" falsevalue="" checked="False"
+        <section name="advanced_options" title="Advanced Options">
+                <param name="to_large" type="boolean" truevalue="--to-large" falsevalue="" checked="False" optional="True"
                     label="When set, scale the small sample up to the bigger sample"
-                    help="By default, the bigger dataset will be scaled down towards the smaller dataset, which will lead to smaller p/qvalues and more specific results. Keep in mind that scaling down will bring down background noise more. (--to-large)"/>
-                <param name="nolambda" type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
-                    label="Use fixed background lambda as local lambda for every peak region" help="up to 9X more time consuming (--nolambda)"/>
-                <param name="ratio" type="float" value="1.0"
+                    help="By default, the bigger dataset will be scaled down towards the smaller dataset, which will lead to smaller p/qvalues and more specific results. Keep in mind that scaling down will bring down background noise more. (--to-large). Default: No"/>
+                <param name="nolambda" type="boolean" truevalue="--nolambda" falsevalue="" checked="False" optional="True"
+                    label="Use fixed background lambda as local lambda for every peak region" help="up to 9X more time consuming (--nolambda). Default: No"/>
+                <param name="ratio" type="float" optional="True"
                     label="When set, use a custom scaling ratio of ChIP/control (e.g. calculated using NCIS) for linear scaling"
-                    help="(--ratio)"/>
-                <param name="slocal" value="1000" type="integer" label="The small nearby region in basepairs to calculate dynamic lambda"
-                    help="This is used to capture the bias near the peak summit region. Invalid if there is no control data. If you set this to 0, MACS will skip slocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--slocal)"/>
-                <param name="llocal" value="10000" type="integer" label="The large nearby region in basepairs to calculate dynamic lambda"
-                    help="This is used to capture the surround bias. If you set this to 0, MACS will skip llocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--llocal)"/>
+                    help="(--ratio) Default: ignore"/>
+                <param name="slocal" type="integer" optional="True" label="The small nearby region in basepairs to calculate dynamic lambda"
+                    help="This is used to capture the bias near the peak summit region. Invalid if there is no control data. If you set this to 0, MACS will skip slocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--slocal). Default: 1000"/>
+                <param name="llocal" type="integer" optional="True" label="The large nearby region in basepairs to calculate dynamic lambda"
+                    help="This is used to capture the surround bias. If you set this to 0, MACS will skip llocal lambda calculation. *Note* that MACS will always perform a d-size local lambda calculation. The final local bias should be the maximum of the lambda value from d, slocal, and llocal size windows. (--llocal) Default: 10000"/>
                 <conditional name="broad_options">
                     <param name="broad_options_selector" type="select"
                         label="Composite broad regions" help="by putting nearby highly enriched regions into a broad region with loose cutoff (--broad)">
@@ -234,9 +239,7 @@
                     </when>
                 </conditional>
                 <expand macro="keep_duplicates" />
-            </when>
-            <when value="off" />
-        </conditional>
+        </section>
     </inputs>
     <outputs>
         <!--callpeaks output-->
@@ -246,7 +249,6 @@
         <data name="output_broadpeaks" format="bed" from_work_dir="MACS2_peaks.broadPeak" label="${tool.name} on ${on_string} (broad Peaks)">
             <filter>
             ((
-              advanced_options['advanced_options_selector'] == "on" and
               advanced_options['broad_options']['broad_options_selector'] == "broad"
             ))
             </filter>
@@ -254,17 +256,13 @@
         <data name="output_gappedpeaks" format="bed" from_work_dir="MACS2_peaks.gappedPeak" label="${tool.name} on ${on_string} (gapped Peaks)">
             <filter>
             ((
-              advanced_options['advanced_options_selector'] == "on" and
               advanced_options['broad_options']['broad_options_selector'] == "broad"
             ))
             </filter>
         </data>
         <data name="output_narrowpeaks" format="bed" from_work_dir="MACS2_peaks.narrowPeak" label="${tool.name} on ${on_string} (narrow Peaks)">
             <filter>
-            (
-              advanced_options['advanced_options_selector'] == "off" or
-            (
-              advanced_options['advanced_options_selector'] == "on" and
+            ((
               advanced_options['broad_options']['broad_options_selector'] == "nobroad"
             ))
             </filter>
@@ -295,6 +293,7 @@
             <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/>
             <param name="c_select" value="Yes"/>
             <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
+            <param name="format" value="BED" />
             <param name="cutoff_options_selector" value="qvalue"/>
             <param name="qvalue" value="0.05"/>
             <param name="band_width" value="300"/>
@@ -311,13 +310,13 @@
                     <has_text text="Additional output created by MACS2" />
                 </assert_contents>
             </output>
-
         </test>
         <!-- Ensure pdf can be output -->
         <test expect_num_outputs="2">
             <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/>
             <param name="c_select" value="Yes"/>
             <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
+            <param name="format" value="BED" />
             <param name="cutoff_options_selector" value="qvalue"/>
             <param name="qvalue" value="0.05"/>
             <param name="band_width" value="300"/>
@@ -328,6 +327,16 @@
             <param name="upper" value="50" />
             <output name="output_plot" file="magic.pdf" ftype="pdf" compare="contains" />
         </test>
+        <!-- Ensure BAMPE works -->
+        <test expect_num_outputs="1">
+            <param name="input_treatment_file" ftype="bam" value="bwa-mem-test1.bam"/>
+            <param name="format" value="BAMPE" />
+            <param name="effective_genome_size_options_selector" value="user_defined"/>
+            <param name="gsize" value="3300000000"/>
+            <param name="nomodel_type_selector" value="nomodel"/>
+            <param name="nolambda" value="True"/>
+            <output name="output_narrowpeaks" file="callpeak_bampe_narrow.bed"/>
+        </test>
     </tests>
     <help><![CDATA[
 
@@ -335,7 +344,7 @@
 
 **What it does**
 
-**callpeak** is the main function of the MACS2_ package. MACS identifies enriched binding sites in ChIP-seq experiments. It captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and improves the spatial resolution of binding sites through combining the information of both sequencing tag position and orientation. 
+**callpeak** is the main function of the MACS2_ package. MACS identifies enriched binding sites in ChIP-seq experiments. It captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and improves the spatial resolution of binding sites through combining the information of both sequencing tag position and orientation.
 
 .. _MACS2: https://github.com/taoliu/MACS
 
@@ -349,11 +358,22 @@
 
 Both single-end and paired-end mapping results can be input and you can specify if the data is from paired-end reads above. Paired-end mapping results can be input to MACS as a single BAM file, and just the left mate (5' end) tag will be automatically kept. However, when paired-end format (BAMPE) is specified, MACS will use the real fragments inferred from alignment results for reads pileup.
 
+*Effective Genome Size*
+
+PLEASE assign this parameter to fit your needs!
+
+It's the mappable genome size or effective genome size which is defined as the genome size which can be sequenced. Because of the repetitive features on the chromsomes, the actual mappable genome size will be smaller than the original size, about 90% or 70% of the genome size. The default hs -- 2.7e9 is recommended for UCSC human hg18 assembly. Here are all precompiled parameters for effective genome size from the MACS2_ website:
+
+ hs: 2.7e9
+ mm: 1.87e9
+ ce: 9e7
+ dm: 1.2e8
+
 -----
 
 **Outputs**
 
-This tool produces a BED file of narrowPeaks as default output. It can also produce additional outputs, which can be selected under the **Additional Outputs** option above. 
+This tool produces a BED file of narrowPeaks as default output. It can also produce additional outputs, which can be selected under the **Additional Outputs** option above.
 
     * **a BED file of peaks** (default)
     * a tabular file of peaks
@@ -366,13 +386,13 @@
 
 **Peaks BED File**
 
-The default output is the narrowPeak BED file (BED6+4 format). This contains the peak locations, together with peak summit, pvalue and qvalue. You can load it to UCSC genome browser. 
+The default output is the narrowPeak BED file (BED6+4 format). This contains the peak locations, together with peak summit, pvalue and qvalue. You can load it to UCSC genome browser.
 
     Example:
 
     ======= ========= ======= ============ ==== === ======= ======== ======= =======
     1          2        3          4        5    6     7       8         9   **10**
-    ======= ========= ======= ============ ==== === ======= ======== ======= ======= 
+    ======= ========= ======= ============ ==== === ======= ======== ======= =======
     chr1    840081    840400  MACS2_peak_1  69   .  4.89872 10.50944 6.91052 158
     chr1    919419    919785  MACS2_peak_2  87   .  5.85158 12.44148 8.70936 130
     chr1    937220    937483  MACS2_peak_3  66   .  4.87632 10.06728 6.61759 154
@@ -394,7 +414,7 @@
 
 **Peaks tabular File**
 
-A tabular file which contains information about called peaks. You can open it in Excel and sort/filter using Excel functions. 
+A tabular file which contains information about called peaks. You can open it in Excel and sort/filter using Excel functions.
 
     Example:
 
@@ -428,7 +448,7 @@
 
     Example:
 
-    ======= ========= ======= ============ ======= 
+    ======= ========= ======= ============ =======
     1          2        3          4        **5**
     ======= ========= ======= ============ =======
     chr1    840239    840240  MACS2_peak_1 6.91052
@@ -487,13 +507,13 @@
 
     Example:
 
-    ======= ========= ======= ============ ==== === ======= ======= ======= 
+    ======= ========= ======= ============ ==== === ======= ======= =======
     1        2         3       4            5    6   7       8       9
-    ======= ========= ======= ============ ==== === ======= ======= ======= 
+    ======= ========= ======= ============ ==== === ======= ======= =======
     chr1    840081    840400  MACS2_peak_1  52   .  4.08790 8.57605 5.21506
     chr1    919419    919785  MACS2_peak_2  56   .  4.37270 8.90436 5.60462
     chr1    937220    937483  MACS2_peak_3  48   .  4.02343 8.06676 4.86861
-    ======= ========= ======= ============ ==== === ======= ======= ======= 
+    ======= ========= ======= ============ ==== === ======= ======= =======
 
 
 Columns contain the following data:
@@ -531,14 +551,14 @@
 * **4th**: name of peak
 * **5th**: 10*-log10qvalue, to be more compatible to show grey levels on UCSC browser
 * **6th**: strand, either "." (=no strand) or "+" or "-"
-* **7th**: start of the first narrow peak in the region 
+* **7th**: start of the first narrow peak in the region
 * **8th**: end of the peak
-* **9th**: RGB color key, default colour is 0 
+* **9th**: RGB color key, default colour is 0
 * **10th**: number of blocks, including the starting 1bp and ending 1bp of broad regions
 * **11th**: length of each block, comma-separated values if multiple
 * **12th**: start of each block, comma-separated values if multiple
-* **13th**: fold-change 
-* **14th**: -log10pvalue 
+* **13th**: fold-change
+* **14th**: -log10pvalue
 * **15th**: -log10qvalue
 
 -----