diff macs2_callpeak.xml @ 5:beb902da6e5f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/macs2 commit e10f301c7f8c54a7d12df4e631527197baccf70b
author iuc
date Sat, 08 Apr 2017 08:28:57 -0400
parents 56e104999978
children 2119d851a53b
line wrap: on
line diff
--- a/macs2_callpeak.xml	Mon Feb 06 02:30:37 2017 -0500
+++ b/macs2_callpeak.xml	Sat Apr 08 08:28:57 2017 -0400
@@ -4,16 +4,15 @@
         <import>macs2_macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="3.1.2">R</requirement>
-        <requirement type="set_environment">MACS2_SCRIPT_PATH</requirement>
     </expand>
     <expand macro="stdio" />
     <expand macro="version_command" />
     <command>
+        <![CDATA[
         #set $temp_stderr = 'macs2_stderr'
         (macs2 callpeak
 
-            --name "MACS2"
+            --name 'MACS2'
             -t ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_treatment_file ) ) }
 
             #if str( $input_control_file ) != 'None':
@@ -21,7 +20,7 @@
             #end if
 
         #for $ifile in $input_treatment_file:
-            #if $ifile.ext.upper() == 'BAM' and $bampe:
+            #if $ifile.ext.upper() == "BAM" and $bampe:
                 --format BAMPE
             #else
                 --format='$ifile.ext.upper()'
@@ -30,16 +29,17 @@
 
         @effective_genome_size@
 
-        --bw='$band_width'
+        --bw '${$band_width}'
+        @mfold_command@
 
         ## advanced options
-        #if $advanced_options.advanced_options_selector == 'on':
+        #if $advanced_options.advanced_options_selector == "on":
             $advanced_options.nolambda
             $advanced_options.to_large
             --ratio $advanced_options.ratio
             --slocal $advanced_options.slocal
             --llocal $advanced_options.llocal
-            #if $advanced_options.broad_options.broad_options_selector == 'broad':
+            #if $advanced_options.broad_options.broad_options_selector == "broad":
                 --broad
                 --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }'
             #else:
@@ -47,73 +47,78 @@
             #end if
 
             #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user":
-                --keep-dup "${ advanced_options.keep_dup_options.user_keepdup }"
+                --keep-dup '${ advanced_options.keep_dup_options.user_keepdup }'
             #else
-                --keep-dup "${ advanced_options.keep_dup_options.keep_dup_options_selector }"
+                --keep-dup '${ advanced_options.keep_dup_options.keep_dup_options_selector }'
             #end if
 
         #end if
 
         ## With --bdg two additional output files will be generated.
-        #if 'bdg' in str($outputs).split(','):
+        #if "bdg" in str($outputs).split(','):
             --bdg
         #end if
 
         ## cutoff selection
-        #if str( $cutoff_options.cutoff_options_selector ) == 'qvalue':
-            --qvalue "${ cutoff_options.qvalue }"
-        #elif str( $cutoff_options.cutoff_options_selector ) == 'pvalue':
-            #if str($cutoff_options.pvalue).strip() != '':
-                --pvalue "${ cutoff_options.pvalue }"
+        #if str( $cutoff_options.cutoff_options_selector ) == "qvalue":
+            --qvalue '${ cutoff_options.qvalue }'
+        #elif str( $cutoff_options.cutoff_options_selector ) == "pvalue":
+            #if str($cutoff_options.pvalue).strip() != "":
+                --pvalue '${ cutoff_options.pvalue }'
             #end if
         #end if
 
         ## model options
-        #if $nomodel_type.nomodel_type_selector == 'nomodel':
+        #if $nomodel_type.nomodel_type_selector == "nomodel":
             --nomodel
-            ##--shiftsize '$nomodel_type.shiftsize'
             --extsize '${ nomodel_type.extsize }'
         #end if
 
         2> $temp_stderr)
-        #if 'peaks_tabular' in str($outputs).split(','):
-            &amp;&amp;
-            cp MACS2_peaks.xls "${ output_tabular }"
+        #if "peaks_tabular" in str($outputs).split(','):
+            &&
+            cp MACS2_peaks.xls '${ output_tabular }'
         #end if
 
         ## run R to create pdf from model script
-        #if $nomodel_type.nomodel_type_selector == 'create_model' and 'pdf' in str($outputs).split(','):
-            &amp;&amp;
+        #if $nomodel_type.nomodel_type_selector == "create_model" and "pdf" in str($outputs).split(','):
+            &&
             Rscript MACS2_model.r > MACS2_model.r.log
         #end if
 
         #if 'html' in str($outputs).split(','):
             ## if output files exists, move them to the files_path and create a html result page linking to them
-            &amp;&amp;
+            &&
             (
             count=`ls -1 MACS2* 2>/dev/null | wc -l`;
             if [ \$count != 0 ];
             then
-                mkdir "${ output_extra_files.files_path }";
-                cp MACS2* "${ output_extra_files.files_path }";
-                python "\$MACS2_SCRIPT_PATH/dir2html.py" "${ output_extra_files.files_path }" $temp_stderr > "${ output_extra_files }";
+                mkdir '${ output_extra_files.files_path }' &&
+                cp MACS2* '${ output_extra_files.files_path }' &&
+                python '$__tool_direcotry__/dir2html.py' 
+                    '${ output_extra_files.files_path }' $temp_stderr > '${ output_extra_files }'
             fi;
             )
         #end if
-        ;
-        exit_code_for_galaxy=\$?;
-        cat $temp_stderr 2&gt;&amp;1;
+        &&
+        exit_code_for_galaxy=\$? &&
+        cat $temp_stderr 2>&1 &&
         (exit \$exit_code_for_galaxy)
+        ]]>
     </command>
     <inputs>
-        <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="True" label="ChIP-Seq Treatment File" />
-        <param name="input_control_file" type="data" format="bam,sam,bed" multiple="True" optional="True" label="ChIP-Seq Control File" />
+        <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="true"
+               label="ChIP-Seq Treatment File" />
+        <param name="input_control_file" type="data" format="bam,sam,bed" multiple="true" optional="True"
+               label="ChIP-Seq Control File" />
 
-        <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False" label="Are your inputs Paired-end BAM files?"
-            help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/>
+        <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False"
+               label="Are your inputs Paired-end BAM files?"
+               help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/>
 
         <expand macro="conditional_effective_genome_size" />
         <expand macro="band_width" />
+        <expand macro="mfold_options" />
 
         <conditional name="cutoff_options">
             <param name="cutoff_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
@@ -121,12 +126,10 @@
                 <option value="pvalue">p-value</option>
             </param>
             <when value="pvalue">
-                <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection"
-                    help="default: not set (--pvalue)"/>
+                <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection" help="default: not set (--pvalue)"/>
             </when>
             <when value="qvalue">
-                <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection"
-                    help="default: 0.05 (--qvalue)"/>
+                <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" help="The q-value (minimum FDR) cutoff to call significant regions. Default is 0.01. For broad marks, you can try 0.05 as cutoff. Q-values are calculated from p-values using Benjamini-Hochberg procedure. (--qvalue)"/>
             </when>
         </conditional>
 
@@ -137,16 +140,14 @@
             </param>
             <when value="create_model"/>
             <when value="nomodel">
-                <!--<param name="shiftsize" type="integer" label="Arbitrary shift size in bp" value="100" help="(shiftsize)"/>-->
-                <param name="extsize" type="integer" value="100" label="The arbitrary extension size in bp"
-                    help="MACS will use this value as fragment size to extend each read towards 3' end, then pile them up. It's exactly twice the number of legacy shiftsize. In previous language, each read is moved 3' direction to middle of fragment by 1/2 d, then extended to both direction with 1/2 d. This is equivalent to say each read is extended towards 3' into a d size fragment. DEFAULT: 200 (--extsize)"/>
+                <param name="extsize" type="integer" value="200" label="Set extension size" help="The arbitrary extension size in bp. When nomodel is true, MACS will use this value as fragment size to extend each read towards 3-prime; end, then pile them up. It is exactly twice the number of obsolete SHIFTSIZE. In previous language, each read is moved 5-prime-to-3-prime direction to middle of fragment by 0.5 d, then extended to both direction with 0.5 d. This is equivalent to say each read is extended towards 5-prime-to-3-prime into a d size fragment. --extsize (this option) and --shift (the option below) can be combined when necessary. See --shift option below. Default = 200 (--extsize)."/>
+                <param name="shift" type="integer" value="0" label="Set shift size" help="(NOT the legacy --shiftsize option!) The arbitrary shift in bp. Use discretion while setting it other than default value. When NOMODEL is set, MACS will use this value to move cutting ends (5-prime) towards 5-prime-to-3-prime  direction then apply EXTSIZE to extend them to fragments. When this value is negative, ends will be moved toward 3-prime-to-5-prime  direction. Recommended to keep it as default 0 for ChIP-Seq datasets, or -1 * 0.5 of --extsize (option above) together with --extsize option for detecting enriched cutting loci such as certain DNAseI-Seq datasets. Note, you can't set values other than 0 if format is BAMPE for paired-end data. Default = 0 (--shift)."/>
             </when>
         </conditional>
 
         <param name="outputs" type="select" display="checkboxes" multiple="True" optional="false" label="Outputs" help="PDF only created when model is build">
             <option value="peaks_tabular" selected="True">Peaks as tabular file</option>
-            <!--<option value="narrow">narrow Peaks</option>-->
-            <option value="summits" selected="true">summits</option>
+            <option value="summits" selected="true">Peak summits</option>
             <option value="bdg" selected="true">Scores in bedGraph files (--bdg)</option>
             <option value="html">Summary page (html)</option>
             <option value="pdf">Plot in PDF</option>
@@ -253,10 +254,11 @@
             <param name="outputs" value="peaks_tabular,bdg"/>
             <param name="effective_genome_size_options_selector" value="user_defined" />
             <param name="gsize" value="3300000000" />
-            <output name="output_control_lambda" compare="contains" file="callpeak_control_part.bdg"/>
-            <output name="output_treat_pileup" compare="contains" file="callpeak_treatment_part.bdg"/>
-            <output name="output_tabular" compare="contains" file="callpeak_part.tabular"/>
-            <output name="output_summits" compare="contains" file="callpeak_summits_part.bed"/>
+            <param name="lower" value="5" />
+            <param name="upper" value="50" />
+            <output name="output_control_lambda" compare="contains" file="callpeak_control_part.bdg" lines_diff="1"/>
+            <output name="output_treat_pileup" compare="contains" file="callpeak_treatment_part.bdg" lines_diff="1"/>
+            <output name="output_tabular" compare="contains" file="callpeak_part.tabular" lines_diff="1"/>
         </test>
         <test>
             <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
@@ -267,32 +269,37 @@
             <param name="outputs" value="pdf"/>
             <param name="effective_genome_size_options_selector" value="user_defined" />
             <param name="gsize" value="3300000000" />
+            <param name="lower" value="5" />
+            <param name="upper" value="50" />
             <output name="output_plot" file="magic.pdf" ftype="pdf" compare="contains" />
         </test>
     </tests>
     <help>
+        <![CDATA[
 **What it does**
 
-With the improvement of sequencing techniques, chromatin immunoprecipitation followed by high throughput sequencing (ChIP-Seq)
-is getting popular to study genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq analysis method, we present a novel algorithm, named Model-based Analysis of ChIP-Seq (MACS), for
-identifying transcript factor binding sites. MACS captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and MACS improves the spatial resolution of
-binding sites through combining the information of both sequencing tag position and orientation. MACS can be easily used for ChIP-Seq data alone, or with control sample with the increase of specificity.
+**callpeak** is the main function of the MACS2_ package. MACS identifies enriched binding sites in ChIP-seq experiments.
+It captures the influence of genome complexity to evaluate the significance of enriched ChIP regions,
+and improves the spatial resolution of binding sites through combining the information of both sequencing
+tag position and orientation. MACS can be used for ChIP-Seq data alone, or with control sample with the
+increase of specificity (recommended).
 
-View the original MACS2 documentation: https://github.com/taoliu/MACS/blob/master/README
-
-------
+.. _MACS2: https://github.com/taoliu/MACS
 
-**Usage**
-
-**Peak Calling**: Main MACS2 Function to Call peaks from alignment results.
+MACS2 performs the following analysis steps:
 
-If you choose "Scores in bedGraph files" MACS will output the fragment pileup, control lambda, -log10-pvalue and -log10-qvalue scores in bedGraph files.
-The peaks in BED format contain the following colomns: chr end length abs_summit pileup -log10(pvalue) fold_enrichment -log10(qvalue) name
+ * Artificially extend reads to expected fragment length, and generate coverage map along genome.
+ * Assume background reads are Poisson distributed. Mean of the Poisson is locally variable and is estimated from control experiment (if available) in 5Kbp or 10Kbp around examined location.
+ * For a given location, do we see more reads than we would have expected from the Poisson (p < 0.00005)? If Yes, MACS2 calls a peak. 
 
-**Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph.
+
+.. class:: warningmark
+
+If MACS2 fails, it is usually because it cannot build the model for peaks.  You may want to extend **mfold** range by increasing the upper bound or play with **Build model** options.
 
 
 @citation@
+]]>
   </help>
   <expand macro="citations" />
 </tool>