comparison macs2_callpeak.xml @ 5:beb902da6e5f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/macs2 commit e10f301c7f8c54a7d12df4e631527197baccf70b
author iuc
date Sat, 08 Apr 2017 08:28:57 -0400
parents 56e104999978
children 2119d851a53b
comparison
equal deleted inserted replaced
4:56e104999978 5:beb902da6e5f
2 <description>Call peaks from alignment results</description> 2 <description>Call peaks from alignment results</description>
3 <macros> 3 <macros>
4 <import>macs2_macros.xml</import> 4 <import>macs2_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
7 <requirement type="package" version="3.1.2">R</requirement>
8 <requirement type="set_environment">MACS2_SCRIPT_PATH</requirement>
9 </expand> 7 </expand>
10 <expand macro="stdio" /> 8 <expand macro="stdio" />
11 <expand macro="version_command" /> 9 <expand macro="version_command" />
12 <command> 10 <command>
11 <![CDATA[
13 #set $temp_stderr = 'macs2_stderr' 12 #set $temp_stderr = 'macs2_stderr'
14 (macs2 callpeak 13 (macs2 callpeak
15 14
16 --name "MACS2" 15 --name 'MACS2'
17 -t ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_treatment_file ) ) } 16 -t ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_treatment_file ) ) }
18 17
19 #if str( $input_control_file ) != 'None': 18 #if str( $input_control_file ) != 'None':
20 -c ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_control_file ) ) } 19 -c ${ ' '.join( map( lambda x:'"%s"' % ( x ), $input_control_file ) ) }
21 #end if 20 #end if
22 21
23 #for $ifile in $input_treatment_file: 22 #for $ifile in $input_treatment_file:
24 #if $ifile.ext.upper() == 'BAM' and $bampe: 23 #if $ifile.ext.upper() == "BAM" and $bampe:
25 --format BAMPE 24 --format BAMPE
26 #else 25 #else
27 --format='$ifile.ext.upper()' 26 --format='$ifile.ext.upper()'
28 #end if 27 #end if
29 #end for 28 #end for
30 29
31 @effective_genome_size@ 30 @effective_genome_size@
32 31
33 --bw='$band_width' 32 --bw '${$band_width}'
33 @mfold_command@
34 34
35 ## advanced options 35 ## advanced options
36 #if $advanced_options.advanced_options_selector == 'on': 36 #if $advanced_options.advanced_options_selector == "on":
37 $advanced_options.nolambda 37 $advanced_options.nolambda
38 $advanced_options.to_large 38 $advanced_options.to_large
39 --ratio $advanced_options.ratio 39 --ratio $advanced_options.ratio
40 --slocal $advanced_options.slocal 40 --slocal $advanced_options.slocal
41 --llocal $advanced_options.llocal 41 --llocal $advanced_options.llocal
42 #if $advanced_options.broad_options.broad_options_selector == 'broad': 42 #if $advanced_options.broad_options.broad_options_selector == "broad":
43 --broad 43 --broad
44 --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }' 44 --broad-cutoff='${ advanced_options.broad_options.broad_cutoff }'
45 #else: 45 #else:
46 $advanced_options.broad_options.call_summits 46 $advanced_options.broad_options.call_summits
47 #end if 47 #end if
48 48
49 #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user": 49 #if str( $advanced_options.keep_dup_options.keep_dup_options_selector ) == "user":
50 --keep-dup "${ advanced_options.keep_dup_options.user_keepdup }" 50 --keep-dup '${ advanced_options.keep_dup_options.user_keepdup }'
51 #else 51 #else
52 --keep-dup "${ advanced_options.keep_dup_options.keep_dup_options_selector }" 52 --keep-dup '${ advanced_options.keep_dup_options.keep_dup_options_selector }'
53 #end if 53 #end if
54 54
55 #end if 55 #end if
56 56
57 ## With --bdg two additional output files will be generated. 57 ## With --bdg two additional output files will be generated.
58 #if 'bdg' in str($outputs).split(','): 58 #if "bdg" in str($outputs).split(','):
59 --bdg 59 --bdg
60 #end if 60 #end if
61 61
62 ## cutoff selection 62 ## cutoff selection
63 #if str( $cutoff_options.cutoff_options_selector ) == 'qvalue': 63 #if str( $cutoff_options.cutoff_options_selector ) == "qvalue":
64 --qvalue "${ cutoff_options.qvalue }" 64 --qvalue '${ cutoff_options.qvalue }'
65 #elif str( $cutoff_options.cutoff_options_selector ) == 'pvalue': 65 #elif str( $cutoff_options.cutoff_options_selector ) == "pvalue":
66 #if str($cutoff_options.pvalue).strip() != '': 66 #if str($cutoff_options.pvalue).strip() != "":
67 --pvalue "${ cutoff_options.pvalue }" 67 --pvalue '${ cutoff_options.pvalue }'
68 #end if 68 #end if
69 #end if 69 #end if
70 70
71 ## model options 71 ## model options
72 #if $nomodel_type.nomodel_type_selector == 'nomodel': 72 #if $nomodel_type.nomodel_type_selector == "nomodel":
73 --nomodel 73 --nomodel
74 ##--shiftsize '$nomodel_type.shiftsize'
75 --extsize '${ nomodel_type.extsize }' 74 --extsize '${ nomodel_type.extsize }'
76 #end if 75 #end if
77 76
78 2> $temp_stderr) 77 2> $temp_stderr)
79 #if 'peaks_tabular' in str($outputs).split(','): 78 #if "peaks_tabular" in str($outputs).split(','):
80 &amp;&amp; 79 &&
81 cp MACS2_peaks.xls "${ output_tabular }" 80 cp MACS2_peaks.xls '${ output_tabular }'
82 #end if 81 #end if
83 82
84 ## run R to create pdf from model script 83 ## run R to create pdf from model script
85 #if $nomodel_type.nomodel_type_selector == 'create_model' and 'pdf' in str($outputs).split(','): 84 #if $nomodel_type.nomodel_type_selector == "create_model" and "pdf" in str($outputs).split(','):
86 &amp;&amp; 85 &&
87 Rscript MACS2_model.r > MACS2_model.r.log 86 Rscript MACS2_model.r > MACS2_model.r.log
88 #end if 87 #end if
89 88
90 #if 'html' in str($outputs).split(','): 89 #if 'html' in str($outputs).split(','):
91 ## if output files exists, move them to the files_path and create a html result page linking to them 90 ## if output files exists, move them to the files_path and create a html result page linking to them
92 &amp;&amp; 91 &&
93 ( 92 (
94 count=`ls -1 MACS2* 2>/dev/null | wc -l`; 93 count=`ls -1 MACS2* 2>/dev/null | wc -l`;
95 if [ \$count != 0 ]; 94 if [ \$count != 0 ];
96 then 95 then
97 mkdir "${ output_extra_files.files_path }"; 96 mkdir '${ output_extra_files.files_path }' &&
98 cp MACS2* "${ output_extra_files.files_path }"; 97 cp MACS2* '${ output_extra_files.files_path }' &&
99 python "\$MACS2_SCRIPT_PATH/dir2html.py" "${ output_extra_files.files_path }" $temp_stderr > "${ output_extra_files }"; 98 python '$__tool_direcotry__/dir2html.py'
99 '${ output_extra_files.files_path }' $temp_stderr > '${ output_extra_files }'
100 fi; 100 fi;
101 ) 101 )
102 #end if 102 #end if
103 ; 103 &&
104 exit_code_for_galaxy=\$?; 104 exit_code_for_galaxy=\$? &&
105 cat $temp_stderr 2&gt;&amp;1; 105 cat $temp_stderr 2>&1 &&
106 (exit \$exit_code_for_galaxy) 106 (exit \$exit_code_for_galaxy)
107 ]]>
107 </command> 108 </command>
108 <inputs> 109 <inputs>
109 <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="True" label="ChIP-Seq Treatment File" /> 110 <param name="input_treatment_file" type="data" format="bam,sam,bed" multiple="true"
110 <param name="input_control_file" type="data" format="bam,sam,bed" multiple="True" optional="True" label="ChIP-Seq Control File" /> 111 label="ChIP-Seq Treatment File" />
111 112 <param name="input_control_file" type="data" format="bam,sam,bed" multiple="true" optional="True"
112 <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False" label="Are your inputs Paired-end BAM files?" 113 label="ChIP-Seq Control File" />
113 help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/> 114
115 <param name="bampe" type="boolean" truevalue="--format BAMPE" falsevalue="" checked="False"
116 label="Are your inputs Paired-end BAM files?"
117 help="The 'Build model step' will be ignored and the real fragments will be used for each template defined by leftmost and rightmost mapping positions. (--format BAMPE)"/>
114 118
115 <expand macro="conditional_effective_genome_size" /> 119 <expand macro="conditional_effective_genome_size" />
116 <expand macro="band_width" /> 120 <expand macro="band_width" />
121 <expand macro="mfold_options" />
117 122
118 <conditional name="cutoff_options"> 123 <conditional name="cutoff_options">
119 <param name="cutoff_options_selector" type="select" label="Peak detection based on" help="default uses q-value"> 124 <param name="cutoff_options_selector" type="select" label="Peak detection based on" help="default uses q-value">
120 <option value="qvalue" selected="true">q-value</option> 125 <option value="qvalue" selected="true">q-value</option>
121 <option value="pvalue">p-value</option> 126 <option value="pvalue">p-value</option>
122 </param> 127 </param>
123 <when value="pvalue"> 128 <when value="pvalue">
124 <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection" 129 <param name="pvalue" type="float" value="" label="p-value cutoff for peak detection" help="default: not set (--pvalue)"/>
125 help="default: not set (--pvalue)"/>
126 </when> 130 </when>
127 <when value="qvalue"> 131 <when value="qvalue">
128 <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" 132 <param name="qvalue" type="float" value="0.05" label="Minimum FDR (q-value) cutoff for peak detection" help="The q-value (minimum FDR) cutoff to call significant regions. Default is 0.01. For broad marks, you can try 0.05 as cutoff. Q-values are calculated from p-values using Benjamini-Hochberg procedure. (--qvalue)"/>
129 help="default: 0.05 (--qvalue)"/>
130 </when> 133 </when>
131 </conditional> 134 </conditional>
132 135
133 <conditional name="nomodel_type"> 136 <conditional name="nomodel_type">
134 <param name="nomodel_type_selector" type="select" label="Build Model"> 137 <param name="nomodel_type_selector" type="select" label="Build Model">
135 <option value="nomodel">Do not build the shifting model (--nomodel)</option> 138 <option value="nomodel">Do not build the shifting model (--nomodel)</option>
136 <option value="create_model" selected="true">Build the shifting model</option> 139 <option value="create_model" selected="true">Build the shifting model</option>
137 </param> 140 </param>
138 <when value="create_model"/> 141 <when value="create_model"/>
139 <when value="nomodel"> 142 <when value="nomodel">
140 <!--<param name="shiftsize" type="integer" label="Arbitrary shift size in bp" value="100" help="(shiftsize)"/>--> 143 <param name="extsize" type="integer" value="200" label="Set extension size" help="The arbitrary extension size in bp. When nomodel is true, MACS will use this value as fragment size to extend each read towards 3-prime; end, then pile them up. It is exactly twice the number of obsolete SHIFTSIZE. In previous language, each read is moved 5-prime-to-3-prime direction to middle of fragment by 0.5 d, then extended to both direction with 0.5 d. This is equivalent to say each read is extended towards 5-prime-to-3-prime into a d size fragment. --extsize (this option) and --shift (the option below) can be combined when necessary. See --shift option below. Default = 200 (--extsize)."/>
141 <param name="extsize" type="integer" value="100" label="The arbitrary extension size in bp" 144 <param name="shift" type="integer" value="0" label="Set shift size" help="(NOT the legacy --shiftsize option!) The arbitrary shift in bp. Use discretion while setting it other than default value. When NOMODEL is set, MACS will use this value to move cutting ends (5-prime) towards 5-prime-to-3-prime direction then apply EXTSIZE to extend them to fragments. When this value is negative, ends will be moved toward 3-prime-to-5-prime direction. Recommended to keep it as default 0 for ChIP-Seq datasets, or -1 * 0.5 of --extsize (option above) together with --extsize option for detecting enriched cutting loci such as certain DNAseI-Seq datasets. Note, you can't set values other than 0 if format is BAMPE for paired-end data. Default = 0 (--shift)."/>
142 help="MACS will use this value as fragment size to extend each read towards 3' end, then pile them up. It's exactly twice the number of legacy shiftsize. In previous language, each read is moved 3' direction to middle of fragment by 1/2 d, then extended to both direction with 1/2 d. This is equivalent to say each read is extended towards 3' into a d size fragment. DEFAULT: 200 (--extsize)"/>
143 </when> 145 </when>
144 </conditional> 146 </conditional>
145 147
146 <param name="outputs" type="select" display="checkboxes" multiple="True" optional="false" label="Outputs" help="PDF only created when model is build"> 148 <param name="outputs" type="select" display="checkboxes" multiple="True" optional="false" label="Outputs" help="PDF only created when model is build">
147 <option value="peaks_tabular" selected="True">Peaks as tabular file</option> 149 <option value="peaks_tabular" selected="True">Peaks as tabular file</option>
148 <!--<option value="narrow">narrow Peaks</option>--> 150 <option value="summits" selected="true">Peak summits</option>
149 <option value="summits" selected="true">summits</option>
150 <option value="bdg" selected="true">Scores in bedGraph files (--bdg)</option> 151 <option value="bdg" selected="true">Scores in bedGraph files (--bdg)</option>
151 <option value="html">Summary page (html)</option> 152 <option value="html">Summary page (html)</option>
152 <option value="pdf">Plot in PDF</option> 153 <option value="pdf">Plot in PDF</option>
153 </param> 154 </param>
154 155
251 <param name="qvalue" value="0.05"/> 252 <param name="qvalue" value="0.05"/>
252 <param name="band_width" value="300"/> 253 <param name="band_width" value="300"/>
253 <param name="outputs" value="peaks_tabular,bdg"/> 254 <param name="outputs" value="peaks_tabular,bdg"/>
254 <param name="effective_genome_size_options_selector" value="user_defined" /> 255 <param name="effective_genome_size_options_selector" value="user_defined" />
255 <param name="gsize" value="3300000000" /> 256 <param name="gsize" value="3300000000" />
256 <output name="output_control_lambda" compare="contains" file="callpeak_control_part.bdg"/> 257 <param name="lower" value="5" />
257 <output name="output_treat_pileup" compare="contains" file="callpeak_treatment_part.bdg"/> 258 <param name="upper" value="50" />
258 <output name="output_tabular" compare="contains" file="callpeak_part.tabular"/> 259 <output name="output_control_lambda" compare="contains" file="callpeak_control_part.bdg" lines_diff="1"/>
259 <output name="output_summits" compare="contains" file="callpeak_summits_part.bed"/> 260 <output name="output_treat_pileup" compare="contains" file="callpeak_treatment_part.bdg" lines_diff="1"/>
261 <output name="output_tabular" compare="contains" file="callpeak_part.tabular" lines_diff="1"/>
260 </test> 262 </test>
261 <test> 263 <test>
262 <param name="input_control_file" value="Control_200K.bed" ftype="bed"/> 264 <param name="input_control_file" value="Control_200K.bed" ftype="bed"/>
263 <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/> 265 <param name="input_treatment_file" value="ChIP_200K.bed" ftype="bed"/>
264 <param name="cutoff_options_selector" value="qvalue"/> 266 <param name="cutoff_options_selector" value="qvalue"/>
265 <param name="qvalue" value="0.05"/> 267 <param name="qvalue" value="0.05"/>
266 <param name="band_width" value="300"/> 268 <param name="band_width" value="300"/>
267 <param name="outputs" value="pdf"/> 269 <param name="outputs" value="pdf"/>
268 <param name="effective_genome_size_options_selector" value="user_defined" /> 270 <param name="effective_genome_size_options_selector" value="user_defined" />
269 <param name="gsize" value="3300000000" /> 271 <param name="gsize" value="3300000000" />
272 <param name="lower" value="5" />
273 <param name="upper" value="50" />
270 <output name="output_plot" file="magic.pdf" ftype="pdf" compare="contains" /> 274 <output name="output_plot" file="magic.pdf" ftype="pdf" compare="contains" />
271 </test> 275 </test>
272 </tests> 276 </tests>
273 <help> 277 <help>
278 <![CDATA[
274 **What it does** 279 **What it does**
275 280
276 With the improvement of sequencing techniques, chromatin immunoprecipitation followed by high throughput sequencing (ChIP-Seq) 281 **callpeak** is the main function of the MACS2_ package. MACS identifies enriched binding sites in ChIP-seq experiments.
277 is getting popular to study genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq analysis method, we present a novel algorithm, named Model-based Analysis of ChIP-Seq (MACS), for 282 It captures the influence of genome complexity to evaluate the significance of enriched ChIP regions,
278 identifying transcript factor binding sites. MACS captures the influence of genome complexity to evaluate the significance of enriched ChIP regions, and MACS improves the spatial resolution of 283 and improves the spatial resolution of binding sites through combining the information of both sequencing
279 binding sites through combining the information of both sequencing tag position and orientation. MACS can be easily used for ChIP-Seq data alone, or with control sample with the increase of specificity. 284 tag position and orientation. MACS can be used for ChIP-Seq data alone, or with control sample with the
280 285 increase of specificity (recommended).
281 View the original MACS2 documentation: https://github.com/taoliu/MACS/blob/master/README 286
282 287 .. _MACS2: https://github.com/taoliu/MACS
283 ------ 288
284 289 MACS2 performs the following analysis steps:
285 **Usage** 290
286 291 * Artificially extend reads to expected fragment length, and generate coverage map along genome.
287 **Peak Calling**: Main MACS2 Function to Call peaks from alignment results. 292 * Assume background reads are Poisson distributed. Mean of the Poisson is locally variable and is estimated from control experiment (if available) in 5Kbp or 10Kbp around examined location.
288 293 * For a given location, do we see more reads than we would have expected from the Poisson (p < 0.00005)? If Yes, MACS2 calls a peak.
289 If you choose "Scores in bedGraph files" MACS will output the fragment pileup, control lambda, -log10-pvalue and -log10-qvalue scores in bedGraph files. 294
290 The peaks in BED format contain the following colomns: chr end length abs_summit pileup -log10(pvalue) fold_enrichment -log10(qvalue) name 295
291 296 .. class:: warningmark
292 **Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph. 297
298 If MACS2 fails, it is usually because it cannot build the model for peaks. You may want to extend **mfold** range by increasing the upper bound or play with **Build model** options.
293 299
294 300
295 @citation@ 301 @citation@
302 ]]>
296 </help> 303 </help>
297 <expand macro="citations" /> 304 <expand macro="citations" />
298 </tool> 305 </tool>