comparison span.xml @ 2:5b99943c4627 draft

Span version https://github.com/JetBrains-Research/galaxy-applications/commit/cbbba255d66a4775cc35caf5cb85665396fdcd2a
author jetbrains
date Sun, 18 Nov 2018 08:20:27 -0500
parents 1f0c4f0a9c3b
children 4130e95bd6c8
comparison
equal deleted inserted replaced
1:dfb1e66235c5 2:5b99943c4627
1 <tool id="span" name="SPAN" version="0.7.1.4272"> 1 <tool id="span" name="SPAN" version="0.7.1.4272">
2 <description>ChIP-Seq analysis</description> 2 <description>Semi-supervised Peak Analyzer for ChIP-Seq data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.7.1.4272">package_span_jar</requirement> 4 <requirement type="package" version="0.7.1.4272">package_span_jar</requirement>
5 <!--<container type="docker">biolabs/span</container>-->
6 </requirements> 5 </requirements>
7 <stdio> 6 <stdio>
8 <!-- Wrapper ensures anything other than zero is an error --> 7 <!-- Wrapper ensures anything other than zero is an error -->
9 <exit_code range="1:"/> 8 <exit_code range="1:"/>
10 <exit_code range=":-1"/> 9 <exit_code range=":-1"/>
11 </stdio> 10 </stdio>
12 <command interpreter="python"> 11 <command interpreter="python">
12 #import re
13 #set treatment_identifier = re.sub('[^\w\-\.]', '_', str($treatment_file.element_identifier))
14 #set genome_identifier = re.sub('[^\w\-\.]', '_', str($genome_file.element_identifier))
15
16 #if $control.control_selector
17 #set control_identifier = re.sub('[^\w\-\.]', '_', str($control.control_file.element_identifier))
18 #end if
19
13 #if str($action.action_selector) == "model" 20 #if str($action.action_selector) == "model"
14 #if $control.control_selector 21 #if $control.control_selector
15 span_wrapper.py model with_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${control.control_file}" 22 span_wrapper.py model with_control
23 "${genome_identifier}" "${genome_file}"
24 "${treatment_identifier}" "${treatment_file}"
25 "${bin}" "${action.model_file}"
26 "${control_identifier}" "${control.control_file}"
16 #else 27 #else
17 span_wrapper.py model without_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" 28 span_wrapper.py model without_control
29 "${genome_identifier}" "${genome_file}"
30 "${treatment_identifier}" "${treatment_file}"
31 "${bin}" "${action.model_file}"
18 #end if 32 #end if
19 #else 33 #else
20 #if $control.control_selector 34 #if $control.control_selector
21 span_wrapper.py peaks with_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${control.control_file}" "${fdr}" "${gap}" "${action.peaks_file}" 35 span_wrapper.py peaks with_control
36 "${genome_identifier}" "${genome_file}"
37 "${treatment_identifier}" "${treatment_file}"
38 "${bin}" "${action.model_file}"
39 "${control_identifier}" "${control.control_file}"
40 "${action.fdr}" "${action.gap}" "${action.peaks_file}"
22 #else 41 #else
23 span_wrapper.py peaks without_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${fdr}" "${gap}" "${action.peaks_file}" 42 span_wrapper.py peaks without_control
43 "${genome_identifier}" "${genome_file}"
44 "${treatment_identifier}" "${treatment_file}"
45 "${bin}" "${action.model_file}"
46 "${action.fdr}" "${action.gap}" "${action.peaks_file}"
24 #end if 47 #end if
25 #end if 48 #end if
26 </command> 49 </command>
27 <inputs> 50 <inputs>
28 <param name="treatment_file" type="data" format="bam" label="Treatment BAM" 51 <param name="treatment_file" type="data" format="bam" label="Treatment BAM"
29 description="Treatment BAM reads to process"/> 52 description="Treatment BAM reads to process" argument="--treatment"
30 <param name="genome" type="data" format="chrom.sizes" label="Genome chrom.sizes" 53 help="Treatment BAM reads to process"/>
31 description="Genome build chrom.sizes file"/> 54 <param name="genome_file" type="data" format="chrom.sizes" label="Genome chrom.sizes"
55 description="Genome build chrom.sizes file" argument="--chrom.sizes"
56 help="Genome build chrom.sizes file"/>
32 57
33 <conditional name="control"> 58 <conditional name="control">
34 <param name="control_selector" type="boolean" label="Control available" value="false"/> 59 <param name="control_selector" type="boolean" label="Control available" value="false"/>
35 <when value="true"> 60 <when value="true">
36 <param name="control_file" type="data" format="bam" label="Control BAM" 61 <param name="control_file" type="data" format="bam" label="Control BAM"
37 description="Control BAM reads to process"/> 62 description="Control BAM reads to process" argument="--control"
63 help="Control BAM reads to process"/>
38 </when> 64 </when>
39 </conditional> 65 </conditional>
40 66
41 <conditional name="action"> 67 <conditional name="action">
42 <param name="action_selector" type="select" label="Action"> 68 <param name="action_selector" type="select" label="Action">
43 <option value="model">Compute SPAN model</option> 69 <option value="model">Compute SPAN model</option>
44 <option value="peaks">Compute SPAN model and produce peaks file</option> 70 <option value="peaks">Compute SPAN model and produce peaks file</option>
45 </param> 71 </param>
46 <when value="model"> 72 <when value="model">
47 <param name="model_file" type="text" value="model.span" label="Model name"/> 73 <param name="model_file" type="text" value="model.span" label="Model name"
74 help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser
75 and used in integrated peak calling pipeline"/>
48 </when> 76 </when>
49 <when value="peaks"> 77 <when value="peaks">
50 <param name="model_file" type="text" value="model.span" label="Model file name"/> 78 <param name="model_file" type="text" value="model.span" label="Model file name"
51 <param name="fdr" size="5" type="float" value="0.0001" label="FDR"/> 79 help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser
52 <param name="gap" size="5" type="integer" value="5" label="GAP"/> 80 and used in integrated peak calling pipeline"/>
53 <param name="peaks_file" type="text" value="result.peak" label="Peaks file name"/> 81 <param name="fdr" size="5" type="float" value="0.0001" label="FDR" argument="--fdr"
82 help="Minimum FDR cutoff to call significant regions, default value is 1.0E-6.
83 SPAN reports p- and q- values for the null hypothesis that a given bin is not enriched with a histone modification.
84 Peaks are formed from a list of truly (in the FDR sense) enriched bins for the analyzed biological condition by thresholding the
85 Q-value with a cutoff FDR and merging spatially close peaks using GAP option to broad ones. This is equivalent to controlling FDR.
86 q-values are are calculated from p-values using Benjamini-Hochberg procedure."/>
87 <param name="gap" size="5" type="integer" value="5" label="GAP" argument="--gap"
88 help="Gap size to merge spatially close peaks. Useful for wide histone modifications.
89 Default value is 5, i.e. peaks separated by 5*BIN distance or less are merged."/>
90 <param name="peaks_file" type="text" value="result.peak" label="Peaks file name" argument="--peaks"/>
54 </when> 91 </when>
55 </conditional> 92 </conditional>
56 93
57 <param name="bin" size="5" type="integer" value="200" label="Bin size"/> 94 <param name="bin" size="5" type="integer" value="200" label="Bin size" argument="--bin"
95 help="Peak analysis is performed on read coverage tiled into consequent bins, with size being configurable.
96 Default value is 200bp, approximately the length of one nucleosome."/>
58 </inputs> 97 </inputs>
59 <outputs> 98 <outputs>
60 <data name="${action.model_file}" format="span" label="SPAN model file"/> 99 <data name="SPAN model file" format="span" from_work_dir="*.span" label="SPAN model file ${action.model_file} on ${on_string}"/>
61 <data name="${action.peaks_file}" format="bed" label="SPAN peaks file"> 100 <data name="SPAN peaks file" format="bed" from_work_dir="*.peak" label="SPAN peaks file ${action.peaks_file} on ${on_string}">
62 <filter>action['action_selector'] == "peaks"</filter> 101 <filter>action['action_selector'] == "peaks"</filter>
63 </data> 102 </data>
103 <data name="SPAN log file" format="txt" from_work_dir="*.log" label="SPAN log file on ${on_string}"/>
64 </outputs> 104 </outputs>
65 <help> 105 <help><![CDATA[
66 SPAN Semi-supervised Peak Analyzer is a tool for analyzing ChIP-seq data. 106 .. class:: infomark
67 Details: http://artyomovlab.wustl.edu/aging/span.html 107
68 </help> 108 **What it does**
109
110 SPAN Semi-supervised Peak Analyzer is a tool for analyzing ChIP-seq data.
111
112 -----
113
114 **Inputs**
115
116 *-t, --treatment <Path>* **Required.** ChIP-seq treatment file. bam, bed or .bed.gz file; If multiple files are given, treated as replicates.
117
118 *--chrom.sizes, --cs <Path>* **Required.** Chromosome sizes path, can be downloaded at http://hgdownload.cse.ucsc.edu/goldenPath/<build>/bigZips/<build>.chrom.sizes
119
120 *-c, --control <Path>* Control file. bam, bed or bed.gz file; Single control file or separate file per each treatment file required.
121
122 *--fragment <Integer>* Fragment size, read length if not given
123
124 *-b, --bin <Integer>* Bin size
125
126 *-f, --fdr <Double>* Fdr value
127
128 *-g, --gap <Integer>* Gap size to merge peaks
129
130 *-p, --peaks <Path>* Path to result peaks file in ENCODE broadPeak (BED 6+3) format
131
132
133 -----
134
135 **Outputs**
136
137 This tool produces a SPAN binary model file and/or peaks in ENCODE broadPeak (BED 6+3) format.
138
139 Peak file columns contain the following data:
140
141 * **1st**: chromosome name
142 * **2nd**: start position of peak
143 * **3rd**: end position of peak
144 * **4th**: name of peak
145 * **5th**: integer score for display in genome browser (e.g. UCSC)
146 * **6th**: strand, either "." (=no strand) or "+" or "-"
147 * **7th**: fold-change
148 * **8th**: -log10pvalue
149 * **9th**: -log10qvalue
150
151 -----
152
153 **SPAN workflow**
154
155 * Convert raw reads to tags using *FRAGMENT* parameter.
156 * Compute coverage for all genome tiled into bins of *BIN* base pairs.
157 * Fit 3-state hidden Markov model that classifies bins as ZERO states with no coverage, LOW states of non-specific binding, and HIGH states of the specific binding.
158 * Compute posterior HIGH state probability of each bin.
159 * Trained model is saved into *.span* binary format.
160 * Peaks are computed using trained model and *FDR* and *GAP* parameters.
161
162 ------
163
164 **Citation**
165
166 If you use this tool in Galaxy, please cite XXX, et al. *In preparation.*
167
168 -----
169
170 **More Information**
171
172 * Project home page: https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer
173 * Study cases: https://artyomovlab.wustl.edu/aging
174
175 ]]></help>
69 </tool> 176 </tool>