comparison volcanoplot.xml @ 5:44608d0193ed draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/volcanoplot commit 8464d1b013c316d88b37884be521c0ef50be5623"
author iuc
date Sun, 06 Jun 2021 09:12:22 +0000
parents 73b8cb5bddcd
children 83c573f2e73c
comparison
equal deleted inserted replaced
4:73b8cb5bddcd 5:44608d0193ed
1 <tool id="volcanoplot" name="Volcano Plot" version="0.0.3"> 1 <tool id="volcanoplot" name="Volcano Plot" version="0.0.4">
2 <description>create a volcano plot</description> 2 <description>create a volcano plot</description>
3 <edam_topics>
4 <edam_topic>topic_0092</edam_topic>
5 </edam_topics>
6 <edam_operations>
7 <edam_operation>operation_0337</edam_operation>
8 </edam_operations>
3 <requirements> 9 <requirements>
4 <requirement type="package" version="3.1.0">r-ggplot2</requirement> 10 <requirement type="package" version="3.3.3">r-ggplot2</requirement>
5 <requirement type="package" version="0.8.0">r-ggrepel</requirement> 11 <requirement type="package" version="0.9.1">r-ggrepel</requirement>
6 <requirement type="package" version="0.7.8">r-dplyr</requirement> 12 <requirement type="package" version="1.0.6">r-dplyr</requirement>
7 <requirement type="package" version="1.20.2">r-getopt</requirement>
8 </requirements> 13 </requirements>
9 <version_command><![CDATA[ 14 <version_command><![CDATA[
10 echo $(R --version | grep version | grep -v GNU)", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggrepel version" $(R --vanilla --slave -e "library(ggrepel); cat(sessionInfo()\$otherPkgs\$ggrepel\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ") 15 echo $(R --version | grep version | grep -v GNU)", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggrepel version" $(R --vanilla --slave -e "library(ggrepel); cat(sessionInfo()\$otherPkgs\$ggrepel\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
11 ]]></version_command> 16 ]]></version_command>
12 <command detect_errors="exit_code"><![CDATA[ 17 <command detect_errors="exit_code"><![CDATA[
13 18 #if $out_options.rscript_out:
14 Rscript '${__tool_directory__}/volcanoplot.R' 19 cp '${volcanoplot_script}' rscript.txt &&
15 20 #end if
16 -i '$input' 21 Rscript '${volcanoplot_script}'
17 -a $fdr_col 22 ]]></command>
18 -p $pval_col 23 <configfiles>
19 -c $lfc_col 24 <configfile name="volcanoplot_script"><![CDATA[
20 -l $label_col 25 # Galaxy settings start ---------------------------------------------------
21 -s $signif_thresh 26
22 -x $lfc_thresh 27 # setup R error handling to go to stderr
23 #if $labels.label_select == 'file': 28 options(show.error.messages = F, error = function() {cat(geterrmessage(), file = stderr()); q("no", 1, F)})
24 -f '$labels.label_file' 29
25 #else if $labels.label_select == 'signif': 30 # we need that to not crash galaxy with an UTF8 error on German LC settings.
26 #if $labels.top_num: 31 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
27 -t $labels.top_num 32
33
34 # Load packages -----------------------------------------------------------
35
36 suppressPackageStartupMessages({
37 library(dplyr)
38 library(ggplot2)
39 library(ggrepel)
40 })
41
42
43 # Import data ------------------------------------------------------------
44
45 # Check if header is present by checking if P value column is numeric or not
46
47 first_line <- read.delim('$input', header = FALSE, nrow = 1)
48
49 first_pvalue <- first_line[, $pval_col]
50
51 if (is.numeric(first_pvalue)) {
52 print("No header row detected")
53 results <- read.delim('$input', header = FALSE)
54 } else {
55 print("Header row detected")
56 results <- read.delim('$input', header = TRUE)
57 }
58
59
60 # Format data ------------------------------------------------------------
61
62 # Create columns from the column numbers specified
63 results <- results %>% mutate(fdr = .[[$fdr_col]],
64 pvalue = .[[$pval_col]],
65 logfc = .[[$lfc_col]],
66 labels = .[[$label_col]])
67
68 # Get names for legend
69 down <- unlist(strsplit('$plot_options.legend_labs', split = ","))[1]
70 notsig <- unlist(strsplit('$plot_options.legend_labs', split = ","))[2]
71 up <- unlist(strsplit('$plot_options.legend_labs', split = ","))[3]
72
73 # Set colours
74 colours <- setNames(c("cornflowerblue", "grey", "firebrick"), c(down, notsig, up))
75
76 # Create significant (sig) column
77 results <- mutate(results, sig = case_when(
78 fdr < $signif_thresh & logfc > $lfc_thresh ~ up,
79 fdr < $signif_thresh & logfc < -$lfc_thresh ~ down,
80 TRUE ~ notsig))
81
82 ## R code below is left aligned for R script output
83
84 #if $labels.label_select != "none"
85 # Specify genes to label --------------------------------------------------
86 #if $labels.label_select == "file"
87 labelfile <- read.delim('$labels.label_file')
88 results <- mutate(results, labels = ifelse(labels %in% labelfile[, 1], labels, ""))
89 #elif $labels.label_select == "signif"
90 #if $labels.top_num <= 0
91 results <- mutate(results, labels = "")
92 #elif $labels.top_num > 0
93 top <- results %>%
94 filter(sig != notsig) %>%
95 slice_min(order_by = pvalue, n = $labels.top_num)
96 toplabels <- pull(top, labels)
97 results <- mutate(results, labels = ifelse(labels %in% toplabels, labels, ""))
98 #else
99 results <- mutate(results, labels = ifelse(sig != notsig, labels, ""))
28 #end if 100 #end if
101 #end if
102 #end if
103
104
105 # Create plot -------------------------------------------------------------
106
107 pdf("out.pdf")
108 p <- ggplot(results, aes(x = logfc, y = -log10(pvalue))) +
109 geom_point(aes(colour = sig)) +
110 scale_color_manual(values = colours) +
111 scale_fill_manual(values = colours) +
112 theme(panel.grid.major = element_blank(),
113 panel.grid.minor = element_blank(),
114 panel.background = element_blank(),
115 axis.line = element_line(colour = "black"),
116 legend.key = element_blank())
117
118 #if not '$plot_options.title'
119 p <- p + ggtitle('$plot_options.title')
120 #end if
121
122 #if not '$plot_options.xlab'
123 p <- p + xlab('$plot_options.xlab')
124 #end if
125
126 #if not '$plot_options.ylab'
127 p <- p + ylab('$plot_options.ylab')
128 #end if
129
130 #if not '$plot_options.xmin' and '$plot_options.xmax'
131 p <- p + xlim('$plot_options.xmin', '$plot_options.xmax')
132 #end if
133
134 #if not '$plot_options.ymax'
135 p <- p + ylim(0, '$plot_options.ymax')
136 #end if
137
138 # Set legend title
139 #if not '$plot_options.legend'
140 p <- p + labs(colour = '$plot_options.legend')
141 #else
142 p <- p + labs(colour = "")
143 #end if
144
145 #if $labels.label_select != "none"
146 # Add gene labels in boxes
147 #if $plot_options.boxes
148 p <- p + geom_label_repel(aes(label = labels, fill = sig),
149 segment.colour = "black",
150 colour = "white",
151 min.segment.length = 0,
152 show.legend = FALSE)
29 #else 153 #else
30 -t 0 154 # Add gene labels
155 p <- p + geom_text_repel(aes(label = labels, col = sig),
156 min.segment.length = 0,
157 box.padding = 0.3,
158 point.padding = 0.3,
159 show.legend = FALSE)
31 #end if 160 #end if
32 #if $plot_options.boxes: 161 #end if
33 -b 162
34 #end if 163 print(p)
35 #if $plot_options.title: 164 dev.off()
36 -T '$plot_options.title' 165
37 #end if 166
38 #if $plot_options.xlab: 167 #if $out_options.rdata_out
39 -X '$plot_options.xlab' 168 # Save RData -------------------------------------------------------------
40 #end if 169 save.image(file="volcanoplot.RData")
41 #if $plot_options.ylab: 170 #end if
42 -Y '$plot_options.ylab' 171
43 #end if 172
44 #if $plot_options.xmin: 173 # R and Package versions -------------------------------------------------
45 -m '$plot_options.xmin' 174 sessionInfo()
46 #end if 175
47 #if $plot_options.xmax: 176 ]]></configfile>
48 -M '$plot_options.xmax' 177 </configfiles>
49 #end if
50 #if $plot_options.ymax:
51 -W '$plot_options.ymax'
52 #end if
53 #if $plot_options.legend:
54 -L '$plot_options.legend'
55 #end if
56 -z '$plot_options.legend_labs'
57
58 ]]></command>
59 <inputs> 178 <inputs>
60 <param name="input" type="data" format="tabular" label="Specify an input file" /> 179 <param name="input" type="data" format="tabular" label="Specify an input file" />
61 <param name="fdr_col" type="data_column" data_ref="input" label="FDR (adjusted P value)" /> 180 <param name="fdr_col" type="data_column" data_ref="input" label="FDR (adjusted P value)" />
62 <param name="pval_col" type="data_column" data_ref="input" label="P value (raw)" /> 181 <param name="pval_col" type="data_column" data_ref="input" label="P value (raw)" />
63 <param name="lfc_col" type="data_column" data_ref="input" label="Log Fold Change" /> 182 <param name="lfc_col" type="data_column" data_ref="input" label="Log Fold Change" />
87 <param name="xmax" type="float" optional="True" label="Maximum value for x axis" help="To customise the x axis limits, specify both minimum and maximum values. Leave empty for automatic values."/> 206 <param name="xmax" type="float" optional="True" label="Maximum value for x axis" help="To customise the x axis limits, specify both minimum and maximum values. Leave empty for automatic values."/>
88 <param name="ymax" type="float" optional="True" label="Maximum value for y axis" help="To customise the y axis upper limit, specify the maximum value, the minimum will be 0. Leave empty for automatic value."/> 207 <param name="ymax" type="float" optional="True" label="Maximum value for y axis" help="To customise the y axis upper limit, specify the maximum value, the minimum will be 0. Leave empty for automatic value."/>
89 <param name="legend" type="text" optional="True" label="Label for Legend Title"/> 208 <param name="legend" type="text" optional="True" label="Label for Legend Title"/>
90 <param name="legend_labs" type="text" value="Down,Not Sig,Up" label="Labels for Legend" help="Labels in the legend can be specified. Default: Down,Not Sig,Up"/> 209 <param name="legend_labs" type="text" value="Down,Not Sig,Up" label="Labels for Legend" help="Labels in the legend can be specified. Default: Down,Not Sig,Up"/>
91 </section> 210 </section>
211 <section name="out_options" expanded="false" title="Output Options">
212 <param name="rscript_out" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?"
213 help="Output the R code used by the tool, can view and edit in R. Default: No"/>
214 <param name="rdata_out" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?"
215 help="Output the data generated by the RScript code, can be loaded into R with load(). Default: No">
216 </param>
217 </section>
92 </inputs> 218 </inputs>
93 <outputs> 219 <outputs>
94 <data name="plot" format="pdf" from_work_dir="out.pdf" label="Volcano plot on ${on_string}"/> 220 <data name="plot" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: PDF"/>
221 <data name="rscript" format="txt" from_work_dir="rscript.txt" label="${tool.name} on ${on_string}: Rscript">
222 <filter>out_options['rscript_out']</filter>
223 </data>
224 <data name="rdata" format="rdata" from_work_dir="volcanoplot.RData" label="${tool.name} on ${on_string}: RData">
225 <filter>out_options['rdata_out']</filter>
226 </data>
95 </outputs> 227 </outputs>
96 <tests> 228 <tests>
97 <test> 229 <test expect_num_outputs="1">
98 <!-- Ensure default output works --> 230 <!-- Ensure default output works -->
99 <param name="input" ftype="tabular" value="input.tab"/> 231 <param name="input" ftype="tabular" value="input.tab"/>
100 <param name="fdr_col" value="4" /> 232 <param name="fdr_col" value="4" />
101 <param name="pval_col" value="3" /> 233 <param name="pval_col" value="3" />
102 <param name="lfc_col" value="2" /> 234 <param name="lfc_col" value="2" />
103 <param name="label_col" value="1" /> 235 <param name="label_col" value="1" />
104 <param name="lfc_thresh" value="0" /> 236 <param name="lfc_thresh" value="0" />
105 <output name="plot" value= "out.pdf" compare="sim_size" /> 237 <output name="plot">
238 <assert_contents>
239 <has_size value= "933447" delta="1000" />
240 </assert_contents>
241 </output>
106 </test> 242 </test>
107 <test> 243 <test expect_num_outputs="1">
108 <!-- Ensure input labels and plot options work --> 244 <!-- Ensure input labels and plot options work -->
109 <param name="input" ftype="tabular" value="input.tab"/> 245 <param name="input" ftype="tabular" value="input.tab"/>
110 <param name="fdr_col" value="4" /> 246 <param name="fdr_col" value="4" />
111 <param name="pval_col" value="3" /> 247 <param name="pval_col" value="3" />
112 <param name="lfc_col" value="2" /> 248 <param name="lfc_col" value="2" />
113 <param name="label_col" value="1" /> 249 <param name="label_col" value="1" />
114 <param name="lfc_thresh" value="0" /> 250 <param name="lfc_thresh" value="0" />
115 <param name="label_select" value="file"/> 251 <param name="label_select" value="file"/>
116 <param name="label_file" ftype="tabular" value="labels.tab" /> 252 <param name="label_file" ftype="tabular" value="labels.tab" />
117 <output name="plot" value= "out2.pdf" compare="sim_size" /> 253 <output name="plot">
254 <assert_contents>
255 <has_size value= "936522" delta="1000" />
256 </assert_contents>
257 </output>
258 </test>
259 <test expect_num_outputs="3">
260 <!-- Ensure rscript and rdata outputs work -->
261 <param name="input" ftype="tabular" value="input.tab"/>
262 <param name="fdr_col" value="4" />
263 <param name="pval_col" value="3" />
264 <param name="lfc_col" value="2" />
265 <param name="label_col" value="1" />
266 <param name="lfc_thresh" value="0" />
267 <param name="label_select" value="file"/>
268 <param name="label_file" ftype="tabular" value="labels.tab" />
269 <param name="rscript_out" value="True"/>
270 <param name="rdata_out" value="True"/>
271 <output name="plot">
272 <assert_contents>
273 <has_size value= "936522" delta="1000" />
274 </assert_contents>
275 </output>
276 <output name="rscript" value= "out.rscript" lines_diff="8"/>
277 <output name="rdata">
278 <assert_contents>
279 <has_size value= "589613" delta="1000" />
280 </assert_contents>
281 </output>
118 </test> 282 </test>
119 </tests> 283 </tests>
120 <help><![CDATA[ 284 <help><![CDATA[
121 .. class:: infomark 285 .. class:: infomark
122 286
123 **What it does** 287 **What it does**
124 288
125 This tool creates a Volcano plot using ggplot2. Points can be labelled via ggrepel. 289 This tool creates a Volcano plot using ggplot2. Points can be labelled via ggrepel. It was inspired by this Getting Genetics Done `blog post`_.
126 290
127 In statistics, a `Volcano plot`_ is a type of scatter-plot that is used to quickly identify changes in large data sets composed of replicate data. It plots significance versus fold-change on the y and x axes, respectively. These plots are increasingly common in omic experiments such as genomics, proteomics, and metabolomics where one often has a list of many thousands of replicate data points between two conditions and one wishes to quickly identify the most meaningful changes. A volcano plot combines a measure of statistical significance from a statistical test (e.g., a p value from an ANOVA model) with the magnitude of the change, enabling quick visual identification of those data-points (genes, etc.) that display large magnitude changes that are also statistically significant. 291 In statistics, a `Volcano plot`_ is a type of scatter-plot that is used to quickly identify changes in large data sets composed of replicate data. It plots significance versus fold-change on the y and x axes, respectively. These plots are increasingly common in omic experiments such as genomics, proteomics, and metabolomics where one often has a list of many thousands of replicate data points between two conditions and one wishes to quickly identify the most meaningful changes. A volcano plot combines a measure of statistical significance from a statistical test (e.g., a p value from an ANOVA model) with the magnitude of the change, enabling quick visual identification of those data-points (genes, etc.) that display large magnitude changes that are also statistically significant.
128 292
129 A volcano plot is constructed by plotting the negative log of the p value on the y axis (usually base 10). This results in data points with low p values (highly significant) appearing toward the top of the plot. The x axis is the log of the fold change between the two conditions. The log of the fold change is used so that changes in both directions appear equidistant from the center. Plotting points in this way results in two regions of interest in the plot: those points that are found toward the top of the plot that are far to either the left- or right-hand sides. These represent values that display large magnitude fold changes (hence being left or right of center) as well as high statistical significance (hence being toward the top). 293 A volcano plot is constructed by plotting the negative log of the p value on the y axis (usually base 10). This results in data points with low p values (highly significant) appearing toward the top of the plot. The x axis is the log of the fold change between the two conditions. The log of the fold change is used so that changes in both directions appear equidistant from the center. Plotting points in this way results in two regions of interest in the plot: those points that are found toward the top of the plot that are far to either the left- or right-hand sides. These represent values that display large magnitude fold changes (hence being left or right of center) as well as high statistical significance (hence being toward the top).
130 294
132 296
133 ----- 297 -----
134 298
135 **Inputs** 299 **Inputs**
136 300
137 A tabular file with a header row containing the columns below (additional columns may be present): 301 A tabular file containing the columns below (additional columns may be present):
138 302
139 * P value 303 * P value
140 * FDR / adjusted P value 304 * FDR / adjusted P value
141 * Log fold change 305 * Log fold change
142 * Labels (e.g. Gene symbols or IDs) 306 * Labels (e.g. Gene symbols or IDs)
143 307
308 The tool will auto-detect if a header is present, by checking if the first row in the P value column is a number or not.
309
144 All significant points, those meeting the specified FDR and Log Fold Change thresholds, will be coloured, red for upregulated, blue for downregulated. Users can choose to apply labels to the points (such as gene symbols) from the Labels column. To label all significant points, select "Significant" for the **Points to label** option, or to only label the top most significant specify a number under "Only label top most significant". Users can label any points of interest through selecting **Points to label** "Input from file" and providing a tabular labels file. The labels file must contain a header row and have the labels in the first column. These labels must match the labels in the main input file. 310 All significant points, those meeting the specified FDR and Log Fold Change thresholds, will be coloured, red for upregulated, blue for downregulated. Users can choose to apply labels to the points (such as gene symbols) from the Labels column. To label all significant points, select "Significant" for the **Points to label** option, or to only label the top most significant specify a number under "Only label top most significant". Users can label any points of interest through selecting **Points to label** "Input from file" and providing a tabular labels file. The labels file must contain a header row and have the labels in the first column. These labels must match the labels in the main input file.
145 311
146 **Outputs** 312 **Outputs**
147 313
148 A PDF containing a Volcano plot like below. 314 A PDF containing a Volcano plot like below.
149 315
150 .. image:: $PATH_TO_IMAGES/volcano_plot.png 316 .. image:: $PATH_TO_IMAGES/volcano_plot.png
151 317
152 .. _Volcano plot: https://en.wikipedia.org/wiki/Volcano_plot_(statistics) 318 .. _Volcano plot: https://en.wikipedia.org/wiki/Volcano_plot_(statistics)
319 .. _blog post: https://gettinggeneticsdone.blogspot.com/2016/01/
153 320
154 ]]></help> 321 ]]></help>
155 <citations> 322 <citations>
156 </citations> 323 </citations>
157 </tool> 324 </tool>