changeset 6:83c573f2e73c draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/volcanoplot commit e880a5736a65605ae8a38ca9bc6f17c92c798ccd"
author iuc
date Thu, 10 Jun 2021 08:38:12 +0000
parents 44608d0193ed
children
files static/images/volcano_plot.png test-data/out.rscript volcanoplot.xml
diffstat 3 files changed, 97 insertions(+), 102 deletions(-) [+]
line wrap: on
line diff
Binary file static/images/volcano_plot.png has changed
--- a/test-data/out.rscript	Sun Jun 06 09:12:22 2021 +0000
+++ b/test-data/out.rscript	Thu Jun 10 08:38:12 2021 +0000
@@ -7,6 +7,7 @@
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
+# Galaxy settings end -----------------------------------------------------
 
 # Load packages -----------------------------------------------------------
 
@@ -19,19 +20,7 @@
 
 # Import data  ------------------------------------------------------------
 
-# Check if header is present by checking if P value column is numeric or not
-
-first_line <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmpfpemuuun/files/8/3/7/dataset_8374ef6b-02c7-46f1-afc9-408a2a6cbde4.dat', header = FALSE, nrow = 1)
-
-first_pvalue <- first_line[, 3]
-
-if (is.numeric(first_pvalue)) {
-  print("No header row detected")
-  results <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmpfpemuuun/files/8/3/7/dataset_8374ef6b-02c7-46f1-afc9-408a2a6cbde4.dat', header = FALSE)
-} else {
-  print("Header row detected")
-  results <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmpfpemuuun/files/8/3/7/dataset_8374ef6b-02c7-46f1-afc9-408a2a6cbde4.dat', header = TRUE)
-}
+results <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmprh4qip75/files/d/2/2/dataset_d2255b46-f0f6-4900-8b9e-bd352e34f303.dat', header = TRUE)
 
 
 # Format data  ------------------------------------------------------------
@@ -52,52 +41,57 @@
 
 # Create significant (sig) column
 results <- mutate(results, sig = case_when(
-                                fdr < 0.05 & logfc > 0.0 ~ up, 
-                                fdr < 0.05 & logfc < -0.0 ~ down, 
+                                fdr < 0.05 & logfc > 0.0 ~ up,
+                                fdr < 0.05 & logfc < -0.0 ~ down,
                                 TRUE ~ notsig))
 
 
 # Specify genes to label --------------------------------------------------
-labelfile <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmpfpemuuun/files/4/2/f/dataset_42fc8a63-f9cc-435b-9bb3-dd106b708cd9.dat')
+
+# Import file with genes of interest
+labelfile <- read.delim('/private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmprh4qip75/files/5/e/5/dataset_5e5b8fb0-bf65-438e-9b5b-03a540d9aa5d.dat', header = TRUE)
+
+# Label the genes of interest in results table
 results <- mutate(results, labels = ifelse(labels %in% labelfile[, 1], labels, ""))
 
 
+
 # Create plot -------------------------------------------------------------
 
-pdf("out.pdf")
-p <- ggplot(results, aes(x = logfc, y = -log10(pvalue))) +
+# Open file to save plot as PDF
+pdf("volcano_plot.pdf")
+
+# Set up base plot
+p <- ggplot(data = results, aes(x = logfc, y = -log10(pvalue))) +
     geom_point(aes(colour = sig)) +
     scale_color_manual(values = colours) +
-    scale_fill_manual(values = colours) +
     theme(panel.grid.major = element_blank(),
         panel.grid.minor = element_blank(),
         panel.background = element_blank(),
         axis.line = element_line(colour = "black"),
         legend.key = element_blank())
 
+# Add gene labels
+p <- p + geom_text_repel(data = filter(results, labels != ""), aes(label = labels),
+                         min.segment.length = 0,
+                         max.overlaps = Inf,
+                         show.legend = FALSE)
+
 
 
 
 
 
 # Set legend title
-p <- p + labs(colour = "")
+p <- p + theme(legend.title = element_blank())
 
-# Add gene labels in boxes
-p <- p + geom_label_repel(aes(label = labels, fill = sig), 
-                          segment.colour = "black", 
-                          colour = "white", 
-                          min.segment.length = 0, 
-                          show.legend = FALSE)
+# Print plot
+print(p)
 
-print(p)
+# Close PDF graphics device
 dev.off()
 
 
-# Save RData -------------------------------------------------------------
-save.image(file="volcanoplot.RData")
-
-
 # R and Package versions -------------------------------------------------
 sessionInfo()
 
--- a/volcanoplot.xml	Sun Jun 06 09:12:22 2021 +0000
+++ b/volcanoplot.xml	Thu Jun 10 08:38:12 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="volcanoplot" name="Volcano Plot" version="0.0.4">
+<tool id="volcanoplot" name="Volcano Plot" version="0.0.5">
     <description>create a volcano plot</description>
     <edam_topics>
         <edam_topic>topic_0092</edam_topic>
@@ -30,6 +30,7 @@
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
+# Galaxy settings end -----------------------------------------------------
 
 # Load packages -----------------------------------------------------------
 
@@ -42,12 +43,16 @@
 
 # Import data  ------------------------------------------------------------
 
-# Check if header is present by checking if P value column is numeric or not
+#if $header == "yes"
+results <- read.delim('$input', header = TRUE)
 
+#elif $header == "no"
+results <- read.delim('$input', header = FALSE)
+
+#else
+# Auto-detect header by checking if P value column is numeric or not
 first_line <- read.delim('$input', header = FALSE, nrow = 1)
-
 first_pvalue <- first_line[, $pval_col]
-
 if (is.numeric(first_pvalue)) {
   print("No header row detected")
   results <- read.delim('$input', header = FALSE)
@@ -55,7 +60,7 @@
   print("Header row detected")
   results <- read.delim('$input', header = TRUE)
 }
-
+#end if
 
 # Format data  ------------------------------------------------------------
 
@@ -75,28 +80,36 @@
 
 # Create significant (sig) column
 results <- mutate(results, sig = case_when(
-                                fdr < $signif_thresh & logfc > $lfc_thresh ~ up, 
-                                fdr < $signif_thresh & logfc < -$lfc_thresh ~ down, 
+                                fdr < $signif_thresh & logfc > $lfc_thresh ~ up,
+                                fdr < $signif_thresh & logfc < -$lfc_thresh ~ down,
                                 TRUE ~ notsig))
 
 ## R code below is left aligned for R script output
 
 #if $labels.label_select != "none"
 # Specify genes to label --------------------------------------------------
-    #if $labels.label_select == "file" 
-labelfile <- read.delim('$labels.label_file')
+
+    #if $labels.label_select == "file"
+# Import file with genes of interest
+labelfile <- read.delim('$labels.label_file', header = TRUE)
+
+# Label the genes of interest in results table
 results <- mutate(results, labels = ifelse(labels %in% labelfile[, 1], labels, ""))
+
     #elif $labels.label_select == "signif"
-        #if $labels.top_num <= 0
-results <- mutate(results, labels = "")
+        #if not $labels.top_num
+# Label all significant genes in results table
+results <- mutate(results, labels=ifelse(sig != notsig, labels, ""))
+
         #elif $labels.top_num > 0
-top <- results %>% 
-    filter(sig != notsig) %>% 
-    slice_min(order_by = pvalue, n = $labels.top_num)
+# Get top genes by P value
+top <- slice_min(results, order_by = pvalue, n = $labels.top_num)
+
+# Extract into vector
 toplabels <- pull(top, labels)
+
+# Label just the top genes in results table
 results <- mutate(results, labels = ifelse(labels %in% toplabels, labels, ""))
-        #else 
-results <- mutate(results, labels = ifelse(sig != notsig, labels, ""))
         #end if
      #end if
 #end if
@@ -104,17 +117,34 @@
 
 # Create plot -------------------------------------------------------------
 
-pdf("out.pdf")
-p <- ggplot(results, aes(x = logfc, y = -log10(pvalue))) +
+# Open file to save plot as PDF
+pdf("volcano_plot.pdf")
+
+# Set up base plot
+p <- ggplot(data = results, aes(x = logfc, y = -log10(pvalue))) +
     geom_point(aes(colour = sig)) +
     scale_color_manual(values = colours) +
-    scale_fill_manual(values = colours) +
     theme(panel.grid.major = element_blank(),
         panel.grid.minor = element_blank(),
         panel.background = element_blank(),
         axis.line = element_line(colour = "black"),
         legend.key = element_blank())
 
+#if $labels.label_select != "none"
+# Add gene labels
+    #if $plot_options.boxes
+p <- p + geom_label_repel(data = filter(results, labels != ""), aes(label = labels),
+                          min.segment.length = 0,
+                          max.overlaps = Inf,
+                          show.legend = FALSE)
+    #else
+p <- p + geom_text_repel(data = filter(results, labels != ""), aes(label = labels),
+                         min.segment.length = 0,
+                         max.overlaps = Inf,
+                         show.legend = FALSE)
+    #end if
+#end if
+
 #if not '$plot_options.title'
 p <- p + ggtitle('$plot_options.title')
 #end if
@@ -137,39 +167,18 @@
 
 # Set legend title
 #if not '$plot_options.legend'
-p <- p + labs(colour = '$plot_options.legend')
+p <- p + theme(legend.title = '$plot_options.legend')
 #else
-p <- p + labs(colour = "")
+p <- p + theme(legend.title = element_blank())
 #end if
 
-#if $labels.label_select != "none" 
-# Add gene labels in boxes
-    #if $plot_options.boxes 
-p <- p + geom_label_repel(aes(label = labels, fill = sig), 
-                          segment.colour = "black", 
-                          colour = "white", 
-                          min.segment.length = 0, 
-                          show.legend = FALSE)
-    #else
-# Add gene labels
-p <- p + geom_text_repel(aes(label = labels, col = sig), 
-                         min.segment.length = 0, 
-                         box.padding = 0.3, 
-                         point.padding = 0.3, 
-                         show.legend = FALSE)
-    #end if
-#end if
+# Print plot
+print(p)
 
-print(p)
+# Close PDF graphics device
 dev.off()
 
 
-#if $out_options.rdata_out
-# Save RData -------------------------------------------------------------
-save.image(file="volcanoplot.RData")
-#end if
-
-
 # R and Package versions -------------------------------------------------
 sessionInfo()
 
@@ -177,6 +186,11 @@
 </configfiles>
     <inputs>
         <param name="input" type="data" format="tabular" label="Specify an input file" />
+        <param name="header" type="select" label="File has header?" help="Does the differentially expressed results file contain a header row. The tool can auto-detect by checking if the first row in the P value column is a number or not. Default: Auto-detect">
+            <option value="auto" selected="True">Auto-detect</option>
+            <option value="yes">Yes</option>
+            <option value="no">No</option>
+        </param>
         <param name="fdr_col" type="data_column" data_ref="input" label="FDR (adjusted P value)" />
         <param name="pval_col" type="data_column" data_ref="input" label="P value (raw)" />
         <param name="lfc_col" type="data_column" data_ref="input" label="Log Fold Change" />
@@ -198,7 +212,7 @@
             <when value="none" />
         </conditional>
         <section name="plot_options" expanded="false" title="Plot Options">
-            <param name="boxes" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Label Boxes" help="If this is set to Yes, the labels for the points will be in boxes. Default: Yes"/>
+            <param name="boxes" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Label Boxes" help="If this is set to Yes, the labels for the points will be in boxes. Default: No"/>
             <param name="title" type="text" optional="True" label="Plot title"/>
             <param name="xlab" type="text" optional="True" label="Label for x axis"/>
             <param name="ylab" type="text" optional="True" label="Label for y axis"/>
@@ -209,21 +223,15 @@
             <param name="legend_labs" type="text" value="Down,Not Sig,Up" label="Labels for Legend" help="Labels in the legend can be specified. Default: Down,Not Sig,Up"/>
         </section>
         <section name="out_options" expanded="false" title="Output Options">
-            <param name="rscript_out" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" 
-                help="Output the R code used by the tool, can view and edit in R. Default: No"/>
-            <param name="rdata_out" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?"
-                help="Output the data generated by the RScript code, can be loaded into R with load(). Default: No">
-            </param>
+            <param name="rscript_out" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?"
+                help="Output the R code used by the tool. Can edit in R if you want to customise the plot further. Default: No"/>
         </section>
     </inputs>
     <outputs>
-        <data name="plot" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: PDF"/>
+        <data name="plot" format="pdf" from_work_dir="volcano_plot.pdf" label="${tool.name} on ${on_string}: PDF"/>
         <data name="rscript" format="txt" from_work_dir="rscript.txt" label="${tool.name} on ${on_string}: Rscript">
             <filter>out_options['rscript_out']</filter>
         </data>
-        <data name="rdata" format="rdata" from_work_dir="volcanoplot.RData" label="${tool.name} on ${on_string}: RData">
-            <filter>out_options['rdata_out']</filter>
-        </data>
     </outputs>
     <tests>
         <test expect_num_outputs="1">
@@ -236,7 +244,7 @@
             <param name="lfc_thresh" value="0" />
             <output name="plot">
                 <assert_contents>
-                    <has_size value= "933447" delta="1000" />
+                    <has_size value= "933451" delta="1000" />
                 </assert_contents>
             </output>
         </test>
@@ -252,13 +260,14 @@
             <param name="label_file" ftype="tabular" value="labels.tab" />
             <output name="plot">
                 <assert_contents>
-                    <has_size value= "936522" delta="1000" />
+                    <has_size value= "933832" delta="1000" />
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="3">
-            <!-- Ensure rscript and rdata outputs work -->
+        <test expect_num_outputs="2">
+            <!-- Ensure rscript output works -->
             <param name="input" ftype="tabular" value="input.tab"/>
+            <param name="header" value="yes"/>
             <param name="fdr_col" value="4" />
             <param name="pval_col" value="3" />
             <param name="lfc_col" value="2" />
@@ -267,18 +276,12 @@
             <param name="label_select" value="file"/>
             <param name="label_file" ftype="tabular" value="labels.tab" />
             <param name="rscript_out" value="True"/>
-            <param name="rdata_out" value="True"/>
             <output name="plot">
                 <assert_contents>
-                    <has_size value= "936522" delta="1000" />
+                    <has_size value= "933832" delta="1000" />
                 </assert_contents>
             </output>
-            <output name="rscript" value= "out.rscript" lines_diff="8"/>
-            <output name="rdata">
-                <assert_contents>
-                    <has_size value= "589613" delta="1000" />
-                </assert_contents>
-            </output>
+            <output name="rscript" value= "out.rscript" lines_diff="4"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -305,13 +308,11 @@
     * Log fold change
     * Labels (e.g. Gene symbols or IDs)
 
-The tool will auto-detect if a header is present, by checking if the first row in the P value column is a number or not. 
-
 All significant points, those meeting the specified FDR and Log Fold Change thresholds, will be coloured, red for upregulated, blue for downregulated. Users can choose to apply labels to the points (such as gene symbols) from the Labels column. To label all significant points, select "Significant" for the **Points to label** option, or to only label the top most significant specify a number under "Only label top most significant". Users can label any points of interest through selecting **Points to label** "Input from file" and providing a tabular labels file. The labels file must contain a header row and have the labels in the first column. These labels must match the labels in the main input file.
 
 **Outputs**
 
-A PDF containing a Volcano plot like below.
+A PDF containing a Volcano plot like below. The R code can be output through *Output Options* in the tool form.
 
 .. image:: $PATH_TO_IMAGES/volcano_plot.png
 
@@ -321,4 +322,4 @@
     ]]></help>
     <citations>
     </citations>
-</tool>
+</tool>
\ No newline at end of file