changeset 4:3a083c78896e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 30117fce22f3771c9c0c13e70c3ad14b694de6e2
author iuc
date Fri, 21 Apr 2023 14:09:17 +0000
parents d1f7fa5bb3cb
children
files get_deseq_dataset.R ruvseq.R ruvseq.xml
diffstat 3 files changed, 102 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/get_deseq_dataset.R	Fri Jul 23 22:37:45 2021 +0000
+++ b/get_deseq_dataset.R	Fri Apr 21 14:09:17 2023 +0000
@@ -14,7 +14,7 @@
     }
   }
 
-  if (!use_txi & has_header) {
+  if (!use_txi && has_header) {
       countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
       tbl <- do.call("cbind", countfiles)
       colnames(tbl) <- rownames(sample_table) # take sample ids from header
@@ -35,7 +35,7 @@
         colData = subset(sample_table, select = -filename),
         design = design_formula
       )
-  } else if (!use_txi & !has_header) {
+  } else if (!use_txi && !has_header) {
 
     # construct the object from HTSeq files
     dds <- DESeqDataSetFromHTSeqCount(
--- a/ruvseq.R	Fri Jul 23 22:37:45 2021 +0000
+++ b/ruvseq.R	Fri Apr 21 14:09:17 2023 +0000
@@ -1,7 +1,8 @@
 # setup R error handling to go to stderr
 library("getopt")
-options(show.error.messages = F, error = function() {
-  cat(geterrmessage(), file = stderr()); q("no", 1, F)
+options(show.error.messages = FALSE, error = function() {
+  cat(geterrmessage(), file = stderr())
+  q("no", 1, FALSE)
 })
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
 
@@ -17,7 +18,8 @@
     "plots", "p", 1, "character",
     "header", "H", 0, "logical",
     "txtype", "y", 1, "character",
-    "tx2gene", "x", 1, "character"), # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+    "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+    "ruv_ncounts", "ruv_ncounts", 0, "logical"),
     byrow = TRUE, ncol = 4)
 
   opt <- getopt(spec)
@@ -155,6 +157,7 @@
 min_k <- opt$min_k
 max_k <- opt$max_k
 min_c <- opt$min_mean_count
+ruv_ncounts <- ifelse(is.null(opt$ruv_ncounts), FALSE, TRUE)
 sample_json <- fromJSON(opt$sample_json)
 sample_paths <- sample_json$path
 sample_names <- sample_json$label
@@ -183,8 +186,14 @@
     df <- data.frame(identifier = rownames(unwanted_variation))
     df <- cbind(df, unwanted_variation)
     colnames(df)[2] <- "condition"
-    write.table(df, file = paste0("batch_effects_", name, ".tabular"),  sep = "\t", quote = F, row.names = F)
+    write.table(df, file = paste0("uv_batch_effects_", name, ".tabular"),  sep = "\t", quote = FALSE, row.names = FALSE)
+    if (ruv_ncounts) {
+      ruvnorm_counts <- normCounts(set)
+      ruvnorm_df <- data.frame(geneID = rownames(ruvnorm_counts), ruvnorm_counts)
+      write.table(ruvnorm_df, file = paste0("ruv_norm_counts_", name, ".tabular"),  sep = "\t", quote = FALSE, row.names = FALSE)
+    }
   }
+
 }
 
 # close the plot device
--- a/ruvseq.xml	Fri Jul 23 22:37:45 2021 +0000
+++ b/ruvseq.xml	Fri Apr 21 14:09:17 2023 +0000
@@ -1,8 +1,12 @@
 <tool id="ruvseq" name="Remove Unwanted Variation" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@">
     <description>from RNA-seq data</description>
+    <xrefs>
+        <xref type="bio.tools">ruvseq</xref>
+        <xref type="bioconductor">ruvseq</xref>
+    </xrefs>
     <macros>
         <token name="@TOOL_VERSION@">1.26.0</token>
-        <token name="@WRAPPER_VERSION@">0</token>
+        <token name="@WRAPPER_VERSION@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">bioconductor-ruvseq</requirement>
@@ -60,6 +64,10 @@
         --tx2gene mapping.txt
     #end if
 #end if
+
+#if $ruv_ncounts == 1:
+    --ruv_ncounts 
+#end if
 ]]></command>
     <configfiles>
         <configfile name="sampleTable">
@@ -117,10 +125,17 @@
         <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
             label="Visualising the analysis results"
             help="output an additional PDF files" />
+        <param name="ruv_ncounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output RUVSeq normalized count tables"
+            help="If this option is set to Yes, the tool will generate RUVseq normalized count files. Default: No" />
     </inputs>
     <outputs>
         <collection name="unwanted_variation" type="list" label="RUVSeq covariate files on ${on_string}">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
+            <discover_datasets pattern="uv_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
+        </collection>
+        <collection name="ruv_normcounts" type="list" label="RUVSeq normalized counts on ${on_string}">
+            <filter>ruv_ncounts == True</filter>
+            <discover_datasets pattern="ruv_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
         </collection>
         <data format="pdf" name="plots" label="RUVSeq diagonstic plots on ${on_string}">
             <filter>pdf == True</filter>
@@ -232,6 +247,73 @@
                 </element>
             </output_collection>
         </test>
+                <!--Ensure Normalized counts files are generated -->
+        <test>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Treated"/>
+                <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
+            </repeat>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Untreated"/>
+                <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
+            </repeat>
+            <param name="pdf" value="true"/>
+            <param name="ruv_ncounts" value="true"/>
+            <output name="plots" file="ruvseq_diag.pdf" ftype="pdf" compare="sim_size"/>
+            <output_collection name="ruv_normcounts" type="list">
+                <element name="norm_counts_control_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_replicate_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_residual_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!--Ensure Normalized counts are generated with sailfish files  -->
+        <test>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Treated"/>
+                <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
+            </repeat>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
+            </repeat>
+            <param name="pdf" value="true"/>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="tabular"/>
+            <param name="tabular_file" value="tx2gene.tab"/>
+            <param name="min_mean_count" value="0"/>
+            <param name="ruv_ncounts" value="true"/>
+            <output name="plots" file="ruvseq_diag_sailfish.pdf" ftype="pdf" compare="sim_size"/>
+            <output_collection name="ruv_normcounts" type="list">
+                <element name="norm_counts_control_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_replicate_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_residual_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test> 
     </tests>
     <help><![CDATA[
 .. class:: infomark
@@ -306,6 +388,9 @@
 
 RUVSeq_ generates a tabular file for each method and each k of variation as well as a summary PDF.
 
+RUVSeq can also generate RUVSeq normalized count tables. However, *these counts should be used only for exploration. It is important that subsequent DE analysis be done on the original counts, as removing the unwanted factors from the counts can also remove part of a factor of interest*. 
+
+
 .. _RUVSeq: http://master.bioconductor.org/packages/release/bioc/html/RUVSeq.html
 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
     ]]></help>