changeset 1:c24765926774 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
author iuc
date Tue, 26 Mar 2019 06:25:38 -0400
parents 61dffb20b6f9
children fed9d0350d72
files get_deseq_dataset.R ruvseq.R ruvseq.xml test-data/ruvseq_diag_sailfish.pdf
diffstat 4 files changed, 49 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/get_deseq_dataset.R	Wed Sep 05 15:54:16 2018 -0400
+++ b/get_deseq_dataset.R	Tue Mar 26 06:25:38 2019 -0400
@@ -9,12 +9,12 @@
   }
 
   if (!is.null(tximport)) {
-    if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
-    if (tolower(file_ext(opt$tx2gene)) == "gtf") {
-      gtfFile <-tx2gene
+    if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
+    if (tolower(file_ext(tx2gene)) == "gff") {
+      gffFile <-tx2gene
     } else {
-      gtfFile <- NULL
-      tx2gene <- read.table(tx2gene, header=FALSE)
+      gffFile <- NULL
+      tx2gene <- read.table(tx2gene, header=hasHeader)
     }
     useTXI <- TRUE
   } else {
@@ -45,22 +45,29 @@
 
   } else {
       # construct the object using tximport
-      # first need to make the tx2gene table
-      # this takes ~2-3 minutes using Bioconductor functions
-      if (!is.null(gtfFile)) {
-        suppressPackageStartupMessages({
-          library("GenomicFeatures")
-        })
-        txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
-        k <- keys(txdb, keytype = "GENEID")
-        df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
-        tx2gene <- df[, 2:1]  # tx ID, then gene ID
-      }
       library("tximport")
       txiFiles <- as.character(sampleTable$filename)
       labs <- row.names(sampleTable)
       names(txiFiles) <- labs
-      txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+      if (!is.null(gffFile)) {
+        # first need to make the tx2gene table
+        # this takes ~2-3 minutes using Bioconductor functions
+        suppressPackageStartupMessages({
+          library("GenomicFeatures")
+        })
+        txdb <- makeTxDbFromGFF(gffFile)
+        k <- keys(txdb, keytype = "TXNAME")
+        tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
+        # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
+        tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
+      }
+      try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
+      if (!exists("txi")) {
+        # Remove version from transcript IDs in tx2gene...
+        tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME)
+        # ...and in txiFiles
+        txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE)
+      }
       dds <- DESeqDataSetFromTximport(txi,
                                       subset(sampleTable, select=-c(filename)),
                                       designFormula)
--- a/ruvseq.R	Wed Sep 05 15:54:16 2018 -0400
+++ b/ruvseq.R	Tue Mar 26 06:25:38 2019 -0400
@@ -83,9 +83,12 @@
 
 create_seq_expression_set <- function (dds, min_mean_count) {
   count_values <- counts(dds)
+  print(paste0("feature count before filtering :",nrow(count_values),"\n"))
+  print(paste0("Filtering features which mean expression is less or eq. than ", min_mean_count, " counts\n"))
   filter <- apply(count_values, 1, function(x) mean(x) > min_mean_count)
   filtered <- count_values[filter,]
-  set = newSeqExpressionSet(as.matrix(count_values),
+  print(paste0("feature count after filtering :",nrow(filtered),"\n")) 
+  set = newSeqExpressionSet(as.matrix(filtered),
                             phenoData = data.frame(colData(dds)$condition, row.names=colnames(filtered)))
   plot_pca_rle(set = set, title = 'raw data')
   set <- betweenLaneNormalization(set, which="upper")
@@ -103,7 +106,7 @@
   fit <- glmFit(y, design)
   lrt <- glmLRT(fit, coef=2)
   top <- topTags(lrt, n=nrow(set))$table
-  top_rows <- rownames(top)[which(top$PValue > cutoff_p)]
+  top_rows <- rownames(top)[which(top$PValue < cutoff_p)]
   empirical <- rownames(set)[which(!(rownames(set) %in% top_rows))]
   return(empirical)
 }
@@ -154,6 +157,7 @@
 alpha <- opt$alpha
 min_k <- opt$min_k
 max_k <- opt$max_k
+min_c <- opt$min_mean_count
 sample_json <- fromJSON(opt$sample_json)
 sample_paths <- sample_json$path
 sample_names <- sample_json$label
@@ -169,7 +173,7 @@
 }
 
 # Run through the ruvseq variants
-set <- create_seq_expression_set(dds, min_mean_count = opt$min_mean_count)
+set <- create_seq_expression_set(dds, min_mean_count = min_c)
 result <- list(no_correction = set)
 for (k in seq(min_k, max_k)) {
   result[[paste0('residual_method_k', k)]] <- ruv_residual_method(set, k=k)
--- a/ruvseq.xml	Wed Sep 05 15:54:16 2018 -0400
+++ b/ruvseq.xml	Tue Mar 26 06:25:38 2019 -0400
@@ -1,11 +1,12 @@
-<tool id="ruvseq" name="Remove Unwanted Variation" version="1.12.0">
+<tool id="ruvseq" name="Remove Unwanted Variation" version="1.16.0">
     <description>from RNA-seq data</description>
     <requirements>
-        <requirement type="package" version="1.12.0">bioconductor-ruvseq</requirement>
-        <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="1.6.0">bioconductor-tximport</requirement>
-        <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement>
-        <requirement type="package" version="0.6.5">r-ggrepel</requirement>
+        <requirement type="package" version="1.16.0">bioconductor-ruvseq</requirement>
+        <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement>
+        <requirement type="package" version="1.10.0">bioconductor-tximport</requirement>
+        <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement>
+        <requirement type="package" version="0.8.0">r-ggrepel</requirement>
+        <requirement type="package" version="1.20.2">r-getopt</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -44,6 +45,7 @@
 
 --min_k $min_k
 --max_k $max_k
+--min_mean_count $min_mean_count
 
 #if $tximport.tximport_selector == 'tximport':
     --txtype $tximport.txtype
@@ -136,19 +138,19 @@
                 <element name="batch_effects_control_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
                 <element name="batch_effects_replicate_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
                 <element name="batch_effects_residual_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -170,19 +172,19 @@
                 <element name="batch_effects_control_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
                 <element name="batch_effects_replicate_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
                 <element name="batch_effects_residual_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/>
+                        <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -202,24 +204,25 @@
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="tabular"/>
             <param name="tabular_file" value="tx2gene.tab"/>
+            <param name="min_mean_count" value="0"/>
             <output name="plots" file="ruvseq_diag_sailfish.pdf" ftype="pdf" compare="sim_size"/>
             <output_collection name="unwanted_variation" type="list">
                 <element name="batch_effects_control_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.28.*"/>
+                        <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> 
                     </assert_contents>
                 </element>
                 <element name="batch_effects_replicate_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.44.*"/>
+                        <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> 
                     </assert_contents>
                 </element>
                 <element name="batch_effects_residual_method_k1">
                     <assert_contents>
                         <has_text_matching expression="identifier\tcondition\tW_1"/>
-                        <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.22.*"/>
+                        <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> 
                     </assert_contents>
                 </element>
             </output_collection>
Binary file test-data/ruvseq_diag_sailfish.pdf has changed