diff tximport.R @ 0:2f5e9c0fe367 draft default tip

"planemo upload for repository https://github.com/ieguinoa/tximport-galaxy-wrapper commit 2bb25471c1320fb1206afa2c4daf536b6d6e275f-dirty"
author ieguinoa
date Wed, 09 Oct 2019 15:38:21 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tximport.R	Wed Oct 09 15:38:21 2019 -0400
@@ -0,0 +1,119 @@
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+library("getopt")
+#library("tools")
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "help", "h", 0, "logical",
+  "base_dir", "w", 1, "character",
+  "out_file", "o", 1, "character",
+  "countsFiles", "n", 1, "character",
+  "countsFromAbundance", "r", 1, "character",
+  "format", "v", 1, "character",
+  "gff_file", "H", 0, "character",
+  "tx2gene", "f", 0, "character",
+  "geneIdCol", "l", 0, "character",
+  "txIdCol" , "p", 1, "character",
+  "abundanceCol", "i", 0, "character",
+  "countsCol", "y", 1, "character",
+  "lengthCol", "x", 1, "character"),
+  byrow=TRUE, ncol=4)
+
+opt <- getopt(spec)
+
+
+
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+	
+if (is.null(opt$gff_file) & is.null(opt$tx2gene)) {
+  cat("A GFF/GTF file or a tx2gene table is required\n")
+  q(status=1)
+}
+
+if (opt$format == 'none'){  #custom format
+    if (is.null(opt$txIdCol) | is.null(opt$abundanceCol) | is.null(opt$countsCol) | is.null(opt$lengthCol)) {
+        cat("If you select a custom format for the input files you need to specify the column names\n")
+        q(status=1)
+   }
+}
+
+if (is.null(opt$countsFiles)) {
+  cat("'countsFiles' is required\n")
+  q(status=1)
+}
+
+## parse counts files
+library(rjson)
+dat <- fromJSON(opt$countsFiles)
+samples_df <- lapply(dat, function(samples) # Loop through each "sample"
+{
+  # Convert each group to a data frame.
+  # This assumes you have 6 elements each time
+  data.frame(matrix(unlist(samples), ncol=2, byrow=T))
+})
+samples_df <- do.call(rbind, samples_df)
+colnames(samples_df) <- c("path","id")
+rownames(samples_df) <- NULL
+
+# Prepare char vector with files and sample names 
+files <- file.path(samples_df[,"path"])
+names(files) <- samples_df[,"id"]
+#files
+#all(file.exists(files))
+
+
+
+library(tximport)
+
+
+
+
+### if the input is a gff/gtf file first need to create the tx2gene table
+if (!is.null(opt$gff_file)) {
+    suppressPackageStartupMessages({
+        library("GenomicFeatures")
+    })
+    txdb <- makeTxDbFromGFF(opt$gff_file)
+    k <- keys(txdb, keytype = "TXNAME")
+    tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
+    # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
+    tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
+
+} else {
+        tx2gene <- read.table(opt$tx2gene,header=FALSE)
+    }
+
+
+
+##
+if (is.null(opt$geneIdCol)) { ## there is a tx2gene table
+    if (opt$format == 'none'){  #predefined format 
+        cat("here i am too\n")  
+        txi_out <- tximport(files, type="none",txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance)
+    } else {
+        txi_out <- tximport(files, type=opt$format, tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance)
+    }
+} else {  # the gene_ID is a column in the counts table
+    if (opt$format == 'none'){  #predefined format
+        txi_out <- tximport(files, type="none",geneIdCol=opt$geneIdCol,txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance)
+    } else {
+        txi_out <- tximport(files, type=opt$format, geneIdCol=opt$geneIdCol,countsFromAbundance=opt$countsFromAbundance)
+    }
+
+}
+# write count as table
+write.table(txi_out$counts, file=opt$out_file, row.names = TRUE, col.names = TRUE, quote = FALSE, sep = "\t")