Mercurial > repos > ieguinoa > tximport
diff tximport.R @ 0:2f5e9c0fe367 draft default tip
"planemo upload for repository https://github.com/ieguinoa/tximport-galaxy-wrapper commit 2bb25471c1320fb1206afa2c4daf536b6d6e275f-dirty"
author | ieguinoa |
---|---|
date | Wed, 09 Oct 2019 15:38:21 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tximport.R Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,119 @@ +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("getopt") +#library("tools") +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "help", "h", 0, "logical", + "base_dir", "w", 1, "character", + "out_file", "o", 1, "character", + "countsFiles", "n", 1, "character", + "countsFromAbundance", "r", 1, "character", + "format", "v", 1, "character", + "gff_file", "H", 0, "character", + "tx2gene", "f", 0, "character", + "geneIdCol", "l", 0, "character", + "txIdCol" , "p", 1, "character", + "abundanceCol", "i", 0, "character", + "countsCol", "y", 1, "character", + "lengthCol", "x", 1, "character"), + byrow=TRUE, ncol=4) + +opt <- getopt(spec) + + + + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if (!is.null(opt$help)) { + cat(getopt(spec, usage=TRUE)) + q(status=1) +} + +if (is.null(opt$gff_file) & is.null(opt$tx2gene)) { + cat("A GFF/GTF file or a tx2gene table is required\n") + q(status=1) +} + +if (opt$format == 'none'){ #custom format + if (is.null(opt$txIdCol) | is.null(opt$abundanceCol) | is.null(opt$countsCol) | is.null(opt$lengthCol)) { + cat("If you select a custom format for the input files you need to specify the column names\n") + q(status=1) + } +} + +if (is.null(opt$countsFiles)) { + cat("'countsFiles' is required\n") + q(status=1) +} + +## parse counts files +library(rjson) +dat <- fromJSON(opt$countsFiles) +samples_df <- lapply(dat, function(samples) # Loop through each "sample" +{ + # Convert each group to a data frame. + # This assumes you have 6 elements each time + data.frame(matrix(unlist(samples), ncol=2, byrow=T)) +}) +samples_df <- do.call(rbind, samples_df) +colnames(samples_df) <- c("path","id") +rownames(samples_df) <- NULL + +# Prepare char vector with files and sample names +files <- file.path(samples_df[,"path"]) +names(files) <- samples_df[,"id"] +#files +#all(file.exists(files)) + + + +library(tximport) + + + + +### if the input is a gff/gtf file first need to create the tx2gene table +if (!is.null(opt$gff_file)) { + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(opt$gff_file) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") + # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) + +} else { + tx2gene <- read.table(opt$tx2gene,header=FALSE) + } + + + +## +if (is.null(opt$geneIdCol)) { ## there is a tx2gene table + if (opt$format == 'none'){ #predefined format + cat("here i am too\n") + txi_out <- tximport(files, type="none",txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } else { + txi_out <- tximport(files, type=opt$format, tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } +} else { # the gene_ID is a column in the counts table + if (opt$format == 'none'){ #predefined format + txi_out <- tximport(files, type="none",geneIdCol=opt$geneIdCol,txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } else { + txi_out <- tximport(files, type=opt$format, geneIdCol=opt$geneIdCol,countsFromAbundance=opt$countsFromAbundance) + } + +} +# write count as table +write.table(txi_out$counts, file=opt$out_file, row.names = TRUE, col.names = TRUE, quote = FALSE, sep = "\t")