diff scater-create-qcmetric-ready-sce.R @ 0:87757f7b9974 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 5fdcafccb6c645d301db040dfeed693d7b6b4278
author iuc
date Thu, 18 Jul 2019 11:13:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scater-create-qcmetric-ready-sce.R	Thu Jul 18 11:13:05 2019 -0400
@@ -0,0 +1,142 @@
+#!/usr/bin/env Rscript
+#Creates a SingleCellExperiment object, which scater's calculateQCMetrics already applied
+
+library(optparse)
+library(workflowscriptscommon)
+library(scater)
+library(LoomExperiment)
+
+# parse options
+#SCE-specific options
+option_list = list(
+  make_option(
+    c("-a", "--counts"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "A tab-delimited expression matrix. The first column of all files is assumed to be feature names and the first row is assumed to be sample names."
+  ),
+  make_option(
+    c("-r", "--row-data"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Path to TSV (tab-delimited) format file describing the features. Row names from the expression matrix (-a), if present, become the row names of the SingleCellExperiment."
+  ),
+  make_option(
+    c("-c", "--col-data"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Path to TSV format file describing the samples (annotation). The number of rows (samples) must equal the number of columns in the expression matrix."
+  ),
+  #The scater-specific options
+  make_option(
+    c("--assay-name"),
+    action = "store",
+    default = 'counts',
+    type = 'character',
+    help= "String specifying the name of the 'assay' of the 'object' that should be used to define expression."
+  ),
+  make_option(
+    c("-f", "--mt-controls"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Path to file containing a list of the mitochondrial control genes"
+  ),
+  make_option(
+    c("-p", "--ercc-controls"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Path to file containing a list of the ERCC controls"
+  ),
+  make_option(
+    c("-l", "--cell-controls"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Path to file (one cell per line) to be used to derive a vector of cell (sample) names used to identify cell controls (for example, blank wells or bulk controls)."
+  ),
+  make_option(
+    c("-o", "--output-loom"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "File name in which to store the SingleCellExperiment object in Loom format."
+  )
+)
+
+opt <- wsc_parse_args(option_list, mandatory = c('counts', 'output_loom'))
+
+# Read the expression matrix
+
+counts <- wsc_split_string(opt$counts)
+reads <- read.table(counts)
+
+# Read row and column annotations
+
+rowdata <- opt$row_data
+
+if ( ! is.null(opt$row_data) ){
+  rowdata <- read.delim(opt$row_data)
+}
+
+coldata <- opt$col_data
+
+if ( ! is.null(opt$col_data) ){
+  coldata <- read.delim(opt$col_data)
+}
+
+# Now build the object
+assays <- list(as.matrix(reads))
+names(assays) <- c(opt$assay_name)
+scle <- SingleCellLoomExperiment(assays = assays, colData = coldata, rowData = rowdata)
+# Define spikes (if supplied)
+
+
+#Scater options
+
+# Check feature_controls (only mitochondrial and ERCC used for now)
+feature_controls_list = list()
+if (! is.null(opt$mt_controls) && opt$mt_controls != 'NULL'){
+  if (! file.exists(opt$mt_controls)){
+    stop((paste('Supplied feature_controls file', opt$mt_controls, 'does not exist')))
+  } else {
+    mt_controls <- readLines(opt$mt_controls)
+    feature_controls_list[["MT"]] <- mt_controls
+  }
+}
+
+if (! is.null(opt$ercc_controls) && opt$ercc_controls != 'NULL'){
+  if (! file.exists(opt$ercc_controls)){
+    stop((paste('Supplied feature_controls file', opt$ercc_controls, 'does not exist')))
+  } else {
+    ercc_controls <- readLines(opt$ercc_controls)
+    feature_controls_list[["ERCC"]] <- ercc_controls
+  }
+} else {
+  ercc_controls <- character()
+}
+
+# Check cell_controls
+cell_controls_list <- list()
+if (! is.null(opt$cell_controls) && opt$cell_controls != 'NULL'){
+  if (! file.exists(opt$cell_controls)){
+    stop((paste('Supplied feature_controls file', opt$cell_controls, 'does not exist')))
+  } else {
+    cell_controls <- readLines(opt$cell_controls)
+    cell_controls_list[["empty"]] <- cell_controls
+  }
+}
+
+
+# calculate QCMs
+scle  <- calculateQCMetrics(scle, exprs_values = opt$assay_name, feature_controls = feature_controls_list, cell_controls = cell_controls_list)
+
+# Output to a Loom file
+if (file.exists(opt$output_loom)) {
+  file.remove(opt$output_loom)
+}
+export(scle, opt$output_loom, format='loom')