Repository 'dropletutils'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/dropletutils

Changeset 6:8855361fcfc5 (2020-12-10)
Previous changeset 5:cdf4443d5625 (2020-01-29) Next changeset 7:2c1200fba922 (2021-01-07)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
modified:
dropletutils.xml
scripts/dropletutils.Rscript
test-data/defs_defaultdrops.h5
test-data/defs_emptydrops_150_0002.h5
test-data/defs_emptydrops_150_0002.png
test-data/defs_emptydrops_150_0002a.h5
test-data/defs_emptydrops_150_0002a.png
b
diff -r cdf4443d5625 -r 8855361fcfc5 dropletutils.xml
--- a/dropletutils.xml Wed Jan 29 15:07:38 2020 -0500
+++ b/dropletutils.xml Thu Dec 10 13:50:06 2020 +0000
[
@@ -1,9 +1,22 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="dropletutils" name="DropletUtils" version="@PACKAGE_VERSION@+@GALAXY_VERSION@" >
+<tool id="dropletutils" name="DropletUtils" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" >
     <description>Utilities for handling droplet-based single-cell RNA-seq data</description>
+    <xrefs>
+        <xref type="bio.tools">dropletutils</xref>
+    </xrefs>
+    <edam_topics>
+        <edam_topic>topic_0203</edam_topic>
+        <edam_topic>topic_3168</edam_topic>
+        <edam_topic>topic_3170</edam_topic>
+        <edam_topic>topic_3308</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_1812</edam_operation>
+        <edam_operation>operation_3200</edam_operation>
+    </edam_operations>
     <macros>
-        <token name="@PACKAGE_VERSION@" >1.2.1</token>
-        <token name="@GALAXY_VERSION@" >galaxy6</token>
+        <token name="@TOOL_VERSION@">1.10.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
         <token name="@TXIN@">tenx.input</token>
         <token name="@TXOUT@">tenx.output</token>
         <xml name="test_dirin" >
@@ -16,9 +29,10 @@
         </xml>
     </macros>
     <requirements>
-        <requirement type="package" version="@PACKAGE_VERSION@">bioconductor-dropletutils</requirement>
-        <requirement type="package" version="1.2_17" >r-matrix</requirement>
-        <requirement type="package" version="1.10.1" >bioconductor-scater</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">bioconductor-dropletutils</requirement>
+        <requirement type="package" version="1.18.0" >bioconductor-scater</requirement>
+        <requirement type="package" version="4.0">r-base</requirement>
+        <requirement type="package" version="1.2_18">r-matrix</requirement>
         <requirement type="package" version="1">fonts-conda-ecosystem</requirement>
     </requirements>
     <version_command><![CDATA[
@@ -45,14 +59,14 @@
     <configfiles>
         <configfile name="droplet_conf" >
 ## defaults
-empty.fdr_threshold = 0.01
-eparams=formals(emptyDrops)
-dparams=formals(defaultDrops)
-bparams=formals(barcodeRanks)
+empty_fdr_threshold = 0.01
+eparams = formals(emptyDrops)
+dparams = formals(defaultDrops)
+bparams = formals(barcodeRanks)
 
 ## File params
-in.type='$tenx_format.use'
-out.type=NULL
+intype='$tenx_format.use'
+outtype=NULL
 
 files=list()
 files\$table='$table'
@@ -74,10 +88,10 @@
     #else if str($operation.method.use) == 'emptydrops':
 do.method="emptyDrops"
 eparams\$lower=as.integer('$operation.method.lower')
-empty.fdr_threshold=as.numeric('$operation.method.fdr_thresh')
+empty_fdr_threshold=as.numeric('$operation.method.fdr_thresh')
     #end if
 
-out.type='$operation.outformat'
+outtype='$operation.outformat'
     #if str($operation.outformat) == 'directory':
 files\$out='@TXOUT@'
     #else if str($operation.outformat) == 'h5':
@@ -172,8 +186,8 @@
     </outputs>
     <tests>
         <!-- Directory input tests -->
-        <!-- ::: Default Drops -->
         <test expect_num_outputs="1">
+            <!-- ::: Default Drops -->
             <expand macro="test_dirin" />
             <conditional name="operation">
                 <param name="use" value="filter" />
@@ -196,17 +210,17 @@
                 </assert_contents>
             </output>
         </test>
-        <!-- :: Barcode Ranks -->
         <test expect_num_outputs="1">
+            <!-- :: Barcode Ranks -->
             <expand macro="test_dirin" />
             <conditional name="operation">
                 <param name="use" value="barcode_rank" />
                 <param name="lower" value="120" />
             </conditional>
-            <output name="plot" value="defs_barcoderankings.png" compare="sim_size" delta="400"/>
+            <output name="plot" value="defs_barcoderankings.png" compare="sim_size" delta="600"/>
         </test>
-        <!-- ::: Empty Drops -->
         <test expect_num_outputs="3">
+            <!-- ::: Empty Drops -->
             <expand macro="test_dirin" />
             <conditional name="operation">
                 <param name="use" value="filter" />
@@ -221,16 +235,16 @@
             <output name="table" >
                 <assert_contents>
                     <has_n_columns n="9" />
-                    <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" />
-                    <has_line_matching expression="^994\sGGCATTACAA\s338\s-246.922772388055\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" />
-                    <has_line_matching expression="^998\sCATGAAGCAA\s151\s-166.644236503983\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" />
+                    <has_line_matching expression="^\sbar_names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis_cell\sis_cellandlimited" />
+                    <has_line_matching expression="^994\sGGCATTACAA\s338\s-246\.(.*TRUE){3}$" />
+                    <has_line_matching expression="^998\sCATGAAGCAA\s151\s-166\.(.*TRUE){3}$" />
                 </assert_contents>
             </output>
             <output name="plot" value="defs_emptydrops_150_0002.png" compare="sim_size" delta="400" />
         </test>
         <!-- Other format input tests -->
-        <!-- ::: Empty Drops, same as above but input is h5 -->
         <test expect_num_outputs="3">
+            <!-- ::: Empty Drops, same as above but input is h5 -->
             <conditional name="tenx_format" >
                 <param name="use" value="h5" />
                 <param name="input" value="in_matrix.h5" />
@@ -248,9 +262,9 @@
             <output name="table" >
                 <assert_contents>
                     <has_n_columns n="9" />
-                    <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" />
-                    <has_line_matching expression="^1100\sCCGGAAGCAA\s169\s-198.117943099773\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" />
-                    <has_line_matching expression="^1114\sTCCGAAGCAA\s182\s-196.181449214729\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" />
+                    <has_line_matching expression="^\sbar_names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis_cell\sis_cellandlimited" />
+                    <has_line_matching expression="^1100\sCCGGAAGCAA\s169\s-198\.(.*TRUE){3}$" />
+                    <has_line_matching expression="^1114\sTCCGAAGCAA\s182\s-196\.(.*TRUE){3}$" />
                 </assert_contents>
             </output>
             <output name="plot" value="defs_emptydrops_150_0002a.png" compare="sim_size" delta="400" />
b
diff -r cdf4443d5625 -r 8855361fcfc5 scripts/dropletutils.Rscript
--- a/scripts/dropletutils.Rscript Wed Jan 29 15:07:38 2020 -0500
+++ b/scripts/dropletutils.Rscript Thu Dec 10 13:50:06 2020 +0000
[
b'@@ -1,6 +1,6 @@\n ## Load in data\n-args = commandArgs(trailingOnly = T)\n-if (length(args) != 1){\n+args <- commandArgs(trailingOnly = T)\n+if (length(args) != 1) {\n     stop("Please provide the config file")\n }\n \n@@ -11,57 +11,60 @@\n source(args[1])\n \n ## Helper functions\n-setSparse <- function(obj){\n+set_sparse <- function(obj) {\n     return(as(obj, "dgCMatrix"))\n }\n \n-writeTSV <- function(fileout, obj){\n-    write.table(as.matrix(obj), file=fileout, col.names=NA, sep=\'\\t\', quote=FALSE)\n+write_tsv <- function(fileout, obj) {\n+    write.table(as.matrix(obj), file = fileout,\n+                col.names = NA, sep = "\\t", quote = FALSE)\n }\n \n-determineGeneIDs <- function(object){\n-    if (!is.null(rowData(object)$Symbol)){\n+determine_geneids <- function(object) {\n+    if (!is.null(rowData(object)$Symbol)) {\n         return(rowData(object)$Symbol)\n     }\n     return(rownames(object))\n }\n \n-getCounts <- function(object){\n-    return(Matrix(counts(object), sparse=TRUE))\n+get_counts <- function(object) {\n+    return(Matrix(counts(object), sparse = TRUE))\n }\n \n-writeOut <- function(object, fileout, typeout){\n-    if (typeout == "tsv"){\n-        writeTSV(fileout, getCounts(object))\n+write_out <- function(object, fileout, typeout) {\n+    if (typeout == "tsv") {\n+        write_tsv(fileout, get_counts(object))\n     }\n-    else if (typeout == "h5"){\n-        write10xCounts(fileout, getCounts(object),\n-                       type="HDF5",\n-                       gene.symbol=determineGeneIDs(object),\n-                       overwrite=TRUE)\n+    else if (typeout == "h5") {\n+        write10xCounts(fileout, get_counts(object),\n+                       type = "HDF5",\n+                       gene.symbol = determine_geneids(object),\n+                       overwrite = TRUE)\n     }\n-    else if (typeout == "directory"){\n-        write10xCounts(fileout, getCounts(object),\n-                       type="sparse",\n-                       gene.symbol=determineGeneIDs(object),\n-                       overwrite=TRUE)\n+    else if (typeout == "directory") {\n+        write10xCounts(fileout, get_counts(object),\n+                       type = "sparse",\n+                       gene.symbol = determine_geneids(object),\n+                       overwrite = TRUE)\n     }\n }\n \n-read10xFiles <- function(filein, typein){\n+read_10x_files <- function(filein, typein) {\n     sce <- NULL\n-    if (typein == "tsv"){\n+    if (typein == "tsv") {\n         ## Exploding memory problems occured here\n         ## - solution is to use the readSparseCounts function from scater\n-        sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein)))\n-    }\n-    else if (typein == "h5"){\n-        sce <- read10xCounts(filein, col.names=T, type="HDF5")   # use barcodes.tsv as column names\n+        sce <- SingleCellExperiment(assays =\n+                                        list(counts = readSparseCounts(filein)))\n     }\n-    else if (typein == "directory"){\n-        sce <- read10xCounts(filein, col.names=T, type="sparse")\n+    else if (typein == "h5") {\n+         # use barcodes.tsv as column names\n+        sce <- read10xCounts(filein, col.names = T, type = "HDF5")\n     }\n-    counts(sce) <- setSparse(counts(sce))\n+    else if (typein == "directory") {\n+        sce <- read10xCounts(filein, col.names = T, type = "sparse")\n+    }\n+    counts(sce) <- set_sparse(counts(sce))\n     return(sce)\n }\n \n@@ -69,97 +72,113 @@\n ## Methods\n \n \n-doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){\n-    sce <- read10xFiles(files$infile, in.type)\n+do_empty_drops <- function(files, eparams, intype = "directory",\n+                         outtype = "h5", fdr_threshold  =  0.01) {\n+    sce <- read_10x_files(files$infile, intype)\n+\n+    eparams$... <- NULL ## hack to remove other parameters from being\n+    eparams$m <- Matrix(counts(sce), sparse = TRUE)\n+\n+    ## Determine sensible lowerbound\n+    m_stats <- summary(colSums(counts(sce)))\n+    print("Cell Li'..b'e.out[complete.cases(e.out),])\n+    write_tsv(files$table, e_out[complete.cases(e_out), ])\n \n     png(files$plot)\n-    plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"),\n-         xlab="Total UMI count", ylab="-Log Probability",\n-         xlim=c(min(xlim.dat),max(xlim.dat)))\n+    plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell,\n+                                                   "red", "black"),\n+         xlab = "Total UMI count", ylab = "-Log Probability",\n+         xlim = c(min(xlim_dat), max(xlim_dat)))\n     dev.off()\n \n     ## Filtered\n     called <- NULL\n-    if (fdr_threshold != 0){\n-        called <- e.out$is.CellAndLimited\n+    if (fdr_threshold != 0) {\n+        called <- e_out$is_cellandlimited\n     } else {\n-        called <- e.out$is.Cell\n+        called <- e_out$is_cell\n     }\n     called[is.na(called)] <- FALSE    # replace NA\'s with FALSE\n-    sce.filtered <- sce[,called]\n+    sce_filtered <- sce[, called]\n \n-    writeOut(sce.filtered, files$out, out.type)\n+    write_out(sce_filtered, files$out, outtype)\n \n-    message(paste("Cells:", sum(na.omit(e.out$is.Cell))))\n-    message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited))))\n+    message(paste("Cells:", sum(na.omit(e_out$is_cell))))\n+    message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited))))\n }\n \n \n-doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){\n-    sce <- read10xFiles(files$infile, in.type)\n+do_default_drops <- function(files, dparams,\n+                           intype = "directory", outtype = "h5") {\n+    sce <- read_10x_files(files$infile, intype)\n \n-    dparams$m = counts(sce)\n+    dparams$m <- counts(sce)\n     called <- do.call(defaultDrops, c(dparams))\n \n     # Filtered\n-    sce.filtered <- sce[,called]\n+    sce_filtered <- sce[, called]\n \n-    writeOut(sce.filtered, files$out, out.type)\n+    write_out(sce_filtered, files$out, outtype)\n \n     message(paste("Cells:", sum(called)))\n }\n \n-\n-doBarcodeRankings <- function(files, bparams, in.type="directory"){\n-    sce <- read10xFiles(files$infile, in.type)\n+do_barcode_rankings <- function(files, bparams, intype = "directory") {\n+    sce <- read_10x_files(files$infile, intype)\n \n     bparams$... <- NULL ## hack\n-    bparams$m = counts(sce)\n+    bparams$m <- counts(sce)\n \n-    br.out <- do.call(barcodeRanks, c(bparams))\n+    brout <- do.call(barcodeRanks, c(bparams))\n \n     png(files$plot)\n-    plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes")\n-    o <- order(br.out$rank)\n-    lines(br.out$rank[o], br.out$fitted[o], col="red")\n+    plot(brout$rank, brout$total, log = "xy",\n+         xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes")\n+    o <- order(brout$rank)\n+    lines(brout$rank[o], brout$fitted[o], col = "red")\n \n-    abline(h=br.out$knee, col="dodgerblue", lty=2)\n-    abline(h=br.out$inflection, col="forestgreen", lty=2)\n-    legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))\n+    abline(h = brout$knee, col = "dodgerblue", lty = 2)\n+    abline(h = brout$inflection, col = "forestgreen", lty = 2)\n+    legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"),\n+           legend = c("knee", "inflection"))\n     dev.off()\n \n-    print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection))\n+    print(paste("knee =", brout$knee, ", inflection = ", brout$inflection))\n }\n \n ## Main\n set.seed(seed.val)\n \n if (do.method == "barcodeRankings") {\n-    doBarcodeRankings(files, bparams, in.type)\n+    do_barcode_rankings(files, bparams, intype)\n \n } else if (do.method == "defaultDrops") {\n-    doDefaultDrops(files, dparams, in.type, out.type)\n+    do_default_drops(files, dparams, intype, outtype)\n \n } else if (do.method == "emptyDrops") {\n-    doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold)\n+    do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold)\n }\n'
b
diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_defaultdrops.h5
b
Binary file test-data/defs_defaultdrops.h5 has changed
b
diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002.h5
b
Binary file test-data/defs_emptydrops_150_0002.h5 has changed
b
diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002.png
b
Binary file test-data/defs_emptydrops_150_0002.png has changed
b
diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002a.h5
b
Binary file test-data/defs_emptydrops_150_0002a.h5 has changed
b
diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002a.png
b
Binary file test-data/defs_emptydrops_150_0002a.png has changed