changeset 2:7782648d8b56 draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/droplet-rank-plot/.shed.yml commit 6f31a23343fd7d0a834eebe268d820733eb483b5"
author ebi-gxa
date Tue, 24 Mar 2020 07:14:22 -0400
parents 0e985680e67d
children 8f605643ccfd
files dropletBarcodePlot.R dropletBarcodePlot.xml
diffstat 2 files changed, 2 insertions(+), 178 deletions(-) [+]
line wrap: on
line diff
--- a/dropletBarcodePlot.R	Wed Mar 04 06:44:10 2020 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
-#!/usr/bin/env Rscript
-
-# This script parses the GTF file to create a feature-wise annotation file with
-# mitochondrial features flagged, to assist in annotation and QC of single-cell
-# expression data analysis.
-
-suppressPackageStartupMessages(require(optparse))
-suppressPackageStartupMessages(require(ggplot2))
-suppressPackageStartupMessages(require(gridExtra))
-suppressPackageStartupMessages(require(DropletUtils))
-suppressPackageStartupMessages(require(Matrix))
-
-die <- function(message){
-  write(message, stderr())
-  q(status = 1)
-}
-
-option_list = list(
-  make_option(
-    c("-b", "--barcode-frequencies"),
-    action = "store",
-    default = NA,
-    type = 'character',
-    help = "Path to a two-column tab-delimited file, with barcodes in the first column and frequencies in the second (ignored if --mtx-matrix supplied)"
-  ),
-  make_option(
-    c("-m", "--mtx-matrix"),
-    action = "store",
-    default = NA,
-    type = 'character',
-    help = 'Matrix-market format matrix file, with cells by column (overrides --barcode-frequencies if supplied)'
-  ),  
-  make_option(
-    c("-r", "--cells-by-row"),
-    action = "store_true",
-    default = FALSE,
-    type = 'logical',
-    help = 'For use with --mtx-matrix: force interpretation of matrix to assume cells are by row, rather than by column (default)'
-  ),
-  make_option(
-    c("-l", "--label"),
-    action = "store",
-    default = '',
-    type = 'character',
-    help = 'Label to use in plot'
-  ),
-  make_option(
-    c("-d", "--density-bins"),
-    action = "store",
-    default = 50,
-    type = 'numeric',
-    help = "Number of bins used to calculate density plot"
-  ),
-  make_option(
-    c("-y", "--roryk-multiplier"),
-    action = "store",
-    default = 1.5,
-    type = 'numeric',
-    help = "Above-baseline multiplier to calculate roryk threshold"
-  ),
-  make_option(
-    c("-o", "--output-plot"),
-    action = "store",
-    default = 'barcode_plot.png',
-    type = 'character',
-    help = "File path for output plot"
-  ),
-  make_option(
-    c("-t", "--output-thresholds"),
-    action = "store",
-    default = 'barcode_thresholds.txt',
-    type = 'character',
-    help = "File path for output file containing calculted thresholds"
-  )
-)
-
-opt <- parse_args(OptionParser(option_list = option_list), convert_hyphens_to_underscores = TRUE)
-
-# Process inputs dependent on what has been provided
-
-if (is.na(opt$mtx_matrix)){
-  if (is.na(opt$barcode_frequencies)){
-    die('ERROR: must supply --mtx-matrix or --barcode-frequencies')
-  }else if (! file.exists(opt$barcode_frequencies)){
-    die(paste('ERROR: barcode frequencies file', opt$barcode_frequencies, 'does not exist'))
-  }else{
-    barcode_counts <- read.delim(opt$barcode_frequencies, header = FALSE)
-  }
-}else if (! file.exists(opt$mtx_matrix)){
-  die(paste('ERROR: MTX matrix file', opt$mtx_matrix, 'does not exist'))
-}else{
-  result_matrix <- Matrix::readMM(opt$mtx_matrix)
-  if (opt$cells_by_row){
-    barcode_counts <- data.frame(V1 = 1:nrow(result_matrix), V2=Matrix::rowSums(result_matrix))
-  }else{
-    barcode_counts <- data.frame(V1 = 1:ncol(result_matrix), V2=Matrix::colSums(result_matrix))
-  }
-}
-
-# Pick a cutoff on count as per https://github.com/COMBINE-lab/salmon/issues/362#issuecomment-490160480
-
-pick_roryk_cutoff = function(bcs, above_baseline_multiplier = 1.5){
-  bcs_hist = hist(log10(bcs), plot=FALSE, n=opt$density_bins)
-  mids = bcs_hist$mids
-  vals = bcs_hist$count
-  wdensity = vals * (10^mids) / sum(vals * (10^mids))
-  baseline <- median(wdensity)
-  
-  # Find highest density in upper half of barcode distribution
-  
-  peak <- which(wdensity == max(wdensity[((length(wdensity)+1)/2):length(wdensity)]))
-  
-  # Cutoff is the point before the peak at which density falls below the multiplier of baseline
-  
-  10^mids[max(which(wdensity[1:peak] < (above_baseline_multiplier*baseline)))]
-}
-
-# Plot densities 
-
-barcode_density_plot = function(bcs, roryk_cutoff, knee, inflection, name = '   ') {
-  bcs_hist = hist(log10(bcs), plot=FALSE, n=opt$density_bins)
-  counts = bcs_hist$count
-  mids = bcs_hist$mids
-  y = counts * (10^mids) / sum(counts * (10^mids))
-  qplot(y, 10^mids) + geom_point() + theme_bw() + ggtitle(name) + ylab('Count') + xlab ('Density') +
-    geom_hline(aes(yintercept = roryk_cutoff, color = paste('roryk_cutoff =', length(which(bcs > roryk_cutoff)), 'cells'))) + 
-    geom_hline(aes(yintercept = inflection, color = paste('dropletutils_inflection =', length(which(bcs > inflection)), 'cells'))) +
-    geom_hline(aes(yintercept = knee, color = paste('dropletutils_knee =', length(which(bcs > knee)), 'cells'))) +
-    scale_y_continuous(trans='log10') + theme(axis.title.y=element_blank()) + labs(color='Thresholds')
-}  
-
-# Plot a more standard barcode rank plot
-
-barcode_rank_plot <- function(br.out, roryk_total_cutoff, knee, inflection, name='no name'){
-  ggplot(data.frame(br.out), aes(x=rank, y=total)) + geom_line() + scale_x_continuous(trans='log10') + scale_y_continuous(trans='log10') + theme_bw() + 
-    geom_hline(aes(yintercept = knee, color = 'dropletutils_knee')) + 
-    geom_hline(aes(yintercept = inflection, color = 'dropletutils_inflection')) +
-    geom_hline(aes(yintercept = roryk_total_cutoff, color = 'roryk_cutoff')) +
-    ggtitle(name) + ylab('Count') + xlab('Rank') + theme(legend.position = "none")
-}
-
-# Sort barcodes by descending frequency
-
-barcode_counts <- barcode_counts[order(barcode_counts$V2, decreasing = TRUE), ]
-
-roryk_count_cutoff <- pick_roryk_cutoff(barcode_counts$V2, opt$roryk_multiplier)
-  
-# Run dropletUtils' barcodeRanks to get knee etc
-br.out <- barcodeRanks(t(barcode_counts[,2,drop=FALSE]))
-  
-dropletutils_knee <- metadata(br.out)$knee
-dropletutils_inflection <- metadata(br.out)$inflection
-
-plot_label <- paste(format(nrow(barcode_counts), big.mark = ','), 'cell barcodes')
-if ((! is.na(opt$label)) && opt$label != ''){
-  plot_label <- paste0(opt$label, ': ', plot_label)
-}
-  
-plots <- list(
-  dropletutils = barcode_rank_plot(br.out, roryk_count_cutoff, dropletutils_knee, dropletutils_inflection, name = plot_label),
-  roryk = barcode_density_plot(barcode_counts$V2, roryk_count_cutoff, dropletutils_knee, dropletutils_inflection, name = '   ')
-)
-
-# Create output plot
-png(width = 1000, height = 600, file=opt$output_plot)
-grid.arrange(plots$dropletutils, plots$roryk, nrow=1)
-dev.off()
-
-# Return calculated thresholds
-write.table(data.frame(dropletutils_knee = dropletutils_knee, dropletutils_inflection = dropletutils_inflection, roryk=roryk_count_cutoff), file = opt$output_thresholds, row.names = FALSE, quote = FALSE)
--- a/dropletBarcodePlot.xml	Wed Mar 04 06:44:10 2020 -0500
+++ b/dropletBarcodePlot.xml	Tue Mar 24 07:14:22 2020 -0400
@@ -1,13 +1,7 @@
-<tool id="_dropletBarcodePlot" name="Droplet barcode rank plot" version="1.6.1+galaxy1" profile="18.01">
+<tool id="_dropletBarcodePlot" name="Droplet barcode rank plot" version="1.6.1+galaxy2" profile="18.01">
     <description>Creates a barcode rank plot for quality control of droplet single-cell RNA-seq data</description>
     <requirements>
-      <requirement type="package" version="1.6.1">bioconductor-dropletutils</requirement>
-      <requirement type="package" version="0.3.9">openblas</requirement>
-      <requirement type="package" version="1.2">r-matrix</requirement>
-      <requirement type="package" version="3.2.1">r-ggplot2</requirement>
-      <requirement type="package" version="1.6.4">r-optparse</requirement>
-      <requirement type="package" version="2.3">r-gridextra</requirement>
-      <requirement type="package" version="0.12.0">bioconductor-delayedarray</requirement>
+      <requirement type="package" version="0.0.1">scxa-plots</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
         $__tool_directory__/dropletBarcodePlot.R --output-plot "${plot_file}" --output-thresholds "${thresholds_file}" --label "${label}" --density-bins "${density_bins}" --roryk-multiplier "${roryk_multiplier}"