Next changeset 1:a62b0c62270e (2021-04-06) |
Commit message:
"planemo upload for repository https://github.com/ohsu-comp-bio/naivestates commit 392f57d212a7499bf1d3e421112a32a56635bc67-dirty" |
added:
macros.xml main.R naivestates.xml |
b |
diff -r 000000000000 -r 1fb6181c2c64 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Mar 12 00:20:13 2021 +0000 |
b |
@@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">naivestatess</requirement> + </requirements> + </xml> + + <xml name="version_cmd"> + <version_command>echo @VERSION@</version_command> + </xml> + <xml name="citations"> + <citations> + </citations> + </xml> + + <token name="@VERSION@">1.6.1</token> + <token name="@CMD_BEGIN@">${__tool_directory__}/main.R</token> +</macros> |
b |
diff -r 000000000000 -r 1fb6181c2c64 main.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.R Fri Mar 12 00:20:13 2021 +0000 |
[ |
@@ -0,0 +1,156 @@ +#!/usr/bin/env Rscript + +suppressMessages( library(tidyverse) ) +library( optparse ) +library( naivestates ) + +## Identify directory of the script +wd <- commandArgs( trailingOnly=FALSE ) %>% + keep( ~grepl("--file=", .x) ) %>% + str_replace( "--file=", "" ) %>% dirname() +cat( "Running the script from", wd, "\n" ) + +## Parse command-line arugments +option_list <- list( + make_option(c("-i", "--in"), type="character", help="Input file"), + make_option(c("-o", "--out"), type="character", default="/data", + help="Output directory"), + make_option(c("-m", "--markers"), type="character", default="auto", + help="Markers to model"), + make_option(c("-p", "--plots"), type="character", default="off", + help="Generate plots showing the fit"), + make_option("--mct", type="character", default="", + help="Marker -> cell type map in .csv format"), + make_option("--id", type="character", default="CellID", + help="Column containing cell IDs"), + make_option("--log", type="character", default="auto", + help="Whether to apply a log transform <yes|no|auto>"), + make_option("--sfx", type="character", default="", + help="Common suffix on marker columns (e.g., _cellMask)"), + make_option("--umap", action="store_true", default=FALSE, + help="Generate UMAP plots") +) +opt <- parse_args(OptionParser(option_list=option_list)) + +## Argument verification +if( !("in" %in% names(opt)) ) + stop( "Please provide an input file name with -i" ) +if( !(opt$log %in% c("yes","no","auto")) ) + stop( "--log must be one of <yes|no|auto>" ) +if( !(opt$plots %in% c("off", "pdf", "png")) ) + stop( "--plots must be one of <off|pdf|png>" ) + +## Identify the sample name +sn <- basename( opt$`in` ) %>% str_split( "\\." ) %>% + pluck( 1, 1 ) +cat( "Inferred sample name:", sn, "\n" ) + +## Read the data matrix +X <- read_csv( opt$`in`, col_types=cols() ) +cat( "Read", nrow(X), "entries\n" ) + +## Fix potential capitalization mismatch of --id +if( !(opt$id %in% colnames(X)) ) +{ + ## Attempt to find a singular case-insensitive match + i <- grep( tolower(opt$id), tolower(colnames(X)) ) + if( length(i) == 1 ) + { + warning( " No such column ", opt$id, + "; using ", colnames(X)[i], " instead" ) + opt$id <- colnames(X)[i] + } + else stop( "No such column ", opt$id, + "; use --id to specify which column contains cell IDs" ) +} + +## Identify markers in the matrix +mrkv <- findMarkers(setdiff(colnames(X), opt$id), opt$markers, + opt$sfx, TRUE, TRUE) + +## Handle log transformation of the data +if( opt$log == "yes" || + (opt$log == "auto" && max(X[mrkv], na.rm=TRUE) > 1000) ) +{ + cat( "Applying a log10 transform\n" ) + X <- X %>% mutate_at( unname(mrkv), ~log10(.x+1) ) +} + +## Fit Gaussian mixture models +GMM <- GMMfit(X, opt$id, !!!mrkv) +fnMdl <- file.path( opt$out, str_c(sn, "-models.csv") ) +cat( "Saving models to", fnMdl, "\n" ) +GMMmodels(GMM) %>% write_csv( fnMdl ) + +## Reshape the matrix back to cells-by-marker format +Y <- GMMreshape(GMM) + +cat( "------\n" ) + +## Find the default cell type map +if( opt$mct != "" ) { + + ## Load marker -> cell type associations + cat( "Loading cell type map from", opt$mct, "\n" ) + mct <- read_csv( opt$mct, col_types=cols() ) %>% + distinct() %>% filter(Marker %in% colnames(Y)) + + if( nrow(mct) == 0 ) { + warning( "No usable marker -> cell type mappings detected" ) + Y <- findDominant(Y, opt$id) + } else { + cat( "Using the following marker -> cell type map:\n" ) + walk2( mct$Marker, mct$State, ~cat(.x, "->", .y, "\n") ) + Y <- callStates(Y, opt$id, mct) + } +} else { + cat( "No marker -> cell type mapping provided\n" ) + Y <- findDominant(Y, opt$id) +} + +cat( "------\n" ) + +## Identify the output location(s) +fnOut <- file.path( opt$out, str_c(sn, "-states.csv") ) +cat( "Saving probabilities and calls to", fnOut, "\n") +Y %>% write_csv( fnOut ) + +## Generates plots as necessary +if( opt$plots != "off" ) +{ + ## Create a separate directory for plots + dirPlot <- file.path( opt$out, "plots", sn ) + dir.create(dirPlot, recursive=TRUE, showWarnings=FALSE) + + ## Fit overview + fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-allfits.", opt$plots) ) + ggf <- plotFitOverview(GMM) + suppressMessages(ggsave( fn, ggf, width=12, height=8 )) + + ## Compute a UMAP projection + if( opt$umap ) { + cat( "Computing a UMAP projection...\n" ) + U <- umap( Y, c(opt$id, "State", "Dominant") ) + + ## Generate and write a summary plot + gg <- plotSummary( U ) + fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-summary.", opt$plots) ) + suppressMessages(ggsave( fn, gg, width=9, height=7 )) + cat( "Plotted summary to", fn, "\n" ) + + ## Generate and write faceted probabilities plot + gg <- plotProbs( U, c(opt$id, "State", "Dominant") ) + fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-probs.", opt$plots) ) + suppressMessages(ggsave( fn, gg, width=9, height=7 )) + cat( "Plotted probabilities to", fn, "\n" ) + } + + ## Generate and write out plots for individual marker fits + for( i in names(mrkv) ) + { + gg <- plotMarker(GMM, i) + fn <- file.path( dirPlot, str_c(i,".",opt$plots) ) + suppressMessages(ggsave( fn, gg )) + cat( "Wrote", fn, "\n" ) + } +} |
b |
diff -r 000000000000 -r 1fb6181c2c64 naivestates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/naivestates.xml Fri Mar 12 00:20:13 2021 +0000 |
[ |
b'@@ -0,0 +1,185 @@\n+<tool id="naivestates" name="naivestates" version="@VERSION@.2" profile="17.09">\n+ <description> Inference of cell states using Naive Bayes</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ \n+ <expand macro="requirements"/>\n+ @VERSION_CMD@\n+\n+ <command detect_errors="exit_code"><![CDATA[\n+\n+ @CMD_BEGIN@\n+ -i \'$counts\'\n+\n+ #if $markers\n+ -m $markers\n+ #end if\n+ \n+ --mct $mct\n+ -p $plots\n+\n+ #if $id\n+ --id $id\n+ #end if\n+\n+ --log $log\n+\n+ #if $sfx\n+ --sfx $sfx\n+ #end if\n+\n+ #if $umap\n+ --umap\n+ #end if\n+ -o .\n+\n+ &&\n+\n+ mv *-states.csv states.csv;\n+\n+ #if $plots != "off"\n+ mv plots/*-probs.${plots} plots/probs.${plots};\n+ mv plots/*-summary.${plots} plots/summary.${plots};\n+ mv plots/*-allfits.${plots} plots/allfits.${plots};\n+ #end if\n+\n+ ]]></command>\n+\n+\n+ <inputs>\n+ <param name="counts" type="data" format="csv" label="Quantified Cell Matrix"/>\n+ <param name="markers" type="data" format="txt" optional="true" label="Markers to model"/>\n+ <param name="mct" type="data" format="csv" label="Marker-State Association Map"/>\n+ <param name="plots" type="select" label="Generate plots showing the fit">\n+ <option selected="true" value="png">png</option>\n+ <option value="pdf">pdf</option>\n+ <option value="off">off</option>\n+ </param>\n+ <param name="id" type="text" value="" label="Column name containing cell IDs"/>\n+ <param name="log" type="select" label="Log Transform" help="Whether to apply a log transform">\n+ <option selected="true" value="auto">auto</option>\n+ <option value="yes">yes</option>\n+ <option value="no">no</option>\n+ </param>\n+ <param name="sfx" type="text" value="_cellMask" optional="true" label="Common suffix" help="Common suffix on marker columns (e.g., _cellMask)"/>\n+ <param name="umap" type="boolean" checked="true" label="Generate UMAP plots"/>\n+ </inputs>\n+\n+ <outputs>\n+ <data format="csv" name="states" from_work_dir="states.csv" label="${tool.name} on ${on_string}: States CSV"/>\n+ <data format="png" name="probs-png" from_work_dir="plots/probs.png" label="${tool.name} on ${on_string}: Probabilities">\n+ <filter>plots == \'png\'</filter>\n+ </data>\n+ <data format="png" name="summary-png" from_work_dir="plots/summary.png" label="${tool.name} on ${on_string}: Summary">\n+ <filter>plots == \'png\'</filter>\n+ </data>\n+ <data format="png" name="allfits-png" from_work_dir="plots/allfits.png" label="${tool.name} on ${on_string}: AllFits">\n+ <filter>plots == \'png\'</filter>\n+ </data>\n+ <data format="pdf" name="probs-pdf" from_work_dir="plots/probs.pdf" label="${tool.name} on ${on_string}: Probabilities">\n+ <filter>plots == \'pdf\'</filter>\n+ </data>\n+ <data format="pdf" name="summary-pdf" from_work_dir="plots/summary.pdf" label="${tool.name} on ${on_string}: Summary">\n+ <filter>plots == \'pdf\'</filter>\n+ </data>\n+ <data format="pdf" name="allfits-pdf" from_work_dir="plots/allfits.pdf" label="${tool.name} on ${on_string}: AllFits">\n+ <filter>plots == \'pdf\'</filter>\n+ </data>\n+ </outputs>\n+ <help><![CDATA[\n+naivestates - Inference of cell states using Naive Bayes\n+This work is supported by the NIH Grant 1U54CA225088: Systems Pharmacology of Therapeutic and Adverse Responses to Immune Checkpoint and Small Molecule Drugs and by the NCI grant 1U2CCA233262: Pre-cancer atlases of cutaneous and hematologic origin (PATCH Center).\n+\n+Introduction\n+naivestates is a label-free, cluster-free tool for inferring cell types from quantified marker expression data, based on known marker <-> cell type associatio'..b'or example, v1.2.0 can be pulled with\n+\n+docker pull labsyspharm/naivestates:1.2.0\n+Examine the tool usage instructions\n+docker run --rm labsyspharm/naivestates:1.2.0 /app/main.R -h\n+replacing 1.2.0 with the version you are working with. Omit :1.2.0 entirely if you pulled the latest version above. The flag --rm tells Docker to delete the container instance after it finishes displaying the help message.\n+\n+Basic usage\n+At minimum, the tool requires an input file and the list of marker names:\n+\n+docker run --rm -v /path/to/data/folder:/data labsyspharm/naivestates:1.2.0 \\\n+ /app/main.R -i /data/myfile.csv -m aSMA,CD45,panCK\n+where we can make a distinction between Docker-level arguments:\n+\n+--rm once again cleans up the container instance after it finishes running the code\n+-v /path/to/data/folder:/data maps the local folder containing your data to /data inside the container\n+:1.2.0 specifies the container version that we pulled above\n+and tool-level arguments:\n+\n+-i /data/myfile.csv specifies which data file to process\n+-m aSMA,CD45,panCK specifies the markers of interest (NOTE: comma-delimited, no spaces)\n+If there is a large number of markers, place their names in a standalone file markers.txt with one marker per line. Ensure that the file lives in /path/to/data/folder/ and modify the Docker call to use the new file:\n+\n+docker run --rm -v /path/to/data/folder:/data labsyspharm/naivestates:1.2.0 \\\n+ /app/main.R -i /data/myfile.csv -m /data/markers.txt\n+Additional parameters\n+The following parameters are optional, but may be useful in certain scenarios:\n+\n+--plots <off|pdf|png> - (default: off) Produces QC plots of individual marker fits and summary UMAP plots in .png or .pdf format.\n+--id - (default: CellID) Name of the column that contains cell IDs\n+--log <yes|no|auto> - (default: auto) When a log10 transformation should be applied prior to fitting the data. The tool will do this automatically if it detects large values. Use --log no to force the use of original, non-transformed values instead.\n+-o - (default: /data) Alternative output directory. (Note that any file written to a directory that wasn\'t mapped with docker -v will not persist when the container is destroyed.)\n+--mct - The tool has a basic marker -> cell type (mct) mapping in typemap.csv. More sophisticated mct mappings can be defined by creating a custom-map.csv file with two columns: Marker and State. Ensure that custom-map.csv is in /path/to/data/folder and point the tool at it with --mct (e.g., /app/main.R -i /data/myfile.csv --mct /data/custom-map.csv -m aSMA,CD45,panCK)\n+Alternative execution environments\n+Running in a Conda environment\n+If you are working in a computational environment that doesn\'t support Docker, the repository provides a Conda-based alternative. Ensure that conda is installed on your system, then 1) clone this repository, 2) instantiate the conda environment and 3) install the tool.\n+\n+git clone https://github.com/labsyspharm/naivestates.git\n+cd naivestates\n+conda env create -f conda.yml\n+conda activate naivestates\n+R -s -e "devtools::install_github(\'labsyspharm/naivestates\')"\n+The tool can now be used as above by running main.R:\n+\n+./main.R -h\n+./main.R -i /path/to/datafile.csv -m aSMA,CD45,panCK\n+Running as an R package\n+The tool can also be installed as an R package directly from GitHub:\n+\n+if( !require(devtools) ) install.packages("devtools")\n+devtools::install_github( "labsyspharm/naivestates" )\n+Example usage:\n+\n+library( tidyverse )\n+library( naivestates )\n+\n+# Load the original data\n+X <- read_csv( "datafile.csv" )\n+\n+# Fit models to channels aSMA, CD45 and panCK\n+# Specify that cell IDs are in column CellID\n+GMM <- GMMfit( X, CellID, aSMA, CD45, panCK )\n+\n+# Plot a fit to one of the markers\n+plotFit( GMM, "CD45" )\n+\n+# Write out the results to results.csv\n+GMMreshape(GMM) %>% write_csv( "results.csv" )\n+\n+OHSU Wrapper Repo: https://github.com/ohsu-comp-bio/naivestates\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n' |