Mercurial > repos > iuc > sleuth
changeset 0:5f1cb4c28d73 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sleuth commit 6b943159b4d68812dc6911309f23d54ec659282f
author | iuc |
---|---|
date | Thu, 01 Jun 2023 07:56:00 +0000 |
parents | |
children | d3e447dd52c8 |
files | macros.xml sleuth.R sleuth.xml test-data/kallisto_output_01.h5 test-data/kallisto_output_02.h5 test-data/kallisto_output_03.h5 test-data/kallisto_output_04.h5 test-data/test01_density.pdf test-data/test01_pca.pdf |
diffstat | 9 files changed, 267 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 01 07:56:00 2023 +0000 @@ -0,0 +1,23 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">r-sleuth</requirement> + <requirement type="package" version="0.2.0">r-annotables</requirement> + <requirement type="package" version="2.2.1">r-argparse</requirement> + <requirement type="package" version="2.0.0">r-tidyverse</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">0.30.1</token> + <token name="@SUFFIX_VERSION@">0</token> + <token name="@PROFILE@">20.01</token> + <xml name="citations"> + <citations> + <citation type="doi">10.1038/nmeth.4324</citation> + </citations> + </xml> + <xml name="xrefs"> + <xrefs> + <xref type="bio.tools">sleuth</xref> + </xrefs> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sleuth.R Thu Jun 01 07:56:00 2023 +0000 @@ -0,0 +1,116 @@ +library(sleuth, + quietly = TRUE, + warn.conflicts = FALSE) +library(annotables, quietly = TRUE, warn.conflicts = FALSE) +library(argparse, quietly = TRUE, warn.conflicts = FALSE) +library(tidyverse) + + +# setup R error handling to go to stderr +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +################################################################################ +### Input Processing +################################################################################ + + +# Collect arguments from command line +parser <- ArgumentParser(description = "Sleuth R script") + +parser$add_argument("--factorLevel", action = "append", required = TRUE) +parser$add_argument("--factorLevel_counts", + action = "append", + required = TRUE) +parser$add_argument("--factorLevel_n", action = "append", required = TRUE) +parser$add_argument("--cores", type = "integer", required = TRUE) +parser$add_argument("--normalize", action = "store_true", required = FALSE) +parser$add_argument("--nbins", type = "integer", required = TRUE) +parser$add_argument("--lwr", type = "numeric", required = TRUE) +parser$add_argument("--upr", type = "numeric", required = TRUE) + +args <- parser$parse_args() + +all_files <- args$factorLevel_counts + +conditions <- c() +for (x in seq_along(args$factorLevel)) { + temp <- append(conditions, rep(args$factorLevel[[x]])) + conditions <- temp +} + +sample_names <- all_files %>% + str_replace(pattern = "\\.tab", "") + +design <- + data.frame(list( + sample = sample_names, + condition = conditions, + path = all_files + )) +so <- sleuth_prep(design, + cores = args$cores, + normalize = args$normalize) + +so <- sleuth_fit( + so, + ~ condition, + "full", + n_bins = args$nbins, + lwr = args$lwr, + upr = args$upr +) + +so <- sleuth_fit( + so, + ~ 1, + "reduced", + n_bins = args$nbins, + lwr = args$lwr, + upr = args$upr +) + +so <- sleuth_lrt(so, "reduced", "full") + +sleuth_table <- + sleuth_results(so, "reduced:full", "lrt", show_all = FALSE) + +write.table( + sleuth_table, + file = "sleuth_table.tab", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE +) + + +outputFile <- file.path(getwd(), "pca_plot.pdf") +pdf(file = outputFile, + height = 6, + width = 9) +plot_pca(so, color_by = "condition") +dev.off() + +outputFile <- file.path(getwd(), "group_density.pdf") +pdf(file = outputFile, + height = 6, + width = 9) +plot_group_density( + so, + use_filtered = TRUE, + units = "est_counts", + trans = "log", + grouping = setdiff(colnames(so$sample_to_covariates), + "sample"), + offset = 1 +) +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sleuth.xml Thu Jun 01 07:56:00 2023 +0000 @@ -0,0 +1,128 @@ +<tool id="sleuth" name="Sleuth" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@"> + <description>differential expression analysis</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro='xrefs'/> + <expand macro='requirements'/> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + </stdio> + <version_command><![CDATA[echo $(R --version | grep version | grep -v GNU)", sleuth version" $(R --vanilla --slave -e "library(sleuth); cat(sessionInfo()\$otherPkgs\$sleuth\$Version)" 2> /dev/null | grep -v -i "WARNING: ")]]></version_command> + <command><![CDATA[ + #set $factor_levels = list() + #set $cond_files = list() + #set $cond_n_files = list() + #for $level in $rep_factorLevel + $factor_levels.append(str($level.factorLevel)) + $cond_n_files.append(len(str($level.countsFile).split(","))) + #for $i, $count in enumerate(str($level.countsFile).split(",")) + #set $fname = str($level.factorLevel) + "_" + str($i) + '.h5' + ln -s '${count}' "${fname}" && + $cond_files.append($fname) + #end for + #end for + Rscript '${__tool_directory__}/sleuth.R' + #for $i, $factor in enumerate($factor_levels) + --factorLevel $factor + --factorLevel_n $cond_n_files[$i] + #end for + #for $file in $cond_files + --factorLevel_counts $file + #end for + --cores \${GALAXY_SLOTS:-4} + $advanced_options.normalization + --nbins $advanced_options.nbins + --lwr $advanced_options.lwr + --upr $advanced_options.upr + ]]></command> + <inputs> + <repeat name="rep_factorLevel" title="Factor level" min="2" default="2"> + <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'" + help="Only letters, numbers and underscores will be retained in this field"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="countsFile" type="data" format="h5" multiple="true" label="Counts file(s)"/> + </repeat> + <section name="advanced_options" title="Advanced options" expanded="true"> + <param argument="normalization" type="boolean" truevalue="--normalize" falsevalue="" checked="true" label="Normalize data" + help="If this is set to false, bootstraps will not be read and transformation of the data will not be done. This should + only be set to false if one desires to do a quick check of the raw data. " /> + <param argument="nbins" type="integer" min="0" value="100" label="NBins" help="The number of bins that the data should be + split for the sliding window shrinkage using the mean-variance curve." /> + <param argument="lwr" type="float" min="0" max="1" value="0.25" label="LWR" help="The lower range of variances within each + bin that should be included for the shrinkage procedure. " /> + <param argument="upr" type="float" min="0" max="1" value="0.75" label="UPR" help="The upper range of variances within each + bin that should be included for the shrinkage procedure." /> + </section> + </inputs> + <outputs> + <data name="sleuth_table" from_work_dir="sleuth_table.tab" format="tabular" label="${tool.name} on ${on_string}: DE table"> + <actions> + <action name="column_names" type="metadata" default="target_id,pval,qval,test_stat,rss,degrees_free,mean_obs,var_obs,tech_var,sigma_sq,smooth_sigma_sq,final_sigma_sq" /> + </actions> + </data> + <data name="pca_plot" from_work_dir="pca_plot.pdf" format="pdf" label="${tool.name} on ${on_string}: PCA plot"/> + <data name="density_plot" from_work_dir="group_density.pdf" format="pdf" label="${tool.name} on ${on_string}: density plot"/> + </outputs> + <tests> + <test expect_num_outputs="3"> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Control"/> + <param name="countsFile" value="kallisto_output_01.h5,kallisto_output_02.h5"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Cancer"/> + <param name="countsFile" value="kallisto_output_03.h5,kallisto_output_04.h5"/> + </repeat> + <section name="advanced_options"> + <param name="normalization" value="true"/> + <param name="nbins" value="100"/> + <param name="lwr" value="0.25"/> + <param name="upr" value="0.75"/> + </section> + <output name="sleuth_table" ftype="tabular"> + <assert_contents> + <has_size value="689791" delta="100"/> + <has_text text="ENST00000281092.9"/> + <has_text text="ENST00000700211.1"/> + </assert_contents> + </output> + <output name="pca_plot" file="test01_pca.pdf" ftype="pdf" compare="sim_size"/> + <output name="density_plot" file="test01_density.pdf" ftype="pdf" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**Purpose** + +Sleuth is a tool for the analysis and comparison of multiple related RNA-Seq experiments. Key features include: + +- The ability to perform both transcript-level and gene-level analysis. +- Compatibility with kallisto enabling a fast and accurate workflow from reads to results. +- The use of bootstraps to ascertain and correct for technical variation in experiments. +- An interactive app for exploratory data analysis. + +To use sleuth, RNA-Seq data must first be quantified with kallisto, which is a program for very fast RNA-Seq quantification based on +pseudo-alignment. An important feature of kallisto is that it outputs bootstraps along with the estimates of transcript abundances. +These can serve as proxies for technical replicates, allowing for an ascertainment of the variability in estimates due to the random +processes underlying RNA-Seq as well as the statistical procedure of read assignment. + + ]]></help> + <expand macro="citations" /> +</tool>