Mercurial > repos > iuc > masigpro
changeset 1:cc96abdef027 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit c4510bf402d10e8e3a3c4c90c2d96666c987a256
author | iuc |
---|---|
date | Thu, 01 Jun 2017 11:10:22 -0400 |
parents | c8c290f3ea7d |
children | db04ba860dab |
files | masigpro.R masigpro.xml test-data/Results.pdf |
diffstat | 3 files changed, 583 insertions(+), 534 deletions(-) [+] |
line wrap: on
line diff
--- a/masigpro.R Mon May 15 07:29:03 2017 -0400 +++ b/masigpro.R Thu Jun 01 11:10:22 2017 -0400 @@ -1,104 +1,153 @@ -#!/usr/bin/env Rscript - -# A command-line interface to maSigPro for use with Galaxy -# written by Clemens Blank. -# Thanks to Bjoern Gruening and Michael Love for their DESeq2 -# wrapper as a basis to build upon. - -# setup R error handling to go to stderr -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -suppressPackageStartupMessages({ - library("maSigPro") - library("optparse") - library("mclust") -}) - -options(stringAsFactors = FALSE, useFancyQuotes = FALSE) -args <- commandArgs(trailingOnly = TRUE) - -# specify our desired options in a list -# by default OptionParser will add an help option equivalent to -# make_option(c("-h", "--help"), action="store_true", default=FALSE, -# help="Show this help message and exit") -option_list <- list( - make_option(c("-q", "--quiet"), action="store_false", - dest="verbose", help="Print little output"), - make_option(c("-e", "--edesign"), type="character"), - make_option(c("-d", "--data"), type="character"), - make_option(c("-o", "--outfile"), type="character"), - make_option("--degree", type="integer", default=1), - make_option("--time_col", type="integer", default=1), - make_option("--repl_col", type="integer", default=2), - make_option("--qvalue", type="double", default=0.05), - make_option("--min_obs", type="integer", default=6), - make_option("--step_method", type="character", default="backward"), - make_option("--nvar_correction", type="logical", default=FALSE), - make_option("--alfa", type="double", default=0.05), - make_option("--rsq", type="double", default=0.7), - make_option("--vars", type="character", default="groups"), - make_option("--significant_intercept", type="character", default="dummy"), - make_option("--cluster_data", type="integer", default=1), - make_option(c("-k", "--k"), type="integer", default=9), - make_option("--cluster_method", type="character", default="hclust"), - make_option("--distance", type="character", default="cor"), - make_option("--agglo_method", type="character", default="ward.D"), - make_option("--iter_max", type="integer", default=500), - make_option("--color_mode", type="character", default="rainbow"), - make_option("--show_fit", type="logical", default=TRUE), - make_option("--show_lines", type="logical", default=TRUE), - make_option("--cexlab", type="double", default=0.8), - make_option("--legend", type="logical", default=TRUE) -) - -# get command line options, if help option encountered print help and exit, -# otherwise if options not found on command line then set defaults -opt <- parse_args(OptionParser(option_list=option_list)) - -# enforce the following required arguments -if (is.null(opt$edesign)) { - cat("'edesign' is required\n") - q(status=1) -} -if (is.null(opt$data)) { - cat("'data' is required\n") - q(status=1) -} -if (is.null(opt$outfile)) { - cat("'outfile' is required\n") - q(status=1) -} - -verbose <- if (is.null(opt$quiet)) { - TRUE -} else { - FALSE -} - -edesign <- as.matrix(read.table(opt$edesign, header=TRUE, row.names = 1)) - -data <- read.table(opt$data, header=TRUE) - -results <- maSigPro(data, edesign, degree = opt$degree, time.col = opt$time_col, - repl.col = opt$repl_col, Q = opt$qvalue, min.obs = opt$min_obs, - step.method = opt$step_method, nvar.correction = opt$nvar_correction, - alfa = opt$alfa, rsq = opt$rsq, vars = opt$vars, - significant.intercept = opt$significant_intercept, - cluster.data = opt$cluster_data, k = opt$k, - cluster.method = opt$cluster_method, distance = opt$distance, - agglo.method = opt$agglo_method, iter.max = opt$iter_max, - color.mode = opt$color_mode, show.fit = opt$show_fit, - show.lines = opt$show_lines, cexlab = opt$cexlab, - legend = opt$legend) - -filename <- opt$outfile - -write.table((results$summary), file=filename, sep="\t", quote=FALSE, - row.names=FALSE, col.names=TRUE) - -cat("Session information:\n\n") - -sessionInfo() \ No newline at end of file +#!/usr/bin/env Rscript + +# A command-line interface to maSigPro for use with Galaxy +# written by Clemens Blank. +# Thanks to Bjoern Gruening and Michael Love for their DESeq2 +# wrapper as a basis to build upon. + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library("maSigPro") + library("optparse") + library("mclust") +}) + +# The following code fixes an error in the stepback function +# of the maSigPro package. This code is hopefully temporary +# and can be removed if the fix is included in a future +# version. The stepback function in the maSigPro namespace +# will be overwritten by the following function. +stepback <- function (y = y, d = d, alfa = 0.05, family = gaussian() , epsilon=0.00001) +{ + lm1 <- glm(y ~ ., data = d, family=family, epsilon=epsilon) + result <- summary(lm1) + max <- max(result$coefficients[, 4][-1], na.rm = TRUE) + if (length(result$coefficients[, 4][-1]) == 1) { + if (max > alfa) { + max = 0 + lm1 <- glm(y ~ 1, family=family, epsilon=epsilon) + } + } + while (max > alfa) { + varout <- names(result$coefficients[, 4][-1])[result$coefficients[, + 4][-1] == max][1] + pos <- position(matrix = d, vari = varout) + d <- d[, -pos] + if (length(result$coefficients[, 4][-1]) == 2) { + min <- min(result$coefficients[, 4][-1], na.rm = TRUE) + lastname <- names(result$coefficients[, 4][-1])[result$coefficients[,4][-1] == min] + } + if (is.null(dim(d))) { + d <- as.data.frame(d) + colnames(d) <- lastname + } + lm1 <- glm(y ~ ., data = d, family=family, epsilon=epsilon) + result <- summary(lm1) + max <- max(result$coefficients[, 4][-1], na.rm = TRUE) + if (length(result$coefficients[, 4][-1]) == 1) { + max <- result$coefficients[, 4][-1] + if (max > alfa) { + max = 0 + lm1 <- glm(y ~ 1, family=family, epsilon=epsilon) + } + } + } + return(lm1) +} + +unlockBinding("stepback", as.environment("package:maSigPro")) +assignInNamespace("stepback", stepback, ns="maSigPro", envir=as.environment("package:maSigPro")) +assign("stepback", stepback, as.environment("package:maSigPro")) +lockBinding("stepback", as.environment("package:maSigPro")) +# End of temporary code to fix stepback.R + +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +# specify our desired options in a list +# by default OptionParser will add an help option equivalent to +# make_option(c("-h", "--help"), action="store_true", default=FALSE, +# help="Show this help message and exit") +option_list <- list( + make_option(c("-q", "--quiet"), action="store_false", + dest="verbose", help="Print little output"), + make_option(c("-e", "--edesign"), type="character"), + make_option(c("-d", "--data"), type="character"), + make_option(c("-o", "--outfile"), type="character"), + make_option("--degree", type="integer", default=1), + make_option("--time_col", type="integer", default=1), + make_option("--repl_col", type="integer", default=2), + make_option("--qvalue", type="double", default=0.05), + make_option("--min_obs", type="integer", default=6), + make_option("--step_method", type="character", default="backward"), + make_option("--nvar_correction", type="logical", default=FALSE), + make_option("--alfa", type="double", default=0.05), + make_option("--rsq", type="double", default=0.7), + make_option("--vars", type="character", default="groups"), + make_option("--significant_intercept", type="character", default="dummy"), + make_option("--cluster_data", type="integer", default=1), + make_option(c("-k", "--k"), type="integer", default=9), + make_option("--cluster_method", type="character", default="hclust"), + make_option("--distance", type="character", default="cor"), + make_option("--agglo_method", type="character", default="ward.D"), + make_option("--iter_max", type="integer", default=500), + make_option("--color_mode", type="character", default="rainbow"), + make_option("--show_fit", type="logical", default=TRUE), + make_option("--show_lines", type="logical", default=TRUE), + make_option("--cexlab", type="double", default=0.8), + make_option("--legend", type="logical", default=TRUE) +) + +# get command line options, if help option encountered print help and exit, +# otherwise if options not found on command line then set defaults +opt <- parse_args(OptionParser(option_list=option_list)) + +# enforce the following required arguments +if (is.null(opt$edesign)) { + cat("'edesign' is required\n") + q(status=1) +} +if (is.null(opt$data)) { + cat("'data' is required\n") + q(status=1) +} +if (is.null(opt$outfile)) { + cat("'outfile' is required\n") + q(status=1) +} + +verbose <- if (is.null(opt$quiet)) { + TRUE +} else { + FALSE +} + +edesign <- as.matrix(read.table(opt$edesign, header=TRUE, row.names = 1)) + +data <- read.table(opt$data, header=TRUE, check.names=FALSE) + +results <- maSigPro(data, edesign, degree = opt$degree, time.col = opt$time_col, + repl.col = opt$repl_col, Q = opt$qvalue, min.obs = opt$min_obs, + step.method = opt$step_method, nvar.correction = opt$nvar_correction, + alfa = opt$alfa, rsq = opt$rsq, vars = opt$vars, + significant.intercept = opt$significant_intercept, + cluster.data = opt$cluster_data, k = opt$k, + cluster.method = opt$cluster_method, distance = opt$distance, + agglo.method = opt$agglo_method, iter.max = opt$iter_max, + color.mode = opt$color_mode, show.fit = opt$show_fit, + show.lines = opt$show_lines, cexlab = opt$cexlab, + legend = opt$legend) + +filename <- opt$outfile + +write.table((results$summary), file=filename, sep="\t", quote=FALSE, + row.names=FALSE, col.names=TRUE) + +cat("Session information:\n\n") + +sessionInfo()
--- a/masigpro.xml Mon May 15 07:29:03 2017 -0400 +++ b/masigpro.xml Thu Jun 01 11:10:22 2017 -0400 @@ -1,430 +1,430 @@ -<tool id="masigpro" name="maSigPro" version="1.49.0.0"> - <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description> - <requirements> - <requirement type="package" version="1.49.0">bioconductor-masigpro</requirement> - <requirement type="package" version="1.3.2">r-optparse</requirement> - <requirement type="package" version="4.4">sed</requirement> - </requirements> - <stdio> - <regex match="Execution halted" - source="both" - level="fatal" - description="Execution halted." /> - <regex match="Error in" - source="both" - level="fatal" - description="An undefined error occurred, please check your input carefully and contact your administrator." /> - <regex match="Fatal error" - source="both" - level="fatal" - description="An undefined error occurred, please check your input carefully and contact your administrator." /> - </stdio> - <version_command> - <![CDATA[ - echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ") - ]]> - </version_command> - <command> - <![CDATA[ - #if str($source.source_selector) == "advanced": - paste - #set $start = True - #set $header = '' - #for $time in $source.rep_time: - #for $file in $time.files: - #if $start: - <(cut -f1 $file) - #set $start = False - #end if - #set $header += ' "' + $file.name + '"' - <(cut -f2 $file) - #end for - #end for - > data && sed -i '1i$header' data && - #if $source.enable_output: - ln -f data $data_out && ln -f $design_matrix $edesign_out && - #end if - #set $data = 'data' - #set $edesign = $design_matrix - #else: - #set $data = $source.data - #set $edesign = $source.edesign - #end if - Rscript '${__tool_directory__}/masigpro.R' - -e '$edesign' - -d '$data' - -o '$masigpro_out' - #if str($source.source_selector) == "defaults": - --time_col $source.time_col - --repl_col $source.repl_col - #end if - --degree $makeDesignMatrix.degree - --qvalue $p_vector.qvalue - --min_obs $p_vector.min_obs - --step_method '$Tfit.step_method' - --nvar_correction $Tfit.nvar_correction - --alfa $Tfit.alfa - --rsq $getSiggenes.rsq - --vars '$getSiggenes.vars' - --significant_intercept '$getSiggenes.significant_intercept' - #if $pdf.pdf_selector: - --cluster_data $pdf.seeGenes.clusterData - -k $pdf.seeGenes.k - --cluster_method $pdf.seeGenes.clustering.clusterMethod - #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust": - --distance $pdf.seeGenes.clustering.distance - --agglo_method $pdf.seeGenes.clustering.aggloMethod - #end if - #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans": - --iter_max $pdf.seeGenes.clustering.iterMax - #end if - --color_mode $pdf.seeGenes.colorMode - --show_fit $pdf.seeGenes.showFit - --show_lines $pdf.seeGenes.showLines - --cexlab $pdf.seeGenes.cexlab - --legend $pdf.seeGenes.legend - #end if - ]]> - </command> - <configfiles> -<configfile name="design_matrix">#if str($source.source_selector) == "advanced": -#set $header = "Name Time Replicate" -#for $group in $source.rep_groups: - #set $header = $header + ' ' + str($group.name) -#end for -$header -#set $c = len($source.rep_repl) + 1 -#for $time in $source.rep_time: - #for $file in $time.files: - #set $is_repl = False - #for $i, $repl in enumerate($source.rep_repl): - #if str($file) in str($repl.files): - #set $r = $i + 1 - #set $is_repl = True - #end if - #end for - #if $is_repl == False: - #set $r = $c - #set $c += 1 - #end if - #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r) - #for $group in $source.rep_groups: - #if str($file) in str($group.files): - #set $line += " 1" - #else - #set $line += " 0" - #end if - #end for -$line - #end for -#end for -#end if -</configfile> - </configfiles> - <inputs> - <conditional name="source"> - <param label="Choose data source" name="source_selector" - help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) - and define your experiment design matrix here, or if you have maSigPro edesign and data input files already." - type="select"> - <option value="defaults">Use maSigPro edesign and data files</option> - <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option> - </param> - <when value="defaults"> - <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix" - help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" /> - <param name="data" format="tabular,txt" type="data" label="Gene expression matrix" - help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" /> - <param name="time_col" label="Column number containing time values" type="integer" value="1" - help="Column number in edesign containing time values. Default is first column" /> - <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2" - help="Column number in edesign containing coding for replicate arrays. Default is second column" /> - </when> - <when value="advanced"> - <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?" - help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." /> - <repeat name="rep_time" title="Time values" min="1" default="1"> - <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed"> - <sanitizer> - <valid initial="string.digits"></valid> - </sanitizer> - </param> - <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" /> - </repeat> - <repeat name="rep_groups" title="Experimental groups" min="1" default="1"> - <param name="name" type="text" value="Group title" label="Specify the name of this experimental group" - help="Use a single name without spaces or special characters"> - </param> - <param name="files" type="data" format="tabular" multiple="true" - label="Counts file(s) belonging to this experimental group" /> - </repeat> - <repeat name="rep_repl" title="Replicates" min="0" default="0"> - <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" /> - </repeat> - </when> - </conditional> - <section name="makeDesignMatrix" - title="Step 1: make.Design.Matrix - Defining the regression model" - help="‘make.design.matrix’ creates the design matrix of dummies for - fitting time series micorarray gene expression experiments."> - <param name="degree" type="integer" value="1" - label="Degree of regression fit polynome" - help="The degree of the regression fit polynome. ‘degree’ = 1 returns - linear regression, ‘degree’ = 2 returns quadratic regression, etc" /> - </section> - <section name="p_vector" - title="Step 2: p.vector - Finding significant genes" - help="‘p.vector’ performs a regression fit for each gene taking all - variables present in the model given by a regression matrix and - returns a list of FDR corrected significant genes"> - <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" /> - <param name="min_obs" label="Minimum values" type="integer" value="6" - help="Genes with less than this number of true numerical values - will be excluded from the analysis. Minimum value to estimate - the model is (degree+1)xGroups+1. Default is 6." /> - </section> - <section name="Tfit" title="Step 3: T.fit - Finding significant differences" - help="‘T.fit’ selects the best regression model for each gene using - stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent - steps, meaning that significant genes are first selected on the - basis of a general model and then the significant variables for - each gene are found by step-wise regression."> - <param name="step_method" type="select" label="Step regression" - help="The step regression can be ‘backward’ or ‘forward’ indicating - whether the step procedure starts from the model with all or none - variables. With the ‘two.ways.backward’ or ‘two.ways.forward’ - options the variables are both allowed to get in and out. At each - step the p-value of each variable is computed and variables get - in/out the model when this p-value is lower or higher than given - threshold alfa."> - <option selected="True" value="backward">backward</option> - <option value="forward">forward</option> - <option value="two.ways.backward">two.ways.backward</option> - <option value="two.ways.forward">two.ways.forward</option> - </param> - <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false" - help="When nvar.correction is TRUE the given significance - level is corrected by the number of variables in the model."> - <option selected="True" value="FALSE">False</option> - <option value="TRUE">True</option> - </param> - <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" /> - </section> - <section name="getSiggenes" - title="Step 4: get.siggenes - Obtaining lists of significant genes" - help="This function creates lists of significant genes for a set of - variables whose significance value has been computed with the - ‘T.fit’ function."> - <param name="rsq" type="float" value="0.7" label="rsq" - help="cut-off level at the R-squared value for the stepwise - regression fit. Only genes with R-squared more than rsq are - selected" /> - <param name="vars" type="select" label="Variables" - help="Variables for which to extract significant genes. - ‘all’: generates one single matrix or gene list with all - significant genes. - - ‘each’: generates as many significant genes extractions as - variables in the general regression model. Each extraction - contains the significant genes for that variable. - - ‘groups’: generates a significant genes extraction for each - experimental group. - - The difference between ‘each’ and ‘groups’ is that in the - first case the variables of the same group (e.g. ‘TreatmentA’ - and ‘time*TreatmentA’) will be extracted separately and in t - he - second case jointly."> - <option selected="True" value="groups">Groups</option> - <option value="each">Each</option> - <option value="all">All</option> - </param> - <param name="significant_intercept" type="select" label="Significant intercept" - help="The argument ‘significant.intercept’ modulates the treatment for - intercept coefficients to apply for selecting significant genes - when ‘vars’ equals ‘groups’. There are three possible values: - ‘none’, no significant intercept (differences) are considered - for significant gene selection, ‘dummy’, includes genes with - significant intercept differences between control and experimental - groups, and ‘all’ when both significant intercept coefficient - for the control group and significant intercept differences are - considered for selecting significant genes."> - <option selected="True" value="dummy">Dummy</option> - <option value="none">None</option> - <option value="all">All</option> - </param> - </section> - <conditional name="pdf"> - <param label="Generate visualization PDF" name="pdf_selector" type="boolean" - truevalue="1" falsevalue="0" checked="true" - help="Choose if you want to generate a PDF file containing the visualizations" /> - <when value="1"> - <section name="seeGenes" title="Step 5: see.genes - Visualization" - help="This function provides visualisation tools for gene expression - values in a time course experiment. The function first calls the - heatmap function for a general overview of experiment results. - Next a partioning of the data is generated using a clustering - method. The results of the clustering are visualized both as gene - expression profiles extended along all arrays in the experiment, - as provided by the plot.profiles function, and as summary - expression profiles for comparison among experimental groups."> - <param name="clusterData" label="Cluster Data" type="integer" value="1" - help="Data clustering can be done on the basis of either the original - expression values, the regression coefficients, or the t.scores. - In case ‘data’ is a ‘get.siggenes’ object, this is given by - providing the element names of the list - ‘c(sig.profiles,coefficients,t.score)’ of their list - position (1,2 or 3)." /> - <param name="k" type="integer" label="Number of clusters for data partioning" value="9" /> - <conditional name="clustering"> - <param name="clusterMethod" label="Cluster Method" type="select" - help="clustering method for data partioning. Currently - ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported"> - <option selected="True" value="hclust">hclust</option> - <option value="kmeans">kmeans</option> - <option value="Mclust">Mclust</option> - </param> - <when value="hclust"> - <param name="distance" type="select" label="Distance measure" - help="Distance measurement function when ‘cluster.method’ is - ‘hclust’. Default uses correlation."> - <option selected="True" value="cor">Correlation</option> - <option value="euclidean">Euclidean</option> - <option value="maximum">Maximum</option> - <option value="manhattan">Manhattan</option> - <option value="Canberra">Canberra</option> - <option value="binary">Binary</option> - <option value="minkowski">Minkowski</option> - </param> - <param name="aggloMethod" type="select" label="Agglomeration method" - help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’."> - <option selected="True" value="ward.D">ward.D</option> - <option value="ward.D2">ward.D2</option> - <option value="single">single</option> - <option value="complete">complete</option> - <option value="average">average (= UPGMA)</option> - <option value="mcquitty">mcquitty (= WPGMA)</option> - <option value="median">median (= WPGMC)</option> - <option value="centroid">centroid (= UPGMC)</option> - </param> - </when> - <when value="kmeans"> - <param name="iterMax" type="integer" label="Maximum number of iterations" value="500" - help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" /> - </when> - </conditional> - <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’"> - <option selected="True" value="rainbow">Rainbow</option> - <option value="gray">Gray</option> - </param> - <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?" - help="Indicating whether regression fit curves must be plotted" /> - <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?" - help="Indicating whether a line must be drawn joining plotted data points for each group" /> - <param name="cexlab" type="float" value="0.8" label="Magnification for x labels" - help="Graphical parameter maginfication to be used for x labels in plotting functions" /> - <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?" - help="Indicating whether legend must be added when plotting profiles" /> - </section> - </when> - </conditional> - </inputs> - <outputs> - <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}"> - </data> - <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}"> - <filter> - (( - source['source_selector'] == 'advanced' and - source['enable_output'] == True - )) - </filter> - </data> - <data format="txt" name="data_out" label="maSigPro data file on ${on_string}"> - <filter> - (( - source['source_selector'] == 'advanced' and - source['enable_output'] == True - )) - </filter> - </data> - <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}"> - <filter> - (( - pdf['pdf_selector'] == True - )) - </filter> - </data> - </outputs> - <tests> - <test> - <param name="source_selector" value="advanced" /> - <param name="enable_output" value="1" /> - <repeat name="rep_time"> - <param name="time" value="1" /> - <param name="files" value="control_1H.counts,treat_1H.counts" /> - </repeat> - <repeat name="rep_time"> - <param name="time" value="2" /> - <param name="files" value="control_2H.counts,treat_2H.counts" /> - </repeat> - <repeat name="rep_time"> - <param name="time" value="3" /> - <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" /> - </repeat> - <param name="replicates_selector" value="advanced" /> - <repeat name="rep_repl"> - <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" /> - </repeat> - <repeat name="rep_groups"> - <param name="name" value="Control" /> - <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" /> - </repeat> - <repeat name="rep_groups"> - <param name="name" value="Treatment" /> - <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" /> - </repeat> - <output name="masigpro_out" file="masigpro_out.tab" /> - <output name="data_out" file="data_out.txt" /> - <output name="edesign_out" file="edesign_out.txt" /> - <output name="pdf_out" file="Results.pdf" /> - </test> - <test> - <param name="source_selector" value="defaults" /> - <param name="edesign" value="edesign_out.txt" /> - <param name="data" value="data_out.txt" /> - <output name="masigpro_out" file="masigpro_out.tab" /> - <output name="pdf_out" file="Results.pdf" /> - </test> - </tests> - <help> -<![CDATA[ -.. class:: infomark - -**What it does** - -maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments. - -**Inputs** - -The maSigPro wrapper has two options for input data: - - - directly through two seperate text files containing the experiment design (edesign) and the data or - - count tables generated from HTSeq-count. Count tables must be generated for each sample individually. - -To set up an experimental design from seperate count files you first have to select which files belong to a certain time point. -Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files. -For a more comfortable setup in future analysis you have the option to output the generated edesign and data files. - -**Output** - -maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis. - -.. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html -]]> - </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btl056</citation> - </citations> -</tool> \ No newline at end of file +<tool id="masigpro" name="maSigPro" version="1.49.3.0"> + <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description> + <requirements> + <requirement type="package" version="1.49.3">bioconductor-masigpro</requirement> + <requirement type="package" version="1.3.2">r-optparse</requirement> + <requirement type="package" version="4.4">sed</requirement> + </requirements> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + </stdio> + <version_command> + <![CDATA[ + echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]> + </version_command> + <command> + <![CDATA[ + #if str($source.source_selector) == "advanced": + paste + #set $start = True + #set $header = '' + #for $time in $source.rep_time: + #for $file in $time.files: + #if $start: + <(cut -f1 $file) + #set $start = False + #end if + #set $header += ' "' + $file.name + '"' + <(cut -f2 $file) + #end for + #end for + > data && sed -i '1i$header' data && + #if $source.enable_output: + ln -f data $data_out && ln -f $design_matrix $edesign_out && + #end if + #set $data = 'data' + #set $edesign = $design_matrix + #else: + #set $data = $source.data + #set $edesign = $source.edesign + #end if + Rscript '${__tool_directory__}/masigpro.R' + -e '$edesign' + -d '$data' + -o '$masigpro_out' + #if str($source.source_selector) == "defaults": + --time_col $source.time_col + --repl_col $source.repl_col + #end if + --degree $makeDesignMatrix.degree + --qvalue $p_vector.qvalue + --min_obs $p_vector.min_obs + --step_method '$Tfit.step_method' + --nvar_correction $Tfit.nvar_correction + --alfa $Tfit.alfa + --rsq $getSiggenes.rsq + --vars '$getSiggenes.vars' + --significant_intercept '$getSiggenes.significant_intercept' + #if $pdf.pdf_selector: + --cluster_data $pdf.seeGenes.clusterData + -k $pdf.seeGenes.k + --cluster_method $pdf.seeGenes.clustering.clusterMethod + #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust": + --distance $pdf.seeGenes.clustering.distance + --agglo_method $pdf.seeGenes.clustering.aggloMethod + #end if + #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans": + --iter_max $pdf.seeGenes.clustering.iterMax + #end if + --color_mode $pdf.seeGenes.colorMode + --show_fit $pdf.seeGenes.showFit + --show_lines $pdf.seeGenes.showLines + --cexlab $pdf.seeGenes.cexlab + --legend $pdf.seeGenes.legend + #end if + ]]> + </command> + <configfiles> +<configfile name="design_matrix">#if str($source.source_selector) == "advanced": +#set $header = "Name Time Replicate" +#for $group in $source.rep_groups: + #set $header = $header + ' ' + str($group.name) +#end for +$header +#set $c = len($source.rep_repl) + 1 +#for $time in $source.rep_time: + #for $file in $time.files: + #set $is_repl = False + #for $i, $repl in enumerate($source.rep_repl): + #if str($file) in str($repl.files): + #set $r = $i + 1 + #set $is_repl = True + #end if + #end for + #if $is_repl == False: + #set $r = $c + #set $c += 1 + #end if + #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r) + #for $group in $source.rep_groups: + #if str($file) in str($group.files): + #set $line += " 1" + #else + #set $line += " 0" + #end if + #end for +$line + #end for +#end for +#end if +</configfile> + </configfiles> + <inputs> + <conditional name="source"> + <param label="Choose data source" name="source_selector" + help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) + and define your experiment design matrix here, or if you have maSigPro edesign and data input files already." + type="select"> + <option value="defaults">Use maSigPro edesign and data files</option> + <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option> + </param> + <when value="defaults"> + <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix" + help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" /> + <param name="data" format="tabular,txt" type="data" label="Gene expression matrix" + help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" /> + <param name="time_col" label="Column number containing time values" type="integer" value="1" + help="Column number in edesign containing time values. Default is first column" /> + <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2" + help="Column number in edesign containing coding for replicate arrays. Default is second column" /> + </when> + <when value="advanced"> + <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?" + help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." /> + <repeat name="rep_time" title="Time values" min="1" default="1"> + <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed"> + <sanitizer> + <valid initial="string.digits"></valid> + </sanitizer> + </param> + <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" /> + </repeat> + <repeat name="rep_groups" title="Experimental groups" min="1" default="1"> + <param name="name" type="text" value="Group title" label="Specify the name of this experimental group" + help="Use a single name without spaces or special characters"> + </param> + <param name="files" type="data" format="tabular" multiple="true" + label="Counts file(s) belonging to this experimental group" /> + </repeat> + <repeat name="rep_repl" title="Replicates" min="0" default="0"> + <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" /> + </repeat> + </when> + </conditional> + <section name="makeDesignMatrix" + title="Step 1: make.Design.Matrix - Defining the regression model" + help="‘make.design.matrix’ creates the design matrix of dummies for + fitting time series micorarray gene expression experiments."> + <param name="degree" type="integer" value="1" + label="Degree of regression fit polynome" + help="The degree of the regression fit polynome. ‘degree’ = 1 returns + linear regression, ‘degree’ = 2 returns quadratic regression, etc" /> + </section> + <section name="p_vector" + title="Step 2: p.vector - Finding significant genes" + help="‘p.vector’ performs a regression fit for each gene taking all + variables present in the model given by a regression matrix and + returns a list of FDR corrected significant genes"> + <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" /> + <param name="min_obs" label="Minimum values" type="integer" value="6" + help="Genes with less than this number of true numerical values + will be excluded from the analysis. Minimum value to estimate + the model is (degree+1)xGroups+1. Default is 6." /> + </section> + <section name="Tfit" title="Step 3: T.fit - Finding significant differences" + help="‘T.fit’ selects the best regression model for each gene using + stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent + steps, meaning that significant genes are first selected on the + basis of a general model and then the significant variables for + each gene are found by step-wise regression."> + <param name="step_method" type="select" label="Step regression" + help="The step regression can be ‘backward’ or ‘forward’ indicating + whether the step procedure starts from the model with all or none + variables. With the ‘two.ways.backward’ or ‘two.ways.forward’ + options the variables are both allowed to get in and out. At each + step the p-value of each variable is computed and variables get + in/out the model when this p-value is lower or higher than given + threshold alfa."> + <option selected="True" value="backward">backward</option> + <option value="forward">forward</option> + <option value="two.ways.backward">two.ways.backward</option> + <option value="two.ways.forward">two.ways.forward</option> + </param> + <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false" + help="When nvar.correction is TRUE the given significance + level is corrected by the number of variables in the model."> + <option selected="True" value="FALSE">False</option> + <option value="TRUE">True</option> + </param> + <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" /> + </section> + <section name="getSiggenes" + title="Step 4: get.siggenes - Obtaining lists of significant genes" + help="This function creates lists of significant genes for a set of + variables whose significance value has been computed with the + ‘T.fit’ function."> + <param name="rsq" type="float" value="0.7" label="rsq" + help="cut-off level at the R-squared value for the stepwise + regression fit. Only genes with R-squared more than rsq are + selected" /> + <param name="vars" type="select" label="Variables" + help="Variables for which to extract significant genes. + ‘all’: generates one single matrix or gene list with all + significant genes. + + ‘each’: generates as many significant genes extractions as + variables in the general regression model. Each extraction + contains the significant genes for that variable. + + ‘groups’: generates a significant genes extraction for each + experimental group. + + The difference between ‘each’ and ‘groups’ is that in the + first case the variables of the same group (e.g. ‘TreatmentA’ + and ‘time*TreatmentA’) will be extracted separately and in t + he + second case jointly."> + <option selected="True" value="groups">Groups</option> + <option value="each">Each</option> + <option value="all">All</option> + </param> + <param name="significant_intercept" type="select" label="Significant intercept" + help="The argument ‘significant.intercept’ modulates the treatment for + intercept coefficients to apply for selecting significant genes + when ‘vars’ equals ‘groups’. There are three possible values: + ‘none’, no significant intercept (differences) are considered + for significant gene selection, ‘dummy’, includes genes with + significant intercept differences between control and experimental + groups, and ‘all’ when both significant intercept coefficient + for the control group and significant intercept differences are + considered for selecting significant genes."> + <option selected="True" value="dummy">Dummy</option> + <option value="none">None</option> + <option value="all">All</option> + </param> + </section> + <conditional name="pdf"> + <param label="Generate visualization PDF" name="pdf_selector" type="boolean" + truevalue="1" falsevalue="0" checked="true" + help="Choose if you want to generate a PDF file containing the visualizations" /> + <when value="1"> + <section name="seeGenes" title="Step 5: see.genes - Visualization" + help="This function provides visualisation tools for gene expression + values in a time course experiment. The function first calls the + heatmap function for a general overview of experiment results. + Next a partioning of the data is generated using a clustering + method. The results of the clustering are visualized both as gene + expression profiles extended along all arrays in the experiment, + as provided by the plot.profiles function, and as summary + expression profiles for comparison among experimental groups."> + <param name="clusterData" label="Cluster Data" type="integer" value="1" + help="Data clustering can be done on the basis of either the original + expression values, the regression coefficients, or the t.scores. + In case ‘data’ is a ‘get.siggenes’ object, this is given by + providing the element names of the list + ‘c(sig.profiles,coefficients,t.score)’ of their list + position (1,2 or 3)." /> + <param name="k" type="integer" label="Number of clusters for data partioning" value="9" /> + <conditional name="clustering"> + <param name="clusterMethod" label="Cluster Method" type="select" + help="clustering method for data partioning. Currently + ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported"> + <option selected="True" value="hclust">hclust</option> + <option value="kmeans">kmeans</option> + <option value="Mclust">Mclust</option> + </param> + <when value="hclust"> + <param name="distance" type="select" label="Distance measure" + help="Distance measurement function when ‘cluster.method’ is + ‘hclust’. Default uses correlation."> + <option selected="True" value="cor">Correlation</option> + <option value="euclidean">Euclidean</option> + <option value="maximum">Maximum</option> + <option value="manhattan">Manhattan</option> + <option value="Canberra">Canberra</option> + <option value="binary">Binary</option> + <option value="minkowski">Minkowski</option> + </param> + <param name="aggloMethod" type="select" label="Agglomeration method" + help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’."> + <option selected="True" value="ward.D">ward.D</option> + <option value="ward.D2">ward.D2</option> + <option value="single">single</option> + <option value="complete">complete</option> + <option value="average">average (= UPGMA)</option> + <option value="mcquitty">mcquitty (= WPGMA)</option> + <option value="median">median (= WPGMC)</option> + <option value="centroid">centroid (= UPGMC)</option> + </param> + </when> + <when value="kmeans"> + <param name="iterMax" type="integer" label="Maximum number of iterations" value="500" + help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" /> + </when> + </conditional> + <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’"> + <option selected="True" value="rainbow">Rainbow</option> + <option value="gray">Gray</option> + </param> + <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?" + help="Indicating whether regression fit curves must be plotted" /> + <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?" + help="Indicating whether a line must be drawn joining plotted data points for each group" /> + <param name="cexlab" type="float" value="0.8" label="Magnification for x labels" + help="Graphical parameter maginfication to be used for x labels in plotting functions" /> + <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?" + help="Indicating whether legend must be added when plotting profiles" /> + </section> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}"> + </data> + <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}"> + <filter> + (( + source['source_selector'] == 'advanced' and + source['enable_output'] == True + )) + </filter> + </data> + <data format="txt" name="data_out" label="maSigPro data file on ${on_string}"> + <filter> + (( + source['source_selector'] == 'advanced' and + source['enable_output'] == True + )) + </filter> + </data> + <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}"> + <filter> + (( + pdf['pdf_selector'] == True + )) + </filter> + </data> + </outputs> + <tests> + <test> + <param name="source_selector" value="advanced" /> + <param name="enable_output" value="1" /> + <repeat name="rep_time"> + <param name="time" value="1" /> + <param name="files" value="control_1H.counts,treat_1H.counts" /> + </repeat> + <repeat name="rep_time"> + <param name="time" value="2" /> + <param name="files" value="control_2H.counts,treat_2H.counts" /> + </repeat> + <repeat name="rep_time"> + <param name="time" value="3" /> + <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <param name="replicates_selector" value="advanced" /> + <repeat name="rep_repl"> + <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <repeat name="rep_groups"> + <param name="name" value="Control" /> + <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" /> + </repeat> + <repeat name="rep_groups"> + <param name="name" value="Treatment" /> + <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <output name="masigpro_out" file="masigpro_out.tab" /> + <output name="data_out" file="data_out.txt" /> + <output name="edesign_out" file="edesign_out.txt" /> + <output name="pdf_out" file="Results.pdf" /> + </test> + <test> + <param name="source_selector" value="defaults" /> + <param name="edesign" value="edesign_out.txt" /> + <param name="data" value="data_out.txt" /> + <output name="masigpro_out" file="masigpro_out.tab" /> + <output name="pdf_out" file="Results.pdf" /> + </test> + </tests> + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments. + +**Inputs** + +The maSigPro wrapper has two options for input data: + + - directly through two seperate text files containing the experiment design (edesign) and the data or + - count tables generated from HTSeq-count. Count tables must be generated for each sample individually. + +To set up an experimental design from seperate count files you first have to select which files belong to a certain time point. +Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files. +For a more comfortable setup in future analysis you have the option to output the generated edesign and data files. + +**Output** + +maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis. + +.. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html +]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btl056</citation> + </citations> +</tool>