masigpro: masigpro.xml comparison

comparison masigpro.xml @ 0:c8c290f3ea7d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit 5798bd978553dee97521c39920d263dd750e0755

author	iuc
date	Mon, 15 May 2017 07:29:03 -0400
parents
children	cc96abdef027

comparison

equal deleted inserted replaced

--1:000000000000
+:c8c290f3ea7d
+<tool id="masigpro" name="maSigPro" version="1.49.0.0">
+<description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description>
+<requirements>
+<requirement type="package" version="1.49.0">bioconductor-masigpro</requirement>
+<requirement type="package" version="1.3.2">r-optparse</requirement>
+<requirement type="package" version="4.4">sed</requirement>
+</requirements>
+<stdio>
+<regex match="Execution halted"
+source="both"
+level="fatal"
+description="Execution halted." />
+<regex match="Error in"
+source="both"
+level="fatal"
+description="An undefined error occurred, please check your input carefully and contact your administrator." />
+<regex match="Fatal error"
+source="both"
+level="fatal"
+description="An undefined error occurred, please check your input carefully and contact your administrator." />
+</stdio>
+<version_command>
+<![CDATA[
+echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+]]>
+</version_command>
+<command>
+<![CDATA[
+#if str($source.source_selector) == "advanced":
+paste
+#set $start = True
+#set $header = ''
+#for $time in $source.rep_time:
+#for $file in $time.files:
+#if $start:
+<(cut -f1 $file)
+#set $start = False
+#end if
+#set $header += ' "' + $file.name + '"'
+<(cut -f2 $file)
+#end for
+#end for
+> data && sed -i '1i$header' data &&
+#if $source.enable_output:
+ln -f data $data_out && ln -f $design_matrix $edesign_out &&
+#end if
+#set $data = 'data'
+#set $edesign = $design_matrix
+#else:
+#set $data = $source.data
+#set $edesign = $source.edesign
+#end if
+Rscript '${__tool_directory__}/masigpro.R'
+-e '$edesign'
+-d '$data'
+-o '$masigpro_out'
+#if str($source.source_selector) == "defaults":
+--time_col $source.time_col
+--repl_col $source.repl_col
+#end if
+--degree $makeDesignMatrix.degree
+--qvalue $p_vector.qvalue
+--min_obs $p_vector.min_obs
+--step_method '$Tfit.step_method'
+--nvar_correction $Tfit.nvar_correction
+--alfa $Tfit.alfa
+--rsq $getSiggenes.rsq
+--vars '$getSiggenes.vars'
+--significant_intercept '$getSiggenes.significant_intercept'
+#if $pdf.pdf_selector:
+--cluster_data $pdf.seeGenes.clusterData
+-k $pdf.seeGenes.k
+--cluster_method $pdf.seeGenes.clustering.clusterMethod
+#if str($pdf.seeGenes.clustering.clusterMethod) == "hclust":
+--distance $pdf.seeGenes.clustering.distance
+--agglo_method $pdf.seeGenes.clustering.aggloMethod
+#end if
+#if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans":
+--iter_max $pdf.seeGenes.clustering.iterMax
+#end if
+--color_mode $pdf.seeGenes.colorMode
+--show_fit $pdf.seeGenes.showFit
+--show_lines $pdf.seeGenes.showLines
+--cexlab $pdf.seeGenes.cexlab
+--legend $pdf.seeGenes.legend
+#end if
+]]>
+</command>
+<configfiles>
+<configfile name="design_matrix">#if str($source.source_selector) == "advanced":
+#set $header = "Name Time Replicate"
+#for $group in $source.rep_groups:
+#set $header = $header + ' ' + str($group.name)
+#end for
+$header
+#set $c = len($source.rep_repl) + 1
+#for $time in $source.rep_time:
+#for $file in $time.files:
+#set $is_repl = False
+#for $i, $repl in enumerate($source.rep_repl):
+#if str($file) in str($repl.files):
+#set $r = $i + 1
+#set $is_repl = True
+#end if
+#end for
+#if $is_repl == False:
+#set $r = $c
+#set $c += 1
+#end if
+#set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r)
+#for $group in $source.rep_groups:
+#if str($file) in str($group.files):
+#set $line += " 1"
+#else
+#set $line += " 0"
+#end if
+#end for
+$line
+#end for
+#end for
+#end if
+</configfile>
+</configfiles>
+<inputs>
+<conditional name="source">
+<param label="Choose data source" name="source_selector"
+help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq)
+and define your experiment design matrix here, or if you have maSigPro edesign and data input files already."
+type="select">
+<option value="defaults">Use maSigPro edesign and data files</option>
+<option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option>
+</param>
+<when value="defaults">
+<param name="edesign" format="tabular,txt" type="data" label="Experiment matrix"
+help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" />
+<param name="data" format="tabular,txt" type="data" label="Gene expression matrix"
+help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" />
+<param name="time_col" label="Column number containing time values" type="integer" value="1"
+help="Column number in edesign containing time values. Default is first column" />
+<param name="repl_col" label="Column number containing replicate coding" type="integer" value="2"
+help="Column number in edesign containing coding for replicate arrays. Default is second column" />
+</when>
+<when value="advanced">
+<param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?"
+help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." />
+<repeat name="rep_time" title="Time values" min="1" default="1">
+<param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed">
+<sanitizer>
+<valid initial="string.digits"></valid>
+</sanitizer>
+</param>
+<param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" />
+</repeat>
+<repeat name="rep_groups" title="Experimental groups" min="1" default="1">
+<param name="name" type="text" value="Group title" label="Specify the name of this experimental group"
+help="Use a single name without spaces or special characters">
+</param>
+<param name="files" type="data" format="tabular" multiple="true"
+label="Counts file(s) belonging to this experimental group" />
+</repeat>
+<repeat name="rep_repl" title="Replicates" min="0" default="0">
+<param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" />
+</repeat>
+</when>
+</conditional>
+<section name="makeDesignMatrix"
+title="Step 1: make.Design.Matrix - Defining the regression model"
+help="‘make.design.matrix’ creates the design matrix of dummies for
+fitting time series micorarray gene expression experiments.">
+<param name="degree" type="integer" value="1"
+label="Degree of regression fit polynome"
+help="The degree of the regression fit polynome. ‘degree’ = 1 returns
+linear regression, ‘degree’ = 2 returns quadratic regression, etc" />
+</section>
+<section name="p_vector"
+title="Step 2: p.vector - Finding significant genes"
+help="‘p.vector’ performs a regression fit for each gene taking all
+variables present in the model given by a regression matrix and
+returns a list of FDR corrected significant genes">
+<param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" />
+<param name="min_obs" label="Minimum values" type="integer" value="6"
+help="Genes with less than this number of true numerical values
+will be excluded from the analysis. Minimum value to estimate
+the model is (degree+1)xGroups+1. Default is 6." />
+</section>
+<section name="Tfit" title="Step 3: T.fit - Finding significant differences"
+help="‘T.fit’ selects the best regression model for each gene using
+stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent
+steps, meaning that significant genes are first selected on the
+basis of a general model and then the significant variables for
+each gene are found by step-wise regression.">
+<param name="step_method" type="select" label="Step regression"
+help="The step regression can be ‘backward’ or ‘forward’ indicating
+whether the step procedure starts from the model with all or none
+variables. With the ‘two.ways.backward’ or ‘two.ways.forward’
+options the variables are both allowed to get in and out. At each
+step the p-value of each variable is computed and variables get
+in/out the model when this p-value is lower or higher than given
+threshold alfa.">
+<option selected="True" value="backward">backward</option>
+<option value="forward">forward</option>
+<option value="two.ways.backward">two.ways.backward</option>
+<option value="two.ways.forward">two.ways.forward</option>
+</param>
+<param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false"
+help="When nvar.correction is TRUE the given significance
+level is corrected by the number of variables in the model.">
+<option selected="True" value="FALSE">False</option>
+<option value="TRUE">True</option>
+</param>
+<param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" />
+</section>
+<section name="getSiggenes"
+title="Step 4: get.siggenes - Obtaining lists of significant genes"
+help="This function creates lists of significant genes for a set of
+variables whose significance value has been computed with the
+‘T.fit’ function.">
+<param name="rsq" type="float" value="0.7" label="rsq"
+help="cut-off level at the R-squared value for the stepwise
+regression fit. Only genes with R-squared more than rsq are
+selected" />
+<param name="vars" type="select" label="Variables"
+help="Variables for which to extract significant genes.
+‘all’: generates one single matrix or gene list with all
+significant genes.
+‘each’: generates as many significant genes extractions as
+variables in the general regression model. Each extraction
+contains the significant genes for that variable.
+‘groups’: generates a significant genes extraction for each
+experimental group.
+The difference between ‘each’ and ‘groups’ is that in the
+first case the variables of the same group (e.g.  ‘TreatmentA’
+and ‘time*TreatmentA’) will be extracted separately and in t
+he
+second case jointly.">
+<option selected="True" value="groups">Groups</option>
+<option value="each">Each</option>
+<option value="all">All</option>
+</param>
+<param name="significant_intercept" type="select" label="Significant intercept"
+help="The argument ‘significant.intercept’ modulates the treatment for
+intercept coefficients to apply for selecting significant genes
+when ‘vars’ equals ‘groups’. There are three possible values:
+‘none’, no significant intercept (differences) are considered
+for significant gene selection, ‘dummy’, includes genes with
+significant intercept differences between control and experimental
+groups, and ‘all’ when both significant intercept coefficient
+for the control group and significant intercept differences are
+considered for selecting significant genes.">
+<option selected="True" value="dummy">Dummy</option>
+<option value="none">None</option>
+<option value="all">All</option>
+</param>
+</section>
+<conditional name="pdf">
+<param label="Generate visualization PDF" name="pdf_selector" type="boolean"
+truevalue="1" falsevalue="0" checked="true"
+help="Choose if you want to generate a PDF file containing the visualizations" />
+<when value="1">
+<section name="seeGenes" title="Step 5: see.genes - Visualization"
+help="This function provides visualisation tools for gene expression
+values in a time course experiment. The function first calls the
+heatmap function for a general overview of experiment results.
+Next a partioning of the data is generated using a clustering
+method.  The results of the clustering are visualized both as gene
+expression profiles extended along all arrays in the experiment,
+as provided by the plot.profiles function, and as summary
+expression profiles for comparison among experimental groups.">
+<param name="clusterData" label="Cluster Data" type="integer" value="1"
+help="Data clustering can be done on the basis of either the original
+expression values, the regression coefficients, or the t.scores.
+In case ‘data’ is a ‘get.siggenes’ object, this is given by
+providing the element names of the list
+‘c(sig.profiles,coefficients,t.score)’ of their list
+position (1,2 or 3)." />
+<param name="k" type="integer" label="Number of clusters for data partioning" value="9" />
+<conditional name="clustering">
+<param name="clusterMethod" label="Cluster Method" type="select"
+help="clustering method for data partioning. Currently
+‘hclust’, ‘kmeans’ and ‘Mclust’ are supported">
+<option selected="True" value="hclust">hclust</option>
+<option value="kmeans">kmeans</option>
+<option value="Mclust">Mclust</option>
+</param>
+<when value="hclust">
+<param name="distance" type="select" label="Distance measure"
+help="Distance measurement function when ‘cluster.method’ is
+‘hclust’. Default uses correlation.">
+<option selected="True" value="cor">Correlation</option>
+<option value="euclidean">Euclidean</option>
+<option value="maximum">Maximum</option>
+<option value="manhattan">Manhattan</option>
+<option value="Canberra">Canberra</option>
+<option value="binary">Binary</option>
+<option value="minkowski">Minkowski</option>
+</param>
+<param name="aggloMethod" type="select" label="Agglomeration method"
+help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’.">
+<option selected="True" value="ward.D">ward.D</option>
+<option value="ward.D2">ward.D2</option>
+<option value="single">single</option>
+<option value="complete">complete</option>
+<option value="average">average (= UPGMA)</option>
+<option value="mcquitty">mcquitty (= WPGMA)</option>
+<option value="median">median (= WPGMC)</option>
+<option value="centroid">centroid (= UPGMC)</option>
+</param>
+</when>
+<when value="kmeans">
+<param name="iterMax" type="integer" label="Maximum number of iterations" value="500"
+help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" />
+</when>
+</conditional>
+<param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’">
+<option selected="True" value="rainbow">Rainbow</option>
+<option value="gray">Gray</option>
+</param>
+<param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?"
+help="Indicating whether regression fit curves must be plotted" />
+<param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?"
+help="Indicating whether a line must be drawn joining plotted data points for each group" />
+<param name="cexlab" type="float" value="0.8" label="Magnification for x labels"
+help="Graphical parameter maginfication to be used for x labels in plotting functions" />
+<param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?"
+help="Indicating whether legend must be added when plotting profiles" />
+</section>
+</when>
+</conditional>
+</inputs>
+<outputs>
+<data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}">
+</data>
+<data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}">
+<filter>
+((
+source['source_selector'] == 'advanced' and
+source['enable_output'] == True
+))
+</filter>
+</data>
+<data format="txt" name="data_out" label="maSigPro data file on ${on_string}">
+<filter>
+((
+source['source_selector'] == 'advanced' and
+source['enable_output'] == True
+))
+</filter>
+</data>
+<data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}">
+<filter>
+((
+pdf['pdf_selector'] == True
+))
+</filter>
+</data>
+</outputs>
+<tests>
+<test>
+<param name="source_selector" value="advanced" />
+<param name="enable_output" value="1" />
+<repeat name="rep_time">
+<param name="time" value="1" />
+<param name="files" value="control_1H.counts,treat_1H.counts" />
+</repeat>
+<repeat name="rep_time">
+<param name="time" value="2" />
+<param name="files" value="control_2H.counts,treat_2H.counts" />
+</repeat>
+<repeat name="rep_time">
+<param name="time" value="3" />
+<param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" />
+</repeat>
+<param name="replicates_selector" value="advanced" />
+<repeat name="rep_repl">
+<param name="files" value="treat_3H_1.counts,treat_3H_2.counts" />
+</repeat>
+<repeat name="rep_groups">
+<param name="name" value="Control" />
+<param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" />
+</repeat>
+<repeat name="rep_groups">
+<param name="name" value="Treatment" />
+<param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" />
+</repeat>
+<output name="masigpro_out" file="masigpro_out.tab" />
+<output name="data_out" file="data_out.txt" />
+<output name="edesign_out" file="edesign_out.txt" />
+<output name="pdf_out" file="Results.pdf" />
+</test>
+<test>
+<param name="source_selector" value="defaults" />
+<param name="edesign" value="edesign_out.txt" />
+<param name="data" value="data_out.txt" />
+<output name="masigpro_out" file="masigpro_out.tab" />
+<output name="pdf_out" file="Results.pdf" />
+</test>
+</tests>
+<help>
+<![CDATA[
+.. class:: infomark
+**What it does**
+maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments.
+**Inputs**
+The maSigPro wrapper has two options for input data:
+- directly through two seperate text files containing the experiment design (edesign) and the data or
+- count tables generated from HTSeq-count. Count tables must be generated for each sample individually.
+To set up an experimental design from seperate count files you first have to select which files belong to a certain time point.
+Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files.
+For a more comfortable setup in future analysis you have the option to output the generated edesign and data files.
+**Output**
+maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis.
+.. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html
+]]>
+</help>
+<citations>
+<citation type="doi">10.1093/bioinformatics/btl056</citation>
+</citations>
+</tool>

Mercurial > repos > iuc > masigpro

comparison masigpro.xml @ 0:c8c290f3ea7d draft