Mercurial > repos > lecorguille > anova

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_anova.r	Tue Jun 30 06:02:46 2015 -0400
@@ -0,0 +1,132 @@
+#!/usr/local/public/bin/Rscript
+# version="1.1"
+
+# date: 06-06-2012
+# update: 18-02-2014
+# **Authors** Gildas Le Corguille  ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
+
+# abims_anova.r version 20140218
+
+library(batch)
+
+
+# function avova
+anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") {
+
+
+	if (sep=="tabulation") sep="\t"
+    	if (sep=="semicolon") sep=";"
+    	if (sep=="comma") sep=","
+
+	anova_formula_operator = "+"
+	if (interaction) anova_formula_operator = "*"
+
+  	# -- import --
+	data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA")
+
+  	if (mode == "row") data=t(data)
+
+	sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"")
+	rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab))
+
+
+	# -- group --
+	match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab))
+	if (sum(is.na(match_data_sampleinfoTab)) > 0) {
+	  write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr())
+	  write("You may need to use change the mode (column/row)", stderr())
+	  write("10 first sample names in the data matrix:", stderr())
+	  write(head(colnames(data)), stderr())
+	  write("10 first sample names in the sample info:", stderr())
+	  write(head(rownames(sampleinfoTab)), stderr())
+	  quit("no",status=10)
+	}
+
+
+	# -- anova --
+
+  	# formula
+	grps=list()
+	anova_formula_s = "data ~ "
+	cat("\ncontrasts:\n")
+	for (i in 1:length(condition)) {
+	  grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab])
+	  anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="")
+	  cat(condition[i],"\t",levels(grps[[i]]),"\n")
+	# write("Current groups: ", stderr())
+	# write(grp[[i]], stderr())
+	}
+	anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1)
+	anova_formula = as.formula(anova_formula_s)
+
+
+
+	# anova
+	manovaObjectList = manova(anova_formula)
+	manovaList = summary.aov(manovaObjectList)
+
+  	# condition renaming
+	manovaRownames = gsub(" ","",rownames(manovaList[[1]]))
+	manovaNbrPvalue = length(manovaRownames)-1
+	manovaRownames = manovaRownames[-(manovaNbrPvalue+1)]
+
+	for (i in 1:length(condition)) {
+	  manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames)
+  	  anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s)
+	}
+
+  	# log
+  	cat("\nanova_formula",anova_formula_s,"\n")
+
+	# p-value
+	aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]})
+	if(length(condition) == 1) aovPValue = t(aovPValue)
+	rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="")
+
+	# p-value adjusted
+	if(length(condition) == 1) {
+		aovAdjPValue = t(p.adjust(aovPValue,method=method))
+	} else {
+		aovAdjPValue = apply(aovPValue,2,p.adjust, method=method)
+	}
+	rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="")
+
+	# selection
+	colSumThreshold = colSums(aovAdjPValue <= threshold)
+	if (selection_method == "intersection") {
+		datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )]
+	} else {
+		datafiltered = data[,colSumThreshold != 0]
+	}
+
+	#data=rbind(data, aovPValue, aovAdjPValue)
+	data=rbind(data, aovAdjPValue)
+
+
+	if (mode == "row") {
+	  data=t(data)
+	  datafiltered=t(datafiltered)
+	}
+
+	# -- output / return --
+	write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA)
+	write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA)
+
+	# log
+	cat("\nthreshold:",threshold,"\n")
+	cat("result:",nrow(datafiltered),"/",nrow(data),"\n")
+
+	quit("no",status=0)
+}
+
+# log
+cat("ANOVA\n\n")
+cat("Arguments\n")
+args <- commandArgs(trailingOnly = TRUE)
+print(args)
+
+listArguments = parseCommandArgs(evaluate=FALSE)
+do.call(anova, listArguments)
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_anova.xml	Tue Jun 30 06:02:46 2015 -0400
@@ -0,0 +1,199 @@
+<tool id="abims_anova" name="Anova" version="1.1">
+
+    <requirements>
+	<requirement type="binary">Rscript</requirement>
+	<requirement type="binary">batch</requirement>
+    </requirements>
+
+    <description>N-way anova. With ou Without interactions</description>
+
+    <command interpreter="Rscript">
+        abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode"
+condition "c('$condition_1'
+#for $i, $s in enumerate( $conditions )
+,'${s.condition}'
+#end for
+)"
+interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered
+    </command>
+
+    <inputs>
+        <param name="input" type="data" label="Data Matrix file" format="tabular" help="Matrix of numeric data with headers." />
+        <param name="sampleinfo" type="data" label="Sample Metadata file" format="tabular" help="Tabular file with the data metadata : one sample per line and at least two columns : ids and one condition" />
+
+         <param name="mode" type="select" help="Perform the anova tests on column/row" format="text" optional="true">
+		<label>Mode</label>
+		<option value="row">row</option>
+		<option value="column">column</option>
+	</param>
+
+	<param name="condition_1" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" optional="false" />
+	<repeat name="conditions" title="Conditions for N-ways anova">
+	    <param name="condition" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" />
+	</repeat>
+
+	<param name="interaction" type="boolean" label="Enable interaction response p-values" truevalue="T" falsevalue="F" help="Used if more than 1 conditon. The anova will produse p-value according to the interaction between your condition (ex: condition1:conditions2, condition1:conditions3, condition2:conditions3 and condition1:condition2:conditions3)" />
+
+	<param name="method" type="select" help="Method used to apply a correction on the pvalue because of the number of test" format="text" optional="true">
+		<label>PValue adjusted method</label>
+		<option value="BH">BH</option>
+		<option value="holm">holm</option>
+		<option value="bonferroni">bonferroni</option>
+		<option value="hochberg">hochberg</option>
+		<option value="hommel">hommel</option>
+		<option value="BY">BY</option>
+		<option value="fdr">fdr</option>
+		<option value="none">none</option>
+	</param>
+
+        <param name="threshold" type="float" label="Threshold" value="0.01" help="max adjusted p.value accepted" />
+
+        <param name="selection_method" type="select" format="text" help="Intersection: all condition p-value must be under the threshold. Union: at least condition p-value must be under the threshold. ">
+		<label>Selection method</label>
+		<option value="intersection" selected="true">intersection / strong</option>
+		<option value="union">union / weak</option>
+	</param>
+
+        <param name="sep" type="select" format="text">
+		<label>Separator of columns</label>
+		<option value="tabulation">tabulation</option>
+		<option value="semicolon">;</option>
+		<option value="comma">,</option>
+	</param>
+
+        <param name="dec" type="text" label="Decimal separator" value="." help="" />
+
+    </inputs>
+
+    <outputs>
+        <data name="dataMatrixPValue" format="input" label="${input.name}_anova_pvalue.${input.ext}"/>
+        <data name="dataMatrixFiltered" format="input" label="${input.name}_anova_filtered.${input.ext}"/>
+    </outputs>
+
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+    <help>
+
+.. class:: infomark
+
+**Authors** Gildas Le Corguille  ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
+
+---------------------------------------------------
+
+=====
+Anova
+=====
+
+-----------
+Description
+-----------
+
+Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures,
+in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation.
+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label   |   Format   |
++===========================+============+
+| 1 : Data Matrix file      | Tabular    |
++---------------------------+------------+
+| 2 : Sample Metadata file  | Tabular    |
++---------------------------+------------+
+
+
+
+------------
+Output files
+------------
+
+
+
+***.anova_pvalue.tabular**
+
+	| A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue).
+
+***.anova_filtered.tabular**
+
+	| The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue.
+
+
+------
+
+.. class:: infomark
+
+The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools:
+	| PCA
+	| Hierarchical Clustering
+
+
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+
+Input files
+-----------
+
+**>A part of an example of Data Matrix file input**
+
+
++--------+------------------+----------------+
+| Name   | Bur-eH_FSP_12    | Bur-eH_FSP_24  |
++========+==================+================+
+|M202T601| 91206595.7559783 |106808979.08546 |
++--------+------------------+----------------+
+|M234T851| 27249137.275504  |28824971.3177926|
++--------+------------------+----------------+
+
+**>A part of an example of Sample Metadata file input**
+
+
++---------------------------+------------+------------+------------+
+| Sample name               |   class    |   time     |   batch    |
++===========================+============+============+============+
+| Bur-eH_FSP_12             | Bur-eH     |    12      |     1      |
++---------------------------+------------+------------+------------+
+| Bur-eH_FSP_24             | Bur-eH     |    24      |     1      |
++---------------------------+------------+------------+------------+
+| Bur-NI_FSP_12             | Bur-NI     |    12      |     2      |
++---------------------------+------------+------------+------------+
+| Bur-NI_FSP_24             | Bur-NI     |    24      |     2      |
++---------------------------+------------+------------+------------+
+
+Parameters
+----------
+
+	| Mode -> **row**
+	| column name of condition -> **class**
+	| Separator of columns: -> **tabulation**
+	| Decimal separator -> **.**
+	| PValue adjusted method -> **BH**
+	| Threshold -> **0.001**
+
+
+
+Output files
+------------
+
+**Part of an example of xset.anova_filtered.tabular:**
+
+.. image:: anova_pvalue.png
+
+**Part of an example of xset.anova_pvalue.tabular:**
+
+.. image:: anova_filtered.png
+
+
+    </help>
+
+</tool>
Binary file static/images/anova_filtered.png has changed
Binary file static/images/anova_pvalue.png has changed
Binary file static/images/hclust.png has changed
Binary file static/images/pca_abims_Rplots.png has changed
Binary file static/images/pca_abims_Rplots1.png has changed
Binary file static/images/pca_abims_eigenvalue.png has changed
Binary file static/images/pca_abims_percentage_of_variance.png has changed