Mercurial > repos > lecorguille > anova
changeset 0:8dd2a438bfba draft
Uploaded
author | lecorguille |
---|---|
date | Tue, 30 Jun 2015 06:02:46 -0400 |
parents | |
children | e646ad125f2d |
files | abims_anova.r abims_anova.xml static/images/anova_filtered.png static/images/anova_pvalue.png static/images/hclust.png static/images/pca_abims_Rplots.png static/images/pca_abims_Rplots1.png static/images/pca_abims_eigenvalue.png static/images/pca_abims_percentage_of_variance.png |
diffstat | 9 files changed, 331 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.r Tue Jun 30 06:02:46 2015 -0400 @@ -0,0 +1,132 @@ +#!/usr/local/public/bin/Rscript +# version="1.1" + +# date: 06-06-2012 +# update: 18-02-2014 +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +# abims_anova.r version 20140218 + +library(batch) + + +# function avova +anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { + + + if (sep=="tabulation") sep="\t" + if (sep=="semicolon") sep=";" + if (sep=="comma") sep="," + + anova_formula_operator = "+" + if (interaction) anova_formula_operator = "*" + + # -- import -- + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") + + if (mode == "row") data=t(data) + + sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") + rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) + + + # -- group -- + match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) + if (sum(is.na(match_data_sampleinfoTab)) > 0) { + write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) + write("You may need to use change the mode (column/row)", stderr()) + write("10 first sample names in the data matrix:", stderr()) + write(head(colnames(data)), stderr()) + write("10 first sample names in the sample info:", stderr()) + write(head(rownames(sampleinfoTab)), stderr()) + quit("no",status=10) + } + + + # -- anova -- + + # formula + grps=list() + anova_formula_s = "data ~ " + cat("\ncontrasts:\n") + for (i in 1:length(condition)) { + grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab]) + anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="") + cat(condition[i],"\t",levels(grps[[i]]),"\n") + # write("Current groups: ", stderr()) + # write(grp[[i]], stderr()) + } + anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) + anova_formula = as.formula(anova_formula_s) + + + + # anova + manovaObjectList = manova(anova_formula) + manovaList = summary.aov(manovaObjectList) + + # condition renaming + manovaRownames = gsub(" ","",rownames(manovaList[[1]])) + manovaNbrPvalue = length(manovaRownames)-1 + manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] + + for (i in 1:length(condition)) { + manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) + anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) + } + + # log + cat("\nanova_formula",anova_formula_s,"\n") + + # p-value + aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) + if(length(condition) == 1) aovPValue = t(aovPValue) + rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") + + # p-value adjusted + if(length(condition) == 1) { + aovAdjPValue = t(p.adjust(aovPValue,method=method)) + } else { + aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) + } + rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") + + # selection + colSumThreshold = colSums(aovAdjPValue <= threshold) + if (selection_method == "intersection") { + datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] + } else { + datafiltered = data[,colSumThreshold != 0] + } + + #data=rbind(data, aovPValue, aovAdjPValue) + data=rbind(data, aovAdjPValue) + + + if (mode == "row") { + data=t(data) + datafiltered=t(datafiltered) + } + + # -- output / return -- + write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) + write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) + + # log + cat("\nthreshold:",threshold,"\n") + cat("result:",nrow(datafiltered),"/",nrow(data),"\n") + + quit("no",status=0) +} + +# log +cat("ANOVA\n\n") +cat("Arguments\n") +args <- commandArgs(trailingOnly = TRUE) +print(args) + +listArguments = parseCommandArgs(evaluate=FALSE) +do.call(anova, listArguments) + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.xml Tue Jun 30 06:02:46 2015 -0400 @@ -0,0 +1,199 @@ +<tool id="abims_anova" name="Anova" version="1.1"> + + <requirements> + <requirement type="binary">Rscript</requirement> + <requirement type="binary">batch</requirement> + </requirements> + + <description>N-way anova. With ou Without interactions</description> + + <command interpreter="Rscript"> + abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode" +condition "c('$condition_1' +#for $i, $s in enumerate( $conditions ) +,'${s.condition}' +#end for +)" +interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered + </command> + + <inputs> + <param name="input" type="data" label="Data Matrix file" format="tabular" help="Matrix of numeric data with headers." /> + <param name="sampleinfo" type="data" label="Sample Metadata file" format="tabular" help="Tabular file with the data metadata : one sample per line and at least two columns : ids and one condition" /> + + <param name="mode" type="select" help="Perform the anova tests on column/row" format="text" optional="true"> + <label>Mode</label> + <option value="row">row</option> + <option value="column">column</option> + </param> + + <param name="condition_1" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" optional="false" /> + <repeat name="conditions" title="Conditions for N-ways anova"> + <param name="condition" type="text" label="Condition" value="" help="The column name of the condition. ex: hour or treatment" /> + </repeat> + + <param name="interaction" type="boolean" label="Enable interaction response p-values" truevalue="T" falsevalue="F" help="Used if more than 1 conditon. The anova will produse p-value according to the interaction between your condition (ex: condition1:conditions2, condition1:conditions3, condition2:conditions3 and condition1:condition2:conditions3)" /> + + <param name="method" type="select" help="Method used to apply a correction on the pvalue because of the number of test" format="text" optional="true"> + <label>PValue adjusted method</label> + <option value="BH">BH</option> + <option value="holm">holm</option> + <option value="bonferroni">bonferroni</option> + <option value="hochberg">hochberg</option> + <option value="hommel">hommel</option> + <option value="BY">BY</option> + <option value="fdr">fdr</option> + <option value="none">none</option> + </param> + + <param name="threshold" type="float" label="Threshold" value="0.01" help="max adjusted p.value accepted" /> + + <param name="selection_method" type="select" format="text" help="Intersection: all condition p-value must be under the threshold. Union: at least condition p-value must be under the threshold. "> + <label>Selection method</label> + <option value="intersection" selected="true">intersection / strong</option> + <option value="union">union / weak</option> + </param> + + <param name="sep" type="select" format="text"> + <label>Separator of columns</label> + <option value="tabulation">tabulation</option> + <option value="semicolon">;</option> + <option value="comma">,</option> + </param> + + <param name="dec" type="text" label="Decimal separator" value="." help="" /> + + </inputs> + + <outputs> + <data name="dataMatrixPValue" format="input" label="${input.name}_anova_pvalue.${input.ext}"/> + <data name="dataMatrixFiltered" format="input" label="${input.name}_anova_filtered.${input.ext}"/> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> + +.. class:: infomark + +**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +--------------------------------------------------- + +===== +Anova +===== + +----------- +Description +----------- + +Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures, +in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation. + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Data Matrix file | Tabular | ++---------------------------+------------+ +| 2 : Sample Metadata file | Tabular | ++---------------------------+------------+ + + + +------------ +Output files +------------ + + + +***.anova_pvalue.tabular** + + | A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue). + +***.anova_filtered.tabular** + + | The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue. + + +------ + +.. class:: infomark + +The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools: + | PCA + | Hierarchical Clustering + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +Input files +----------- + +**>A part of an example of Data Matrix file input** + + ++--------+------------------+----------------+ +| Name | Bur-eH_FSP_12 | Bur-eH_FSP_24 | ++========+==================+================+ +|M202T601| 91206595.7559783 |106808979.08546 | ++--------+------------------+----------------+ +|M234T851| 27249137.275504 |28824971.3177926| ++--------+------------------+----------------+ + +**>A part of an example of Sample Metadata file input** + + ++---------------------------+------------+------------+------------+ +| Sample name | class | time | batch | ++===========================+============+============+============+ +| Bur-eH_FSP_12 | Bur-eH | 12 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-eH_FSP_24 | Bur-eH | 24 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_12 | Bur-NI | 12 | 2 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_24 | Bur-NI | 24 | 2 | ++---------------------------+------------+------------+------------+ + +Parameters +---------- + + | Mode -> **row** + | column name of condition -> **class** + | Separator of columns: -> **tabulation** + | Decimal separator -> **.** + | PValue adjusted method -> **BH** + | Threshold -> **0.001** + + + +Output files +------------ + +**Part of an example of xset.anova_filtered.tabular:** + +.. image:: anova_pvalue.png + +**Part of an example of xset.anova_pvalue.tabular:** + +.. image:: anova_filtered.png + + + </help> + +</tool>