# HG changeset patch # User lecorguille # Date 1435658566 14400 # Node ID 8dd2a438bfba792eb71d7115a4246f45ecd9aa21 Uploaded diff -r 000000000000 -r 8dd2a438bfba abims_anova.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.r Tue Jun 30 06:02:46 2015 -0400 @@ -0,0 +1,132 @@ +#!/usr/local/public/bin/Rscript +# version="1.1" + +# date: 06-06-2012 +# update: 18-02-2014 +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +# abims_anova.r version 20140218 + +library(batch) + + +# function avova +anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { + + + if (sep=="tabulation") sep="\t" + if (sep=="semicolon") sep=";" + if (sep=="comma") sep="," + + anova_formula_operator = "+" + if (interaction) anova_formula_operator = "*" + + # -- import -- + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") + + if (mode == "row") data=t(data) + + sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") + rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) + + + # -- group -- + match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) + if (sum(is.na(match_data_sampleinfoTab)) > 0) { + write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) + write("You may need to use change the mode (column/row)", stderr()) + write("10 first sample names in the data matrix:", stderr()) + write(head(colnames(data)), stderr()) + write("10 first sample names in the sample info:", stderr()) + write(head(rownames(sampleinfoTab)), stderr()) + quit("no",status=10) + } + + + # -- anova -- + + # formula + grps=list() + anova_formula_s = "data ~ " + cat("\ncontrasts:\n") + for (i in 1:length(condition)) { + grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab]) + anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="") + cat(condition[i],"\t",levels(grps[[i]]),"\n") + # write("Current groups: ", stderr()) + # write(grp[[i]], stderr()) + } + anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) + anova_formula = as.formula(anova_formula_s) + + + + # anova + manovaObjectList = manova(anova_formula) + manovaList = summary.aov(manovaObjectList) + + # condition renaming + manovaRownames = gsub(" ","",rownames(manovaList[[1]])) + manovaNbrPvalue = length(manovaRownames)-1 + manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] + + for (i in 1:length(condition)) { + manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) + anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) + } + + # log + cat("\nanova_formula",anova_formula_s,"\n") + + # p-value + aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) + if(length(condition) == 1) aovPValue = t(aovPValue) + rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") + + # p-value adjusted + if(length(condition) == 1) { + aovAdjPValue = t(p.adjust(aovPValue,method=method)) + } else { + aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) + } + rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") + + # selection + colSumThreshold = colSums(aovAdjPValue <= threshold) + if (selection_method == "intersection") { + datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] + } else { + datafiltered = data[,colSumThreshold != 0] + } + + #data=rbind(data, aovPValue, aovAdjPValue) + data=rbind(data, aovAdjPValue) + + + if (mode == "row") { + data=t(data) + datafiltered=t(datafiltered) + } + + # -- output / return -- + write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) + write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) + + # log + cat("\nthreshold:",threshold,"\n") + cat("result:",nrow(datafiltered),"/",nrow(data),"\n") + + quit("no",status=0) +} + +# log +cat("ANOVA\n\n") +cat("Arguments\n") +args <- commandArgs(trailingOnly = TRUE) +print(args) + +listArguments = parseCommandArgs(evaluate=FALSE) +do.call(anova, listArguments) + + + diff -r 000000000000 -r 8dd2a438bfba abims_anova.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_anova.xml Tue Jun 30 06:02:46 2015 -0400 @@ -0,0 +1,199 @@ + + + + Rscript + batch + + + N-way anova. With ou Without interactions + + + abims_anova.r file "$input" sampleinfo "$sampleinfo" mode "$mode" +condition "c('$condition_1' +#for $i, $s in enumerate( $conditions ) +,'${s.condition}' +#end for +)" +interaction $interaction method $method threshold $threshold selection_method $selection_method sep "$sep" dec "$dec" outputdatapvalue $dataMatrixPValue outputdatafiltered $dataMatrixFiltered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +--------------------------------------------------- + +===== +Anova +===== + +----------- +Description +----------- + +Analysis of variance (ANOVA) is used to analyze the differences between group means and their associated procedures, +in which the observed variance in a particular variable is partitioned into components attributable to different sources of variation. + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Data Matrix file | Tabular | ++---------------------------+------------+ +| 2 : Sample Metadata file | Tabular | ++---------------------------+------------+ + + + +------------ +Output files +------------ + + + +***.anova_pvalue.tabular** + + | A tabular file which represents for each metabolite (row), the value of the intensity in each sample (column) + two columns (aovPValue and aovAdjPValue). + +***.anova_filtered.tabular** + + | The tabular file xset.anova_pvalue.tabular containing only the metabolites that have been filtered by aovAdjPValue. + + +------ + +.. class:: infomark + +The outputs ***.anova_filtered.tabular** or ***.anova_pvalue.tabular** are tabular files. You can continue your analysis using it in the following tools: + | PCA + | Hierarchical Clustering + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +Input files +----------- + +**>A part of an example of Data Matrix file input** + + ++--------+------------------+----------------+ +| Name | Bur-eH_FSP_12 | Bur-eH_FSP_24 | ++========+==================+================+ +|M202T601| 91206595.7559783 |106808979.08546 | ++--------+------------------+----------------+ +|M234T851| 27249137.275504 |28824971.3177926| ++--------+------------------+----------------+ + +**>A part of an example of Sample Metadata file input** + + ++---------------------------+------------+------------+------------+ +| Sample name | class | time | batch | ++===========================+============+============+============+ +| Bur-eH_FSP_12 | Bur-eH | 12 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-eH_FSP_24 | Bur-eH | 24 | 1 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_12 | Bur-NI | 12 | 2 | ++---------------------------+------------+------------+------------+ +| Bur-NI_FSP_24 | Bur-NI | 24 | 2 | ++---------------------------+------------+------------+------------+ + +Parameters +---------- + + | Mode -> **row** + | column name of condition -> **class** + | Separator of columns: -> **tabulation** + | Decimal separator -> **.** + | PValue adjusted method -> **BH** + | Threshold -> **0.001** + + + +Output files +------------ + +**Part of an example of xset.anova_filtered.tabular:** + +.. image:: anova_pvalue.png + +**Part of an example of xset.anova_pvalue.tabular:** + +.. image:: anova_filtered.png + + + + + diff -r 000000000000 -r 8dd2a438bfba static/images/anova_filtered.png Binary file static/images/anova_filtered.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/anova_pvalue.png Binary file static/images/anova_pvalue.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/hclust.png Binary file static/images/hclust.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/pca_abims_Rplots.png Binary file static/images/pca_abims_Rplots.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/pca_abims_Rplots1.png Binary file static/images/pca_abims_Rplots1.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/pca_abims_eigenvalue.png Binary file static/images/pca_abims_eigenvalue.png has changed diff -r 000000000000 -r 8dd2a438bfba static/images/pca_abims_percentage_of_variance.png Binary file static/images/pca_abims_percentage_of_variance.png has changed