# HG changeset patch # User lecorguille # Date 1435660569 14400 # Node ID 2f7381ee5235df3ba162d580f048e263c437890c Uploaded diff -r 000000000000 -r 2f7381ee5235 abims_hclustering.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_hclustering.r Tue Jun 30 06:36:09 2015 -0400 @@ -0,0 +1,38 @@ +#!/usr/local/public/bin/Rscript --verbose +# version="1.1" + +# date: 04-06-2013 +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +# abims_hclust.r version 20130604 + +library(batch) +library(ctc) + +hclust_metabolomics = function(file, method = "pearson", link = "ward", normalization=TRUE, keep.hclust=FALSE, sep=";", dec="."){ + + if (sep=="tabulation") sep="\t" + if (sep=="semicolon") sep=";" + if (sep=="comma") sep="," + + # -- loading -- + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, + fill = TRUE, comment.char="",na.strings = "NA") + + # -- Normalization: logratio -- + if (normalization) { + #meandata = apply(data,1,mean, na.rm=T) + #data = log2(data/meandata) + data=t(scale(t(data))) + } + + # -- hclust / output files for TreeView -- + file="hclust.cdt" + hclust2treeview(data,file=file, method = method, link = link, keep.hclust= keep.hclust) + + # -- output / return -- + system("zip -r hclust.zip hclust.*", ignore.stdout = TRUE) +} + +listArguments = parseCommandArgs(evaluate=FALSE) +do.call(hclust_metabolomics, listArguments) diff -r 000000000000 -r 2f7381ee5235 abims_hclustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_hclustering.xml Tue Jun 30 06:36:09 2015 -0400 @@ -0,0 +1,192 @@ + + + using ctc R package for java-treeview + + + abims_hclustering.r file "$input" method $method link $link keep.hclust FALSE normalization $normalization sep "$sep" dec "$dec" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr + +--------------------------------------------------- + +======================= +Hierarchical Clustering +======================= + +----------- +Description +----------- + +This function compute hierachical clustering with function +hcluster and export cluster to Java TreeView files format: jtreeview.sourceforge.net. + +This function performs a **hierarchical cluster analysis** using a set +of dissimilarities for the n objects being clustered. Initially, +each object is assigned to its own cluster and then the algorithm +proceeds iteratively, at each stage joining the two most similar +clusters, continuing until there is just a single cluster. At +each stage distances between clusters are recomputed by the +Lance-Williams dissimilarity update formula according to the +particular clustering method being used. + +A number of different **clustering methods** are provided. **Ward's** +minimum variance method aims at finding compact, spherical +clusters. The **complete linkage** method finds similar clusters. +The **single linkage** method (which is closely related to the +minimal spanning tree) adopts a ‘friends of friends’ clustering +strategy. The other methods can be regarded as aiming for +clusters with characteristics somewhere between the single and +complete link methods. Note however, that methods **median** and +**centroid** are not leading to a monotone distance measure, +or equivalently the resulting dendrograms can have so called +inversions (which are hard to interpret). + + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Data Matrix file | Tabular | ++---------------------------+------------+ + + +---------- +Parameters +---------- + + +**Agglomeration or Link method:* + +A number of different clustering methods are provided. Ward's minimum variance method aims at finding compact, spherical clusters. +The complete linkage method finds similar clusters. The single linkage method (which is closely related to the minimal spanning tree) adopts a ‘friends of friends’ clustering strategy. +The other methods can be regarded as aiming for clusters with characteristics somewhere between the single and complete link methods. +Note however, that methods median and centroid are not leading to a monotone distance measure, or equivalently the resulting dendrograms can have so called inversions (which are hard to interpret). + + +------------ +Output files +------------ + +***.tab.hclust.zip** + + | A zip file containing three files (hclust.atr, hclust.cdt and hclust.gtr) that are Treeview format. If you want to have more informations or download Treeview, you can visit the webiste: + | http://jtreeview.sourceforge.net + + + +------ + +.. class:: infomark + +You can continue your analysis using Treeview (outside of Galaxy) with the three files (atr,cdt and gtr) within the **xset.tab.hclust.zip** output. + + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +Input files +----------- + +**>A part of an example of Data Matrix file input** + + ++--------+------------------+----------------+ +| Name | Bur-eH_FSP_102 | Bur-eH_FSP_22 | ++========+==================+================+ +|M202T601| 91206595.7559783 |106808979.08546 | ++--------+------------------+----------------+ +|M234T851| 27249137.275504 |28824971.3177926| ++--------+------------------+----------------+ + + +Parameters +---------- + + | Distance measure method -> **pearson** + | Agglomeration/Link method -> **ward** + | Normalization by center and scale -> **TRUE** + | Separator of columns -> **tabulation** + | Decimal separator: -> **.** + + + +Output files +------------ + +**Example of an dendrogram/heatmap generated by the Treeview tool**: + +.. image:: hclust.png + + + + + diff -r 000000000000 -r 2f7381ee5235 static/images/anova_filtered.png Binary file static/images/anova_filtered.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/anova_pvalue.png Binary file static/images/anova_pvalue.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/hclust.png Binary file static/images/hclust.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/pca_abims_Rplots.png Binary file static/images/pca_abims_Rplots.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/pca_abims_Rplots1.png Binary file static/images/pca_abims_Rplots1.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/pca_abims_eigenvalue.png Binary file static/images/pca_abims_eigenvalue.png has changed diff -r 000000000000 -r 2f7381ee5235 static/images/pca_abims_percentage_of_variance.png Binary file static/images/pca_abims_percentage_of_variance.png has changed