Mercurial > repos > kellrott > cluster3
changeset 0:cdcb3276c0e1 draft
Uploaded
author | kellrott |
---|---|
date | Thu, 25 Oct 2012 16:41:28 -0400 |
parents | |
children | e1bc1519822b |
files | cluster3.xml |
diffstat | 1 files changed, 207 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster3.xml Thu Oct 25 16:41:28 2012 -0400 @@ -0,0 +1,207 @@ +<tool id="cluster3" name="Cluster 3" version="1.0.0"> + <description>Cluster 3.0</description> + <command interpreter="python">$scriptfile</command> + <inputs> + <param name="filename" type="data" format="tab" label="Matrix"/> + <param name="row_center" type="select" label="Row Center (gene)"> + <option value="x">No row Center</option> + <option value="a">Substact mean of row</option> + <option value="m">Substact median of row</option> + </param> + <param name="log_transform" type="boolean" label="Log transform data" default="false"/> + <param name="row_normalize" type="boolean" label="Normalize Rows (gene)"/> + + <param name="col_center" type="select" label="Column Center (sample)"> + <option value="x">No column Center</option> + <option value="a">Substact mean of column</option> + <option value="m">Substact median of column</option> + </param> + <param name="col_normalize" type="boolean" label="Normalize Columns (sample)"/> + + <conditional name="cparam"> + <param name="run_mode" type="select" label="Run Mode"> + <option value="clustering">Clustering</option> + <option value="pca">Principal Component Analysis</option> + </param> + <when value="clustering"> + <param name="col_distance" type="select" label="Column (sample) clustering"> + <option value="0">No clustering</option> + <option value="1">Uncentered correlation</option> + <option value="2">Pearson correlation</option> + <option value="3">Uncentered correlation, absolute value</option> + <option value="4">Pearson correlation, absolute value</option> + <option value="5">Spearman's rank correlation</option> + <option value="6">Kendall's tau</option> + <option value="7">Euclidean distance</option> + <option value="8">City-block distance</option> + </param> + + <param name="row_distance" type="select" label="Row (gene) clustering"> + <option value="0">No clustering</option> + <option value="1">Uncentered correlation</option> + <option value="2">Pearson correlation</option> + <option value="3">Uncentered correlation, absolute value</option> + <option value="4">Pearson correlation, absolute value</option> + <option value="5">Spearman's rank correlation</option> + <option value="6">Kendall's tau</option> + <option value="7">Euclidean distance</option> + <option value="8">City-block distance</option> + </param> + <conditional name="cluster_mode"> + <param name="mode_name" type="select" label="Clustering Type"> + <option value="hierarchical">Hierarchical Clustering</option> + <option value="kmeans">Kmeans Clustering</option> + </param> + <when value="hierarchical"> + <param name="hclustermethod" type="select" label="Hierarchical clustering method"> + <option value="m">Pairwise complete-linkage</option> + <option value="s">Pairwise single-linkage</option> + <option value="c">Pairwise centroid-linkage</option> + <option value="a">Pairwise average-linkage</option> + </param> + </when> + <when value="kmeans"> + <param name="knum" type="integer" optional="true" label="K-Means count"/> + <param name="kruns" type="integer" optional="true" label="K-Means run count"/> + </when> + </conditional> + </when> + + <when value="pca"> + <param name="pca_mode" type="select" label="Principal Component Analysis Mode"> + <option value="row">Rows (genes)</option> + <option value="col">Cols (samples)</option> + </param> + </when> + </conditional> + + </inputs> + <outputs> + <data name="cdtfile" format="cdt" label="CDT File"> + <filter>cparam['run_mode'] == "clustering"</filter> + </data> + <data format="atr" name="atrfile" label="ATR File"> + <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['col_distance'] != "0"</filter> + </data> + <data format="gtr" name="gtrfile" label="GTR File"> + <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['row_distance'] != "0"</filter> + </data> + <data format="txt" name="kagfile" label="KAG File"> + <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['col_distance'] != "0"</filter> + </data> + <data format="txt" name="kggfile" label="KGG File"> + <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['row_distance'] != "0"</filter> + </data> + <data format="txt" name="pcagenefile" label="PCA Gene File"> + <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter> + </data> + <data format="txt" name="pcagenecoordfile" label="PCA Gene Coord File"> + <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter> + </data> + <data format="txt" name="pcasamplefile" label="PCA Sample File"> + <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter> + </data> + <data format="txt" name="pcasamplecoordfile" label="PCA Sample Coord File"> + <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter> + </data> + </outputs> + <configfiles> + + <configfile name="scriptfile"><![CDATA[#!/usr/bin/env python +import subprocess +import shutil +from glob import glob + +cmd = ["cluster", "-f", "$filename", "-u", "galaxy_cluster"] + +#if $cparam.run_mode == "clustering": +if "$cparam.col_distance" != "0": + cmd.extend( ['-e', '$cparam.col_distance'] ) + +if "$cparam.row_distance" != "0": + cmd.extend( ['-g', '$cparam.row_distance'] ) + +runK = False +#if $cparam.cluster_mode.mode_name == "kmeans": +if len("${cparam.cluster_mode.knum}"): + cmd.extend(['-k', '${cparam.cluster_mode.knum}']) + runK = True + +if len("${cparam.cluster_mode.kruns}"): + cmd.extend(['-r', '${cparam.cluster_mode.kruns}']) +#end if + +if "$col_normalize" == "true": + cmd.append("-na") + +if "$row_normalize" == "true": + cmd.append("-ng") + +if "$log_transform" == "true": + cmd.append("-l") +#end if + +#if $cparam.run_mode == "pca": +if "$cparam.pca_mode" == "row": + cmd.append("-pg") +if "$cparam.pca_mode" == "col": + cmd.append("-pa") +#end if + +print "Running", " ".join(cmd) + +proc = subprocess.Popen(cmd) +proc.communicate() + +#if $cparam.run_mode == "clustering": +if "$cparam.row_distance" != "0" or "$cparam.col_distance" != "0": + if runK: + for f in glob("galaxy_cluster*.cdt"): + shutil.move(f, "$cdtfile") + else: + shutil.move("galaxy_cluster.cdt", "$cdtfile") + +if "$cparam.col_distance" != "0": + if runK: + for f in glob("galaxy_cluster*.kag"): + shutil.move(f, "$kagfile") + else: + shutil.move("galaxy_cluster" + ".atr", "$atrfile") + +if "$cparam.row_distance" != "0": + if runK: + for f in glob("galaxy_cluster*.kgg"): + shutil.move(f, "$kggfile") + else: + shutil.move("galaxy_cluster" + ".gtr", "$gtrfile") +#end if + +#if $cparam.run_mode == "pca": +if "$cparam.pca_mode" == "row": + shutil.move("galaxy_cluster_pca_gene.pc.txt", "$pcagenefile") + shutil.move("galaxy_cluster_pca_gene.coords.txt", "$pcagenecoordfile") +if "$cparam.pca_mode" == "col": + shutil.move("galaxy_cluster_pca_array.pc.txt", "$pcasamplefile") + shutil.move("galaxy_cluster_pca_array.coords.txt", "$pcasamplecoordfile") +#end if +]]></configfile> + </configfiles> +<help> + +A Wrapper for the Cluster3.0 program http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm + +Principle modes: + + - K-means Clustering + - Hierarchical Clustering + - Principal Component Analysis + +Source code at http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster-1.50.tar.gz + +For command line version, +./configure --without-x +make +make install + +</help> +</tool>