changeset 0:cdcb3276c0e1 draft

Uploaded
author kellrott
date Thu, 25 Oct 2012 16:41:28 -0400
parents
children e1bc1519822b
files cluster3.xml
diffstat 1 files changed, 207 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster3.xml	Thu Oct 25 16:41:28 2012 -0400
@@ -0,0 +1,207 @@
+<tool id="cluster3" name="Cluster 3" version="1.0.0">
+  <description>Cluster 3.0</description>
+  <command interpreter="python">$scriptfile</command>
+  <inputs>
+	  <param name="filename" type="data" format="tab" label="Matrix"/>
+      <param name="row_center" type="select" label="Row Center (gene)">
+         <option value="x">No row Center</option>
+         <option value="a">Substact mean of row</option>
+         <option value="m">Substact median of row</option>
+      </param>
+      <param name="log_transform" type="boolean" label="Log transform data" default="false"/>
+      <param name="row_normalize" type="boolean" label="Normalize Rows (gene)"/>
+      
+      <param name="col_center" type="select" label="Column Center (sample)">
+         <option value="x">No column Center</option>
+         <option value="a">Substact mean of column</option>
+         <option value="m">Substact median of column</option>
+      </param>
+      <param name="col_normalize" type="boolean" label="Normalize Columns (sample)"/>
+
+      <conditional name="cparam">
+		  <param name="run_mode" type="select" label="Run Mode">
+		    <option value="clustering">Clustering</option>
+			<option value="pca">Principal Component Analysis</option>
+		  </param>
+		  <when value="clustering">
+			  <param name="col_distance" type="select" label="Column (sample) clustering">
+				<option value="0">No clustering</option>
+				<option value="1">Uncentered correlation</option>
+				<option value="2">Pearson correlation</option>
+				<option value="3">Uncentered correlation, absolute value</option>
+				<option value="4">Pearson correlation, absolute value</option>
+				<option value="5">Spearman's rank correlation</option>
+				<option value="6">Kendall's tau</option>
+				<option value="7">Euclidean distance</option>
+				<option value="8">City-block distance</option>
+			  </param>
+			  
+			  <param name="row_distance" type="select" label="Row (gene) clustering">
+				<option value="0">No clustering</option>
+				<option value="1">Uncentered correlation</option>
+				<option value="2">Pearson correlation</option>
+				<option value="3">Uncentered correlation, absolute value</option>
+				<option value="4">Pearson correlation, absolute value</option>
+				<option value="5">Spearman's rank correlation</option>
+				<option value="6">Kendall's tau</option>
+				<option value="7">Euclidean distance</option>
+				<option value="8">City-block distance</option>
+			  </param>
+			  <conditional name="cluster_mode">			  
+				  <param name="mode_name" type="select" label="Clustering Type">
+				    <option value="hierarchical">Hierarchical Clustering</option>
+				    <option value="kmeans">Kmeans Clustering</option>		  
+			      </param>
+				  <when value="hierarchical">
+					  <param name="hclustermethod" type="select" label="Hierarchical clustering method">
+						<option value="m">Pairwise complete-linkage</option>
+						<option value="s">Pairwise single-linkage</option>
+						<option value="c">Pairwise centroid-linkage</option>
+						<option value="a">Pairwise average-linkage</option>
+					  </param>
+				  </when>
+				  <when value="kmeans">
+					  <param name="knum" type="integer" optional="true" label="K-Means count"/>
+					  <param name="kruns" type="integer" optional="true" label="K-Means run count"/>
+				  </when>
+			  </conditional>
+		  </when>
+		  
+		  <when value="pca">
+		      <param name="pca_mode" type="select" label="Principal Component Analysis Mode">
+				  <option value="row">Rows (genes)</option>
+				  <option value="col">Cols (samples)</option>
+			  </param>
+		  </when>      
+      </conditional>
+      
+  </inputs>
+  <outputs>
+       <data name="cdtfile" format="cdt" label="CDT File">
+           <filter>cparam['run_mode'] == "clustering"</filter>
+       </data>
+       <data format="atr" name="atrfile" label="ATR File">
+          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['col_distance'] != "0"</filter>
+       </data>
+       <data format="gtr" name="gtrfile" label="GTR File">
+          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['row_distance'] != "0"</filter>
+       </data>
+       <data format="txt" name="kagfile" label="KAG File">
+          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['col_distance'] != "0"</filter>
+       </data>
+       <data format="txt" name="kggfile" label="KGG File">
+          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['row_distance'] != "0"</filter>
+       </data>
+       <data format="txt" name="pcagenefile" label="PCA Gene File">
+		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter>       
+       </data>
+       <data format="txt" name="pcagenecoordfile" label="PCA Gene Coord File">
+		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter>       
+       </data>
+       <data format="txt" name="pcasamplefile" label="PCA Sample File">
+		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter>       
+       </data>
+       <data format="txt" name="pcasamplecoordfile" label="PCA Sample Coord File">
+		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter>       
+       </data>
+  </outputs>
+  	<configfiles>
+
+  <configfile name="scriptfile"><![CDATA[#!/usr/bin/env python
+import subprocess
+import shutil
+from glob import glob
+
+cmd = ["cluster", "-f", "$filename", "-u", "galaxy_cluster"]
+
+#if $cparam.run_mode == "clustering":
+if "$cparam.col_distance" != "0":
+	cmd.extend( ['-e', '$cparam.col_distance'] )
+
+if "$cparam.row_distance" != "0":
+	cmd.extend( ['-g', '$cparam.row_distance'] )
+
+runK = False
+#if $cparam.cluster_mode.mode_name == "kmeans":
+if len("${cparam.cluster_mode.knum}"):
+	cmd.extend(['-k', '${cparam.cluster_mode.knum}'])
+	runK = True
+
+if len("${cparam.cluster_mode.kruns}"):
+	cmd.extend(['-r', '${cparam.cluster_mode.kruns}'])
+#end if
+
+if "$col_normalize" == "true":
+	cmd.append("-na")
+
+if "$row_normalize" == "true":
+	cmd.append("-ng")
+
+if "$log_transform" == "true":
+	cmd.append("-l")
+#end if
+
+#if $cparam.run_mode == "pca":
+if "$cparam.pca_mode" == "row":
+	cmd.append("-pg")
+if "$cparam.pca_mode" == "col":
+	cmd.append("-pa")
+#end if
+
+print "Running", " ".join(cmd)
+
+proc = subprocess.Popen(cmd)
+proc.communicate()
+
+#if $cparam.run_mode == "clustering":
+if "$cparam.row_distance" != "0" or "$cparam.col_distance" != "0":
+	if runK:
+		for f in glob("galaxy_cluster*.cdt"):
+			shutil.move(f, "$cdtfile")
+	else:
+		shutil.move("galaxy_cluster.cdt", "$cdtfile")
+
+if "$cparam.col_distance" != "0":
+	if runK:
+		for f in glob("galaxy_cluster*.kag"):
+			shutil.move(f, "$kagfile")	
+	else:
+		shutil.move("galaxy_cluster" + ".atr", "$atrfile")
+
+if "$cparam.row_distance" != "0":
+	if runK:
+		for f in glob("galaxy_cluster*.kgg"):
+			shutil.move(f, "$kggfile")		
+	else:
+		shutil.move("galaxy_cluster" + ".gtr", "$gtrfile")
+#end if
+
+#if $cparam.run_mode == "pca":
+if "$cparam.pca_mode" == "row":
+	shutil.move("galaxy_cluster_pca_gene.pc.txt", "$pcagenefile")
+	shutil.move("galaxy_cluster_pca_gene.coords.txt", "$pcagenecoordfile")
+if "$cparam.pca_mode" == "col":
+	shutil.move("galaxy_cluster_pca_array.pc.txt", "$pcasamplefile")
+	shutil.move("galaxy_cluster_pca_array.coords.txt", "$pcasamplecoordfile")
+#end if 	
+]]></configfile>
+  	</configfiles>
+<help>
+
+A Wrapper for the Cluster3.0 program http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm
+
+Principle modes:
+
+ - K-means Clustering
+ - Hierarchical Clustering
+ - Principal Component Analysis
+
+Source code at http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster-1.50.tar.gz
+
+For command line version, 
+./configure --without-x
+make
+make install
+
+</help>
+</tool>