view cluster3.xml @ 0:cdcb3276c0e1 draft

Uploaded
author kellrott
date Thu, 25 Oct 2012 16:41:28 -0400
parents
children e1bc1519822b
line wrap: on
line source

<tool id="cluster3" name="Cluster 3" version="1.0.0">
  <description>Cluster 3.0</description>
  <command interpreter="python">$scriptfile</command>
  <inputs>
	  <param name="filename" type="data" format="tab" label="Matrix"/>
      <param name="row_center" type="select" label="Row Center (gene)">
         <option value="x">No row Center</option>
         <option value="a">Substact mean of row</option>
         <option value="m">Substact median of row</option>
      </param>
      <param name="log_transform" type="boolean" label="Log transform data" default="false"/>
      <param name="row_normalize" type="boolean" label="Normalize Rows (gene)"/>
      
      <param name="col_center" type="select" label="Column Center (sample)">
         <option value="x">No column Center</option>
         <option value="a">Substact mean of column</option>
         <option value="m">Substact median of column</option>
      </param>
      <param name="col_normalize" type="boolean" label="Normalize Columns (sample)"/>

      <conditional name="cparam">
		  <param name="run_mode" type="select" label="Run Mode">
		    <option value="clustering">Clustering</option>
			<option value="pca">Principal Component Analysis</option>
		  </param>
		  <when value="clustering">
			  <param name="col_distance" type="select" label="Column (sample) clustering">
				<option value="0">No clustering</option>
				<option value="1">Uncentered correlation</option>
				<option value="2">Pearson correlation</option>
				<option value="3">Uncentered correlation, absolute value</option>
				<option value="4">Pearson correlation, absolute value</option>
				<option value="5">Spearman's rank correlation</option>
				<option value="6">Kendall's tau</option>
				<option value="7">Euclidean distance</option>
				<option value="8">City-block distance</option>
			  </param>
			  
			  <param name="row_distance" type="select" label="Row (gene) clustering">
				<option value="0">No clustering</option>
				<option value="1">Uncentered correlation</option>
				<option value="2">Pearson correlation</option>
				<option value="3">Uncentered correlation, absolute value</option>
				<option value="4">Pearson correlation, absolute value</option>
				<option value="5">Spearman's rank correlation</option>
				<option value="6">Kendall's tau</option>
				<option value="7">Euclidean distance</option>
				<option value="8">City-block distance</option>
			  </param>
			  <conditional name="cluster_mode">			  
				  <param name="mode_name" type="select" label="Clustering Type">
				    <option value="hierarchical">Hierarchical Clustering</option>
				    <option value="kmeans">Kmeans Clustering</option>		  
			      </param>
				  <when value="hierarchical">
					  <param name="hclustermethod" type="select" label="Hierarchical clustering method">
						<option value="m">Pairwise complete-linkage</option>
						<option value="s">Pairwise single-linkage</option>
						<option value="c">Pairwise centroid-linkage</option>
						<option value="a">Pairwise average-linkage</option>
					  </param>
				  </when>
				  <when value="kmeans">
					  <param name="knum" type="integer" optional="true" label="K-Means count"/>
					  <param name="kruns" type="integer" optional="true" label="K-Means run count"/>
				  </when>
			  </conditional>
		  </when>
		  
		  <when value="pca">
		      <param name="pca_mode" type="select" label="Principal Component Analysis Mode">
				  <option value="row">Rows (genes)</option>
				  <option value="col">Cols (samples)</option>
			  </param>
		  </when>      
      </conditional>
      
  </inputs>
  <outputs>
       <data name="cdtfile" format="cdt" label="CDT File">
           <filter>cparam['run_mode'] == "clustering"</filter>
       </data>
       <data format="atr" name="atrfile" label="ATR File">
          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['col_distance'] != "0"</filter>
       </data>
       <data format="gtr" name="gtrfile" label="GTR File">
          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "hierarchical" and cparam['row_distance'] != "0"</filter>
       </data>
       <data format="txt" name="kagfile" label="KAG File">
          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['col_distance'] != "0"</filter>
       </data>
       <data format="txt" name="kggfile" label="KGG File">
          <filter>cparam['run_mode'] == "clustering" and cparam['cluster_mode']['mode_name'] == "kmeans" and cparam['row_distance'] != "0"</filter>
       </data>
       <data format="txt" name="pcagenefile" label="PCA Gene File">
		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter>       
       </data>
       <data format="txt" name="pcagenecoordfile" label="PCA Gene Coord File">
		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "row"</filter>       
       </data>
       <data format="txt" name="pcasamplefile" label="PCA Sample File">
		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter>       
       </data>
       <data format="txt" name="pcasamplecoordfile" label="PCA Sample Coord File">
		  <filter>cparam['run_mode']=="pca" and cparam['pca_mode'] == "col"</filter>       
       </data>
  </outputs>
  	<configfiles>

  <configfile name="scriptfile"><![CDATA[#!/usr/bin/env python
import subprocess
import shutil
from glob import glob

cmd = ["cluster", "-f", "$filename", "-u", "galaxy_cluster"]

#if $cparam.run_mode == "clustering":
if "$cparam.col_distance" != "0":
	cmd.extend( ['-e', '$cparam.col_distance'] )

if "$cparam.row_distance" != "0":
	cmd.extend( ['-g', '$cparam.row_distance'] )

runK = False
#if $cparam.cluster_mode.mode_name == "kmeans":
if len("${cparam.cluster_mode.knum}"):
	cmd.extend(['-k', '${cparam.cluster_mode.knum}'])
	runK = True

if len("${cparam.cluster_mode.kruns}"):
	cmd.extend(['-r', '${cparam.cluster_mode.kruns}'])
#end if

if "$col_normalize" == "true":
	cmd.append("-na")

if "$row_normalize" == "true":
	cmd.append("-ng")

if "$log_transform" == "true":
	cmd.append("-l")
#end if

#if $cparam.run_mode == "pca":
if "$cparam.pca_mode" == "row":
	cmd.append("-pg")
if "$cparam.pca_mode" == "col":
	cmd.append("-pa")
#end if

print "Running", " ".join(cmd)

proc = subprocess.Popen(cmd)
proc.communicate()

#if $cparam.run_mode == "clustering":
if "$cparam.row_distance" != "0" or "$cparam.col_distance" != "0":
	if runK:
		for f in glob("galaxy_cluster*.cdt"):
			shutil.move(f, "$cdtfile")
	else:
		shutil.move("galaxy_cluster.cdt", "$cdtfile")

if "$cparam.col_distance" != "0":
	if runK:
		for f in glob("galaxy_cluster*.kag"):
			shutil.move(f, "$kagfile")	
	else:
		shutil.move("galaxy_cluster" + ".atr", "$atrfile")

if "$cparam.row_distance" != "0":
	if runK:
		for f in glob("galaxy_cluster*.kgg"):
			shutil.move(f, "$kggfile")		
	else:
		shutil.move("galaxy_cluster" + ".gtr", "$gtrfile")
#end if

#if $cparam.run_mode == "pca":
if "$cparam.pca_mode" == "row":
	shutil.move("galaxy_cluster_pca_gene.pc.txt", "$pcagenefile")
	shutil.move("galaxy_cluster_pca_gene.coords.txt", "$pcagenecoordfile")
if "$cparam.pca_mode" == "col":
	shutil.move("galaxy_cluster_pca_array.pc.txt", "$pcasamplefile")
	shutil.move("galaxy_cluster_pca_array.coords.txt", "$pcasamplecoordfile")
#end if 	
]]></configfile>
  	</configfiles>
<help>

A Wrapper for the Cluster3.0 program http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm

Principle modes:

 - K-means Clustering
 - Hierarchical Clustering
 - Principal Component Analysis

Source code at http://bonsai.hgc.jp/~mdehoon/software/cluster/cluster-1.50.tar.gz

For command line version, 
./configure --without-x
make
make install

</help>
</tool>