view tools/new_operations/cluster.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="gops_cluster_1" name="Cluster">
  <description>the intervals of a dataset</description>
  <command interpreter="python">gops_cluster.py $input1 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -d $distance -m $minregions -o $returntype</command>
  <inputs>
    <param format="interval" name="input1" type="data">
      <label>Cluster intervals of</label>
    </param>
    <param name="distance" size="5" type="integer" value="1" help="(bp)">
      <label>max distance between intervals</label>
    </param>
    <param name="minregions" size="5" type="integer" value="2">
      <label>min number of intervals per cluster</label>
    </param>
	<param name="returntype" type="select" label="Return type">
		<option value="1">Merge clusters into single intervals</option>
		<option value="2">Find cluster intervals; preserve comments and order</option>
		<option value="3">Find cluster intervals; output grouped by clusters</option>
		<option value="4">Find the smallest interval in each cluster</option>
		<option value="5">Find the largest interval in each cluster</option>
	</param>
   </inputs>
  <outputs>
    <data format="input" name="output" metadata_source="input1" />
  </outputs>
  <code file="operation_filter.py">
    <hook exec_after_process="exec_after_cluster" />
  </code>
  <tests>
    <test>
      <param name="input1" value="5.bed" />
      <param name="distance" value="1" />
      <param name="minregions" value="2" />
      <param name="returntype" value="1" />
      <output name="output" file="gops-cluster-1.bed" />     
    </test>
    <test>
      <param name="input1" value="gops_cluster_bigint.bed" />
      <param name="distance" value="1" />
      <param name="minregions" value="2" />
      <param name="returntype" value="1" />
      <output name="output" file="gops-cluster-1.bed" />     
    </test>
    <test>
      <param name="input1" value="5.bed" />
      <param name="distance" value="1" />
      <param name="minregions" value="2" />
      <param name="returntype" value="2" />
      <output name="output" file="gops-cluster-2.bed" />     
    </test>    
    <test>
      <param name="input1" value="5.bed" />
      <param name="distance" value="1" />
      <param name="minregions" value="2" />
      <param name="returntype" value="3" />
      <output name="output" file="gops-cluster-3.bed" />     
    </test>
  </tests>
  <help>

.. class:: infomark

**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.

-----

**Screencasts!**

See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).

.. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations

-----

**Syntax**

- **Maximum distance** is greatest distance in base pairs allowed between intervals that will be considered &quot;clustered&quot;.  **Negative** values for distance are allowed, and are useful for clustering intervals that overlap.
- **Minimum intervals per cluster** allow a threshold to be set on the minimum number of intervals to be considered a cluster.  Any area with less than this minimum will not be included in the output.
- **Merge clusters into single intervals** outputs intervals that span the entire cluster.
- **Find cluster intervals; preserve comments and order** filters out non-cluster intervals while maintaining the original ordering and comments in the file.
- **Find cluster intervals; output grouped by clusters** filters out non-cluster intervals, but outputs the cluster intervals so that they are grouped together. Comments and original ordering in the file are lost.

-----

**Example**

.. image:: ./static/operation_icons/gops_cluster.gif

</help>
</tool>