view SMART/galaxy/Clusterize.xml @ 63:5f210bc9f486

Added a simple test for Clusterize
author m-zytnicki
date Mon, 19 Oct 2015 12:02:29 +0200
parents 90f4b29d884f
children
line wrap: on
line source

<tool id="MergingDataClusterize" name="clusterize">
	<description>Clusterize features when their genomic intervals overlap.</description>
	<requirements>
		<requirement type="set_environment">PYTHONPATH</requirement>
	</requirements>
	<command interpreter="python">
		#set $inputFiles = ",".join(["%s" % (s) for s in $formatType.inputFileName])
		../Java/Python/clusterize.py -i $inputFiles -f $formatType.FormatInputFileName -o $outputFileGff $colinear $normalize -d $distance
	</command>

	<inputs>
		<conditional name="formatType">
			<param name="FormatInputFileName" type="select" label="Input File Format">
				<option value="bed">bed</option>
				<option value="gff">gff</option>
				<option value="gff2">gff2</option>
				<option value="gff3">gff3</option>
				<option value="sam">sam</option>
				<option value="gtf">gtf</option>
				<option value="bam">bam</option>
			</param>
			<when value="bed">
				<param name="inputFileName" format="bed" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="gff">
				<param name="inputFileName" format="gff" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="gff2">
				<param name="inputFileName" format="gff2" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="gff3">
				<param name="inputFileName" format="gff3" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="sam">
				<param name="inputFileName" format="sam" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="gtf">
				<param name="inputFileName" format="gtf" type="data" multiple="true" label="Input File"/>
			</when>
			<when value="bam">
				<param name="inputFileName" format="bam" type="data" multiple="true" label="Input File"/>
			</when>
		</conditional>

		<param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Only merge collinear features"/>
		<param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Normalize counts" help="Only works if the nbOccurrences tag is set."/>
		<param name="distance" type="text" value="0" label="merge features if their relative distance is within N nt"/>
	</inputs>

	<outputs>
		<data name="outputFileGff" format="gff3"/>
	</outputs> 

    <tests>
        <test>
            <!-- basic test -->
            <param name="FormatInputFileName" value="bed"/>
            <param name="inputFileName" value="smart_1.bed" ftype="bed"/>
            <param name="colinear" value="false"/>
            <param name="normalize" value="false"/>
            <param name="distance" value="0"/>
            <output name="output" file="smart_clusterize_out_1.gff3" ftype="gff3"/>
        </test>
    </tests>

	<help>
The script clusterizes the input genomic data. Two features are clusterized when their genomic intervals overlap. The output is a GFF3 file, where each element is a cluster. The number of elements in the cluster is given by the tag **nbElements**. The name of a cluster is the concatation of the names of its reads (like **read1--read2--read3**). Note that if the size of the name of the cluster exceeds 100 characters, it is truncated to the first 100 characters.

Some options may clusterize the features which are closer than a given distance.

By default, the tool clusterizes all features which overlap (or nearly overlap), even if they are on different strands. If you want to clusterize the features which are on the same strand only, you can specify it.
	</help>
</tool>