diff tools/extract/liftOver_wrapper.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/extract/liftOver_wrapper.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,145 @@
+<tool id="liftOver1" name="Convert genome coordinates" version="1.0.3">
+  <description> between assemblies and genomes</description>
+  <command interpreter="python">
+  liftOver_wrapper.py 
+  $input 
+  "$out_file1" 
+  "$out_file2" 
+  $dbkey 
+  $to_dbkey 
+  #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__) or isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gtf').__class__):
+        "gff"
+  #else:
+        "interval"
+  #end if
+  $minMatch ${multiple.choice} ${multiple.minChainT} ${multiple.minChainQ} ${multiple.minSizeQ}
+  </command>
+  <inputs>
+    <param format="interval,gff,gtf" name="input" type="data" label="Convert coordinates of">
+      <validator type="unspecified_build" />
+      <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="Liftover mappings are currently not available for the specified build." />
+    </param>
+    <param name="to_dbkey" type="select" label="To">
+      <options from_file="liftOver.loc">
+        <column name="name" index="1"/>
+        <column name="value" index="2"/>
+        <column name="dbkey" index="0"/>
+        <filter type="data_meta" ref="input" key="dbkey" column="0" />
+      </options>
+    </param> 
+    <param name="minMatch" size="10" type="float" value="0.95" label="Minimum ratio of bases that must remap" help="Recommended values: same species = 0.95, different species = 0.10" />
+    <conditional name="multiple">
+	    <param name="choice" type="select" label="Allow multiple output regions?" help="Recommended values: same species = No, different species = Yes">
+	    	<option value="0" selected="true">No</option>
+	    	<option value="1">Yes</option>
+		</param>
+		<when value="0">
+		    <param name="minSizeQ" type="hidden" value="0" />
+    		<param name="minChainQ" type="hidden" value="0" />
+    		<param name="minChainT" type="hidden" value="0" />
+    	</when>
+    	<when value="1">
+    	    <param name="minSizeQ" size="10" type="integer" value="0" label="Minimum matching region size in query" help="Recommended value: set to >= 300 bases for complete transcripts"/>
+    		<param name="minChainQ" size="10" type="integer" value="500" label="Minimum chain size in query"/>
+    		<param name="minChainT" size="10" type="integer" value="500" label="Minimum chain size in target"/>
+    	</when>
+	</conditional>
+  </inputs>
+  <outputs>
+    <data format="input" name="out_file1" label="${tool.name} on ${on_string} [ MAPPED COORDINATES ]">
+      <actions>
+        <action type="metadata" name="dbkey">
+          <option type="from_file" name="liftOver.loc" column="1" offset="0">
+            <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+            <filter type="param_value" ref="to_dbkey" column="2"/>
+          </option>
+        </action>
+      </actions>
+    </data>
+    <data format="input" name="out_file2" label="${tool.name} on ${on_string} [ UNMAPPED COORDINATES ]" />
+  </outputs>
+  <requirements>
+    <requirement type="package">ucsc_tools</requirement>
+  </requirements>
+  <tests>
+    <test>
+      <param name="input" value="5.bed" dbkey="hg18" ftype="bed" />
+      <param name="to_dbkey" value="panTro2" />
+      <param name="minMatch" value="0.95" />
+      <param name="choice" value="0" />
+      <output name="out_file1" file="5_liftover_mapped.bed"/>
+      <output name="out_file2" file="5_liftover_unmapped.bed"/>
+    </test>
+    <test>
+      <param name="input" value="5.bed" dbkey="hg18" ftype="bed" />
+      <param name="to_dbkey" value="panTro2" />
+      <param name="minMatch" value="0.10" />
+      <param name="choice" value="1" />
+      <param name="minSizeQ" value="0" />
+      <param name="minChainQ" value="500" />
+      <param name="minChainT" value="500" />
+      <output name="out_file1" file="5_mult_liftover_mapped.bed"/>
+      <output name="out_file2" file="5_mult_liftover_unmapped.bed"/>
+    </test>
+    <test>
+      <param name="input" value="cuffcompare_in1.gtf" dbkey="hg18" ftype="gtf" />
+      <param name="to_dbkey" value="panTro2" />
+      <param name="minMatch" value="0.95" />
+      <param name="choice" value="0" />
+      <output name="out_file1" file="cuffcompare_in1_liftover_mapped.bed"/>
+      <output name="out_file2" file="cuffcompare_in1_liftover_unmapped.bed"/>
+    </test>
+    <test>
+      <param name="input" value="cuffcompare_in1.gtf" dbkey="hg18" ftype="gtf" />
+      <param name="to_dbkey" value="panTro2" />
+      <param name="minMatch" value="0.10" />
+      <param name="choice" value="1" />
+      <param name="minSizeQ" value="0" />
+      <param name="minChainQ" value="500" />
+      <param name="minChainT" value="500" />
+      <output name="out_file1" file="cuffcompare_in1_mult_liftover_mapped.bed"/>
+      <output name="out_file2" file="cuffcompare_in1_mult_liftover_unmapped.bed"/>
+    </test>
+  </tests>
+  <help>
+.. class:: warningmark
+
+Make sure that the genome build of the input dataset is specified (click the pencil icon in the history item to set it if necessary).
+
+.. class:: warningmark
+
+This tool can work with interval, GFF, and GTF datasets. It requires the interval datasets to have chromosome in column 1,
+start co-ordinate in column 2 and end co-ordinate in column 3. BED comments
+and track and browser lines will be ignored, but if other non-interval lines
+are present the tool will return empty output datasets.
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool is based on the LiftOver utility and Chain track from `the UC Santa Cruz Genome Browser`__.
+
+It converts coordinates and annotations between assemblies and genomes. It produces 2 files, one containing all the mapped coordinates and the other containing the unmapped coordinates, if any. 
+
+ .. __: http://genome.ucsc.edu/
+
+-----
+
+**Example**
+
+Converting the following hg16 intervals to hg18 intervals::
+
+    chrX  85170   112199  AK002185  0  +
+    chrX  110458  112199  AK097346  0  +
+    chrX  112203  121212  AK074528  0  -
+
+will produce the following hg18 intervals::
+
+    chrX  132991  160020  AK002185  0  +
+    chrX  158279  160020  AK097346  0  +
+    chrX  160024  169033  AK074528  0  -
+
+  </help>
+</tool>