view synteny-mapper.xml @ 14:acb6cae8bfec draft

Uploaded
author s-kaufmann
date Fri, 15 Aug 2014 09:49:46 -0400
parents 8a4190457eb7
children
line wrap: on
line source

<tool id="SyntenyMapper1" name="SyntenyMapper" version="1.0.0">
  <description>Maps maximum stretches of orthologous genes with conserved order within large synteny regions</description>

<requirements>
    <requirement type="set_environment">JAR_PATH</requirement>
</requirements>

<command>
	#if $source.source_select == "ensembl" #java -cp \$JAR_PATH/syntenyMapper.jar de/tum/wzw/mapping/SyntenyMapper $source.ensembl_version $inputSyntenyRegions $inputHomologousGenes $out_file1 $out_file2 $source.species.species1 $source.species.species2 $source.filter
	#else #java -cp \$JAR_PATH/syntenyMapper.jar de/tum/wzw/mapping/SyntenyMapper $inputSyntenyRegions $inputHomologousGenes $out_file1 $out_file2 $source.species1 $source.species2
	#end if

  </command>

 <inputs>



<conditional name="source">
	<param name="source_select" type="select" label="Data source">
  		<option value="ensembl">ENSEMBL download</option>
  		<option value="upload">Own data upload</option>
	</param>
 	<when value="ensembl">
		<param name="ensembl_version" type="integer" value="73" label="ENSEMBL Compara version"/>
		<conditional name="species">
			<param name="species1" type="select" label="Species1">
				<option value="felis_catus">Felis catus</option>
				<option value="rattus_norvegicus">Rattus norvegicus</option>
				<option value="meleagris_gallopavo">Meleagris gallopavo</option>
				<option value="monodelphis_domestica">Monodelphis domestica</option>
				<option value="mus_musculus">Mus musculus</option>
				<option value="canis_familiaris">Canis familiaris</option>
				<option value="oryctolagus_cuniculus">Oryctolagus cuniculus</option>
				<option value="equus_caballus">Equus caballus</option>
				<option value="gallus_gallus">Gallus gallus</option>
				<option value="macaca_mulatta">Macaca mulatta</option>
				<option value="pongo_abelii">Pongo abelii</option>
				<option value="sus_scrofa">Sus scrofa</option>
				<option value="gorilla_gorilla">Gorilla gorilla</option>
				<option value="callithrix_jacchus">Callithrix jacchus</option>
				<option value="ornithorhynchus_anatinus">Ornithorhynchus anatinus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<option value="anolis_carolinensis">Anolis carolinensis</option>
				<option value="pan_troglodytes">Pan troglodytes</option>
				<option value="bos_taurus">Bos taurus</option>
				<filter type="sort_by"/>
			</param>
		<when value="felis_catus">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="rattus_norvegicus">
			<param name="species2" type="select" label="Species2">
				<option value="mus_musculus">Mus musculus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="meleagris_gallopavo">
			<param name="species2" type="select" label="Species2">
				<option value="gallus_gallus">Gallus gallus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="monodelphis_domestica">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="mus_musculus">
			<param name="species2" type="select" label="Species2">
				<option value="sus_scrofa">Sus scrofa</option>
				<option value="rattus_norvegicus">Rattus norvegicus</option>
				<option value="ornithorhynchus_anatinus">Ornithorhynchus anatinus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<option value="canis_familiaris">Canis familiaris</option>
				<option value="gallus_gallus">Gallus gallus</option>
				<option value="bos_taurus">Bos taurus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="canis_familiaris">
			<param name="species2" type="select" label="Species2">
				<option value="mus_musculus">Mus musculus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<option value="equus_caballus">Equus caballus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="oryctolagus_cuniculus">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="equus_caballus">
			<param name="species2" type="select" label="Species2">
				<option value="canis_familiaris">Canis familiaris</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="gallus_gallus">
			<param name="species2" type="select" label="Species2">
				<option value="meleagris_gallopavo">Meleagris gallopavo</option>
				<option value="mus_musculus">Mus musculus</option>
				<option value="anolis_carolinensis">Anolis carolinensis</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="macaca_mulatta">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="pongo_abelii">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="sus_scrofa">
			<param name="species2" type="select" label="Species2">
				<option value="mus_musculus">Mus musculus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<option value="bos_taurus">Bos taurus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="gorilla_gorilla">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="callithrix_jacchus">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="ornithorhynchus_anatinus">
			<param name="species2" type="select" label="Species2">
				<option value="mus_musculus">Mus musculus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="homo_sapiens">
			<param name="species2" type="select" label="Species2">
				<option value="felis_catus">Felis catus</option>
				<option value="rattus_norvegicus">Rattus norvegicus</option>
				<option value="monodelphis_domestica">Monodelphis domestica</option>
				<option value="mus_musculus">Mus musculus</option>
				<option value="canis_familiaris">Canis familiaris</option>
				<option value="oryctolagus_cuniculus">Oryctolagus cuniculus</option>
				<option value="equus_caballus">Equus caballus</option>
				<option value="gallus_gallus">Gallus gallus</option>
				<option value="macaca_mulatta">Macaca mulatta</option>
				<option value="pongo_abelii">Pongo abelii</option>
				<option value="sus_scrofa">Sus scrofa</option>
				<option value="gorilla_gorilla">Gorilla gorilla</option>
				<option value="callithrix_jacchus">Callithrix jacchus</option>
				<option value="ornithorhynchus_anatinus">Ornithorhynchus anatinus</option>
				<option value="pan_troglodytes">Pan troglodytes</option>
				<option value="bos_taurus">Bos taurus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="anolis_carolinensis">
			<param name="species2" type="select" label="Species2">
				<option value="gallus_gallus">Gallus gallus</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="pan_troglodytes">
			<param name="species2" type="select" label="Species2">
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		<when value="bos_taurus">
			<param name="species2" type="select" label="Species2">
				<option value="sus_scrofa">Sus scrofa</option>
				<option value="mus_musculus">Mus musculus</option>
				<option value="homo_sapiens">Homo sapiens</option>
				<filter type="sort_by"/>
			</param>
		</when>
		</conditional>
		<param name="filter" type="select" label="Gene filter">
			<option value="false">Use all genes</option>
			<option value="true">Use only protein-coding genes</option>
		</param>
	</when>
  	<when value="upload">
		<param name="species1" type="text" size="40" label="Species1 (latin name with underline)">
			<validator type="expression" message="Please replace any white spaces with underlines">value.find(' ')==-1</validator>
		</param>
		<param name="species2" type="text" size="40" label="Species2 (latin name with underline)">
			<validator type="expression" message="Please replace any white spaces with underlines">value.find(' ')==-1</validator>
		</param>
		<param format="tabular" name="inputSyntenyRegions" type="data" label="Synteny Regions (e.g. from ENSEMBL)" help="Dataset missing? See TIP below."/>
    		<param format="tabular" name="inputHomologousGenes" type="data" label="Orthologous Genes (e.g. from ENSEMBL)" help="Dataset missing? See TIP below."/>
	</when>
</conditional>

  </inputs>
  <outputs>
    <data format="tabular" name="inputSyntenyRegions" label="ENSEMBL synteny regions">
	<filter>source['source_select'] == "ensembl"</filter>
    </data>
   <data format="tabular" name="inputHomologousGenes" label="ENSEMBL orthologous genes">
	<filter>source['source_select'] == "ensembl"</filter>
    </data>
   
    <data format="tabular" name="out_file1" label="Gene mapping"/>
    <data format="tabular" name="out_file2" label="Refined Synteny regions"/>
  </outputs>
  <tests>
    <test>
      <param name="inputSyntenyRegions" value="hom_sapi-mus_musc-synteny-v70"/>
      <param name="inputHomologousGenes" value="hom_sapi-mus_musc-orthologs-v70.filtered"/>
      <param name="species1" value="homo_sapiens"/>
      <param name="species2" value="mus_musculus"/>
      <param name="filter" value="true"/>
      <!--<param name="header_lines" value="0"/>-->
      <output name="out_file1" file="Gene_mapping1.out"/>
      <output name="out_file2" file="Refined_Synteny_regions2.out"/>
    </test>
  </tests>

  <help>

.. class:: warningmark

If you are not downloading data directly from ENSEMBL, please make sure to bring your data into the correct format (see below).

.. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

.. class:: infomark

Species names should be given as latin names (e.g. Homo sapiens).

**TIP:** If you are interested only in the 1-to-1 ortholog mapping, use *Text Manipulation-&gt;Cut* on columns c3,c7 of the output Gene/IR mapping.

----

**What it does**

SyntenyMapper uses two species' predefined macro-rearrangement blocks of common origin (e.g. synteny regions from ENSEMBL) and orthology assignments, both alignment-based, and combines them to create refined synteny regions with microrearrangement blocks and a 1-to-1 ortholog mapping.

It finds the longest blocks of conserved gene order within each synteny block and eliminates all orthology mappings with different gene neighbourhood. As a result, SyntenyMapper produces a list of segments with identical order of genes in both species, caused by micro-rearrangements. For visualization of this mapping, use SyntenyMapperVisualization. To utilize the mapping for feature track comparison between two species, use TrackMapper.

-----

**Syntax**

The SyntenyMapper allows you to identify long blocks of genes with conserved gene order in two organisms.

The mapping is based on previously determined long synteny regions and orthology pairs of genes.

First two comment lines (marked by a leading #) in the synteny file should name #Species1: and #Species2:, respectively. IDs should consist of five digits.
The synteny file should have the format (tab-separated):


+-----+-------------------+---------------+-------------+--------------------+---------------+-------------+-------------+
|#ID  |Chromosome_species1|Start_species1 |End_species1 |Chromosome_species2 |Start_species2 |End_species2 |Dir_species2 |
+-----+-------------------+---------------+-------------+--------------------+---------------+-------------+-------------+


The orthologous genes file should have the format (tab-separated):
 

+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+
|#ID      |ENSEMBL_ID        |Name                    |Chromosome |Start    |End      |Direction | Identity |Species          |
+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+


There should be two (or more) entries for each ID, describing pairs of genes that are orthologs.

-----

**Example**

These are sample lines from two example input files. If you are not downloading data directly from ENSEMBL (or reusing downloaded data), please make sure that it adheres to the above defined format.

The synteny file contains coordinates of large (e.g. whole-genome alignment based) synteny regions in both organisms and specifies the species names.

*Synteny file:*

+----------------------------------------------------------------------------------------------+
|#Species1: homo_sapiens                                                                       |
+----------------------------------------------------------------------------------------------+
|#Species2: mus_musculus                                                                       |
+-----+-----------------+------------+----------+-----------------+------------+----------+----+
|#ID  |Chromosome_human	|Start_human |End_human |Chromosome_mouse |Start_mouse |End_mouse |Dir |
+-----+-----------------+------------+----------+-----------------+------------+----------+----+
|44723|chr6             |155053083   |160101646 |chr17            |3113738     |7931992   |-1  |
+-----+-----------------+------------+----------+-----------------+------------+----------+----+


The orthology file contains coordinates of genes of two species, coupled together by the same identifier to orthology pairs. Chromosomes can be named with "chr" prefix or without in both files.

*Orthology file:*

+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+
|#ID      |ENSEMBL_ID        |Name                    |Chromosome |Start    |End      |Direction | Identity |Species          |
+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+
|33818986 |ENSRNOG00000050189|olfactory receptor Olr89|1          |174585043|174585993|1         |92.0      |rattus_norvegicus|
+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+
|33818986 |ENSMUSG00000073952|null                    |7          |103320401|103321360|1         |93.0      |mus_musculus     |
+---------+------------------+------------------------+-----------+---------+---------+----------+----------+-----------------+

</help>
</tool>