view tools/maf/genebed_maf_to_fasta.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks" version="1.0.1">
  <description>given a set of coding exon intervals</description>
  <command interpreter="python">
    #if $maf_source_type.maf_source == "user" #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
    #else                                     #interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source  --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
    #end if# --overwrite_with_gaps=$overwrite_with_gaps
  </command>
  <inputs>
    <param name="input1" type="data" format="bed" label="Gene BED File">
      <validator type="unspecified_build" />
      <validator type="expression" message="Input must be in BED12 format.">value.metadata.columns &gt;= 12</validator> <!-- allow 12+ columns, not as strict as possible. TODO: only list bed files with 12+ columns -->
    </param>
    <conditional name="maf_source_type">
      <param name="maf_source" type="select" label="MAF Source">
        <option value="cached" selected="true">Locally Cached Alignments</option>
        <option value="user">Alignments in Your History</option>
      </param>
      <when value="user">
        <param name="maf_file" type="data" format="maf" label="MAF File">
          <validator type="dataset_ok_validator" />
          <options>
            <filter type="data_meta" ref="input1" key="dbkey" />
          </options>
        </param>
        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
          <options>
            <filter type="data_meta" ref="maf_file" key="species" />
          </options>
        </param>
      </when>
      <when value="cached">
        <param name="maf_identifier" type="select" label="MAF Type" >
          <options from_file="maf_index.loc">
            <column name="name" index="0"/>
            <column name="value" index="1"/>
            <column name="dbkey" index="2"/>
            <column name="species" index="3"/>
            <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
            <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
          </options>
        </param> 
        <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
          <options from_file="maf_index.loc">
            <column name="uid" index="1"/>
            <column name="value" index="3"/>
            <column name="name" index="3"/>
            <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
            <filter type="multiple_splitter" column="3" separator=","/>
          </options>
        </param>
      </when>
    </conditional>
    <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occurring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
      <option value="True" selected="true">No</option>
      <option value="False">Yes</option>
    </param>
  </inputs>
  <outputs>
    <data format="fasta" name="out_file1" />
  </outputs>
  <tests>
    <test>
      <param name="input1" value="8.bed"/>
      <param name="maf_source" value="cached"/>in aligning species
      <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
      <param name="overwrite_with_gaps" value="True"/>
      <output name="out_file1" file="gene_bed_maf_to_fasta_out.fasta" />
    </test>
    <test>
      <param name="input1" value="8.bed"/>
      <param name="maf_source" value="user"/>
      <param name="maf_file" value="4.maf"/>
      <param name="species" value="hg17,panTro1"/>
      <param name="overwrite_with_gaps" value="True"/>
      <output name="out_file1" file="gene_bed_maf_to_fasta_user_out.fasta" />
    </test>
  </tests> 
  <help>

**What it does**

The coding sequence of genes are usually composed of several coding exons. Each of these coding exons is an individual genomic region, which when concatenated with each other constitutes the coding sequence. A single genomic region can be covered by multiple alignment blocks. In many cases it is desirable to stitch these alignment blocks together. This tool accepts a list of gene-based intervals, in the Gene BED format. For every interval it performs the following:

  * finds all MAF blocks that overlap the coding regions;
  * sorts MAF blocks by alignment score;
  * stitches blocks together and resolves overlaps based on alignment score;
  * outputs alignments in FASTA format.

------

**Citation**

If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_


  </help>
</tool>