view tools/filters/gff/extract_GFF_Features.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="Extract_features1" name="Extract features">
  <description> from GFF data</description>
  <command interpreter="python">extract_GFF_Features.py $input1 $out_file1 ${column_choice.col} ${column_choice.feature}</command>
  <inputs>
    <param format="gff" name="input1" type="data" label="Select GFF data"/>
    <conditional name="column_choice">
      <param name="col" type="select" label="From">
        <option value="0" selected="true">Column 1 / Sequence name</option>
        <option value="1">Column 2 / Source</option>
        <option value="2">Column 3 / Feature</option>
        <option value="6">Column 7 / Strand</option>
        <option value="7">Column 8 / Frame</option>
      </param>
      <when value="0">
        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
          <options from_dataset="input1">
            <column name="name" index="0"/>
            <column name="value" index="0"/>
            <filter type="unique_value" name="unique" column="0"/>
          </options>
        </param> 
      </when>
      <when value="1">
        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
          <options from_dataset="input1">
            <column name="name" index="1"/>
            <column name="value" index="1"/>
            <filter type="unique_value" name="unique" column="1"/>
          </options>
        </param>        	
      </when>
      <when value="2">
        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
          <options from_dataset="input1">
            <column name="name" index="2"/>
            <column name="value" index="2"/>
            <filter type="unique_value" name="unique" column="2"/>
          </options>
        </param> 
      </when>
      <when value="6">
        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
          <options from_dataset="input1">
            <column name="name" index="6"/>
            <column name="value" index="6"/>
            <filter type="unique_value" name="unique" column="6"/>
          </options>
        </param>         	
      </when>
      <when value="7">
        <param name="feature" type="select" multiple="true" label="Extract features" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
          <options from_dataset="input1">
            <column name="name" index="7"/>
            <column name="value" index="7"/>
            <filter type="unique_value" name="unique" column="7"/>
          </options>
        </param>            	
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="input" name="out_file1" metadata_source="input1"/>
  </outputs>
  <tests>
    <test>
      <param name="input1" value="5.gff"/>
      <param name="col" value="0" />
      <param name="feature" value="chr5,chr6,chr7,chr8" />
      <output name="out_file1" file="Extract_features1_out.gff"/>
    </test>
  </tests>
  <help>

**What it does**

This tool extracts selected features from GFF data.

-----

**Example**

Selecting **promoter** from the following GFF data::

    chr22  GeneA  enhancer  10000000  10001000  500  +  .  TGA
    chr22  GeneA  promoter  10010000  10010100  900  +  .  TGA
    chr22  GeneB  promoter  10020000  10025000  400  -  .  TGB
    chr22  GeneB  CCDS2220  10030000  10065000  800  -  .  TGB
    
will produce the following output::

    chr22  GeneA  promoter  10010000  10010100  900  +  .  TGA
    chr22  GeneB  promoter  10020000  10025000  400  -  .  TGB

----

.. class:: infomark

**About formats**

**GFF format** General Feature Format is a format for describing genes and other features associated with DNA, RNA and Protein sequences. GFF lines have nine tab-separated fields::

    1. seqname - Must be a chromosome or scaffold.
    2. source - The program that generated this feature.
    3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon".
    4. start - The starting position of the feature in the sequence. The first base is numbered 1.
    5. end - The ending position of the feature (inclusive).
    6. score - A score between 0 and 1000. If there is no score value, enter ".".
    7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
    8. frame - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
    9. group - All lines with the same group are linked together into a single item.


  </help>
</tool>