view flanking_features.xml @ 1:8307665c4b6c draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit a1517c9d22029095120643bbe2c8fa53754dd2b7
author devteam
date Wed, 11 Nov 2015 12:48:18 -0500
parents 90100b587723
children a09d13b108fd
line wrap: on
line source

<tool id="flanking_features_1" name="Fetch closest non-overlapping feature" version="4.0.1">
  <description>  for every interval</description>
  <requirements>
    <requirement type="package" version="0.7.1">bx-python</requirement>
    <requirement type="package" version="1.0.0">galaxy-ops</requirement>
  </requirements>
  <command interpreter="python">
      flanking_features.py $input1 $input2 $out_file1 $direction
      
      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
          -1 1,4,5,7 --gff1
      #else:
          -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
      #end if
          
      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
          -2 1,4,5,7 --gff2
      #else:
          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
      #end if
  </command>
  <inputs>
    <param format="interval,gff" name="input1" type="data" label="For every interval in"/>
    <param format="interval,gff" name="input2" type="data" label="Fetch closest feature(s) from"/>
    <param name="direction" type="select" label="Located">
      <option value="Either">Either Upstream or Downstream</option>
      <option value="Both">Both Upstream and Downstream</option>
      <option value="Upstream">Upstream</option>
      <option value="Downstream">Downstream</option>
    </param>
  </inputs>
  <outputs>
    <data format="input" name="out_file1" metadata_source="input1"/>
  </outputs>
  <tests>
    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_2.bed"/>
      <param name="direction" value="Either"/>
      <output name="out_file1" file="closest_features_either.interval"/>
    </test>
    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_2.bed"/>
      <param name="direction" value="Both"/>
      <output name="out_file1" file="closest_features.interval"/>
    </test>
    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_2.bed"/>
      <param name="direction" value="Upstream"/>
      <output name="out_file1" file="closest_features_up.interval"/>
    </test>
    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_2.bed"/>
      <param name="direction" value="Downstream"/>
      <output name="out_file1" file="closest_features_down.interval"/>
    </test>
    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_3.bed"/>
      <param name="direction" value="Both"/>
      <output name="out_file1" file="closest_features_both.interval"/>
    </test>
    <!-- Tests for GFF functionality. -->

    <test>
      <param name="input1" value="4_windows.bed"/>
      <param name="input2" value="4_windows_2.gff"/>
      <param name="direction" value="Either"/>
      <output name="out_file1" file="closest_features_both.gff"/>
    </test>
    <test>
      <param name="input1" value="4_windows.gff"/>
      <param name="input2" value="4_windows_2.gff"/>
      <param name="direction" value="Either"/>
      <output name="out_file1" file="closest_features_both2.gff"/>
    </test>
    
  </tests>
 <help> 

.. class:: infomark

**What it does**

For every interval in the **interval** dataset, this tool fetches the **closest non-overlapping** upstream and / or downstream features from the **features** dataset.

-----

.. class:: warningmark

**Note:** 

Every line should contain at least 3 columns: chromosome number, start and stop coordinates. If any of these columns is missing or if start and stop coordinates are not numerical, the lines will be treated as invalid and skipped. The number of skipped lines is documented in the resulting history item as a "data issue".

If the strand column is missing from your input interval dataset, the intervals will be considered to be on positive strand. You can add a strand column to your input dataset by using the *Text Manipulation->Add column* tool.

For GFF files, features are added as a GTF-style attribute at the end of the line.

-----

**Example**

If the **intervals** are::

   chr1 10   100  Query1.1
   chr1 500  1000 Query1.2
   chr1 1100 1250 Query1.3

and the **features** are::

   chr1 120  180  Query2.1
   chr1 140  200  Query2.2
   chr1 580  1050 Query2.3
   chr1 2000 2204 Query2.4
   chr1 2500 3000 Query2.5

Running this tool for **Both Upstream and Downstream** will return::

   chr1 10   100  Query1.1 chr1 120  180  Query2.1
   chr1 500  1000 Query1.2 chr1 140  200  Query2.2
   chr1 500  1000 Query1.2 chr1 2000 2204 Query2.4
   chr1 1100 1250 Query1.3 chr1 580  1050 Query2.3
   chr1 1100 1250 Query1.3 chr1 2000 2204 Query2.4

</help>  


</tool>