view tools/filters/compare.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="comp1" name="Compare two Datasets" version="1.0.2">
  <description>to find common or distinct rows</description>
  <command interpreter="python">joinWrapper.py $input1 $input2 $field1 $field2 $mode $out_file1</command>
  <inputs>
    <param format="tabular" name="input1" type="data" label="Compare"/>
    <param name="field1" label="Using column" type="data_column" data_ref="input1">
        <validator type="no_options" message="Invalid column choice. Please try again after editing metadata of your input dataset by clicking on the pencil icon next to it."/>
    </param>
    <param format="tabular" name="input2" type="data" label="against" />
    <param name="field2" label="and column" type="data_column" data_ref="input2">
            <validator type="no_options" message="Invalid column choice. Please try again after editing metadata of your input dataset by clicking on the pencil icon next to it."/>
    </param>
    <param name="mode" type="select" label="To find" help="See examples below for explanation of these options">
      <option value="N">Matching rows of 1st dataset</option>
      <option value="V">Non Matching rows of 1st dataset</option>
    </param>
  </inputs>
  <outputs>
     <data format="input" name="out_file1" metadata_source="input1" />
  </outputs>
  <tests>
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="2.bed"/>
      <param name="field1" value="2"/>
      <param name="field2" value="2"/>
      <param name="mode" value="N"/>
      <output name="out_file1" file="fs-compare.dat"/>
    </test>
    <!--test case with duplicated key values-->
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="3.bed"/>
      <param name="field1" value="1"/>
      <param name="field2" value="1"/>
      <param name="mode" value="V"/>
      <output name="out_file1" file="fs-compare-2.dat"/>
    </test>
  </tests>
  <help>

.. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

-----

**Syntax**

This tool finds lines in one dataset that HAVE or DO NOT HAVE a common field with another dataset.

-----

**Example**

If this is **First dataset**::

  chr1 10 20 geneA 
  chr1 50 80 geneB
  chr5 10 40 geneL

and this is **Second dataset**::

  geneA tumor-suppressor
  geneB Foxp2
  geneC Gnas1
  geneE INK4a

Finding lines of the **First dataset** whose 4th column matches the 1st column of the **Second dataset** yields::

  chr1 10 20 geneA 
  chr1 50 80 geneB

Conversely, using option **Non Matching rows of First dataset** on the same fields will yield::

  chr5 10 40 geneL

</help>
</tool>