view join_files_on_column_fuzzy.xml @ 2:f2068690addc draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit 73ccf407098f84351a1bd18438b80023d362b91f
author bgruening
date Fri, 13 Apr 2018 03:30:12 -0400
parents 8750c3125ec5
children 22ec3c1a20cd
line wrap: on
line source

<tool id="join_files_on_column_fuzzy" name="Join two files" version="1.0.1">
    <description>
        on column allowing a small difference
    </description>
    <requirements>
        <requirement type="package" version="3.6">python</requirement>
    </requirements>
    <command>
    <![CDATA[
python '$__tool_directory__/join_files_on_column_fuzzy.py'
--f1 '$f1'
--f2 '$f2'
--c1 $c1
--c2 $c2
--outfile '$merged_file'
$header
$add_distance
#if $merge_mode_select == 'closest':
    --closest
#end if:
--distance $distance
--unit $units

    ]]>
    </command>
    <inputs>
	<param argument="--f1" type="data" optional="true" format="tabular" label="1st file"
            help=""/>
        <param argument="--c1" type="data_column" data_ref="f1" label="Column to use from 1st file" help="The file needs to be sorted by this column, ascending."/>
        <param argument="--f2" type="data" optional="true" format="tabular" label="2nd file"
            help=""/>
        <param argument="--c2" type="data_column" data_ref="f2" label="Column to use from 2nd file" help="The file needs to be sorted by this column, ascending."/>

        <param argument="--header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="Does the input files contain a header line" />
        <param argument="--add_distance" type="boolean" checked="false" truevalue="--add_distance" falsevalue="" label="Add an addional column with the calculated distance." />

        <param name="merge_mode_select" type="select" label="Choose the mode of merging.">
            <option value="closest" selected="True">Best match (in case of multiple best matches, only the first one is reported)</option>
            <option value="distance">Matching with a defined distance</option>
        </param>
        <param name="units" display="radio" type="select" value="ppm_value" label="Choose the metrics of your distance"
            help="ppm is useful for very small differences">
            <option value="absolute" selected="True">Absolute distance</option>
            <option value="ppm" >Distance in ppm</option>
        </param>
        <param name="distance" value="0.2" type="float" label="Allowed distance between the two values that will trigger a merge" help=""/>
    </inputs>
    <outputs>
        <data name="merged_file" format="tabular" />
    </outputs>

    <tests>
        <test>
            <param name="f1" value="file1.tab" ftype="tabular"/>
            <param name="f2" value="file2.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="merge_mode_select" value="distance"/>
            <param name="distance" value="0.1"/>
            <param name="units" value="absolute"/>
            <output name="merged_file" file="no_header_result1.tab" />
        </test>
        <test>
            <param name="f1" value="file1_header.tab" ftype="tabular"/>
            <param name="f2" value="file2_header.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="merge_mode_select" value="distance"/>
            <param name="distance" value="0.2"/>
            <param name="units" value="absolute"/>
            <param name="header" value="true"/>
            <output name="merged_file" file="header_result2.tab" />
        </test>
        <test>
            <param name="f1" value="file1_header.tab" ftype="tabular"/>
            <param name="f2" value="file2_header.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="header" value="true"/>
            <param name="merge_mode_select" value="closest"/>
            <output name="merged_file" file="header_closest_result3.tab" />
        </test>
        <test>
            <param name="f1" value="file1_ppm.tab" ftype="tabular"/>
            <param name="f2" value="file2_ppm.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="header" value="false"/>
            <param name="merge_mode_select" value="distance"/>
            <param name="distance" value="100"/>
            <param name="units" value="ppm"/>
            <output name="merged_file" file="no_header_ppm_result4.tab" />
        </test>
        <test>
            <param name="f1" value="file1_header.tab" ftype="tabular"/>
            <param name="f2" value="file2_header.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="header" value="true"/>
            <param name="closest" value="true"/>
            <param name="add_distance" value="true"/>
            <output name="merged_file" file="header_closest_result5.tab" />
        </test>
        <test>
            <param name="f1" value="file1_ppm.tab" ftype="tabular"/>
            <param name="f2" value="file2_ppm.tab" ftype="tabular"/>
            <param name="c1" value="1"/>
            <param name="c2" value="1"/>
            <param name="header" value="false"/>
            <param name="merge_mode_select" value="distance"/>
            <param name="distance" value="100"/>
            <param name="units" value="ppm"/>
            <param name="add_distance" value="true"/>
            <output name="merged_file" file="no_header_ppm_result6.tab" />
        </test>
    </tests>
    <help>
<![CDATA[

Join two files on a common column. It is possible to provide an allowed difference between both values (currently only numbers)
as the absolute differece or as PPM.

Two modes are available:

  1. In the **best match** mode only the rows are merged for the most similar (or identical) values. In case of multiple best matches, only the first one is reported.

  2. The **Matching with a defined distance** option will offer you the possibility
     to provide a distance between the two values of the columns. Is the calculates distance smaller or equal than the given distance the columns will be joined. You can specify the allowed distance as an absolute distance or as PPM.


]]>
    </help>
    <citations>
    </citations>
</tool>