view easyjoin.xml @ 14:fb4ff3c42cd3 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 09b22cceacb34dd4c6c1b42890f93232df128208"
author bgruening
date Sat, 21 Mar 2020 13:49:11 -0400
parents 01ca80da2266
children 12615d397df7
line wrap: on
line source

<tool id="tp_easyjoin_tool" name="Join" version="@BASE_VERSION@.2">
    <description>two files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="5.22.0.1">perl</requirement>
    </expand>
    <version_command>join --version | head -n 1</version_command>
    <command>
<![CDATA[
        cp $__tool_directory__/sort-header ./ &&
        chmod +x sort-header &&
        perl $__tool_directory__/easyjoin
            $jointype
            -t $'\t'
            $header
            -e '$empty_string_filler'
            -o auto
            $ignore_case
            -1 '$column1'
            -2 '$column2'
            "$infile1"
            "$infile2"
            > '$output'
]]>
    </command>
    <inputs>
        <param name="infile1" format="tabular" type="data" label="1st file" />
        <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="infile1" accept_default="true" />

        <param name="infile2" format="tabular" type="data" label="2nd File" />
        <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="infile2" accept_default="true" />

        <param name="jointype" type="select" label="Output lines appearing in">
            <option value=" " selected="True">Both 1st &amp; 2nd file.</option>
            <option value="-v 1">1st but not in 2nd file. (-v 1)</option>
            <option value="-v 2">2nd but not in 1st file. (-v 2)</option>
            <option value="-a 1">Both 1st &amp; 2nd file, plus unpairable lines from 1st file. (-a 1)</option>
            <option value="-a 2">Both 1st &amp; 2nd file, plus unpairable lines from 2st file. (-a 2)</option>
            <option value="-a 1 -a 2">All lines [-a 1 -a 2]</option>
            <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option>
        </param>

        <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue=""
            label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." />
        <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue=""
            label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />
        <param name="empty_string_filler" type="text" value="0" label="Value to put in unpaired (empty) fields">
            <sanitizer>
                <valid initial="string.printable">
                    <remove value="&apos;"/>
                </valid>
            </sanitizer>
        </param>
    </inputs>
    <outputs>
        <data name="output" format_source="infile1" metadata_source="infile1"/>
    </outputs>
    <tests>
        <test>
            <param name="infile1" value="easyjoin1.tabular" />
            <param name="column1" value="1" />
            <param name="infile2" value="easyjoin2.tabular" />
            <param name="column2" value="1" />
            <param name="header" value="True" />
            <param name="jointype" value="-a 1 -a 2" />
            <output name="output" file="easyjoin_result1.tabular" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

This tool joins two tabular files based on a common key column.

-----

**Example**

**First file**::

    Fruit	Color
    Apple	red
    Banana	yellow
    Orange	orange
    Melon	green

**Second File**::

    Fruit	Price
    Orange	7
    Avocado	8
    Apple	4
    Banana	3

**Joining** both files, using **key column 1** and a **header line**, will return::

    Fruit	Color	Price
    Apple	red	4
    Avocado	.	8
    Banana	yellow	3
    Melon	green	.
    Orange	orange	7

.. class:: infomark

 * Input files need not be sorted.
 * The header line (**Fruit  Color  Price**) was joined and kept as first line.
 * Missing values ( Avocado's color, missing from the first file ) are replaced with a period character.

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>