diff easyjoin.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 43b1f073b693
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/easyjoin.xml	Thu Jan 29 07:53:17 2015 -0500
@@ -0,0 +1,115 @@
+<tool id="tp_easyjoin_tool" name="Join" version="@BASE_VERSION@.0">
+    <description>two files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="set_environment">TP_SCRIPT_PATH</requirement>
+    </expand>
+    <version_command>join --version | head -n 1</version_command>
+    <command>
+<![CDATA[
+        cp \$TP_SCRIPT_PATH/sort-header ./ &&
+        chmod +x sort-header &&
+        perl \$TP_SCRIPT_PATH/easyjoin
+            $jointype
+            -t '	'
+            $header
+            -e '$empty_string_filler'
+            -o auto
+            $ignore_case
+            -1 '$column1'
+            -2 '$column2'
+            "$infile1"
+            "$infile2"
+            > '$output'
+]]>
+    </command>
+    <inputs>
+        <param name="infile1" format="tabular" type="data" label="1st file" />
+        <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="infile1" accept_default="true" />
+
+        <param name="infile2" format="txt" type="data" label="2nd File" />
+        <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="infile2" accept_default="true" />
+
+        <param name="jointype" type="select" label="Output lines appearing in">
+            <option value=" " selected="True">Both 1st &amp; 2nd file.</option>
+            <option value="-v 1">1st but not in 2nd file. (-v 1)</option>
+            <option value="-v 2">2nd but not in 1st file. (-v 2)</option>
+            <option value="-a 1">Both 1st &amp; 2nd file, plus unpairable lines from 1st file. (-a 1)</option>
+            <option value="-a 2">Both 1st &amp; 2nd file, plus unpairable lines from 2st file. (-a 2)</option>
+            <option value="-a 1 -a 2">All lines [-a 1 -a 2]</option>
+            <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option>
+        </param>
+
+        <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue=""
+            label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." />
+        <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue=""
+            label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />
+        <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format_source="infile1" metadata_source="infile1"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile1" value="easyjoin1.tabular" />
+            <param name="column1" value="1" />
+            <param name="infile2" value="easyjoin2.tabular" />
+            <param name="column2" value="1" />
+            <param name="header" value="True" />
+            <param name="jointype" value="-a 1 -a 2" />
+            <output name="output" file="easyjoin_result1.tabular" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+This tool joins two tabular files based on a common key column.
+
+-----
+
+**Example**
+
+**First file**::
+
+    Fruit	Color
+    Apple	red
+    Banana	yellow
+    Orange	orange
+    Melon	green
+
+**Second File**::
+
+    Fruit	Price
+    Orange	7
+    Avocado	8
+    Apple	4
+    Banana	3
+
+**Joining** both files, using **key column 1** and a **header line**, will return::
+
+    Fruit	Color	Price
+    Apple	red	4
+    Avocado	.	8
+    Banana	yellow	3
+    Melon	green	.
+    Orange	orange	7
+
+.. class:: infomark
+
+ * Input files need not be sorted.
+ * The header line (**Fruit  Color  Price**) was joined and kept as first line.
+ * Missing values ( Avocado's color, missing from the first file ) are replaced with a period character.
+
+@REFERENCES@
+]]>
+    </help>
+</tool>