diff multijoin.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 37e1eb05b1b4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multijoin.xml	Thu Jan 29 07:53:17 2015 -0500
@@ -0,0 +1,133 @@
+<tool id="tp_multijoin_tool" name="Multi-Join" version="@BASE_VERSION@.0">
+    <description>(combine multiple files)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package" version="5.18.1">perl</requirement>
+        <requirement type="package" version="1.0">text_processing_perl_packages</requirement>
+    </expand>
+    <command interpreter="perl">
+<![CDATA[
+    multijoin
+        --key '$key_column'
+        --values '$value_columns'
+        --filler '$filler'
+        $ignore_dups
+        $output_header
+        $input_header
+        $first_file
+        #for $file in $files:
+            '$file'
+        #end for
+        > '$outfile'
+]]>
+    </command>
+    <inputs>
+        <param name="first_file" type="data" format="txt" label="File to join"/>
+        <param name="files" multiple="True" type="data" format="txt" label="add additional file" />
+
+        <param name="key_column" label="Common key column" type="integer"
+            value="1" help="Usually gene-ID or other common value" />
+
+        <param name="value_columns" label="Column with values to preserve"
+            type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/>
+
+        <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
+        <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
+        <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
+        <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format_source="first_file" metadata_source="first_file" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="first_file" value="multijoin1.txt" />
+            <param name="files" value="multijoin2.txt,multijoin3.txt" />
+            <param name="key_column" value="4" />
+            <param name="value_columns" value="c7,c8,c9" />
+            <param name="output_header" value="True" />
+            <output name="outfile" file="multijoin_result1.txt" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+This tool joins multiple tabular files based on a common key column.
+
+-----
+
+**Example**
+
+To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
+
+**First file (AAA)**::
+
+    chr4    888449    890171    FBtr0308778    0    +    266    1527    1722
+    chr4    972167    979017    FBtr0310651    0    -    3944    6428    6850
+    chr4    972186    979017    FBtr0089229    0    -    3944    6428    6831
+    chr4    972186    979017    FBtr0089231    0    -    3944    6428    6831
+    chr4    972186    979017    FBtr0089233    0    -    3944    6428    6831
+    chr4    995793    996435    FBtr0111046    0    +    7    166    642
+    chr4    995793    997931    FBtr0111044    0    +    28    683    2138
+    chr4    995793    997931    FBtr0111045    0    +    28    683    2138
+    chr4    1034029    1047719    FBtr0089223    0    -    5293    13394    13690
+    ...
+
+
+**Second File (BBB)**::
+
+    chr4    90286    134453    FBtr0309803    0    +    657    29084    44167
+    chr4    251355    266499    FBtr0089116    0    +    56    1296    15144
+    chr4    252050    266506    FBtr0308086    0    +    56    1296    14456
+    chr4    252050    266506    FBtr0308087    0    +    56    1296    14456
+    chr4    252053    266528    FBtr0300796    0    +    56    1296    14475
+    chr4    252053    266528    FBtr0300800    0    +    56    1296    14475
+    chr4    252055    266528    FBtr0300798    0    +    56    1296    14473
+    chr4    252055    266528    FBtr0300799    0    +    56    1296    14473
+    chr4    252541    266528    FBtr0300797    0    +    56    1296    13987
+    ...
+
+**Third file (CCC)**::
+
+    chr4    972167    979017    FBtr0310651    0    -    9927    6738    6850
+    chr4    972186    979017    FBtr0089229    0    -    9927    6738    6831
+    chr4    972186    979017    FBtr0089231    0    -    9927    6738    6831
+    chr4    972186    979017    FBtr0089233    0    -    9927    6738    6831
+    chr4    995793    996435    FBtr0111046    0    +    5    304    642
+    chr4    995793    997931    FBtr0111044    0    +    17    714    2138
+    chr4    995793    997931    FBtr0111045    0    +    17    714    2138
+    chr4    1034029    1047719    FBtr0089223    0    -    17646    13536    13690
+    ...
+
+
+**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::
+
+    key           AAA__V7   AAA__V8   AAA__V9   BBB__V7    BBB__V8    BBB__V9    CCC__V7   CCC__V8   CCC__V9
+    FBtr0089116         0         0         0        56       1296      15144          0         0         0
+    FBtr0089223      5293     13394     13690         0          0          0      17646     13536     13690
+    FBtr0089229      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089231      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089233      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0111044        28       683      2138         0          0          0         17       714      2138
+    FBtr0111045        28       683      2138         0          0          0         17       714      2138
+    FBtr0111046         7       166       642         0          0          0          5       304       642
+    FBtr0300796         0         0         0        56       1296      14475          0         0         0
+    ...
+
+.. class:: infomark
+
+Input files need not be sorted.
+
+@REFERENCES@
+]]>
+    </help>
+</tool>