Mercurial > repos > bgruening > text_processing
view multijoin.xml @ 18:d698c222f354 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 378e99cde623698fb44ee7ac9873f455fb51fdbc"
author | bgruening |
---|---|
date | Sat, 08 Oct 2022 21:01:33 +0000 |
parents | 4c752559b236 |
children | 12615d397df7 |
line wrap: on
line source
<tool id="tp_multijoin_tool" name="Multi-Join" version="@BASE_VERSION@.1"> <description>(combine multiple files)</description> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="5.22.0.1">perl</requirement> </requirements> <command> <![CDATA[ perl '$__tool_directory__/multijoin' --key '$key_column' --values '$value_columns' --filler '$filler' $ignore_dups $output_header $input_header $first_file #for $file in $files: '$file' #end for > '$outfile' ]]> </command> <inputs> <param name="first_file" type="data" format="txt" label="File to join"/> <param name="files" multiple="True" type="data" format="txt" label="add additional file" /> <param name="key_column" label="Common key column" type="integer" value="1" help="Usually gene-ID or other common value" /> <param name="value_columns" label="Column with values to preserve" type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/> <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> <param name="filler" type="text" value="0" label="Value to put in unpaired (empty) fields"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> </sanitizer> </param> </inputs> <outputs> <data name="outfile" format="tabular" /> </outputs> <tests> <test> <param name="first_file" value="multijoin1.txt" /> <param name="files" value="multijoin2.txt,multijoin3.txt" /> <param name="key_column" value="4" /> <param name="value_columns" value="c7,c8,c9" /> <param name="output_header" value="True" /> <output name="outfile" file="multijoin_result1.txt" lines_diff="2" /> </test> </tests> <help> <![CDATA[ **What it does** This tool joins multiple tabular files based on a common key column. ----- **Example** To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: **First file (AAA)**:: chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 chr4 995793 996435 FBtr0111046 0 + 7 166 642 chr4 995793 997931 FBtr0111044 0 + 28 683 2138 chr4 995793 997931 FBtr0111045 0 + 28 683 2138 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 ... **Second File (BBB)**:: chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 ... **Third file (CCC)**:: chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 chr4 995793 996435 FBtr0111046 0 + 5 304 642 chr4 995793 997931 FBtr0111044 0 + 17 714 2138 chr4 995793 997931 FBtr0111045 0 + 17 714 2138 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 ... **Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return:: key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 FBtr0089116 0 0 0 56 1296 15144 0 0 0 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 FBtr0111044 28 683 2138 0 0 0 17 714 2138 FBtr0111045 28 683 2138 0 0 0 17 714 2138 FBtr0111046 7 166 642 0 0 0 5 304 642 FBtr0300796 0 0 0 56 1296 14475 0 0 0 ... .. class:: infomark Input files need not be sorted. @REFERENCES@ ]]> </help> <expand macro="citations" /> </tool>