Mercurial > repos > bgruening > text_processing
comparison multijoin.xml @ 0:5314e5d6f040 draft
Imported from capsule None
author | bgruening |
---|---|
date | Thu, 29 Jan 2015 07:53:17 -0500 |
parents | |
children | 37e1eb05b1b4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5314e5d6f040 |
---|---|
1 <tool id="tp_multijoin_tool" name="Multi-Join" version="@BASE_VERSION@.0"> | |
2 <description>(combine multiple files)</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"> | |
7 <requirement type="package" version="5.18.1">perl</requirement> | |
8 <requirement type="package" version="1.0">text_processing_perl_packages</requirement> | |
9 </expand> | |
10 <command interpreter="perl"> | |
11 <![CDATA[ | |
12 multijoin | |
13 --key '$key_column' | |
14 --values '$value_columns' | |
15 --filler '$filler' | |
16 $ignore_dups | |
17 $output_header | |
18 $input_header | |
19 $first_file | |
20 #for $file in $files: | |
21 '$file' | |
22 #end for | |
23 > '$outfile' | |
24 ]]> | |
25 </command> | |
26 <inputs> | |
27 <param name="first_file" type="data" format="txt" label="File to join"/> | |
28 <param name="files" multiple="True" type="data" format="txt" label="add additional file" /> | |
29 | |
30 <param name="key_column" label="Common key column" type="integer" | |
31 value="1" help="Usually gene-ID or other common value" /> | |
32 | |
33 <param name="value_columns" label="Column with values to preserve" | |
34 type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/> | |
35 | |
36 <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> | |
37 <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> | |
38 <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> | |
39 <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> | |
40 <sanitizer> | |
41 <valid initial="string.printable"> | |
42 <remove value="'"/> | |
43 </valid> | |
44 </sanitizer> | |
45 </param> | |
46 </inputs> | |
47 <outputs> | |
48 <data name="outfile" format_source="first_file" metadata_source="first_file" /> | |
49 </outputs> | |
50 <tests> | |
51 <test> | |
52 <param name="first_file" value="multijoin1.txt" /> | |
53 <param name="files" value="multijoin2.txt,multijoin3.txt" /> | |
54 <param name="key_column" value="4" /> | |
55 <param name="value_columns" value="c7,c8,c9" /> | |
56 <param name="output_header" value="True" /> | |
57 <output name="outfile" file="multijoin_result1.txt" /> | |
58 </test> | |
59 </tests> | |
60 <help> | |
61 <![CDATA[ | |
62 **What it does** | |
63 | |
64 This tool joins multiple tabular files based on a common key column. | |
65 | |
66 ----- | |
67 | |
68 **Example** | |
69 | |
70 To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: | |
71 | |
72 **First file (AAA)**:: | |
73 | |
74 chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 | |
75 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 | |
76 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 | |
77 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 | |
78 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 | |
79 chr4 995793 996435 FBtr0111046 0 + 7 166 642 | |
80 chr4 995793 997931 FBtr0111044 0 + 28 683 2138 | |
81 chr4 995793 997931 FBtr0111045 0 + 28 683 2138 | |
82 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 | |
83 ... | |
84 | |
85 | |
86 **Second File (BBB)**:: | |
87 | |
88 chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 | |
89 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 | |
90 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 | |
91 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 | |
92 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 | |
93 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 | |
94 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 | |
95 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 | |
96 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 | |
97 ... | |
98 | |
99 **Third file (CCC)**:: | |
100 | |
101 chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 | |
102 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 | |
103 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 | |
104 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 | |
105 chr4 995793 996435 FBtr0111046 0 + 5 304 642 | |
106 chr4 995793 997931 FBtr0111044 0 + 17 714 2138 | |
107 chr4 995793 997931 FBtr0111045 0 + 17 714 2138 | |
108 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 | |
109 ... | |
110 | |
111 | |
112 **Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return:: | |
113 | |
114 key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 | |
115 FBtr0089116 0 0 0 56 1296 15144 0 0 0 | |
116 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 | |
117 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 | |
118 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 | |
119 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 | |
120 FBtr0111044 28 683 2138 0 0 0 17 714 2138 | |
121 FBtr0111045 28 683 2138 0 0 0 17 714 2138 | |
122 FBtr0111046 7 166 642 0 0 0 5 304 642 | |
123 FBtr0300796 0 0 0 56 1296 14475 0 0 0 | |
124 ... | |
125 | |
126 .. class:: infomark | |
127 | |
128 Input files need not be sorted. | |
129 | |
130 @REFERENCES@ | |
131 ]]> | |
132 </help> | |
133 </tool> |