Mercurial > repos > nml > csvtk_join
comparison join.xml @ 0:31442b046269 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:22:35 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:31442b046269 |
---|---|
1 <tool id="csvtk_join" name="csvtk-join" version="@VERSION@+@GALAXY_VERSION@"> | |
2 <description>tables by column(s)</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="version_cmd" /> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 | |
10 ################### | |
11 ## Start Command ## | |
12 ################### | |
13 csvtk join --num-cpus "\${GALAXY_SLOTS:-1}" | |
14 | |
15 ## Add additional flags as specified ## | |
16 ####################################### | |
17 $ignore_case | |
18 $global_param.illegal_rows | |
19 $global_param.empty_rows | |
20 $global_param.header | |
21 $global_param.lazy_quotes | |
22 | |
23 ## Set Tabular input/output flag if first input is tabular ## | |
24 ############################################################# | |
25 #if $in_1[0].is_of_type("tabular"): | |
26 -t -T | |
27 #end if | |
28 | |
29 ## Set input files ## | |
30 ##################### | |
31 #for $file in $in_1: | |
32 '$file' | |
33 #end for | |
34 | |
35 ## Specify fields to join ## | |
36 ############################ | |
37 -F -f '$column_text.in_text' | |
38 | |
39 ## Fill columns if wanted ## | |
40 ############################ | |
41 #if $unmatched.wanted == 'yes': | |
42 #if $unmatched.join_type == 'outer' | |
43 -O | |
44 #else | |
45 -L | |
46 #end if | |
47 --na '$unmatched.fill_value' | |
48 #end if | |
49 | |
50 ## To output ## | |
51 ############### | |
52 > joined | |
53 | |
54 ]]></command> | |
55 <inputs> | |
56 <expand macro="multi_input"/> | |
57 <expand macro="fields_input"/> | |
58 <conditional name="unmatched" > | |
59 <param type="select" name="wanted" | |
60 label="Keep Unmatched Columns?" | |
61 help="Include unmatched columns in output results?" | |
62 > | |
63 <option value="no">No</option> | |
64 <option value="yes">Yes</option> | |
65 </param> | |
66 <when value="no" /> | |
67 <when value="yes"> | |
68 <param type="select" name="join_type" | |
69 label="Type of Join" | |
70 > | |
71 <option value="left">Left (Standard)</option> | |
72 <option value="outer">Outer</option> | |
73 </param> | |
74 <param type="text" name="fill_value" | |
75 argument="--na" | |
76 label="What to fill unmatched columns with" | |
77 help="Specify value to fill into all of the columns with no data"> | |
78 <expand macro="text_sanitizer" /> | |
79 </param> | |
80 </when> | |
81 </conditional> | |
82 <expand macro="ignore_case" /> | |
83 <expand macro="global_parameters" /> | |
84 </inputs> | |
85 <outputs> | |
86 <data format_source="in_1" name="joined" from_work_dir="joined" label="${in_1[0].name} joined by column ${column_text.in_text}" /> | |
87 </outputs> | |
88 <tests> | |
89 <test> | |
90 <param name="in_1" value="csv.csv,data.csv" /> | |
91 <conditional name="column_text"> | |
92 <param name="select" value="string" /> | |
93 <param name="in_text" value="Person" /> | |
94 </conditional> | |
95 <conditional name="unmatched" > | |
96 <param name="wanted" value="no" /> | |
97 </conditional> | |
98 <output name="joined" file="joined.csv" ftype="csv" /> | |
99 </test> | |
100 <test> | |
101 <param name="in_1" value="csv.csv,data.csv" /> | |
102 <conditional name="column_text"> | |
103 <param name="select" value="string" /> | |
104 <param name="in_text" value="Person" /> | |
105 </conditional> | |
106 <conditional name="unmatched" > | |
107 <param name="wanted" value="yes" /> | |
108 <param name="join_type" value="outer" /> | |
109 <param name="fill_value" value="a" /> | |
110 </conditional> | |
111 <param name="ignore_case" value="false" /> | |
112 <output name="joined" file="joined_filled.csv" ftype="csv" compare="sim_size" delta="10"/> | |
113 </test> | |
114 </tests> | |
115 <help><![CDATA[ | |
116 | |
117 Csvtk - Join Help | |
118 ----------------- | |
119 | |
120 Info | |
121 #### | |
122 Csvtk-Join is used to join two or more csv/tsv files together by either a column number or a column key/name. | |
123 It will join together the columns that have a matching value and create an output csv/tsv file (depending upon input file type). | |
124 | |
125 Columns that are not matching will be discarded unless 'Keep Unmatched Columns' is set to 'Yes' | |
126 | |
127 .. class:: warningmark | |
128 | |
129 Single quotes are not allowed in text inputs! | |
130 | |
131 @HELP_INPUT_DATA@ | |
132 | |
133 | |
134 Usage | |
135 ##### | |
136 | |
137 **Ex. Joining based on column 1:** | |
138 | |
139 :: | |
140 | |
141 +------+--------+----------+ +----------+----------+----------+ | |
142 | Name | Colour | Food | | Username | Sport | Column 3 | | |
143 +======+========+==========+ +==========+==========+==========+ | |
144 | Joe | Red | Pancakes | + | Joe | Swimming | Yes | | |
145 +------+--------+----------+ +----------+----------+----------+ | |
146 | John | Green | Potatos | | Gary | Biking | Yes | | |
147 +------+--------+----------+ +----------+----------+----------+ | |
148 | |
149 ------------------------------------------------------------------------- | |
150 | |
151 We would get the following table: | |
152 | |
153 +------+--------+----------+----------+----------+ | |
154 | Name | Colour | Food | Sport | Column 3 | | |
155 +======+========+==========+==========+==========+ | |
156 | Joe | Red | Pancakes | Swimming | Yes | | |
157 +------+--------+----------+----------+----------+ | |
158 | |
159 If we kept unmatched columns and filled them with 'NA', the following would be the output: | |
160 | |
161 +------+--------+----------+----------+----------+----------+ | |
162 | Name | Colour | Food | Username | Sport | Column 3 | | |
163 +======+========+==========+==========+==========+==========+ | |
164 | Joe | Red | Pancakes | Joe | Swimming | Yes | | |
165 +------+--------+----------+----------+----------+----------+ | |
166 | John | Green | Potatos | NA | NA | NA | | |
167 +------+--------+----------+----------+----------+----------+ | |
168 | NA | NA | NA | Gary | Biking | Yes | | |
169 +------+--------+----------+----------+----------+----------+ | |
170 | |
171 ---- | |
172 | |
173 **Ex. Joining based on the column named "Name":** | |
174 | |
175 :: | |
176 | |
177 Here, we use the column header name as a way to match up the columns. This is especially useful if the wanted | |
178 column has the same name but is not located in the same location as seen below. | |
179 | |
180 Matching on the "Name" column would look as such: | |
181 | |
182 +------+--------+----------+ +------+----------+----------+ +----------+------+----------+----------+ | |
183 | Name | Colour | Food | | Name | Sport | Column 3 | | Column 4 | Name | Column 5 | Column 6 | | |
184 +======+========+==========+ + +======+==========+==========+ + +==========+======+==========+==========+ | |
185 | Joe | Red | Pancakes | | Joe | Swimming | Yes | | Yes | Joe | Yes | Yes | | |
186 +------+--------+----------+ +------+----------+----------+ +----------+------+----------+----------+ | |
187 | John | Green | Potatos | | |
188 +------+--------+----------+ | |
189 | |
190 ----------------------------------------------------------------------------------------------------------------- | |
191 | |
192 Would give the following table: | |
193 | |
194 +------+--------+----------+----------+----------+----------+----------+----------+ | |
195 | Name | Colour | Food | Sport | Column 3 | Column 4 | Column 5 | Column 6 | | |
196 +======+========+==========+==========+==========+==========+==========+==========+ | |
197 | Joe | Red | Pancakes | Swimming | Yes | Yes | Yes | Yes | | |
198 +------+--------+----------+----------+----------+----------+----------+----------+ | |
199 | |
200 ---- | |
201 | |
202 **Ex. Matching data in multiple columns:** | |
203 | |
204 :: | |
205 | |
206 If using multiple columns, the values that are found to be the same in both columns will be put in the output. | |
207 If only one matches, or neither matches, then that row is not included in the output | |
208 | |
209 The following would occur using column names as "Name,Column 3" | |
210 | |
211 +------+----------+----------+ +----------+------+----------+ | |
212 | Name | Sport | Column 3 | | Column 4 | Name | Column 3 | | |
213 +======+==========+==========+ +==========+======+==========+ | |
214 | Joe | Swimming | Yes | + | Yes | Joe | Yes | | |
215 +------+----------+----------+ +----------+------+----------+ | |
216 | Jake | Track | No | | Yes | Jake | Yes | | |
217 +------+----------+----------+ +----------+------+----------+ | |
218 | |
219 ----------------------------------------------------------------------- | |
220 | |
221 Would Yield: | |
222 | |
223 +------+----------+----------+----------+ | |
224 | Name | Sport | Column 3 | Column 4 | | |
225 +======+==========+==========+==========+ | |
226 | Joe | Swimming | Yes | Yes | | |
227 +------+----------+----------+----------+ | |
228 | |
229 ---- | |
230 | |
231 @HELP_COLUMNS@ | |
232 | |
233 | |
234 @HELP_END_STATEMENT@ | |
235 | |
236 | |
237 ]]></help> | |
238 <expand macro="citations" /> | |
239 </tool> |