Mercurial > repos > nml > csvtk_join
diff join.xml @ 0:31442b046269 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:22:35 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/join.xml Tue May 19 17:22:35 2020 -0400 @@ -0,0 +1,239 @@ +<tool id="csvtk_join" name="csvtk-join" version="@VERSION@+@GALAXY_VERSION@"> + <description>tables by column(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +################### +## Start Command ## +################### +csvtk join --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if first input is tabular ## + ############################################################# + #if $in_1[0].is_of_type("tabular"): + -t -T + #end if + + ## Set input files ## + ##################### + #for $file in $in_1: + '$file' + #end for + + ## Specify fields to join ## + ############################ + -F -f '$column_text.in_text' + + ## Fill columns if wanted ## + ############################ + #if $unmatched.wanted == 'yes': + #if $unmatched.join_type == 'outer' + -O + #else + -L + #end if + --na '$unmatched.fill_value' + #end if + + ## To output ## + ############### + > joined + + ]]></command> + <inputs> + <expand macro="multi_input"/> + <expand macro="fields_input"/> + <conditional name="unmatched" > + <param type="select" name="wanted" + label="Keep Unmatched Columns?" + help="Include unmatched columns in output results?" + > + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param type="select" name="join_type" + label="Type of Join" + > + <option value="left">Left (Standard)</option> + <option value="outer">Outer</option> + </param> + <param type="text" name="fill_value" + argument="--na" + label="What to fill unmatched columns with" + help="Specify value to fill into all of the columns with no data"> + <expand macro="text_sanitizer" /> + </param> + </when> + </conditional> + <expand macro="ignore_case" /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <data format_source="in_1" name="joined" from_work_dir="joined" label="${in_1[0].name} joined by column ${column_text.in_text}" /> + </outputs> + <tests> + <test> + <param name="in_1" value="csv.csv,data.csv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="Person" /> + </conditional> + <conditional name="unmatched" > + <param name="wanted" value="no" /> + </conditional> + <output name="joined" file="joined.csv" ftype="csv" /> + </test> + <test> + <param name="in_1" value="csv.csv,data.csv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="Person" /> + </conditional> + <conditional name="unmatched" > + <param name="wanted" value="yes" /> + <param name="join_type" value="outer" /> + <param name="fill_value" value="a" /> + </conditional> + <param name="ignore_case" value="false" /> + <output name="joined" file="joined_filled.csv" ftype="csv" compare="sim_size" delta="10"/> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Join Help +----------------- + +Info +#### +Csvtk-Join is used to join two or more csv/tsv files together by either a column number or a column key/name. +It will join together the columns that have a matching value and create an output csv/tsv file (depending upon input file type). + +Columns that are not matching will be discarded unless 'Keep Unmatched Columns' is set to 'Yes' + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +@HELP_INPUT_DATA@ + + +Usage +##### + +**Ex. Joining based on column 1:** + +:: + + +------+--------+----------+ +----------+----------+----------+ + | Name | Colour | Food | | Username | Sport | Column 3 | + +======+========+==========+ +==========+==========+==========+ + | Joe | Red | Pancakes | + | Joe | Swimming | Yes | + +------+--------+----------+ +----------+----------+----------+ + | John | Green | Potatos | | Gary | Biking | Yes | + +------+--------+----------+ +----------+----------+----------+ + + ------------------------------------------------------------------------- + + We would get the following table: + + +------+--------+----------+----------+----------+ + | Name | Colour | Food | Sport | Column 3 | + +======+========+==========+==========+==========+ + | Joe | Red | Pancakes | Swimming | Yes | + +------+--------+----------+----------+----------+ + + If we kept unmatched columns and filled them with 'NA', the following would be the output: + + +------+--------+----------+----------+----------+----------+ + | Name | Colour | Food | Username | Sport | Column 3 | + +======+========+==========+==========+==========+==========+ + | Joe | Red | Pancakes | Joe | Swimming | Yes | + +------+--------+----------+----------+----------+----------+ + | John | Green | Potatos | NA | NA | NA | + +------+--------+----------+----------+----------+----------+ + | NA | NA | NA | Gary | Biking | Yes | + +------+--------+----------+----------+----------+----------+ + +---- + +**Ex. Joining based on the column named "Name":** + +:: + + Here, we use the column header name as a way to match up the columns. This is especially useful if the wanted + column has the same name but is not located in the same location as seen below. + + Matching on the "Name" column would look as such: + + +------+--------+----------+ +------+----------+----------+ +----------+------+----------+----------+ + | Name | Colour | Food | | Name | Sport | Column 3 | | Column 4 | Name | Column 5 | Column 6 | + +======+========+==========+ + +======+==========+==========+ + +==========+======+==========+==========+ + | Joe | Red | Pancakes | | Joe | Swimming | Yes | | Yes | Joe | Yes | Yes | + +------+--------+----------+ +------+----------+----------+ +----------+------+----------+----------+ + | John | Green | Potatos | + +------+--------+----------+ + + ----------------------------------------------------------------------------------------------------------------- + + Would give the following table: + + +------+--------+----------+----------+----------+----------+----------+----------+ + | Name | Colour | Food | Sport | Column 3 | Column 4 | Column 5 | Column 6 | + +======+========+==========+==========+==========+==========+==========+==========+ + | Joe | Red | Pancakes | Swimming | Yes | Yes | Yes | Yes | + +------+--------+----------+----------+----------+----------+----------+----------+ + +---- + +**Ex. Matching data in multiple columns:** + +:: + + If using multiple columns, the values that are found to be the same in both columns will be put in the output. + If only one matches, or neither matches, then that row is not included in the output + + The following would occur using column names as "Name,Column 3" + + +------+----------+----------+ +----------+------+----------+ + | Name | Sport | Column 3 | | Column 4 | Name | Column 3 | + +======+==========+==========+ +==========+======+==========+ + | Joe | Swimming | Yes | + | Yes | Joe | Yes | + +------+----------+----------+ +----------+------+----------+ + | Jake | Track | No | | Yes | Jake | Yes | + +------+----------+----------+ +----------+------+----------+ + + ----------------------------------------------------------------------- + + Would Yield: + + +------+----------+----------+----------+ + | Name | Sport | Column 3 | Column 4 | + +======+==========+==========+==========+ + | Joe | Swimming | Yes | Yes | + +------+----------+----------+----------+ + +---- + +@HELP_COLUMNS@ + + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file