Mercurial > repos > nml > csvtk_separate
diff separate.xml @ 0:1be50033b476 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:24:41 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/separate.xml Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,228 @@ +<tool id="csvtk_separate" name="csvtk-separate" version="@VERSION@+@GALAXY_VERSION@"> + <description> column into multiple columns</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +#set column_number = $column_names.count(',') + 1 + +################### +## Start Command ## +################### +csvtk separate --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if first input is tabular ## + ############################################################# + #if $in_1.is_of_type("tabular"): + -t -T + #end if + + ## Set input files ## + ##################### + $in_1 + + ## Specify field to use ## + ########################## + -f '$column_text.in_text' + + ## Specific inputs ## + ##################### + -n '$column_names' + -N '$column_number' + + #if $how_separate.how == 'sep' + -s '$how_separate.separator' + #else + -r '($how_separate.separator)' + #end if + + --na '$fill_na' + $extra_data + $remove + + ## To output ## + ############### + > separated + + ]]></command> + <inputs> + <expand macro="singular_input"/> + <expand macro="singular_fields_input"/> + <conditional name="how_separate"> + <param name="how" type="select" label="Separate input column by:"> + <option value="sep">Separator String</option> + <option value="regex">Regexp</option> + </param> + <when value="sep"> + <param name="separator" type="text" value="; " argument="-s" + label="Separator string" + help="Specify what string separates the data in the column"> + <expand macro="text_sanitizer" /> + </param> + </when> + <when value="regex"> + <param name="separator" type="text" value=".+" argument="-r" + label="Set regex search pattern" + help="Use regex to match input column information. Example: ^(.+)$ will match all characters. + Regex help can be found below. The ' character is invalid"> + <expand macro="text_sanitizer" /> + </param> + </when> + </conditional> + <param name="column_names" type="text" value="new1,new2" argument="-n" + label="Specify new column name(s)" + help="More than one column can be made by separating the names by a comma (,). Ex. 'Genus,Species' would create two columns."> + <expand macro="text_sanitizer" /> + </param> + <param name="extra_data" type="select" label="Handle extra data by:" + help="Extra data is data that does not fit into the new columns made. An example can be found below"> + <option value="--drop">Dropping it</option> + <option value="--merge">Merging it</option> + </param> + <param name="fill_na" type="text" value="NA" argument="--na" + label="Character string to fill empty columns"> + <expand macro="text_sanitizer" /> + </param> + <param name="remove" type="boolean" checked="true" argument="-R" + truevalue="-R" + falsevalue="" + label="Remove input column" + /> + <expand macro="ignore_case" /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <data format_source="in_1" from_work_dir="separated" name="separated" label="${in_1.name} column ${column_text.in_text} separated by ${how_separate.separator}" /> + </outputs> + <tests> + <test> + <param name="in_1" value="collapsed.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="2" /> + </conditional> + <conditional name="how_separate"> + <param name="how" value="sep" /> + <param name="separator" value="; " /> + </conditional> + <param name="column_names" value="1,2" /> + <param name="fill_na" value="NA" /> + <param name="extra_data" value="--drop" /> + <param name="remove" value="true" /> + <output name="separated" file="separated_1.tsv" ftype="tabular" /> + </test> + <test> + <param name="in_1" value="collapsed.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="2" /> + </conditional> + <conditional name="how_separate"> + <param name="how" value="sep" /> + <param name="separator" value="; " /> + </conditional> + <param name="column_names" value="1,2" /> + <param name="fill_na" value="N/A" /> + <param name="extra_data" value="--merge" /> + <param name="remove" value="false" /> + <output name="separated" file="separated_2.tsv" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Separate Help +--------------------- + +Info +#### + +Csvtk-separate separates columns into new columns based on either an input string or a regex expression. + +The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can +start your expression with a `^` or just go straight into it + +For example: + +:: + + Using `.+` as an input would be used in the code as '(.+)' + + Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)' + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +---- + + +@HELP_INPUT_DATA@ + + +Usage +##### + +**Ex. Separate with Dropping Data** + +Suppose we had the following table and wanted to separate the scientific name column to create two new columns called genus and species: + ++------------+------------------------------+ +| Name | Scientific Name | ++============+==============================+ +| Red Fox | Vulpes vulpes | ++------------+------------------------------+ +| Salmonella | Salmonella enterica enterica | ++------------+------------------------------+ + +First, we set our separator to string and use just a space (' ') as the separator as the names are separated by spaces. + +Then, we have to set the new column names which can be done by setting the column names to 'Genus,Species' (make sure that the names are +separated by a comma). + +Finally, we have to decide if we want to drop the input column and if we want to merge the additional data or drop it. + +First table will show dropping of additional data (the second enterica is additional as it cannot fit in its own column) + ++------------+------------+----------+ +| Name | Genus | Species | ++============+============+==========+ +| Red Fox | Vulpes | vulpes | ++------------+------------+----------+ +| Salmonella | Salmonella | enterica | ++------------+------------+----------+ + +| + +Here is what it would look like if we merged data instead: + ++------------+------------+-------------------+ +| Name | Genus | Species | ++============+============+===================+ +| Red Fox | Vulpes | vulpes | ++------------+------------+-------------------+ +| Salmonella | Salmonella | enterica enterica | ++------------+------------+-------------------+ + +---- + + +@HELP_COLUMNS@ + + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file