Mercurial > repos > nml > csvtk_replace
comparison replace.xml @ 0:1d4ee4308d99 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
| author | nml |
|---|---|
| date | Tue, 19 May 2020 17:16:05 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1d4ee4308d99 |
|---|---|
| 1 <tool id="csvtk_replace" name="csvtk-replace" version="@VERSION@+@GALAXY_VERSION@"> | |
| 2 <description> data of selected columns by regular expression</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements" /> | |
| 7 <expand macro="version_cmd" /> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 | |
| 10 ################### | |
| 11 ## Start Command ## | |
| 12 ################### | |
| 13 csvtk replace --num-cpus "\${GALAXY_SLOTS:-1}" | |
| 14 | |
| 15 ## Add additional flags as specified ## | |
| 16 ####################################### | |
| 17 $ignore_case | |
| 18 $global_param.illegal_rows | |
| 19 $global_param.empty_rows | |
| 20 $global_param.header | |
| 21 $global_param.lazy_quotes | |
| 22 | |
| 23 ## Set Tabular input/output flag if first input is tabular ## | |
| 24 ############################################################# | |
| 25 #if $in_1.is_of_type("tabular"): | |
| 26 -t -T | |
| 27 #end if | |
| 28 | |
| 29 ## Set input files ## | |
| 30 ##################### | |
| 31 '$in_1' | |
| 32 | |
| 33 ## Specify fields to use ## | |
| 34 ########################### | |
| 35 -F -f '${column_text.in_text}' | |
| 36 | |
| 37 ## Specific Commands ## | |
| 38 ####################### | |
| 39 -p '($pattern_string)' | |
| 40 -r '$replacement_string' | |
| 41 | |
| 42 #if $input_kv | |
| 43 -k '$input_kv' | |
| 44 #end if | |
| 45 | |
| 46 #if $fill.how_fill == "key" | |
| 47 -K | |
| 48 #elif $fill.how_fill == "string" | |
| 49 --key-miss-repl '$fill.fill_string' | |
| 50 #end if | |
| 51 | |
| 52 ## To output ## | |
| 53 ############### | |
| 54 > replaced | |
| 55 | |
| 56 ]]></command> | |
| 57 <inputs> | |
| 58 <expand macro="singular_input"/> | |
| 59 <expand macro="fields_input" /> | |
| 60 <param name="pattern_string" type="text" argument="-p" | |
| 61 label="Pattern Regex" | |
| 62 help="Regex to search column for. Input is structured as '(YOUR_INPUT_HERE)' so if your regex was just a period it would look like '(.)' as an input."> | |
| 63 <expand macro="text_sanitizer" /> | |
| 64 </param> | |
| 65 <param name="replacement_string" type="text" argument="-r" | |
| 66 label="Replacement String"> | |
| 67 <help> | |
| 68 <![CDATA[ | |
| 69 String to replace found data. Supports capture variables and special replacement symbols. | |
| 70 | |
| 71 - Capture Variables: $1 represents the text of the first submatch | |
| 72 - {nr} inserts a record number starting from 1 | |
| 73 - {kv} uses corresponding value of the key (captured variable $n) of a key-value file | |
| 74 | |
| 75 If using the special replacement symbols, the capture variable must be specified as ${1}! | |
| 76 ]]> | |
| 77 </help> | |
| 78 <expand macro="text_sanitizer" /> | |
| 79 </param> | |
| 80 <param name="input_kv" type="data" format="tsv,tabular" argument="-k" | |
| 81 optional="true" | |
| 82 label="Key/Value file for replacement string" | |
| 83 help="Only specify a file if using {kv} in replacement string. The file must be tab delimited with one key/value pair per line. | |
| 84 An example can be found in the help section below" | |
| 85 /> | |
| 86 <conditional name="fill"> | |
| 87 <param name="how_fill" type="select" label="Fill columns that don't get any regex matches"> | |
| 88 <option value="no">No</option> | |
| 89 <option value="key">Yes - Fill with Original Value</option> | |
| 90 <option value="string">Yes - Fill with String</option> | |
| 91 </param> | |
| 92 <when value="no" /> | |
| 93 <when value="key" /> | |
| 94 <when value="string" > | |
| 95 <param name="fill_string" type="text" value="NA" argument="--key-miss-repl" label="Fill string"> | |
| 96 <expand macro="text_sanitizer" /> | |
| 97 </param> | |
| 98 </when> | |
| 99 </conditional> | |
| 100 <expand macro="ignore_case" /> | |
| 101 <expand macro="global_parameters" /> | |
| 102 </inputs> | |
| 103 <outputs> | |
| 104 <data format_source="in_1" name="replaced" from_work_dir="replaced" label='${in_1.name} with column ${column_text.in_text} replaced' /> | |
| 105 </outputs> | |
| 106 <tests> | |
| 107 <test> | |
| 108 <param name="in_1" value="replace_input.csv" /> | |
| 109 <conditional name="column_text"> | |
| 110 <param name="select" value="string" /> | |
| 111 <param name="in_text" value="2" /> | |
| 112 </conditional> | |
| 113 <param name="pattern_string" value=".+" /> | |
| 114 <param name="replacement_string" value="{nr}-$1" /> | |
| 115 <output name="replaced" value="replace_1.csv" /> | |
| 116 </test> | |
| 117 <test> | |
| 118 <param name="in_1" value="replace_input.csv" /> | |
| 119 <conditional name="column_text"> | |
| 120 <param name="select" value="string" /> | |
| 121 <param name="in_text" value="2" /> | |
| 122 </conditional> | |
| 123 <param name="pattern_string" value=".+" /> | |
| 124 <param name="replacement_string" value="{kv}" /> | |
| 125 <param name="input_kv" value="kv.txt" /> | |
| 126 <conditional name="fill"> | |
| 127 <param name="how_fill" value="key" /> | |
| 128 </conditional> | |
| 129 <output name="replaced" value="replace_2.csv" /> | |
| 130 </test> | |
| 131 </tests> | |
| 132 <help><![CDATA[ | |
| 133 | |
| 134 Csvtk - Replace Help | |
| 135 -------------------- | |
| 136 | |
| 137 Info | |
| 138 #### | |
| 139 Csvtk-replace is a tool that uses Regular Expressions (Regex) to match data in the specified column and replace it with the replacement string. | |
| 140 Non-matched columns can be kept or filled with the Regex key or an input string | |
| 141 | |
| 142 The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can | |
| 143 start your expression with a `^` or just go straight into it | |
| 144 | |
| 145 For example: | |
| 146 | |
| 147 :: | |
| 148 | |
| 149 Using `.+` as an input would be used in the code as '(.+)' | |
| 150 | |
| 151 Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)' | |
| 152 | |
| 153 .. class:: warningmark | |
| 154 | |
| 155 Single quotes are not allowed in text inputs! | |
| 156 | |
| 157 ----- | |
| 158 | |
| 159 | |
| 160 @HELP_INPUT_DATA@ | |
| 161 | |
| 162 | |
| 163 Usage | |
| 164 ##### | |
| 165 You can use csvtk replace to any matched regex expressions with your input replacement string. | |
| 166 | |
| 167 The replacement string has some unique properties that you can use too to better replace your data: | |
| 168 | |
| 169 - Replacement supports capture variables, like $1 which represents the text of the first submatch of the Regex | |
| 170 | |
| 171 - \{nr} can be used to assign ascending numbers starting from 1 to each column | |
| 172 | |
| 173 - \{kv} can be used to get the value of the key (captured variable $n) or a key-value file | |
| 174 | |
| 175 A good Regular expressions cheat sheet that you can use to help yourself build regular expressions can be found at: | |
| 176 https://regexr.com/ | |
| 177 | |
| 178 **Replace Examples** | |
| 179 | |
| 180 1. Replacement with {nr} and $1 | |
| 181 | |
| 182 Input file: | |
| 183 | |
| 184 +---------+--------+ | |
| 185 | Name | Animal | | |
| 186 +=========+========+ | |
| 187 | Bud | Dog | | |
| 188 +---------+--------+ | |
| 189 | Mittens | Cat | | |
| 190 +---------+--------+ | |
| 191 | |
| 192 Now if our regex was set to '.*' on column 2 and our replacement string was set to '{nr}-$1', the following output would be observed: | |
| 193 | |
| 194 +---------+--------+ | |
| 195 | Name | Animal | | |
| 196 +=========+========+ | |
| 197 | Bud | 1-Dog | | |
| 198 +---------+--------+ | |
| 199 | Mittens | 2-Cat | | |
| 200 +---------+--------+ | |
| 201 | |
| 202 --------------- | |
| 203 | |
| 204 2. Replacement with {kv} file | |
| 205 | |
| 206 Suppose you set up a key-value TAB separated file that looked as such: | |
| 207 | |
| 208 :: | |
| 209 | |
| 210 Key Value | |
| 211 Dog Big | |
| 212 Cat Small | |
| 213 | |
| 214 And had a similar input file: | |
| 215 | |
| 216 +---------+--------+ | |
| 217 | Name | Animal | | |
| 218 +=========+========+ | |
| 219 | Bud | Dog | | |
| 220 +---------+--------+ | |
| 221 | Mittens | Cat | | |
| 222 +---------+--------+ | |
| 223 | Fuzzy | Gerbil | | |
| 224 +---------+--------+ | |
| 225 | |
| 226 Now if the regex was '.*' on column 2 with the replacement string as '{kv}'. Your output would look as such with 'No' fill specified: | |
| 227 | |
| 228 +---------+--------+ | |
| 229 | Name | Animal | | |
| 230 +=========+========+ | |
| 231 | Bud | Big | | |
| 232 +---------+--------+ | |
| 233 | Mittens | Small | | |
| 234 +---------+--------+ | |
| 235 | Fuzzy | | | |
| 236 +---------+--------+ | |
| 237 | |
| 238 If you wanted to fill the blank cell you could set it to either: | |
| 239 | |
| 240 - String - the string you input (ex. 'NA') would fill up the blank cell. | |
| 241 | |
| 242 - Original value - would change the blank cell to 'Gerbil' | |
| 243 | |
| 244 ---- | |
| 245 | |
| 246 If your having trouble with the regular expressions, please play around with a builder, there are many others online | |
| 247 and they are great resources to improve your regex statements or test them before use! | |
| 248 | |
| 249 ---- | |
| 250 | |
| 251 @HELP_END_STATEMENT@ | |
| 252 | |
| 253 | |
| 254 ]]></help> | |
| 255 <expand macro="citations" /> | |
| 256 </tool> |
