Mercurial > repos > bgruening > text_processing
diff cut.xml @ 0:5314e5d6f040 draft
Imported from capsule None
author | bgruening |
---|---|
date | Thu, 29 Jan 2015 07:53:17 -0500 |
parents | |
children | 37e1eb05b1b4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,259 @@ +<tool id="tp_cut_tool" name="Cut" version="@BASE_VERSION@.0"> + <description>columns from a table</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>cut --version | head -n 1</version_command> + <command> +<![CDATA[ + cut + #if str($delimiter) != '': + -d"${delimiter}" + #end if + ${complement} + ${cut_type_options.cut_element} + '${cut_type_options.list}' + '${input}' + > '${output}' +]]> + </command> + <inputs> + <param name="input" format="txt" type="data" label="File to cut" /> + <param name="complement" type="select" label="Operation"> + <option value="">Keep</option> + <option value="--complement">Discard</option> + </param> + <param name="delimiter" type="select" label="Delimited by"> + <option value="">Tab</option> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + <conditional name="cut_type_options"> + <param name="cut_element" type="select" label="Cut by"> + <option value="-f">fields</option> + <option value="-c">characters</option> + <option value="-b">bytes</option> + </param> + <when value="-f"> + <param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> + </when> + <when value="-c"> + <param name="list" type="text" size="20" value="" label="List of characters" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + <when value="-b"> + <param name="list" type="text" size="20" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </inputs> + <outputs> + <!--<data format="tabular" name="output" />--> + <!-- WIP, not sure that will work with the complement option --> + <data format="tabular" name="output"> + <actions> + <conditional name="cut_type_options.cut_element"> + <!-- fields --> + <when value="-f"> + <conditional name="delimiter"> + <when value="T"> + <conditional name="input"> + <when datatype_isinstance="interval"> + <action type="format" default="tabular"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 1--> + <filter type="insert_column" column="0" value="interval"/> + <filter type="insert_column" ref="list" /> <!-- startCol --> + <filter type="insert_column" ref="list" /> <!-- endCol --> + + <filter type="multiple_splitter" column="1" separator=","/> + <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="1" name="lower" /> + <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="1" cast="int" /> + + <filter type="multiple_splitter" column="2" separator=","/> + <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="2" name="lower" /> + <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="2" cast="int" /> + + <filter type="multiple_splitter" column="3" separator=","/> + <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="3" name="lower" /> + <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="3" cast="int" /> + + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + <filter type="metadata_value" ref="input" name="startCol" column="2" /> + <filter type="metadata_value" ref="input" name="endCol" column="3" /> + </option> + </action> + <conditional name="output"> + <when datatype_isinstance="interval"> + <action type="metadata" name="chromCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + </option> + </action> + + <action type="metadata" name="startCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- startCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="startCol" column="1" /> + </option> + </action> + + <action type="metadata" name="endCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- endCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="endCol" column="1" /> + </option> + </action> + + <action type="metadata" name="nameCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- nameCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="nameCol" column="1" /> + </option> + </action> + + <action type="metadata" name="strandCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- strandCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="strandCol" column="1" /> + </option> + </action> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="1,3,4"/> + <param name="delimiter" value=""/> + <output name="output" file="cut_results1.txt"/> + </test> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="2" /> + <param name="delimiter" value="" /> + <param name="complement" value="--complement" /> + <output name="output" file="cut_results2.txt"/> + </test> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="-3" /> + <param name="delimiter" value="" /> + <param name="cut_element" value="-c" /> + <output name="output" file="cut_results3.txt"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool runs the **cut** unix command, which extract or delete columns from a file. + +----- + +Field List Example: + +**1,3,7** - Cut specific fields/characters. + +**3-** - Cut from the third field/character to the end of the line. + +**2-5** - Cut from the second to the fifth field/character. + +**-8** - Cut from the first to the eight field/characters. + + +Input Example:: + + fruit color price weight + apple red 1.4 0.5 + orange orange 1.5 0.3 + banana yellow 0.9 0.3 + + +Output Example ( **Keeping fields 1,3,4** ):: + + fruit price weight + apple 1.4 0.5 + orange 1.5 0.3 + banana 0.9 0.3 + +Output Example ( **Discarding field 2** ):: + + fruit price weight + apple 1.4 0.5 + orange 1.5 0.3 + banana 0.9 0.3 + +Output Example ( **Keeping 3 characters** ):: + + fru + app + ora + ban + +@REFERENCES@ +]]> + </help> +</tool>