Mercurial > repos > bgruening > text_processing
changeset 22:3dc70b59608c draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 2dc2df988bd2dde9f8d7f629b594186dbd4fdc2b
author | bgruening |
---|---|
date | Fri, 07 Mar 2025 20:44:20 +0000 |
parents | 86755160afbf |
children | |
files | awk.xml cut.xml grep.xml macros.xml multijoin.xml recurring_lines.xml replace_text_in_column.xml replace_text_in_line.xml sed.xml sort.xml sorted_uniq.xml tac.xml test-data/recurring_result1.txt test-data/replace_text_in_line_results3.txt unsorted_uniq.xml |
diffstat | 15 files changed, 103 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/awk.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/awk.xml Fri Mar 07 20:44:20 2025 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <requirements> - <requirement type="package" version="5.3.0">gawk</requirement> + <requirement type="package" version="5.3.1">gawk</requirement> </requirements> <version_command>awk --version | head -n 1</version_command> <command>
--- a/cut.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/cut.xml Fri Mar 07 20:44:20 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="tp_cut_tool" name="Advanced Cut" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> +<tool id="tp_cut_tool" name="Advanced Cut" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>columns from a table (cut)</description> <macros> <import>macros.xml</import> @@ -191,14 +191,19 @@ <tests> <test expect_num_outputs="1"> <param name="input" value="cut1.txt"/> - <param name="list" value="1,3,4"/> - <param name="delimiter" value=""/> + <conditional name="cut_type_options"> + <param name="cut_element" value="-f"/> + <param name="delimiter" value="" /> + <conditional name="colnames_option"> + <param name="header" value="Y"/> + <param name="list" value="1,3,4"/> + </conditional> + </conditional> <output name="output" file="cut_results1.txt"/> <assert_command> <has_text text="-f '1,3,4'"/> </assert_command> </test> - <test expect_num_outputs="1"> <param name="input" value="cut1.txt"/> <conditional name="cut_type_options"> @@ -217,9 +222,10 @@ </test> <test expect_num_outputs="1"> <param name="input" value="cut1.txt"/> - <param name="list" value="-3" /> - <param name="delimiter" value="" /> - <param name="cut_element" value="-c" /> + <conditional name="cut_type_options"> + <param name="cut_element" value="-c" /> + <param name="list" value="-3" /> + </conditional> <output name="output" file="cut_results3.txt"/> </test> </tests>
--- a/grep.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/grep.xml Fri Mar 07 20:44:20 2025 +0000 @@ -85,7 +85,7 @@ <!-- grep a FASTA file for sequences with specific motif --> <test> <param name="infile" value="grep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-P" /> <param name="invert" value="" /> <param name="url_paste" value="AA.{2}GT" /> @@ -98,7 +98,7 @@ show highlighed output --> <test> <param name="infile" value="grep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-P" /> <param name="invert" value="" /> <param name="url_paste" value="AA.{2}GT" /> @@ -110,7 +110,7 @@ <!-- tests egrep --> <test> <param name="infile" value="egrep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-E" /> <param name="invert" value="" /> <param name="url_paste" value="[^ ]+" /> @@ -122,7 +122,7 @@ <!-- tests basic regex; + must be backslashed to match --> <test> <param name="infile" value="egrep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-G" /> <param name="invert" value="" /> <param name="url_paste" value="[^ ]\+" /> @@ -134,7 +134,7 @@ <!-- tests regex;matching nothing --> <test> <param name="infile" value="egrep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-G" /> <param name="invert" value="" /> <param name="url_paste" value="not existent pattern" /> @@ -150,7 +150,7 @@ <!-- tests invalid regex; i.e. that we still get exit code 2 --> <test expect_failure="true" expect_exit_code="2"> <param name="infile" value="egrep1.txt" /> - <param name="case_sensitive" value="case sensitive" /> + <param name="case_sensitive" value="-i" /> <param name="regex_type" value="-G" /> <param name="invert" value="" /> <param name="url_paste" value="\(" />
--- a/macros.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/macros.xml Fri Mar 07 20:44:20 2025 +0000 @@ -1,12 +1,12 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="9.3">coreutils</requirement> + <requirement type="package" version="@TOOL_VERSION@">coreutils</requirement> <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">9.3</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">9.5</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.1</token> <xml name="stdio"> <stdio>
--- a/multijoin.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/multijoin.xml Fri Mar 07 20:44:20 2025 +0000 @@ -52,7 +52,7 @@ <param name="first_file" value="multijoin1.txt" /> <param name="files" value="multijoin2.txt,multijoin3.txt" /> <param name="key_column" value="4" /> - <param name="value_columns" value="c7,c8,c9" /> + <param name="value_columns" value="7,8,9" /> <param name="output_header" value="True" /> <output name="outfile" file="multijoin_result1.txt" lines_diff="2" /> </test>
--- a/recurring_lines.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/recurring_lines.xml Fri Mar 07 20:44:20 2025 +0000 @@ -44,21 +44,27 @@ <test> <repeat name="token_set"> <param name="line" value="freedom" /> - <param name="repeat_select_opts" value="file" /> - <param name="infile" value="multijoin2.txt" /> + <conditional name="repeat_select"> + <param name="repeat_select_opts" value="file" /> + <param name="infile" value="multijoin2.txt" /> + </conditional> </repeat> <output name="outfile" file="recurring_result1.txt" /> </test> <test> <repeat name="token_set"> <param name="line" value="freedom" /> - <param name="repeat_select_opts" value="user" /> - <param name="times" value="10" /> + <conditional name="repeat_select"> + <param name="repeat_select_opts" value="user" /> + <param name="times" value="10" /> + </conditional> </repeat> <repeat name="token_set"> <param name="line" value="war is over" /> - <param name="repeat_select_opts" value="user" /> - <param name="times" value="10" /> + <conditional name="repeat_select"> + <param name="repeat_select_opts" value="user" /> + <param name="times" value="10" /> + </conditional> </repeat> <output name="outfile" file="recurring_result2.txt" /> </test>
--- a/replace_text_in_column.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/replace_text_in_column.xml Fri Mar 07 20:44:20 2025 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <requirements> - <requirement type="package" version="5.3.0">gawk</requirement> + <requirement type="package" version="5.3.1">gawk</requirement> </requirements> <version_command>awk --version | head -n 1</version_command> <command> @@ -51,9 +51,11 @@ <tests> <test> <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" /> - <param name="column" value="4" /> - <param name="find_pattern" value=".+_(R.)" /> - <param name="replace_pattern" value="\\1" /> + <repeat name="replacements"> + <param name="column" value="4" /> + <param name="find_pattern" value=".+_(R.)" /> + <param name="replace_pattern" value="\\1" /> + </repeat> <output name="outfile" file="replace_text_in_column_results1.txt" /> </test> <test>
--- a/replace_text_in_line.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/replace_text_in_line.xml Fri Mar 07 20:44:20 2025 +0000 @@ -14,6 +14,7 @@ -r --sandbox #for $replacement in $replacements: + -e '$replacement.sed_options' -e 's/$replacement.find_pattern/$replacement.replace_pattern/g' #end for @@ -47,10 +48,20 @@ <add source="'" target="'"'"'" /> <add source="/" target="\/"/> </mapping> - </sanitizer> - - </param> + </param> + <param name="sed_options" type="text" size="20" optional="true" label="Additional sed commands before replacement" help="Provide additional sed commands before the replacement (e.g., ':a;N;$!ba;')." > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> <!-- Removes single quotes --> + <remove value="/"/> <!-- Removes slashes --> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'" /> <!-- Escapes single quotes --> + <add source="/" target="\/"/> <!-- Escapes slashes --> + </mapping> + </sanitizer> + </param> </repeat> </inputs> <outputs> @@ -59,8 +70,11 @@ <tests> <test> <param name="infile" value="replace_text_in_line1.txt" /> - <param name="find_pattern" value="CTC." /> - <param name="replace_pattern" value="FOOBAR" /> + <repeat name="replacements"> + <param name="find_pattern" value="CTC." /> + <param name="replace_pattern" value="FOOBAR" /> + <param name="sed_options" value="" /> + </repeat> <output name="outfile" file="replace_text_in_line_results1.txt" /> </test> <test> @@ -68,13 +82,24 @@ <repeat name="replacements"> <param name="find_pattern" value="CTC." /> <param name="replace_pattern" value="FOOBAR" /> + <param name="sed_options" value="" /> </repeat> <repeat name="replacements"> <param name="find_pattern" value="chr" /> <param name="replace_pattern" value="domain" /> + <param name="sed_options" value="" /> </repeat> <output name="outfile" file="replace_text_in_line_results2.txt" /> </test> + <test> + <param name="infile" value="replace_text_in_line1.txt" /> + <repeat name="replacements"> + <param name="find_pattern" value="\n" /> + <param name="replace_pattern" value="" /> + <param name="sed_options" value=":a;N;$!ba" /> + </repeat> + <output name="outfile" file="replace_text_in_line_results3.txt" /> + </test> </tests> <help> <![CDATA[ @@ -155,7 +180,9 @@ - **\|** Separates alternate possibilities. -**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. +**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. However, you can use SED FAQ to perform commands using special characters. + More complex options can look like `sed -e '$!N;s/foo/bar/;'`. Here, `$!N;` is an optional part which you only need to set in very special cases. The `foo` part is the search string, and the `bar` part is the replacement string. + Please read the SED FAQ here: https://www.pement.org/sed/sedfaq3.html#s3.2 ]]> </help>
--- a/sed.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/sed.xml Fri Mar 07 20:44:20 2025 +0000 @@ -56,14 +56,19 @@ <test> <param name="infile" value="sed1.txt" /> <param name="code" value="1d ; s/foo/bar/" /> - <param name="silent" value="" /> + <conditional name="adv_opts"> + <param name="adv_opts_selector" value="advanced" /> + <param name="silent" value="" /> + </conditional> <output name="output" file="sed_results1.txt" /> </test> <test> <param name="infile" value="sed1.txt" /> <param name="code" value="/foo/ { s/foo/baz/g ; p }" /> - <param name="adv_opts_selector" value="advanced" /> - <param name="silent" value="-n" /> + <conditional name="adv_opts"> + <param name="adv_opts_selector" value="advanced" /> + <param name="silent" value="-n" /> + </conditional> <output name="output" file="sed_results2.txt" /> </test> </tests>
--- a/sort.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/sort.xml Fri Mar 07 20:44:20 2025 +0000 @@ -11,7 +11,7 @@ <command> <![CDATA[ ( - LC_ALL=C + LC_ALL=C; #if int($header) > 0: sed -u '${header}'q && #end if
--- a/sorted_uniq.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/sorted_uniq.xml Fri Mar 07 20:44:20 2025 +0000 @@ -74,17 +74,21 @@ <tests> <test> <param name="infile" value="sorted_uniq1.tabular" /> - <param name="grouping_select" value="no"/> - <param name="count" value="True"/> + <conditional name="grouping"> + <param name="grouping_select" value="no"/> + <param name="count" value="True"/> + <param name="uniqueonly" value="True"/> + </conditional> <param name="ignorecase" value="True"/> - <param name="uniqueonly" value="True"/> <output name="outfile" file="sorted_uniq_results1.tabular" /> </test> <test> <param name="infile" value="sorted_uniq1.tabular" /> <param name="ignorecase" value="True"/> - <param name="grouping_select" value="yes"/> - <param name="group" value="separate"/> + <conditional name="grouping"> + <param name="grouping_select" value="yes"/> + <param name="group" value="separate"/> + </conditional> <output name="outfile" file="sorted_uniq_results2.tabular" /> </test> </tests>
--- a/tac.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/tac.xml Fri Mar 07 20:44:20 2025 +0000 @@ -48,8 +48,10 @@ </test> <test> <param name="infile" value="1.bed"/> - <param name="separator_select" value="yes"/> - <param name="before" value="True"/> + <conditional name="separator"> + <param name="separator_select" value="yes"/> + <param name="before" value="True"/> + </conditional> <output name="outfile" file="tac_result2.txt"/> </test> </tests>
--- a/test-data/recurring_result1.txt Fri Aug 16 10:41:54 2024 +0000 +++ b/test-data/recurring_result1.txt Fri Mar 07 20:44:20 2025 +0000 @@ -7,4 +7,3 @@ freedom freedom freedom -freedom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line_results3.txt Fri Mar 07 20:44:20 2025 +0000 @@ -0,0 +1,1 @@ +chr7 56632 56652 D17003_CTCF_R6 310 +chr7 56736 56756 D17003_CTCF_R7 354 +chr7 56761 56781 D17003_CTCF_R4 220 +
--- a/unsorted_uniq.xml Fri Aug 16 10:41:54 2024 +0000 +++ b/unsorted_uniq.xml Fri Mar 07 20:44:20 2025 +0000 @@ -45,9 +45,11 @@ <param name="infile" value="1.bed"/> <param name="is_numeric" value="True"/> <param name="ignore_case" value="True"/> - <param name="adv_opts_selector" value="advanced"/> - <param name="column_start" value="2"/> - <param name="column_end" value="3"/> + <conditional name="adv_opts"> + <param name="adv_opts_selector" value="advanced"/> + <param name="column_start" value="2"/> + <param name="column_end" value="3"/> + </conditional> <output name="outfile" file="unique_results1.bed"/> </test> </tests>