Mercurial > repos > bgruening > text_processing
changeset 12:a6f147a050a2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit f47799941f9e11c313b38758bfdaab0ce83844f8
author | bgruening |
---|---|
date | Tue, 08 Jan 2019 02:50:23 -0500 |
parents | 74a8bef53a00 |
children | 0a8c6b61f0f4 |
files | replace_text_in_column.xml replace_text_in_line.xml test-data/replace_text_in_column_results2.txt test-data/replace_text_in_line_results2.txt |
diffstat | 4 files changed, 73 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/replace_text_in_column.xml Tue Feb 27 17:15:37 2018 -0500 +++ b/replace_text_in_column.xml Tue Jan 08 02:50:23 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.2"> +<tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.3"> <description>in a specific column</description> <macros> <import>macros.xml</import> @@ -13,41 +13,62 @@ -v OFS="\t" -v FS="\t" --re-interval - --sandbox '{ \$$column = gensub( /$find_pattern/, "$replace_pattern", "g", \$$column ) ; print \$0 ; }' + --sandbox + '{ + #for $replacement in $replacements: + \$$replacement.column = gensub( /$replacement.find_pattern/, "$replacement.replace_pattern", "g", \$$replacement.column ) ; + #end for + print \$0 ; }' "$infile" > "$outfile" ]]> </command> <inputs> <param format="tabular" name="infile" type="data" label="File to process" /> - <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> + <repeat name="replacements" title="Replacement" min="1"> + <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> - <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> - <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> + <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </repeat> </inputs> <outputs> <data name="outfile" format_source="infile" metadata_source="infile" /> </outputs> <tests> - <test> + <test> <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" /> <param name="column" value="4" /> <param name="find_pattern" value=".+_(R.)" /> <param name="replace_pattern" value="\\1" /> <output name="outfile" file="replace_text_in_column_results1.txt" /> </test> + <test> + <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" /> + <repeat name="replacements"> + <param name="column" value="1" /> + <param name="find_pattern" value="[a-z]{2}([a-z])" /> + <param name="replace_pattern" value="\\1" /> + </repeat> + <repeat name="replacements"> + <param name="column" value="4" /> + <param name="find_pattern" value=".+_(R.)" /> + <param name="replace_pattern" value="\\1" /> + </repeat> + <output name="outfile" file="replace_text_in_column_results2.txt" /> + </test> </tests> <help> <![CDATA[
--- a/replace_text_in_line.xml Tue Feb 27 17:15:37 2018 -0500 +++ b/replace_text_in_line.xml Tue Jan 08 02:50:23 2019 -0500 @@ -1,7 +1,7 @@ -<tool id="tp_replace_in_line" name="Replace Text" version="@BASE_VERSION@.1"> +<tool id="tp_replace_in_line" name="Replace Text" version="@BASE_VERSION@.2"> <description>in entire line</description> <macros> - <import>macros.xml</import> + <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="4.4">sed</requirement> @@ -9,17 +9,21 @@ <version_command>sed --version | head -n 1</version_command> <command> <![CDATA[ - sed + sed -r --sandbox - 's/$find_pattern/$replace_pattern/g' + #for $replacement in $replacements: + -e + 's/$replacement.find_pattern/$replacement.replace_pattern/g' + #end for '$infile' > '$outfile' ]]> </command> <inputs> - <param format="txt" name="infile" type="data" label="File to process" /> + <param format="txt" name="infile" type="data" label="File to process" /> + <repeat name="replacements" title="Replacement" min="1"> <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> @@ -31,7 +35,7 @@ <add source="/" target="\/"/> </mapping> </sanitizer> - </param> + </param> <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> @@ -46,17 +50,30 @@ </sanitizer> </param> + </repeat> </inputs> <outputs> - <data name="outfile" format_source="infile" metadata_source="infile"/> + <data name="outfile" format_source="infile" metadata_source="infile"/> </outputs> <tests> - <test> + <test> <param name="infile" value="replace_text_in_line1.txt" /> <param name="find_pattern" value="CTC." /> <param name="replace_pattern" value="FOOBAR" /> <output name="outfile" file="replace_text_in_line_results1.txt" /> </test> + <test> + <param name="infile" value="replace_text_in_line1.txt" /> + <repeat name="replacements"> + <param name="find_pattern" value="CTC." /> + <param name="replace_pattern" value="FOOBAR" /> + </repeat> + <repeat name="replacements"> + <param name="find_pattern" value="chr" /> + <param name="replace_pattern" value="domain" /> + </repeat> + <output name="outfile" file="replace_text_in_line_results2.txt" /> + </test> </tests> <help> <![CDATA[ @@ -77,9 +94,9 @@ **Examples of Find Patterns** - **HELLO** The word 'HELLO' (case sensitive). -- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **AG.T** The letters A,G followed by any single character, followed by the letter T. - **A{4,}** Four or more consecutive A's. -- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. @@ -112,7 +129,7 @@ ----- -**Extened Regular Expression Syntax** +**Extended Regular Expression Syntax** The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.