Mercurial > repos > bgruening > text_processing

--- a/replace_text_in_line.xml	Fri Dec 01 13:47:28 2017 -0500
+++ b/replace_text_in_line.xml	Tue Feb 20 09:24:19 2018 -0500
@@ -1,57 +1,58 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
 <tool id="tp_replace_in_line" name="Replace Text" version="@BASE_VERSION@.0">
     <description>in entire line</description>
     <macros>
-        <import>macros.xml</import>
+	<import>macros.xml</import>
     </macros>
     <requirements>
         <requirement type="package" version="4.2.3.dev0">sed</requirement>
     </requirements>
     <version_command>sed --version | head -n 1</version_command>
     <command>
-    <!--
-         This looks quite strange but it is intentional. We have used U+0090 as
-         the replacement brackets in the sed expression. This meets multiple requirements for use:
-
-         - is legal entity in XML 1.0 (https://en.wikipedia.org/wiki/Valid_characters_in_XML)
-         - is legal as a sed delimiter character (must be single-byte)
-         - is not in string.printable
-
-         Thus, this should execute properly. Additionally it allows users to
-         use characters like '/' and '\' and '|' in their regex without them
-         being able to prematurely terminate the expression.
-     -->
 <![CDATA[
-        sed
+	sed
             -r
             --sandbox
-            's$find_pattern$replace_patterng'
+            's/$find_pattern/$replace_pattern/g'
             '$infile'
         > '$outfile'
 ]]>
+
     </command>
     <inputs>
-        <param format="txt" name="infile" type="data" label="File to process" />
-         <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
+	<param format="txt" name="infile" type="data" label="File to process" />
+         <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
             <sanitizer>
                 <valid initial="string.printable">
-                    <remove value="&apos;"/>
+                    <remove value="&#39;"/>
+                    <remove value="/"/>
                 </valid>
+                <mapping initial="none">
+                    <add source="&#39;" target="&#39;&quot;&#39;&quot;&#39;" />
+                    <add source="/" target="\/"/>
+                </mapping>
             </sanitizer>
         </param>
-         <param name="replace_pattern" type="text" label="Replace with:" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
+         <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
             <sanitizer>
                 <valid initial="string.printable">
-                    <remove value="&apos;"/>
+                    <remove value="&#39;"/>
+                    <remove value="/"/>
                 </valid>
+                <mapping initial="none">
+                    <add source="&#39;" target="&#39;&quot;&#39;&quot;&#39;" />
+                    <add source="/" target="\/"/>
+                </mapping>
+
             </sanitizer>
+
         </param>
     </inputs>
     <outputs>
-        <data name="outfile" format_source="infile" metadata_source="infile"/>
+	<data name="outfile" format_source="infile" metadata_source="infile"/>
     </outputs>
     <tests>
-        <test>
+	<test>
             <param name="infile" value="replace_text_in_line1.txt" />
             <param name="find_pattern" value="CTC." />
             <param name="replace_pattern" value="FOOBAR" />
@@ -77,9 +78,9 @@
 **Examples of Find Patterns**

 - **HELLO**     The word 'HELLO' (case sensitive).
-- **AG.T**      The letters A,G followed by any single character, followed by the letter T.
+- **AG.T**	The letters A,G followed by any single character, followed by the letter T.
 - **A{4,}**     Four or more consecutive A's.
-- **chr2[012]\\t**       The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
+- **chr2[012]\\t**	 The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
 - **hsa-mir-([^ ]+)**        The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
Binary file text_processing.tar.bz2 has changed