comparison find_and_replace.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 37e1eb05b1b4
comparison
equal deleted inserted replaced
-1:000000000000 0:5314e5d6f040
1 <tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.0">
2 <description>parts of text</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <command interpreter="perl">
7 <![CDATA[
8 find_and_replace
9 #if $searchwhere.searchwhere_select == "column":
10 -c $searchwhere.column
11 #end if
12 -o $outfile
13 $caseinsensitive
14 $wholewords
15 $skip_first_line
16 $is_regex
17 '$find_pattern'
18 '$replace_pattern'
19 '$infile'
20 ]]>
21 </command>
22 <inputs>
23 <param name="infile" format="txt" type="data" label="File to process" />
24 <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
25 <sanitizer>
26 <valid initial="string.printable">
27 <remove value="&apos;"/>
28 </valid>
29 </sanitizer>
30 </param>
31 <param name="replace_pattern" type="text" size="20" label="Replace with"
32 help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
33 <sanitizer>
34 <valid initial="string.printable">
35 <remove value="&apos;"/>
36 </valid>
37 </sanitizer>
38 </param>
39 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue=""
40 label="Find-Pattern is a regular expression" help="see help section for details." />
41
42 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue=""
43 label="Case-Insensitive search" help="" />
44
45 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue=""
46 label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" />
47
48 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue=""
49 label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
50
51 <conditional name="searchwhere">
52 <param name="searchwhere_select" type="select" label="Find and Replace text in">
53 <option value="line" selected="true">entire line</option>
54 <option value="column">specific column</option>
55 </param>
56 <when value="line" />
57 <when value="column">
58 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
59 </when>
60 </conditional>
61 </inputs>
62 <outputs>
63 <data format_source="infile" name="outfile" metadata_source="infile" />
64 </outputs>
65 <tests>
66 <test>
67 <param name="infile" value="find_and_replace1.txt" />
68 <param name="find_pattern" value="day" />
69 <param name="replace_pattern" value="great day" />
70 <param name="is_regex" value="False" />
71 <param name="caseinsensitive" value="False" />
72 <param name="wholewords" value="True" />
73 <output name="outfile" file="find_and_replace_results1.txt" />
74 </test>
75 <test>
76 <param name="infile" value="find_and_replace2.txt" />
77 <param name="find_pattern" value="^chr" />
78 <param name="replace_pattern" value="" />
79 <param name="is_regex" value="True" />
80 <param name="caseinsensitive" value="False" />
81 <param name="wholewords" value="False" />
82 <param name="searchwhere_select" value="column" />
83 <param name="column" value="3" />
84 <output name="outfile" file="find_and_replace_results2.txt" />
85 </test>
86 </tests>
87 <help>
88 <![CDATA[
89 **What it does**
90
91 This tool finds $ replaces text in an input dataset.
92
93 .. class:: infomark
94
95 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
96
97 .. class:: infomark
98
99 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
100
101 .. class:: infomark
102
103 This tool uses Perl regular expression syntax.
104
105 -----
106
107 **Examples of *regular-expression* Find Patterns**
108
109 - **HELLO** The word 'HELLO' (case sensitive).
110 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
111 - **A{4,}** Four or more consecutive A's.
112 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
113 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
114
115
116 **Examples of Replace Patterns**
117
118 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
119 - **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern.
120 - **$1** The text which matched the first parenthesis in the Find Pattern.
121
122
123 -----
124
125 **Example 1**
126
127 **Find Pattern:** HELLO
128 **Replace Pattern:** WORLD
129 **Regular Expression:** no
130 **Replace what:** entire line
131
132 Every time the word HELLO is found, it will be replaced with the word WORLD.
133
134 -----
135
136 **Example 2**
137
138 **Find Pattern:** ^chr
139 **Replace Pattern:** (empty)
140 **Regular Expression:** yes
141 **Replace what:** column 11
142
143 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
144
145
146 -----
147
148 **Perl's Regular Expression Syntax**
149
150 The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
151
152 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
153 - **^** matches the beginning of a string(but not an internal line).
154 - **(** .. **)** groups a particular pattern.
155 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
156
157 - **{n}** The preceding item is matched exactly n times.
158 - **{n,}** The preceding item ismatched n or more times.
159 - **{n,m}** The preceding item is matched at least n times but not more than m times.
160
161 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
162 - **.** Matches any single character except a newline.
163 - ***** The preceding item will be matched zero or more times.
164 - **?** The preceding item is optional and matched at most once.
165 - **+** The preceding item will be matched one or more times.
166 - **^** has two meaning:
167 - matches the beginning of a line or string.
168 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
169 - **$** matches the end of a line or string.
170 - **\\|** Separates alternate possibilities.
171 - **\\d** matches a single digit
172 - **\\w** matches a single letter or digit or an underscore.
173 - **\\s** matches a single white-space (space or tabs).
174
175 @REFERENCES@
176 ]]>
177 </help>
178 </tool>