annotate find_and_replace.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 37e1eb05b1b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
1 <tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.0">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 <description>parts of text</description>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 <macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 <import>macros.xml</import>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 </macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
6 <command interpreter="perl">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
7 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
8 find_and_replace
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
9 #if $searchwhere.searchwhere_select == "column":
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 -c $searchwhere.column
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 #end if
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 -o $outfile
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
13 $caseinsensitive
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
14 $wholewords
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 $skip_first_line
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 $is_regex
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17 '$find_pattern'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 '$replace_pattern'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
19 '$infile'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
20 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
21 </command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 <inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23 <param name="infile" format="txt" type="data" label="File to process" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
24 <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
25 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
26 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
31 <param name="replace_pattern" type="text" size="20" label="Replace with"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
32 help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 label="Find-Pattern is a regular expression" help="see help section for details." />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 label="Case-Insensitive search" help="" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51 <conditional name="searchwhere">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 <param name="searchwhere_select" type="select" label="Find and Replace text in">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53 <option value="line" selected="true">entire line</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54 <option value="column">specific column</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56 <when value="line" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57 <when value="column">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59 </when>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60 </conditional>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61 </inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62 <outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63 <data format_source="infile" name="outfile" metadata_source="infile" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
64 </outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65 <tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67 <param name="infile" value="find_and_replace1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68 <param name="find_pattern" value="day" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69 <param name="replace_pattern" value="great day" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
70 <param name="is_regex" value="False" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71 <param name="caseinsensitive" value="False" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72 <param name="wholewords" value="True" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 <output name="outfile" file="find_and_replace_results1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76 <param name="infile" value="find_and_replace2.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77 <param name="find_pattern" value="^chr" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78 <param name="replace_pattern" value="" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79 <param name="is_regex" value="True" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 <param name="caseinsensitive" value="False" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 <param name="wholewords" value="False" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82 <param name="searchwhere_select" value="column" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83 <param name="column" value="3" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84 <output name="outfile" file="find_and_replace_results2.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
85 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86 </tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87 <help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
89 **What it does**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91 This tool finds $ replaces text in an input dataset.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
95 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103 This tool uses Perl regular expression syntax.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
106
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
107 **Examples of *regular-expression* Find Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
108
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
109 - **HELLO** The word 'HELLO' (case sensitive).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
110 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
111 - **A{4,}** Four or more consecutive A's.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
112 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
113 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
114
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
115
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
116 **Examples of Replace Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
117
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
118 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
119 - **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
120 - **$1** The text which matched the first parenthesis in the Find Pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
121
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
122
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
123 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
124
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
125 **Example 1**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
126
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
127 **Find Pattern:** HELLO
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
128 **Replace Pattern:** WORLD
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
129 **Regular Expression:** no
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
130 **Replace what:** entire line
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
131
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
132 Every time the word HELLO is found, it will be replaced with the word WORLD.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
133
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
134 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
135
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
136 **Example 2**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
137
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
138 **Find Pattern:** ^chr
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
139 **Replace Pattern:** (empty)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
140 **Regular Expression:** yes
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
141 **Replace what:** column 11
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
142
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
143 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
144
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
145
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
146 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
147
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
148 **Perl's Regular Expression Syntax**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
149
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
150 The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
151
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
152 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
153 - **^** matches the beginning of a string(but not an internal line).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
154 - **(** .. **)** groups a particular pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
155 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
156
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
157 - **{n}** The preceding item is matched exactly n times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
158 - **{n,}** The preceding item ismatched n or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
159 - **{n,m}** The preceding item is matched at least n times but not more than m times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
160
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
161 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
162 - **.** Matches any single character except a newline.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
163 - ***** The preceding item will be matched zero or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
164 - **?** The preceding item is optional and matched at most once.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
165 - **+** The preceding item will be matched one or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
166 - **^** has two meaning:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
167 - matches the beginning of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
168 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
169 - **$** matches the end of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
170 - **\\|** Separates alternate possibilities.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
171 - **\\d** matches a single digit
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
172 - **\\w** matches a single letter or digit or an underscore.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
173 - **\\s** matches a single white-space (space or tabs).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
174
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
175 @REFERENCES@
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
176 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
177 </help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
178 </tool>