annotate replace_text_in_column.xml @ 5:20344ce0c811 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
author bgruening
date Wed, 23 Nov 2016 15:56:41 -0500
parents 37e1eb05b1b4
children 60edf2f8c28f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
1 <tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.0">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 <description>in a specific column</description>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 <macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 <import>macros.xml</import>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 </macros>
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
6 <requirements>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
7 <requirement type="package" version="4.1.3">gawk</requirement>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
8 </requirements>
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
9 <version_command>awk --version | head -n 1</version_command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 <command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 awk
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
13 -v OFS=" "
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
14 --re-interval
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 --sandbox '{ \$$column = gensub( /$find_pattern/, "$replace_pattern", "g", \$$column ) ; print \$0 ; }'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 "$infile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17 > "$outfile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
19 </command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
20 <inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
21 <param format="tabular" name="infile" type="data" label="File to process" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23
3
37e1eb05b1b4 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 0
diff changeset
24 <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
25 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
26 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 </param>
3
37e1eb05b1b4 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 0
diff changeset
31 <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
32 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38 </inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39 <outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 <data name="outfile" format_source="infile" metadata_source="infile" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41 </outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 <tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44 <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 <param name="column" value="4" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 <param name="find_pattern" value=".+_(R.)" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47 <param name="replace_pattern" value="\\1" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48 <output name="outfile" file="replace_text_in_column_results1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50 </tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51 <help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53 **What it does**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55 This tool performs find & replace operation on a specified column in a given file.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59 The **pattern to find** uses the **extended regular** expression syntax (same as running 'awk --re-interval').
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63 **TIP:** If you need more complex patterns, use the *awk* tool.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
64
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68 **Examples of Find Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
70 - **HELLO** The word 'HELLO' (case sensitive).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72 - **A{4,}** Four or more consecutive A's.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77 **Examples of Replace Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 - **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&** (ampersand) represents the matched find pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 - **\\1** The text which matched the first parenthesis in the Find Pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
85
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86 **Example 1**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88 **Find Pattern:** HELLO
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
89 **Replace Pattern:** WORLD
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91 Every time the word HELLO is found, it will be replaced with the word WORLD. This operation affects only the selected column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
95 **Example 2**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97 **Find Pattern:** ^(.{4})
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98 **Replace Pattern:** &\\t
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100 Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. This operation affects only the selected column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105 **Extened Regular Expression Syntax**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
106
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
107 The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
108
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
109 - **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
110 - **^** matches the beginning of a string(but not an internal line).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
111 - **(** .. **)** groups a particular pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
112 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
113
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
114 - **{n}** The preceding item is matched exactly n times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
115 - **{n,}** The preceding item ismatched n or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
116 - **{n,m}** The preceding item is matched at least n times but not more than m times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
117
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
118 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
119 - **.** Matches any single character except a newline.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
120 - ***** The preceding item will be matched zero or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
121 - **?** The preceding item is optional and matched at most once.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
122 - **+** The preceding item will be matched one or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
123 - **^** has two meaning:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
124 - matches the beginning of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
125 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
126 - **$** matches the end of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
127 - **\|** Separates alternate possibilities.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
128
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
129
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
130 **Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
131
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
132 @REFERENCES@
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
133 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
134 </help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
135 </tool>