annotate replace_text_in_column.xml @ 10:e39fceb6ab85 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit f48156f03164bde1f1be4826b2f0a1f16dc2cd2f
author bgruening
date Tue, 20 Feb 2018 09:24:19 -0500
parents 60edf2f8c28f
children 74a8bef53a00
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
1 <tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.1">
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 <description>in a specific column</description>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 <macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 <import>macros.xml</import>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 </macros>
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
6 <requirements>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
7 <requirement type="package" version="4.1.3">gawk</requirement>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 3
diff changeset
8 </requirements>
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
9 <version_command>awk --version | head -n 1</version_command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 <command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 awk
6
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
13 -v OFS="\t"
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
14 -v FS="\t"
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 --re-interval
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 --sandbox '{ \$$column = gensub( /$find_pattern/, "$replace_pattern", "g", \$$column ) ; print \$0 ; }'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17 "$infile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 > "$outfile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
19 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
20 </command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
21 <inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 <param format="tabular" name="infile" type="data" label="File to process" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
24
3
37e1eb05b1b4 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 0
diff changeset
25 <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
26 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
31 </param>
3
37e1eb05b1b4 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 0
diff changeset
32 <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39 </inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 <outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41 <data name="outfile" format_source="infile" metadata_source="infile" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 </outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 <tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 <param name="column" value="4" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47 <param name="find_pattern" value=".+_(R.)" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48 <param name="replace_pattern" value="\\1" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 <output name="outfile" file="replace_text_in_column_results1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51 </tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 <help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54 **What it does**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56 This tool performs find & replace operation on a specified column in a given file.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60 The **pattern to find** uses the **extended regular** expression syntax (same as running 'awk --re-interval').
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
64 **TIP:** If you need more complex patterns, use the *awk* tool.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69 **Examples of Find Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
70
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71 - **HELLO** The word 'HELLO' (case sensitive).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 - **A{4,}** Four or more consecutive A's.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78 **Examples of Replace Patterns**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 - **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&** (ampersand) represents the matched find pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82 - **\\1** The text which matched the first parenthesis in the Find Pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
85 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87 **Example 1**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
89 **Find Pattern:** HELLO
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90 **Replace Pattern:** WORLD
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92 Every time the word HELLO is found, it will be replaced with the word WORLD. This operation affects only the selected column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
95
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96 **Example 2**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98 **Find Pattern:** ^(.{4})
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99 **Replace Pattern:** &\\t
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101 Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. This operation affects only the selected column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
106 **Extened Regular Expression Syntax**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
107
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
108 The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
109
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
110 - **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
111 - **^** matches the beginning of a string(but not an internal line).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
112 - **(** .. **)** groups a particular pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
113 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
114
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
115 - **{n}** The preceding item is matched exactly n times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
116 - **{n,}** The preceding item ismatched n or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
117 - **{n,m}** The preceding item is matched at least n times but not more than m times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
118
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
119 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
120 - **.** Matches any single character except a newline.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
121 - ***** The preceding item will be matched zero or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
122 - **?** The preceding item is optional and matched at most once.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
123 - **+** The preceding item will be matched one or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
124 - **^** has two meaning:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
125 - matches the beginning of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
126 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
127 - **$** matches the end of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
128 - **\|** Separates alternate possibilities.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
129
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
130
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
131 **Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
132
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
133 @REFERENCES@
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
134 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
135 </help>
6
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
136 <expand macro="citations" />
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
137 </tool>