comparison regex.xml @ 0:60d04307b027 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author galaxyp
date Wed, 18 Jan 2017 17:45:20 -0500
parents
children 209b7c5ee9d7
comparison
equal deleted inserted replaced
-1:000000000000 0:60d04307b027
1 <tool id="regex1" name="Regex Find And Replace" version="1.0.0">
2 <description></description>
3 <command interpreter="python">regex.py --input '$input' --output '$out_file1' --input_display_name '$input.display_name'
4 #for $check in $checks:
5 --pattern='$check.pattern' --replacement='$check.replacement'
6 #end for
7 </command>
8 <inputs>
9 <param format="txt" name="input" type="data" label="Select lines from"/>
10 <repeat name="checks" title="Check">
11 <param name="pattern" size="40" type="text" value="chr([0-9A-Za-z])+" label="Find Regex" help="here you can enter text or regular expression (for syntax check lower part of this frame)">
12 <sanitizer>
13 <valid>
14 <add preset="string.printable"/>
15 <remove value="&#92;" />
16 <remove value="&apos;" />
17 </valid>
18 <mapping initial="none">
19 <add source="&#92;" target="__backslash__" />
20 <add source="&apos;" target="__sq__"/>
21 </mapping>
22 </sanitizer>
23 </param>
24 <param name="replacement" size="40" type="text" value="newchr\1" label="Replacement">
25 <sanitizer>
26 <valid>
27 <add preset="string.printable"/>
28 <remove value="&#92;" />
29 <remove value="&apos;" />
30 </valid>
31 <mapping initial="none">
32 <add source="&#92;" target="__backslash__" />
33 <add source="&apos;" target="__sq__"/>
34 </mapping>
35 </sanitizer>
36 </param>
37 </repeat>
38 </inputs>
39 <outputs>
40 <data format="input" name="out_file1" metadata_source="input"/>
41 </outputs>
42 <tests>
43 <test>
44 <param name="input" value="find1.txt"/>
45 <param name="pattern" value="(T\w+)"/>
46 <param name="replacement" value="\1 \1" />
47 <output name="out_file1" file="replace1.txt"/>
48 </test>
49 <test>
50 <param name="input" value="find1.txt"/>
51 <param name="pattern" value="f"/>
52 <param name="replacement" value="'&quot;" />
53 <output name="out_file1" file="replace2.txt"/>
54 </test>
55 <test>
56 <param name="input" value="find1.txt"/>
57 <param name="checks_0|pattern" value="a test file"/>
58 <param name="checks_0|replacement" value="a file named #{input_name}" />
59 <param name="checks_1|pattern" value="see here"/>
60 <param name="checks_1|replacement" value="see #{input_name}" />
61 <param name="checks_2|pattern" value="see (find1).txt"/>
62 <param name="checks_2|replacement" value="see \1" />
63 <output name="out_file1" file="replace3.txt"/>
64 </test>
65 </tests>
66 <help>
67 This tool goes line by line through the specified input file and
68 replaces text which matches the specified regular expression patterns
69 with its corresponding specified replacement.
70
71 This tool uses Python regular expressions. More information about
72 Python regular expressions can be found here:
73 http://docs.python.org/library/re.html.
74
75 To convert an Ilumina FATSQ sequence id from the CAVASA 8 format::
76
77 @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
78 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
79 +EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG
80 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
81
82 To the CASAVA 7 format::
83
84 @EAS139_FC706VJ:2:2104:15343:197393#0/1
85 GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACC
86 +EAS139_FC706VJ:2:2104:15343:197393#0/1
87 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9IC
88
89 Use Settings::
90
91 Find Regex: ^([@+][A-Z0-9]+):\d+:(\S+)\s(\d).*$
92 Replacement: \1_\2#0/\3
93
94 Note that the parentheses **()** capture patterns in the text that can be used in the replacement text by using a backslash-number reference: **\\1**
95
96 The regex **^([@+][A-Z0-9]+):\d+:(\S+) (\d).*$** means::
97
98 ^ - start the match at the beginning of the line of text
99 ( - start a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
100 [@+] - matches either a @ or + character
101 [A-Z0-9]+ - matches an uppercase letter or a digit, the plus sign means to match 1 or more such characters
102 ) - end a group (1), that is a string of matched text, that can be back-referenced in the replacement as \1
103 :\d+: - matches a colon followed by one or more digits followed by a colon character
104 (\S+) - matches one or more non-whitespace charcters, the enclosing parentheses make this a group (2) that can back-referenced in the replacement text as \2
105 \s - matches a whitespace character
106 (\d) - matches a single digit character, the enclosing parentheses make this a group (3) that can back-referenced in the replacement text as \3
107 .* - dot means match any character, asterisk means zero more more matches
108 $ - the regex must match to the end of the line of text
109
110 In the replacement pattern, use the special token #{input_name} to insert the input dataset's display name.
111 The name can be modified by a second find/replace check. Suppose you want to insert the sample id of your dataset,
112 named **Sample ABC123**, into the dataset itself, which currently contains the lines::
113 Data 1
114 Data 2
115 Data 3
116
117 You can use the following checks::
118 Find Regex: Data
119 Replacement: #{input_name} Data
120
121 Find Regex: Sample (\S+)
122 Replacement: \1
123
124 The result will be::
125 ABC123 Data 1
126 ABC123 Data 2
127 ABC123 Data 3
128
129
130
131 Galaxy aggressively escapes input supplied to tools, so if something
132 is not working please let us know and we can look into whether this is
133 the cause. Also if you would like help constructing regular
134 expressions for your inputs, please let us know at help@msi.umn.edu.
135 </help>
136 </tool>