annotate awk.xml @ 19:12615d397df7 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
author bgruening
date Thu, 29 Feb 2024 22:15:20 +0000
parents ddf54b12c295
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
1 <tool id="tp_awk_tool" name="Text reformatting" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 <description>with awk</description>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 <macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 <import>macros.xml</import>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 </macros>
19
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
6 <expand macro="creator"/>
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
7 <requirements>
19
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
8 <requirement type="package" version="5.3.0">gawk</requirement>
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
9 </requirements>
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 <version_command>awk --version | head -n 1</version_command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 <command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 <![CDATA[
13
0a8c6b61f0f4 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 4f79443517baf378fbfe1f81be361d97f2938601
bgruening
parents: 11
diff changeset
13 env -i
16
ddf54b12c295 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit ded469a802a0496c9a6a2daee7b9505bd8698802-dirty"
bgruening
parents: 13
diff changeset
14 \$(which awk)
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 --sandbox
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 -v FS=' '
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17 -v OFS=' '
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 --re-interval
19
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
19 -f '$awk_script'
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
20 '$infile'
12615d397df7 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 70980e329cd9fa78e74bf14a76fd4ded9bd2b91f
bgruening
parents: 16
diff changeset
21 > '$outfile'
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23 </command>
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
24 <configfiles>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
25 <configfile name="awk_script">$code</configfile>
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
26 </configfiles>
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27 <inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 <param name="infile" format="txt" type="data" label="File to process" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 <param name="code" type="text" area="true" size="5x35" label="AWK Program" help="">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 <sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
31 <valid initial="string.printable">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
32 <remove value="&apos;"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 </valid>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 </sanitizer>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36 </inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 <outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38 <data name="outfile" format_source="infile" metadata_source="infile"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39 </outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 <tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 <param name="infile" value="awk1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 <!-- commas are not allowed in a value field. Values with comma will be splitted -->
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44 <param name="code" value='$2>0.5 { print $2*9"\t"$1 }' />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 <output name="outfile" file="awk_results1.txt" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47 </tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48 <help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50 **What it does**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 This tool runs the unix **awk** command on the selected data file.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56 **TIP:**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58 This tool uses the **extended regular** expression syntax (not the perl syntax).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59 **\\d**, **\\w**, **\\s** etc. are **not** supported.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62 **Further reading**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63
6
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
64 - Awk by Example (http://www.ibm.com/developerworks/linux/library/l-awk1/index.html)
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65 - Long AWK tutorial (http://www.grymoire.com/Unix/Awk.html)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67 -----
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69 **AWK programs**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
70
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71 Most AWK programs consist of **patterns** (i.e. rules that match lines of text) and **actions** (i.e. commands to execute when a pattern matches a line).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 The basic form of AWK program is::
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75 pattern { action 1; action 2; action 3; }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78 **Pattern Examples**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 - **$2 == "chr3"** will match lines whose second column is the string 'chr3'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 - **$5-$4>23** will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82 - **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83 - **$7 ~ /A{4}U/** will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84 - **10000 < $4 && $4 < 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
11
74a8bef53a00 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 0ba37c1f33eeb1c77b4d9363d681fe522d9f7fe7
bgruening
parents: 8
diff changeset
85 - **BEGIN** will be executed once only, before the first input record is read.
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86 - If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88
8
4c752559b236 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 6
diff changeset
89
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90 **Action Examples**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92 - **{ print }** or **{ print $0 }** will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93 - **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94 - **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length).
8
4c752559b236 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 6
diff changeset
95 - **{ FS = "," }** can be used to change the field separator (delimeter) for parsing the input file.
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96 - If no action part is specified (not even the curly brackets) - the default action is to print the entire line.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99 **AWK's Regular Expression Syntax**
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101 The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103 - **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104 - **^** matches the beginning of a string(but not an internal line).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105 - **(** .. **)** groups a particular pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
106 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
107
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
108 - **{n}** The preceding item is matched exactly n times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
109 - **{n,}** The preceding item ismatched n or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
110 - **{n,m}** The preceding item is matched at least n times but not more than m times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
111
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
112 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
113 - **.** Matches any single character except a newline.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
114 - ***** The preceding item will be matched zero or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
115 - **?** The preceding item is optional and matched at most once.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
116 - **+** The preceding item will be matched one or more times.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
117 - **^** has two meaning:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
118 - matches the beginning of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
119 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
120 - **$** matches the end of a line or string.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
121 - **\|** Separates alternate possibilities.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
122
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
123 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
124 </help>
6
60edf2f8c28f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 5
diff changeset
125 <expand macro="citations" />
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
126 </tool>