comparison split_file_on_column.xml @ 5:d4b5b70e82cb draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit df81dd26ed1cf67a0d95b9614738b1d59667773f
author bgruening
date Mon, 04 Jul 2022 12:26:46 +0000
parents 37a53100b67e
children ff2a81aa3f08
comparison
equal deleted inserted replaced
4:37a53100b67e 5:d4b5b70e82cb
1 <tool id="tp_split_on_column" name="Split file" version="0.4"> 1 <tool id="tp_split_on_column" name="Split by group" version="0.5">
2 <description>according to the values of a column</description>
3 <requirements> 2 <requirements>
4 <requirement type="package" version="5.0.1">gawk</requirement> 3 <requirement type="package" version="5.0.1">gawk</requirement>
5 </requirements> 4 </requirements>
6 <command> 5 <command>
7 <![CDATA[ 6 <![CDATA[
12 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' 11 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile'
13 #end if 12 #end if
14 ]]> 13 ]]>
15 </command> 14 </command>
16 <inputs> 15 <inputs>
17 <param format="tabular" name="infile" type="data" label="File to select" /> 16 <param format="tabular" name="infile" type="data" label="File to split" />
18 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> 17 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
19 18
20 <param name="include_header" type="boolean" label="Include the header in all splitted files?" 19 <param name="include_header" type="boolean" label="Include header in splits?"
21 help="Include the first line (the assumed header line) in all splitted files." /> 20 help="Include the first line (the assumed header line) to all split files." />
22 </inputs> 21 </inputs>
23 <outputs> 22 <outputs>
24 <collection name="split_output" type="list" label="Table split on first column"> 23 <collection name="split_output" type="list" label="Split by group collection">
25 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" /> 24 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" />
26 </collection> 25 </collection>
27 </outputs> 26 </outputs>
28 <tests> 27 <tests>
29 <test> 28 <test>
83 </test> 82 </test>
84 </tests> 83 </tests>
85 <help> 84 <help>
86 <![CDATA[ 85 <![CDATA[
87 86
88 **What it does** 87 ========
88 Synopsis
89 ========
89 90
90 This tool splits a file into different smaller files using a specific column. 91 Given a single input dataset this tool splits the file on unique values from a specified column.
91 It will work like the group tool, but every group is saved to its own file. 92
92 You have the option to include the header (first line) in all splitted files. 93 ===========
94 Description
95 ===========
96
97 This tool splits a file into a collection based on unique values of a speific column.
98 It performs a grouping operation with every group saved as a separate collection element.
99 You have the option to include the header (first line) to all splits.
93 If you have a header and don't want keep it, please remove it before you use this tool. 100 If you have a header and don't want keep it, please remove it before you use this tool.
94 For example with the "Remove beginning of a file" tool. 101 For example with the "Remove beginning of a file" tool.
95 102
96 ----- 103 -----
97 104
98 **Example** 105 **Example**
99 106
100 Splitting a file without header on column 5 from this:: 107 Splitting this file on column 1::
101 108
102 chr7 56632 56652 cluster 1 109 chr1 10 20
103 chr7 56736 56756 cluster 1 110 chr1 30 40
104 chr7 56761 56781 cluster 2 111 chr2 40 70
105 chr7 56772 56792 cluster 2 112 chr4 60 80
106 chr7 56775 56795 cluster 2
107
108 will produce 2 files with different clusters::
109
110 chr7 56632 56652 cluster 1
111 chr7 56736 56756 cluster 1
112 113
113 114
114 chr7 56761 56781 cluster 2 115 will produce a collectiion with 4 elements::
115 chr7 56772 56792 cluster 2 116
116 chr7 56775 56795 cluster 2 117 chr1 10 20
118 chr1 30 40
119
120 chr2 40 70
121
122 chr4 60 80
123
124 ------
125
126 .. image:: $PATH_TO_IMAGES/split_by_group.svg
127 :width: 800
128 :alt: Split by group
117 129
118 ]]> 130 ]]>
119 </help> 131 </help>
120 <citations> 132 <citations>
121 <citation type="bibtex"> 133 <citation type="bibtex">