comparison collection_column_join.xml @ 0:2f120a5c49b1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 3b918f5a99ea13ec5acc7cc5fdd310fadb773ac0
author iuc
date Fri, 27 May 2016 11:41:19 -0400
parents
children 9c8536c7ed42
comparison
equal deleted inserted replaced
-1:000000000000 0:2f120a5c49b1
1 <tool id="collection_column_join" name="Column Join" version="0.0.1">
2 <description>
3 on Collections
4 </description>
5 <requirements>
6 <requirement type="package" version="8.22">gnu_coreutils</requirement>
7 </requirements>
8 <stdio>
9 <exit_code range="1:" />
10 <exit_code range=":-1" />
11 </stdio>
12 <command><![CDATA[
13 #if "output_shell_script" in str( $include_outputs ).split( "," ):
14 cp "${collection_column_join_script}" "${script_output}" &&
15 #end if
16 sh "${collection_column_join_script}"
17 ]]>
18 </command>
19 <configfiles>
20 <configfile name="collection_column_join_script"><![CDATA[
21 #!/bin/sh
22 touch header0.tmp &&
23 touch output0.tmp &&
24 #set $delimiter = '\t'
25 #set $left_identifier_column = $identifier_column
26 #set $tail_offset = int( str( $has_header ) ) + 1
27 #for $i, $tabular_item in enumerate( $input_tabular ):
28 #if $has_header:
29 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
30 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
31 #else:
32 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
33 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
34 #end if
35 #if $i == 0:
36 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp &&
37 #if $has_header:
38 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp &&
39 #else:
40 echo "#KEY" > header${ $i % 2 }.tmp &&
41 #end if
42 #else:
43 LC_ALL=C join -o auto -a 1 -a 2 -1 ${left_identifier_column} -2 ${identifier_column} -t "${delimiter}" -e "${fill_char}" output${ $i % 2 }.tmp input_file.tmp > output${ ( $i + 1 ) % 2 }.tmp &&
44 #set $left_identifier_column = 1
45 #end if
46 paste -d "${delimiter}" header${ $i % 2 }.tmp input_header.tmp > header${ ( $i + 1 ) % 2 }.tmp &&
47 #end for
48 cat header${ ( $i + 1 ) % 2 }.tmp output${ ( $i + 1 ) % 2 }.tmp > "${tabular_output}"
49 ]]>
50 </configfile>
51 </configfiles>
52 <inputs>
53 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/>
54 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> -->
55 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/>
56 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/>
57 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/>
58 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create">
59 <option value="output_shell_script" selected="false">Shell script</option>
60 </param>
61 </inputs>
62 <outputs>
63 <data format="tabular" name="tabular_output"/>
64 <data format="txt" name="script_output">
65 <filter>include_outputs and "output_shell_script" in include_outputs</filter>
66 </data>
67 </outputs>
68 <tests>
69 <test>
70 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
71 <param name="identifier_column" value="1"/>
72 <param name="has_header" value="1"/>
73 <param name="fill_char" value="."/>
74 <param name="include_outputs" />
75 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/>
76 </test>
77 <test>
78 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
79 <param name="identifier_column" value="1"/>
80 <param name="has_header" value="0"/>
81 <param name="fill_char" value="."/>
82 <param name="include_outputs" />
83 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/>
84 </test>
85 </tests>
86 <help>
87 <![CDATA[
88 Joins lists of tabular datasets together on a field.
89
90 -----
91
92 **Example**
93
94 To join three files, with headers, based on the first column:
95
96 **First file (in_1.tabular)**::
97
98 #KEY c2 c3 c4
99 one 1-1 1-2 1-3
100 two 1-4 1-5 1-6
101 three 1-7 1-8 1-9
102
103
104 **Second File (in_2.tabular)**::
105
106 #KEY c2 c3 c4
107 one 2-1 2-2 2-3
108 two 2-4 2-5 2-6
109 three 2-7 2-8 2-9
110
111 **Third file (in_3.tabular)**::
112
113 #KEY c2 c3 c4
114 one 3-3 3-2 3-3
115 two 3-4 3-5 3-6
116 three 3-7 3-8 3-9
117
118
119 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, will return::
120
121 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4
122 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3
123 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9
124 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
125
126 ]]>
127 </help>
128 <citations>
129 </citations>
130 </tool>