Mercurial > repos > iuc > collection_column_join

--- a/collection_column_join.xml	Wed May 24 06:19:45 2017 -0400
+++ b/collection_column_join.xml	Fri Apr 06 03:44:21 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="collection_column_join" name="Column Join" version="0.0.2">
+<tool id="collection_column_join" name="Column Join" version="0.0.3">
     <description>on Collections</description>
     <requirements>
         <requirement type="package" version="8.25">coreutils</requirement>
@@ -19,12 +19,22 @@
 #set $left_identifier_column = $identifier_column
 #set $tail_offset = int( str( $has_header ) ) + 1
 #for $i, $tabular_item in enumerate( $input_tabular ):
-    #if $has_header:
-        head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
-        tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+    #if $old_col_in_header:
+        #if $has_header:
+            head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
+            tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+        #else:
+            awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
+            LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+        #end if
     #else:
-        awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
-        LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+        #if $has_header:
+            head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
+            tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
+        #else:
+            awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
+            LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
+        #end if
     #end if
     #if $i == 0:
         mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp &&
@@ -48,6 +58,7 @@
         <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> -->
         <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/>
         <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/>
+        <param name="old_col_in_header" type="boolean" checked="true" label="Keep original column header" help="Disable if you want columns headers to be only composed of the input dataset names"/>
         <param name="fill_char" type="text" value="." optional="False" label="Fill character"/>
         <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create">
             <option value="output_shell_script" selected="false">Shell script</option>
@@ -64,6 +75,7 @@
             <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
             <param name="identifier_column" value="1"/>
             <param name="has_header" value="1"/>
+            <param name="old_col_in_header" value="true"/>
             <param name="fill_char" value="."/>
             <param name="include_outputs" />
             <output name="tabular_output" file="out_1.tabular" ftype="tabular"/>
@@ -72,10 +84,29 @@
             <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
             <param name="identifier_column" value="1"/>
             <param name="has_header" value="0"/>
+            <param name="old_col_in_header" value="true"/>
             <param name="fill_char" value="."/>
             <param name="include_outputs" />
             <output name="tabular_output" file="out_2.tabular" ftype="tabular"/>
         </test>
+        <test>
+            <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
+            <param name="identifier_column" value="1"/>
+            <param name="has_header" value="1"/>
+            <param name="old_col_in_header" value="false"/>
+            <param name="fill_char" value="."/>
+            <param name="include_outputs" />
+            <output name="tabular_output" file="out_3.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
+            <param name="identifier_column" value="1"/>
+            <param name="has_header" value="0"/>
+            <param name="old_col_in_header" value="false"/>
+            <param name="fill_char" value="."/>
+            <param name="include_outputs" />
+            <output name="tabular_output" file="out_4.tabular" ftype="tabular"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
@@ -117,6 +148,14 @@
     three   1-7              1-8            1-9             2-7              2-8             2-9             3-7             3-8             3-9
     two     1-4              1-5            1-6             2-4              2-5             2-6             3-4             3-5             3-6

+
+**Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Keep original column header**, will return::
+
+    #KEY    in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular
+    one     1-1              1-2            1-3             2-1              2-2             2-3             3-3             3-2             3-3
+    three   1-7              1-8            1-9             2-7              2-8             2-9             3-7             3-8             3-9
+    two     1-4              1-5            1-6             2-4              2-5             2-6             3-4             3-5             3-6
+
         ]]>
     </help>
     <citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_3.tabular	Fri Apr 06 03:44:21 2018 -0400
@@ -0,0 +1,4 @@
+#KEY	in_1.tabular	in_1.tabular	in_1.tabular	in_2.tabular	in_2.tabular	in_2.tabular	in_3.tabular	in_3.tabular	in_3.tabular
+one	1-1	1-2	1-3	2-1	2-2	2-3	3-3	3-2	3-3
+three	1-7	1-8	1-9	2-7	2-8	2-9	3-7	3-8	3-9
+two	1-4	1-5	1-6	2-4	2-5	2-6	3-4	3-5	3-6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_4.tabular	Fri Apr 06 03:44:21 2018 -0400
@@ -0,0 +1,4 @@
+#KEY	in_1_headerless.tabular	in_1_headerless.tabular	in_1_headerless.tabular	in_2_headerless.tabular	in_2_headerless.tabular	in_2_headerless.tabular	in_3_headerless.tabular	in_3_headerless.tabular	in_3_headerless.tabular
+one	1-1	1-2	1-3	2-1	2-2	2-3	3-3	3-2	3-3
+three	1-7	1-8	1-9	2-7	2-8	2-9	3-7	3-8	3-9
+two	1-4	1-5	1-6	2-4	2-5	2-6	3-4	3-5	3-6