Mercurial > repos > iuc > collection_column_join
changeset 3:58228a4d58fe draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 9f1c3ab3f41bab8ff962aca1478c75e538e5bf6a
author | iuc |
---|---|
date | Fri, 06 Apr 2018 03:44:21 -0400 |
parents | dfde09461b1e |
children | 071084070619 |
files | collection_column_join.xml test-data/out_3.tabular test-data/out_4.tabular |
diffstat | 3 files changed, 53 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/collection_column_join.xml Wed May 24 06:19:45 2017 -0400 +++ b/collection_column_join.xml Fri Apr 06 03:44:21 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="collection_column_join" name="Column Join" version="0.0.2"> +<tool id="collection_column_join" name="Column Join" version="0.0.3"> <description>on Collections</description> <requirements> <requirement type="package" version="8.25">coreutils</requirement> @@ -19,12 +19,22 @@ #set $left_identifier_column = $identifier_column #set $tail_offset = int( str( $has_header ) ) + 1 #for $i, $tabular_item in enumerate( $input_tabular ): - #if $has_header: - head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && - tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && + #if $old_col_in_header: + #if $has_header: + head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && + tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && + #else: + awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && + LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && + #end if #else: - awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && - LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && + #if $has_header: + head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && + tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && + #else: + awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && + LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && + #end if #end if #if $i == 0: mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && @@ -48,6 +58,7 @@ <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> + <param name="old_col_in_header" type="boolean" checked="true" label="Keep original column header" help="Disable if you want columns headers to be only composed of the input dataset names"/> <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> <option value="output_shell_script" selected="false">Shell script</option> @@ -64,6 +75,7 @@ <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> <param name="identifier_column" value="1"/> <param name="has_header" value="1"/> + <param name="old_col_in_header" value="true"/> <param name="fill_char" value="."/> <param name="include_outputs" /> <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> @@ -72,10 +84,29 @@ <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> <param name="identifier_column" value="1"/> <param name="has_header" value="0"/> + <param name="old_col_in_header" value="true"/> <param name="fill_char" value="."/> <param name="include_outputs" /> <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> </test> + <test> + <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> + <param name="identifier_column" value="1"/> + <param name="has_header" value="1"/> + <param name="old_col_in_header" value="false"/> + <param name="fill_char" value="."/> + <param name="include_outputs" /> + <output name="tabular_output" file="out_3.tabular" ftype="tabular"/> + </test> + <test> + <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> + <param name="identifier_column" value="1"/> + <param name="has_header" value="0"/> + <param name="old_col_in_header" value="false"/> + <param name="fill_char" value="."/> + <param name="include_outputs" /> + <output name="tabular_output" file="out_4.tabular" ftype="tabular"/> + </test> </tests> <help> <![CDATA[ @@ -117,6 +148,14 @@ three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 + +**Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Keep original column header**, will return:: + + #KEY in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular + one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 + three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 + two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 + ]]> </help> <citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_3.tabular Fri Apr 06 03:44:21 2018 -0400 @@ -0,0 +1,4 @@ +#KEY in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular +one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 +three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 +two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_4.tabular Fri Apr 06 03:44:21 2018 -0400 @@ -0,0 +1,4 @@ +#KEY in_1_headerless.tabular in_1_headerless.tabular in_1_headerless.tabular in_2_headerless.tabular in_2_headerless.tabular in_2_headerless.tabular in_3_headerless.tabular in_3_headerless.tabular in_3_headerless.tabular +one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 +three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 +two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6