# HG changeset patch # User artbio # Date 1561363132 14400 # Node ID 99a5ed06b86c460cf14ed491a9e43d3e3cac00b1 # Parent 7afc0515a30765262cc565a1ee774bf0172fcb7c planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 494d583f425daec963ccd02907718e02d5d66b58 diff -r 7afc0515a307 -r 99a5ed06b86c catWrapper.xml --- a/catWrapper.xml Tue Jun 18 11:59:06 2019 -0400 +++ b/catWrapper.xml Mon Jun 24 03:58:52 2019 -0400 @@ -1,4 +1,4 @@ - + tail-to-head by specifying how > '$out_file1' && - #else: - $concat_command '$file' >> '$out_file1' && - #end if + '$file' #end for - sleep 1 + > '$out_file1' #else: #for $file in $global_condition.inputs #if $file.ext[-2:] == "gz": @@ -29,19 +26,15 @@ sleep 1 #end if #else if $global_condition.input_type == "simple_collections": + #if $global_condition.collections_condition.collection_cat_type == "two_collections": mkdir concatenated && #if $dataset_names == "No": - #for $x, $y in zip($global_condition.input_1, $global_condition.input_2): - #if $x.ext[-2:] == "gz": - gzip -dc '$x' | $concat_command | gzip -c > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - #else: - $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - #end if + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) + $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && #end for sleep 1 #else: - #for $x, $y in zip($global_condition.input_1, $global_condition.input_2) + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) #if $x.ext[-2:] == "gz": printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && @@ -56,142 +49,82 @@ #end for sleep 1 #end if + #end if #else if $global_condition.input_type == "paired_collection": #if $global_condition.paired_cat_type == "by_strand": - mkdir concatenated && #if $dataset_names == "No": #for $file in $global_condition.inputs - #if $file['forward'].ext[-2:] == "gz": - gzip -dc $file['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${file['forward'].ext}.listed && - gzip -dc $file['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file['reverse'].ext}.listed && - #else: - $concat_command $file['forward'] >> concatenated/forward.listed.${file['forward'].ext}.listed && - $concat_command $file['reverse'] >> concatenated/reverse.listed.${file['reverse'].ext}.listed && - #end if + $concat_command + $file['forward'] + >> '$forward' && + $concat_command + $file['reverse'] + >> '$reverse' && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - printf "# ${file}_reverse\n" | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - #else: - printf "# ${file}_forward\n" >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - printf "# ${file}_reverse\n" >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" >> '$forward' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$forward' && + printf "# ${file}_reverse\n" >> '$reverse' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$reverse' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "by_pair": mkdir concatenated && #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c - > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #else: - $concat_command $global_condition.inputs[$file]['forward'] - > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #end if + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['reverse'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - printf "# ${file}_reverse\n" | gzip -c >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #else: - printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['forward'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "all": - mkdir concatenated && - #set $base_name=$global_condition.inputs.element_identifier - #set $extention=$global_condition.inputs[$global_condition.inputs.keys()[0]]['forward'].ext #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> c'$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && - #else: - $concat_command - $global_condition.inputs[$file]['forward'] - >> '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> '$paired_out_file' && - #end if + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c > '$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> '$paired_out_file' && - printf "# ${file}_reverse\n" | gzip -c >> '$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && - #else: - printf "# ${file}_forward\n" > '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['forward'] - >> '$paired_out_file' && - printf "# ${file}_reverse\n" >> '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> '$paired_out_file' && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > $out_file1 && + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + printf "# ${file}_reverse\n" >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && #end for sleep 1 #end if #end if - #else if $global_condition.input_type == "nested_collection": - mkdir concatenated && - #if $dataset_names == "No": - #for $sub_list in $global_condition.inputs: - #set $file_base_name=$sub_list.element_identifier - #for $sub_list_element in $sub_list: - #if $sub_list_element.ext[-2:] == "gz": - gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #else: - $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #end if - #end for - #end for - sleep 1 - #else: - #for $sub_list in $global_condition.inputs: - #set $file_base_name=$sub_list.element_identifier - #for $sub_list_element in $sub_list: - #if $sub_list_element.ext[-2:] == "gz": - printf "# ${sub_list_element.element_identifier}\n" | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #else: - printf "# ${sub_list_element.element_identifier}\n" >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #end if - #end for - #end for - sleep 1 - #end if #end if ]]> @@ -199,9 +132,8 @@ - + - @@ -215,11 +147,15 @@ - - - - - + + + + + + + + + @@ -227,18 +163,16 @@ - global_condition['input_type'] == 'singles' - - - global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all' + global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all') - + + global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand' - - (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections') or (global_condition['input_type'] == 'nested_collection') + + (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections') @@ -388,48 +322,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -437,9 +329,9 @@ **WARNING:** This tool does not check if the datasets being concatenated are in the same format. -**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. +**WARNING:** The paired collection operations do not handle gziped files. -**WARNING:** This tool can't handle nested collection deeper than list:list. +**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. ----- @@ -457,8 +349,6 @@ - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned - - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned - - Skipping lines before concatenation to avoid headers - Add the name of the concatenated files as separator @@ -566,32 +456,6 @@ ----- -**Nested collection concatenation example** - -Nested collection: - - - Experiment - - - Sample_1 - - - Sample_1_file_1 - - Sample_1_file_2 - - - Sample_2 - - - Sample_2_file_1 - - Sample_2_file_2 - - Sample_2_file_3 - -Concatenation result:: - - A single collection containing: - - - Sample_1: (Sample_1_file_1 + Sample_1_file_2) - - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3) - ------ - **When selecting "Include dataset names" when concatenating files**: 1rst file name="first_tabular"::