Mercurial > repos > artbio > concatenate_multiple_datasets
changeset 5:99a5ed06b86c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 494d583f425daec963ccd02907718e02d5d66b58
author | artbio |
---|---|
date | Mon, 24 Jun 2019 03:58:52 -0400 |
parents | 7afc0515a307 |
children | 4554fa330d3d |
files | catWrapper.xml |
diffstat | 1 files changed, 73 insertions(+), 209 deletions(-) [+] |
line wrap: on
line diff
--- a/catWrapper.xml Tue Jun 18 11:59:06 2019 -0400 +++ b/catWrapper.xml Mon Jun 24 03:58:52 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.3.0"> +<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0"> <description>tail-to-head by specifying how</description> <command><![CDATA[ #if $headers == 0: @@ -8,14 +8,11 @@ #end if #if $global_condition.input_type == "singles": #if $dataset_names == "No": + $concat_command #for $file in $global_condition.inputs - #if $file.ext[-2:] == "gz": - gzip -dc '$file' | $concat_command | gzip -c >> '$out_file1' && - #else: - $concat_command '$file' >> '$out_file1' && - #end if + '$file' #end for - sleep 1 + > '$out_file1' #else: #for $file in $global_condition.inputs #if $file.ext[-2:] == "gz": @@ -29,19 +26,15 @@ sleep 1 #end if #else if $global_condition.input_type == "simple_collections": + #if $global_condition.collections_condition.collection_cat_type == "two_collections": mkdir concatenated && #if $dataset_names == "No": - #for $x, $y in zip($global_condition.input_1, $global_condition.input_2): - #if $x.ext[-2:] == "gz": - gzip -dc '$x' | $concat_command | gzip -c > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - #else: - $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && - #end if + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) + $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && #end for sleep 1 #else: - #for $x, $y in zip($global_condition.input_1, $global_condition.input_2) + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) #if $x.ext[-2:] == "gz": printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && @@ -56,142 +49,82 @@ #end for sleep 1 #end if + #end if #else if $global_condition.input_type == "paired_collection": #if $global_condition.paired_cat_type == "by_strand": - mkdir concatenated && #if $dataset_names == "No": #for $file in $global_condition.inputs - #if $file['forward'].ext[-2:] == "gz": - gzip -dc $file['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${file['forward'].ext}.listed && - gzip -dc $file['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file['reverse'].ext}.listed && - #else: - $concat_command $file['forward'] >> concatenated/forward.listed.${file['forward'].ext}.listed && - $concat_command $file['reverse'] >> concatenated/reverse.listed.${file['reverse'].ext}.listed && - #end if + $concat_command + $file['forward'] + >> '$forward' && + $concat_command + $file['reverse'] + >> '$reverse' && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - printf "# ${file}_reverse\n" | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - #else: - printf "# ${file}_forward\n" >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && - printf "# ${file}_reverse\n" >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" >> '$forward' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$forward' && + printf "# ${file}_reverse\n" >> '$reverse' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$reverse' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "by_pair": mkdir concatenated && #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c - > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #else: - $concat_command $global_condition.inputs[$file]['forward'] - > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #end if + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['reverse'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - printf "# ${file}_reverse\n" | gzip -c >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #else: - printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['forward'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "all": - mkdir concatenated && - #set $base_name=$global_condition.inputs.element_identifier - #set $extention=$global_condition.inputs[$global_condition.inputs.keys()[0]]['forward'].ext #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> c'$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && - #else: - $concat_command - $global_condition.inputs[$file]['forward'] - >> '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> '$paired_out_file' && - #end if + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys(): - #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz": - printf "# ${file}_forward\n" | gzip -c > '$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> '$paired_out_file' && - printf "# ${file}_reverse\n" | gzip -c >> '$paired_out_file' && - gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && - #else: - printf "# ${file}_forward\n" > '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['forward'] - >> '$paired_out_file' && - printf "# ${file}_reverse\n" >> '$paired_out_file' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> '$paired_out_file' && - #end if + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > $out_file1 && + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + printf "# ${file}_reverse\n" >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && #end for sleep 1 #end if #end if - #else if $global_condition.input_type == "nested_collection": - mkdir concatenated && - #if $dataset_names == "No": - #for $sub_list in $global_condition.inputs: - #set $file_base_name=$sub_list.element_identifier - #for $sub_list_element in $sub_list: - #if $sub_list_element.ext[-2:] == "gz": - gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #else: - $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #end if - #end for - #end for - sleep 1 - #else: - #for $sub_list in $global_condition.inputs: - #set $file_base_name=$sub_list.element_identifier - #for $sub_list_element in $sub_list: - #if $sub_list_element.ext[-2:] == "gz": - printf "# ${sub_list_element.element_identifier}\n" | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #else: - printf "# ${sub_list_element.element_identifier}\n" >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && - #end if - #end for - #end for - sleep 1 - #end if #end if ]]> </command> @@ -199,9 +132,8 @@ <conditional name="global_condition"> <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> <option value="singles">Single datasets</option> - <option value="simple_collections">2 Collections</option> + <option value="simple_collections">Collections</option> <option value="paired_collection">Paired collection</option> - <option value="nested_collection">Nested collection</option> </param> <when value="singles"> <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> @@ -215,11 +147,15 @@ </param> </when> <when value="simple_collections"> - <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> - <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> - </when> - <when value="nested_collection"> - <param name="inputs" type="data_collection" collection_type="list:list" label="Input nested collection" help="The Nested collection which items you want to concatenate." /> + <conditional name="collections_condition"> + <param name="collection_cat_type" type="select" label="What type of concatenation do you wish to perform?"> + <option value="two_collections">Concatenate datasets of 2 collections (outputs a simple collection)</option> + </param> + <when value="two_collections"> + <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> + <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> + </when> + </conditional> </when> </conditional> <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> @@ -227,18 +163,16 @@ </inputs> <outputs> <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets"> - <filter>global_condition['input_type'] == 'singles'</filter> - </data> - <data name="paired_out_file" label="${global_condition.inputs.element_identifier}" auto_format="true"> - <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all'</filter> + <filter>global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all')</filter> </data> <collection name="paired_output" type="paired" label="Concatenation by strtand"> - <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> + <data name="forward" /> + <data name="reverse" /> <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> </collection> <collection name="list_output" type="list" label="Concatenation by pairs"> - <discover_datasets pattern="(?P<identifier_0>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> - <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections') or (global_condition['input_type'] == 'nested_collection')</filter> + <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> + <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections')</filter> </collection> </outputs> <tests> @@ -388,48 +322,6 @@ <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/> </output_collection> </test> - <test> <!-- Test nested collections concatenation --> - <param name="input_type" value="nested_collection" /> - <param name="inputs"> - <collection type="list:list"> - <element name="2"> - <collection type="list"> - <element name="2_f" value="2_f.fastq" ftype="fastq"/> - <element name="2_r" value="2_r.fastq" ftype="fastq"/> - </collection> - </element> - <element name="3"> - <collection type="list"> - <element name="3" value="3.fastq" ftype="fastq"/> - </collection> - </element> - </collection> - </param> - <param name="dataset_names" value="No" /> - <param name="headers" value="0" /> - <output_collection name="list_output" type="list" count="2" > - <element name="2" file="2.fastq"/> - <element name="3" file="3.fastq"/> - </output_collection> - </test> - <test> <!-- Test nested collections concatenation with options and gzip--> - <param name="input_type" value="nested_collection" /> - <param name="inputs"> - <collection type="list:list"> - <element name="1"> - <collection type="list"> - <element name="1_f.fastq" value="1_f.fastq.gz" ftype="fastq.gz"/> - <element name="1_r.fastq" value="1_r.fastq.gz" ftype="fastq.gz"/> - </collection> - </element> - </collection> - </param> - <param name="dataset_names" value="Yes" /> - <param name="headers" value="4" /> - <output_collection name="list_output" type="list" count="1" > - <element name="1" file="1_options.fastq.gz" decompress="True"/> - </output_collection> - </test> </tests> <help> @@ -437,9 +329,9 @@ **WARNING:** This tool does not check if the datasets being concatenated are in the same format. -**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. +**WARNING:** The paired collection operations do not handle gziped files. -**WARNING:** This tool can't handle nested collection deeper than list:list. +**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. ----- @@ -457,8 +349,6 @@ - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned - - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned - - Skipping lines before concatenation to avoid headers - Add the name of the concatenated files as separator @@ -566,32 +456,6 @@ ----- -**Nested collection concatenation example** - -Nested collection: - - - Experiment - - - Sample_1 - - - Sample_1_file_1 - - Sample_1_file_2 - - - Sample_2 - - - Sample_2_file_1 - - Sample_2_file_2 - - Sample_2_file_3 - -Concatenation result:: - - A single collection containing: - - - Sample_1: (Sample_1_file_1 + Sample_1_file_2) - - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3) - ------ - **When selecting "Include dataset names" when concatenating files**: 1rst file name="first_tabular"::