Mercurial > repos > artbio > concatenate_multiple_datasets
view catWrapper.xml @ 5:99a5ed06b86c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 494d583f425daec963ccd02907718e02d5d66b58
author | artbio |
---|---|
date | Mon, 24 Jun 2019 03:58:52 -0400 |
parents | 7afc0515a307 |
children | 4554fa330d3d |
line wrap: on
line source
<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0"> <description>tail-to-head by specifying how</description> <command><![CDATA[ #if $headers == 0: #set $concat_command = "cat" #else: #set $concat_command = 'tail -q -n +'+ str(int($headers)+1) #end if #if $global_condition.input_type == "singles": #if $dataset_names == "No": $concat_command #for $file in $global_condition.inputs '$file' #end for > '$out_file1' #else: #for $file in $global_condition.inputs #if $file.ext[-2:] == "gz": printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && #else: printf "# ${file.element_identifier}\n" >> '$out_file1' && $concat_command "$file" >> '$out_file1' && #end if #end for sleep 1 #end if #else if $global_condition.input_type == "simple_collections": #if $global_condition.collections_condition.collection_cat_type == "two_collections": mkdir concatenated && #if $dataset_names == "No": #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && #end for sleep 1 #else: #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) #if $x.ext[-2:] == "gz": printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && #else: printf "# ${x.element_identifier}\n" > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && $concat_command '$x'>> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && #end if #end for sleep 1 #end if #end if #else if $global_condition.input_type == "paired_collection": #if $global_condition.paired_cat_type == "by_strand": #if $dataset_names == "No": #for $file in $global_condition.inputs $concat_command $file['forward'] >> '$forward' && $concat_command $file['reverse'] >> '$reverse' && #end for sleep 1 #else: #for $file in $global_condition.inputs.keys() printf "# ${file}_forward\n" >> '$forward' && $concat_command $global_condition.inputs[$file]['forward'] >> '$forward' && printf "# ${file}_reverse\n" >> '$reverse' && $concat_command $global_condition.inputs[$file]['reverse'] >> '$reverse' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "by_pair": mkdir concatenated && #if $dataset_names == "No": #for $file in $global_condition.inputs.keys() $concat_command $global_condition.inputs[$file]['forward'] > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #else: #for $file in $global_condition.inputs.keys() printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "all": #if $dataset_names == "No": #for $file in $global_condition.inputs.keys() $concat_command $global_condition.inputs[$file]['forward'] >> $out_file1 && $concat_command $global_condition.inputs[$file]['reverse'] >> $out_file1 && #end for sleep 1 #else: #for $file in $global_condition.inputs.keys() printf "# ${file}_forward\n" > $out_file1 && $concat_command $global_condition.inputs[$file]['forward'] >> $out_file1 && printf "# ${file}_reverse\n" >> $out_file1 && $concat_command $global_condition.inputs[$file]['reverse'] >> $out_file1 && #end for sleep 1 #end if #end if #end if ]]> </command> <inputs> <conditional name="global_condition"> <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> <option value="singles">Single datasets</option> <option value="simple_collections">Collections</option> <option value="paired_collection">Paired collection</option> </param> <when value="singles"> <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> </when> <when value="paired_collection"> <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collection to concatenate"/> <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> </param> </when> <when value="simple_collections"> <conditional name="collections_condition"> <param name="collection_cat_type" type="select" label="What type of concatenation do you wish to perform?"> <option value="two_collections">Concatenate datasets of 2 collections (outputs a simple collection)</option> </param> <when value="two_collections"> <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> </when> </conditional> </when> </conditional> <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> </inputs> <outputs> <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets"> <filter>global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all')</filter> </data> <collection name="paired_output" type="paired" label="Concatenation by strtand"> <data name="forward" /> <data name="reverse" /> <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> </collection> <collection name="list_output" type="list" label="Concatenation by pairs"> <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections')</filter> </collection> </outputs> <tests> <!-- Single files concatenation --> <test> <!-- Test 2 single files concatenation with no other option --> <param name="input_type" value="singles" /> <param name="inputs" value="1.bed,2.bed"/> <param name="dataset_names" value="No" /> <param name="headers" value="0" /> <output name="out_file1" file="cat_wrapper_out1.bed"/> </test> <test> <!-- Test 2 single files concatenation with dataset names activated --> <param name="input_type" value="singles" /> <param name="inputs" value="1.bed,2.bed"/> <param name="dataset_names" value="Yes" /> <param name="headers" value="0" /> <output name="out_file1" file="cat_wrapper_out2.bed"/> </test> <test> <!-- Test 2 single files concatenation skipping 1 line --> <param name="input_type" value="singles" /> <param name="inputs" value="1.bed,2.bed"/> <param name="dataset_names" value="No" /> <param name="headers" value="1" /> <output name="out_file1" file="cat_wrapper_out3.bed"/> </test> <test> <!-- Test gz handling with no options --> <param name="input_type" value="singles" /> <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> <param name="dataset_names" value="No" /> <param name="headers" value="0" /> <output name="out_file1" file="1.fastq.gz" decompress="True"/> </test> <test> <!-- Test gz handling with options --> <param name="input_type" value="singles" /> <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> <param name="dataset_names" value="Yes" /> <param name="headers" value="4" /> <output name="out_file1" file="1_options.fastq.gz" decompress="True"/> </test> <!-- Test paired options --> <test> <!-- Test paired collection concatenation by_pair with no other option --> <param name="input_type" value="paired_collection" /> <param name="paired_cat_type" value="by_pair"/> <param name="inputs"> <collection type="list:paired"> <element name="2"> <collection type="paired"> <element name="forward" value="2_f.fastq"/> <element name="reverse" value="2_r.fastq"/> </collection> </element> <element name="3"> <collection type="paired"> <element name="forward" value="3_f.fastq"/> <element name="reverse" value="3_r.fastq"/> </collection> </element> <element name="4"> <collection type="paired"> <element name="forward" value="4_f.fastq"/> <element name="reverse" value="4_r.fastq"/> </collection> </element> </collection> </param> <param name="dataset_names" value="No" /> <param name="headers" value="0" /> <output_collection name="list_output" type="list" > <element name="2" file="2.fastq"/> <element name="3" file="3.fastq"/> <element name="4" file="4.fastq"/> </output_collection> </test> <test> <!-- Test paired collection concatenation by_strand with no other option --> <param name="input_type" value="paired_collection" /> <param name="paired_cat_type" value="by_strand"/> <param name="inputs"> <collection type="list:paired"> <element name="2"> <collection type="paired"> <element name="forward" value="2_f.fastq"/> <element name="reverse" value="2_r.fastq"/> </collection> </element> <element name="3"> <collection type="paired"> <element name="forward" value="3_f.fastq"/> <element name="reverse" value="3_r.fastq"/> </collection> </element> <element name="4"> <collection type="paired"> <element name="forward" value="4_f.fastq"/> <element name="reverse" value="4_r.fastq"/> </collection> </element> </collection> </param> <param name="dataset_names" value="No" /> <param name="headers" value="0" /> <output_collection name="paired_output" type="paired" > <element name="forward" file="f.fastq"/> <element name="reverse" file="r.fastq"/> </output_collection> </test> <test> <!-- Test 2 collections concatenation --> <param name="input_type" value="simple_collections" /> <param name="collection_cat_type" value="two_collections"/> <param name="input_1"> <collection type="list"> <element name="2" value="2_f.fastq"/> <element name="3" value="3_f.fastq"/> <element name="4" value="4_f.fastq"/> </collection> </param> <param name="input_2"> <collection type="list"> <element name="2" value="2_r.fastq"/> <element name="3" value="3_r.fastq"/> <element name="4" value="4_r.fastq"/> </collection> </param> <param name="dataset_names" value="No" /> <param name="headers" value="0" /> <output_collection name="list_output" type="list" count="3" > <element name="2" file="2.fastq"/> <element name="3" file="3.fastq"/> <element name="4" file="4.fastq"/> </output_collection> </test> <test> <!-- Test 2 collections concatenation with other options--> <param name="input_type" value="simple_collections" /> <param name="collection_cat_type" value="two_collections"/> <param name="input_1"> <collection type="list"> <element name="1_f.fastq" value="1_f.fastq.gz"/> </collection> </param> <param name="input_2"> <collection type="list"> <element name="1_r.fastq" value="1_r.fastq.gz"/> </collection> </param> <param name="dataset_names" value="Yes" /> <param name="headers" value="4" /> <output_collection name="list_output" type="list" count="1" > <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/> </output_collection> </test> </tests> <help> .. class:: warningmark **WARNING:** This tool does not check if the datasets being concatenated are in the same format. **WARNING:** The paired collection operations do not handle gziped files. **WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. ----- **What it does** Concatenates datasets and paired collections with multiple options: - When the input is a paired collection: - concatenation by strand : forward and reverse datasets are concatenated separately and a list with a single forward - reverse dataset pair is returned - concatenation by pair : forward - reverse dataset pairs are concatenated and a simple dataset collection is returned - whole collection concatenation : all datasets in the collection are concatenated and a single dataset is returned - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned - Skipping lines before concatenation to avoid headers - Add the name of the concatenated files as separator ----- **Single datasets concatenation example** Concatenating Dataset:: chrX 151087187 151087355 A 0 - chrX 151572400 151572481 B 0 + with Dataset1:: chr1 151242630 151242955 X 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - and with Dataset2:: chr2 100000030 200000955 P 0 + chr2 100000015 200000999 Q 0 + will result in the following:: chrX 151087187 151087355 A 0 - chrX 151572400 151572481 B 0 + chr1 151242630 151242955 X 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - chr2 100000030 200000955 P 0 + chr2 100000015 200000999 Q 0 + ----- **2 Collections concatenation** 1rst collection:: a b c d 2nd collection:: 1 2 3 4 Concatenation result:: A single collection containing: a concatenated with 1 b concatenated with 2 c concatenated with 3 d concatenated with 4 ----- **Paired collection concatenation example** 1rst pair:: forward - reverse 2nd pair:: forward - reverse - Concatenation by strand:: concatenates: 1rst forward + 2nd forward 1rst reverse + 2nd reverse outputs: 1 pair - Concatenation by pair:: concatenates: 1rst forward + 1rst reverse 2nd forward + 2nd reverse outputs: 2 datasets - Concatenate all:: concatenates: 1rst forward + 1rst reverse + 2nd forward + 2nd reverse outputs: 1 dataset ----- **When selecting "Include dataset names" when concatenating files**: 1rst file name="first_tabular":: chrX 151087187 151087355 A 0 - chrX 151572400 151572481 B 0 + 2nd file name="second_tabular":: chr1 151242630 151242955 X 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - output:: # first_tabular chrX 151087187 151087355 A 0 - chrX 151572400 151572481 B 0 + # second_tabular chr1 151242630 151242955 X 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - ----- **Skiping lines** 1rst file:: chrX 151087187 151087355 A 0 - chrX 151572400 151572481 B 0 + 2nd file:: chr1 151242630 151242955 X 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - skipping 1 line output:: chrX 151572400 151572481 B 0 + chr1 151271715 151271999 Y 0 + chr1 151278832 151279227 Z 0 - ----- Adapted from galaxy's catWrapper.xml to allow multiple input files. </help> </tool>