Repository 'concatenate_multiple_datasets'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/concatenate_multiple_datasets

Changeset 4:7afc0515a307 (2019-06-18)
Previous changeset 3:62aebaf6cfa0 (2019-05-10) Next changeset 5:99a5ed06b86c (2019-06-24)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 4b572d4605dfc1d5cfe2b46c9f0061d041e63df9
modified:
catWrapper.xml
b
diff -r 62aebaf6cfa0 -r 7afc0515a307 catWrapper.xml
--- a/catWrapper.xml Fri May 10 10:15:02 2019 -0400
+++ b/catWrapper.xml Tue Jun 18 11:59:06 2019 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0">\n+<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.3.0">\n     <description>tail-to-head by specifying how</description>\n     <command><![CDATA[\n         #if $headers == 0:\n@@ -8,11 +8,14 @@\n         #end if\n         #if $global_condition.input_type == "singles":\n             #if $dataset_names == "No":\n-                $concat_command\n                 #for $file in $global_condition.inputs\n-                    \'$file\'\n+                    #if $file.ext[-2:] == "gz":\n+                        gzip -dc \'$file\' | $concat_command | gzip -c >> \'$out_file1\' &&\n+                    #else:\n+                        $concat_command \'$file\' >> \'$out_file1\' &&\n+                    #end if\n                 #end for\n-                > \'$out_file1\'\n+                sleep 1\n             #else:\n                 #for $file in $global_condition.inputs\n                     #if $file.ext[-2:] == "gz":\n@@ -26,15 +29,19 @@\n                 sleep 1\n             #end if\n         #else if $global_condition.input_type == "simple_collections":\n-            #if $global_condition.collections_condition.collection_cat_type == "two_collections":\n                 mkdir concatenated &&\n                 #if $dataset_names == "No":\n-                    #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)\n-                        $concat_command \'$x\' \'$y\' > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                    #for $x, $y in zip($global_condition.input_1, $global_condition.input_2):\n+                        #if $x.ext[-2:] == "gz":\n+                            gzip -dc \'$x\' | $concat_command | gzip -c > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                            gzip -dc \'$y\' | $concat_command | gzip -c >> concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                        #else:\n+                            $concat_command \'$x\' \'$y\' > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                        #end if\n                     #end for\n                     sleep 1\n                 #else:\n-                    #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)\n+                    #for $x, $y in zip($global_condition.input_1, $global_condition.input_2)\n                         #if $x.ext[-2:] == "gz":\n                             printf "# ${x.element_identifier}\\n" | gzip -c > concatenated/\'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed\' &&\n                             gzip -dc \'$x\' | $concat_command | gzip -c >> concatenated/\'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed\' &&\n@@ -49,82 +56,142 @@\n                     #end for\n                 sleep 1\n                 #end if\n-            #end if\n         #else if $global_condition.input_type == "paired_collection":\n             #if $global_condition.paired_cat_type == "by_strand":\n+                mkdir concatenated &&\n                 #if $dataset_names == "No":\n                     #for $file in $global_condition.inputs\n-                        $concat_command\n-                        $file[\'forward\']\n-                        >> \'$forward\' &&\n-                        $concat_command\n-                        $file[\'reverse\']\n-                        >> \'$reverse\' &&\n+                        #if $file[\'forward\'].ext[-2:] == "gz":\n+                            gzip -dc $file[\'forward\'] | $concat_command | gzip -c >> concatenated/forward.listed.${file[\'forward\'].ext}.listed &&\n+                            gzip -dc $file[\'reverse\'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file[\'reverse\'].ext}.listed &&\n+                        #else:\n+                            $concat_command $file[\''..b'ections\') or (global_condition[\'input_type\'] == \'nested_collection\')</filter>\n         </collection>\n     </outputs>\n     <tests>\n@@ -322,6 +388,48 @@\n                 <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/>\n             </output_collection>\n         </test>\n+        <test> <!-- Test nested collections concatenation -->\n+            <param name="input_type" value="nested_collection" />\n+            <param name="inputs">\n+                <collection type="list:list">\n+                    <element name="2">\n+                        <collection type="list">\n+                            <element name="2_f" value="2_f.fastq" ftype="fastq"/>\n+                            <element name="2_r" value="2_r.fastq" ftype="fastq"/>\n+                        </collection>\n+                    </element>\n+                    <element name="3">\n+                        <collection type="list">\n+                            <element name="3" value="3.fastq" ftype="fastq"/>\n+                        </collection>\n+                    </element>\n+                </collection>\n+            </param>\n+            <param name="dataset_names" value="No" />\n+            <param name="headers" value="0" />\n+            <output_collection name="list_output" type="list" count="2" >\n+                <element name="2" file="2.fastq"/>\n+                <element name="3" file="3.fastq"/>\n+            </output_collection>\n+        </test>\n+        <test> <!-- Test nested collections concatenation with options and gzip-->\n+            <param name="input_type" value="nested_collection" />\n+            <param name="inputs">\n+                <collection type="list:list">\n+                    <element name="1">\n+                        <collection type="list">\n+                            <element name="1_f.fastq" value="1_f.fastq.gz" ftype="fastq.gz"/>\n+                            <element name="1_r.fastq" value="1_r.fastq.gz" ftype="fastq.gz"/>\n+                        </collection>\n+                    </element>\n+                </collection>\n+            </param>\n+            <param name="dataset_names" value="Yes" />\n+            <param name="headers" value="4" />\n+            <output_collection name="list_output" type="list" count="1" >\n+                <element name="1" file="1_options.fastq.gz" decompress="True"/>\n+            </output_collection>\n+        </test>\n     </tests>\n     <help>\n \n@@ -329,9 +437,9 @@\n \n **WARNING:** This tool does not check if the datasets being concatenated are in the same format.\n \n-**WARNING:** The paired collection operations do not handle gziped files.\n+**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items.\n \n-**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items.\n+**WARNING:** This tool can\'t handle nested collection deeper than list:list.\n \n -----\n \n@@ -349,6 +457,8 @@\n \n  - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned\n \n+ - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned\n+\n  - Skipping lines before concatenation to avoid headers\n \n  - Add the name of the concatenated files as separator\n@@ -456,6 +566,32 @@\n \n -----\n \n+**Nested collection concatenation example**\n+\n+Nested collection:\n+\n+    - Experiment\n+\n+        - Sample_1\n+\n+            - Sample_1_file_1\n+            - Sample_1_file_2\n+\n+        - Sample_2\n+\n+            - Sample_2_file_1\n+            - Sample_2_file_2\n+            - Sample_2_file_3\n+\n+Concatenation result::\n+\n+    A single collection containing:\n+\n+        - Sample_1: (Sample_1_file_1 + Sample_1_file_2)\n+        - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3)\n+\n+-----\n+\n **When selecting "Include dataset names" when concatenating files**:\n \n 1rst file name="first_tabular"::\n'