Repository 'concatenate_multiple_datasets'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/concatenate_multiple_datasets

Changeset 6:4554fa330d3d (2019-06-24)
Previous changeset 5:99a5ed06b86c (2019-06-24) Next changeset 7:55cf9d9defd1 (2019-07-09)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 4df03fd2d6fbf17a451256c0fb9d30932fc9d637
modified:
catWrapper.xml
b
diff -r 99a5ed06b86c -r 4554fa330d3d catWrapper.xml
--- a/catWrapper.xml Mon Jun 24 03:58:52 2019 -0400
+++ b/catWrapper.xml Mon Jun 24 04:02:53 2019 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0">\n+<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.4.0">\n     <description>tail-to-head by specifying how</description>\n     <command><![CDATA[\n         #if $headers == 0:\n@@ -6,36 +6,51 @@\n         #else:\n             #set $concat_command = \'tail -q -n +\'+ str(int($headers)+1)\n         #end if\n+        printf "Concatenating files:\\n" &&\n         #if $global_condition.input_type == "singles":\n             #if $dataset_names == "No":\n-                $concat_command\n                 #for $file in $global_condition.inputs\n-                    \'$file\'\n+                    printf "${file.element_identifier}..." &&\n+                    #if $file.ext[-2:] == "gz" and $headers != 0:\n+                        gzip -dc \'$file\' | $concat_command | gzip -c >> \'$out_file1\' &&\n+                    #else:\n+                        $concat_command \'$file\' >> \'$out_file1\' &&\n+                    #end if\n+                    printf "Done\\n" &&\n                 #end for\n-                > \'$out_file1\'\n+                sleep 1\n             #else:\n                 #for $file in $global_condition.inputs\n-                    #if $file.ext[-2:] == "gz":\n+                    printf "${file.element_identifier}..." &&\n+                    #if $file.ext[-2:] == "gz" and $headers != 0:\n                         printf "# ${file.element_identifier}\\n" | gzip -c >> \'$out_file1\' &&\n                         gzip -dc "$file" | $concat_command |gzip -c >> \'$out_file1\' &&\n                     #else:\n                         printf "# ${file.element_identifier}\\n" >> \'$out_file1\' &&\n                         $concat_command "$file" >> \'$out_file1\' &&\n                     #end if\n+                    printf "Done\\n" &&\n                 #end for\n                 sleep 1\n             #end if\n         #else if $global_condition.input_type == "simple_collections":\n-            #if $global_condition.collections_condition.collection_cat_type == "two_collections":\n                 mkdir concatenated &&\n                 #if $dataset_names == "No":\n-                    #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)\n-                        $concat_command \'$x\' \'$y\' > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                    #for $x, $y in zip($global_condition.input_1, $global_condition.input_2):\n+                        printf "${x.element_identifier} and ${y.element_identifier}..." &&\n+                        #if $x.ext[-2:] == "gz" and $headers != 0:\n+                            gzip -dc \'$x\' | $concat_command | gzip -c > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                            gzip -dc \'$y\' | $concat_command | gzip -c >> concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                        #else:\n+                            $concat_command \'$x\' \'$y\' > concatenated/\'${x.element_identifier}.listed.${x.ext}.listed\' &&\n+                        #end if\n+                        printf "Done\\n" &&\n                     #end for\n                     sleep 1\n                 #else:\n-                    #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)\n-                        #if $x.ext[-2:] == "gz":\n+                    #for $x, $y in zip($global_condition.input_1, $global_condition.input_2)\n+                        printf "${x.element_identifier} and ${y.element_identifier}..." &&\n+                        #if $x.ext[-2:] == "gz" and $headers != 0:\n                             printf "# ${x.element_identifier}\\n" | gzip -c > concatenated/\'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed\' &&\n                             gzip -dc \'$x\' | $concat_command | gzip -c >> concatenated/\'${x.element_identifier}_${y.element_'..b'ections\') or (global_condition[\'input_type\'] == \'nested_collection\')</filter>\n         </collection>\n     </outputs>\n     <tests>\n@@ -322,6 +413,48 @@\n                 <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/>\n             </output_collection>\n         </test>\n+        <test> <!-- Test nested collections concatenation -->\n+            <param name="input_type" value="nested_collection" />\n+            <param name="inputs">\n+                <collection type="list:list">\n+                    <element name="2">\n+                        <collection type="list">\n+                            <element name="2_f" value="2_f.fastq" ftype="fastq"/>\n+                            <element name="2_r" value="2_r.fastq" ftype="fastq"/>\n+                        </collection>\n+                    </element>\n+                    <element name="3">\n+                        <collection type="list">\n+                            <element name="3" value="3.fastq" ftype="fastq"/>\n+                        </collection>\n+                    </element>\n+                </collection>\n+            </param>\n+            <param name="dataset_names" value="No" />\n+            <param name="headers" value="0" />\n+            <output_collection name="list_output" type="list" count="2" >\n+                <element name="2" file="2.fastq"/>\n+                <element name="3" file="3.fastq"/>\n+            </output_collection>\n+        </test>\n+        <test> <!-- Test nested collections concatenation with options and gzip-->\n+            <param name="input_type" value="nested_collection" />\n+            <param name="inputs">\n+                <collection type="list:list">\n+                    <element name="1">\n+                        <collection type="list">\n+                            <element name="1_f.fastq" value="1_f.fastq.gz" ftype="fastq.gz"/>\n+                            <element name="1_r.fastq" value="1_r.fastq.gz" ftype="fastq.gz"/>\n+                        </collection>\n+                    </element>\n+                </collection>\n+            </param>\n+            <param name="dataset_names" value="Yes" />\n+            <param name="headers" value="4" />\n+            <output_collection name="list_output" type="list" count="1" >\n+                <element name="1" file="1_options.fastq.gz" decompress="True"/>\n+            </output_collection>\n+        </test>\n     </tests>\n     <help>\n \n@@ -329,9 +462,9 @@\n \n **WARNING:** This tool does not check if the datasets being concatenated are in the same format.\n \n-**WARNING:** The paired collection operations do not handle gziped files.\n+**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items.\n \n-**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items.\n+**WARNING:** This tool can\'t handle nested collection deeper than list:list.\n \n -----\n \n@@ -349,6 +482,8 @@\n \n  - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned\n \n+ - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned\n+\n  - Skipping lines before concatenation to avoid headers\n \n  - Add the name of the concatenated files as separator\n@@ -456,6 +591,32 @@\n \n -----\n \n+**Nested collection concatenation example**\n+\n+Nested collection:\n+\n+    - Experiment\n+\n+        - Sample_1\n+\n+            - Sample_1_file_1\n+            - Sample_1_file_2\n+\n+        - Sample_2\n+\n+            - Sample_2_file_1\n+            - Sample_2_file_2\n+            - Sample_2_file_3\n+\n+Concatenation result::\n+\n+    A single collection containing:\n+\n+        - Sample_1: (Sample_1_file_1 + Sample_1_file_2)\n+        - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3)\n+\n+-----\n+\n **When selecting "Include dataset names" when concatenating files**:\n \n 1rst file name="first_tabular"::\n'