Mercurial > repos > nml > collapse_collections

--- a/merge.xml	Mon Oct 24 16:23:11 2016 -0400
+++ b/merge.xml	Fri Mar 10 16:12:32 2017 -0500
@@ -1,20 +1,44 @@
-<tool id="collapse_dataset" name="Collapse Collection" version="3.0">
-  <description>Collapse collection into single dataset in order of the collection</description>
+<tool id="collapse_dataset" name="Collapse Collection" version="4.0">
+  <description>into single dataset in order of the collection</description>
   <command>
     <![CDATA[

-     (
+    (
+    #if $one_header:
+      #if $filename.add_name:
+        awk '{if (NR==1) {print "Sample\t"$0}}' "$input_list[0]";
+      #else:
+        awk '{if (NR==1) {print}}' "$input_list[0]";
+      #end if
+    #end if
+
     #for $f in $input_list#
     #if $filename.add_name:
        #if str($filename.place_name) ==  "same_once":
+         #if $one_header:
+           printf "$f.element_identifier\t"; tail -q -n +2 "$f";
+         #else:
            printf "$f.element_identifier\t"; cat "$f";
+         #end if
        #elif str($filename.place_name) ==  "same_multiple":
+         #if $one_header:
+           awk '{if (NR!=1) {print "$f.element_identifier\t"$0}}' "$f";
+         #else:
            awk '{print "$f.element_identifier\t"$0}' "$f";
+         #end if
        #elif str($filename.place_name) ==  "above":
+         #if $one_header:
+           printf "$f.element_identifier\n"; tail -q -n +2  "$f";
+         #else:
            printf "$f.element_identifier\n"; cat "$f";
+         #end if
        #end if
     #else:
-       cat "$f" ;
+       #if $one_header:
+         awk '{if (NR!=1) {print}}' "$f";
+       #else:
+         cat "$f" ;
+       #end if
     #end if

     #end for#
@@ -25,8 +49,10 @@

   </command>
   <inputs>
-     <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" />
+    <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" />
+    <param name="one_header" type="boolean" display="checkboxes" label="Keep one header line" help="Combine first line of each file as the header for the final dataset. Useful when same header line is found in all files."/>
      <conditional name="filename">
+
      <param name="add_name" type="boolean" display="checkboxes" label="Append File name"/>
      <when value="true">
        <param name="place_name" type="select" label="Where to add dataset name">
@@ -44,17 +70,25 @@
   </outputs>
   <tests>
     <test>
-      <param name="input_list">
-        <collection type="list">
-	  <element name="input1" value="input1" />
-          <element name="input2" value="input2" />
-	</collection>
-      </param>
+      <param name="input_list" value="input1,input2"/>
       <output name="output" file="answer.txt"/>
     </test>
+    <test>
+      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
+      <param name="one_header" value="True"/>
+      <param name="add_name" value="True"/>
+      <param name="place_name" value="same_multiple"/>
+      <output name="output" file="answer2.tsv"/>
+    </test>
+    <test>
+      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
+      <param name="one_header" value="True"/>
+      <output name="output" file="answer3.tsv"/>
+    </test>
+
   </tests>
   <help>
-	Combines a list collection into a single file dataset with option to include dataset names.
+	Combines a list collection into a single file dataset with option to include dataset names or merge common header line.
   </help>
   <citations>
   </citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/answer2.tsv	Fri Mar 10 16:12:32 2017 -0500
@@ -0,0 +1,5 @@
+Sample	seq_name	median	mean	gc%	seq_length	invalid_bases	%_invalid	non_zero_bases	%_non_zero	%_non_zero_corrected
+strain1.tsv	mcr_1	52	52.74000	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+strain1.tsv	mcr_2 	0	1.60905	0.48114	1617	0	0.00000	56	3.51980	3.51980
+strain2.tsv	mcr_1	85	85.61500	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+strain2.tsv	mcr_2 	0	3.05343	0.48114	1617	0	0.00000	66	4.14833	4.14833
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/answer3.tsv	Fri Mar 10 16:12:32 2017 -0500
@@ -0,0 +1,5 @@
+seq_name	median	mean	gc%	seq_length	invalid_bases	%_invalid	non_zero_bases	%_non_zero	%_non_zero_corrected
+mcr_1	52	52.74000	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+mcr_2 	0	1.60905	0.48114	1617	0	0.00000	56	3.51980	3.51980
+mcr_1	85	85.61500	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+mcr_2 	0	3.05343	0.48114	1617	0	0.00000	66	4.14833	4.14833
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/strain1.tsv	Fri Mar 10 16:12:32 2017 -0500
@@ -0,0 +1,3 @@
+seq_name	median	mean	gc%	seq_length	invalid_bases	%_invalid	non_zero_bases	%_non_zero	%_non_zero_corrected
+mcr_1	52	52.74000	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+mcr_2 	0	1.60905	0.48114	1617	0	0.00000	56	3.51980	3.51980
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/strain2.tsv	Fri Mar 10 16:12:32 2017 -0500
@@ -0,0 +1,3 @@
+seq_name	median	mean	gc%	seq_length	invalid_bases	%_invalid	non_zero_bases	%_non_zero	%_non_zero_corrected
+mcr_1	85	85.61500	0.49139	1626	0	0.00000	1600	100.00000	100.00000
+mcr_2 	0	3.05343	0.48114	1617	0	0.00000	66	4.14833	4.14833