Repository 'collapse_collections'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/collapse_collections

Changeset 4:25136a2b0cfe (2017-03-10)
Previous changeset 3:c0c988378838 (2016-10-24) Next changeset 5:33151a38533a (2019-08-27)
Commit message:
planemo upload commit 0340e76ceab90331dab96f4a6b8a9b7df5b8c1c2
modified:
merge.xml
added:
test-data/answer2.tsv
test-data/answer3.tsv
test-data/strain1.tsv
test-data/strain2.tsv
b
diff -r c0c988378838 -r 25136a2b0cfe merge.xml
--- a/merge.xml Mon Oct 24 16:23:11 2016 -0400
+++ b/merge.xml Fri Mar 10 16:12:32 2017 -0500
[
@@ -1,20 +1,44 @@
-<tool id="collapse_dataset" name="Collapse Collection" version="3.0">
-  <description>Collapse collection into single dataset in order of the collection</description>
+<tool id="collapse_dataset" name="Collapse Collection" version="4.0">
+  <description>into single dataset in order of the collection</description>
   <command>
     <![CDATA[
     
-     (
+    (
+    #if $one_header:
+      #if $filename.add_name:
+        awk '{if (NR==1) {print "Sample\t"$0}}' "$input_list[0]";
+      #else:
+        awk '{if (NR==1) {print}}' "$input_list[0]";
+      #end if
+    #end if
+    
     #for $f in $input_list#
     #if $filename.add_name:
        #if str($filename.place_name) ==  "same_once":
+         #if $one_header:
+           printf "$f.element_identifier\t"; tail -q -n +2 "$f";
+         #else:
            printf "$f.element_identifier\t"; cat "$f";
+         #end if
        #elif str($filename.place_name) ==  "same_multiple":
+         #if $one_header:
+           awk '{if (NR!=1) {print "$f.element_identifier\t"$0}}' "$f";
+         #else:
            awk '{print "$f.element_identifier\t"$0}' "$f";
+         #end if
        #elif str($filename.place_name) ==  "above":
+         #if $one_header:
+           printf "$f.element_identifier\n"; tail -q -n +2  "$f";
+         #else:
            printf "$f.element_identifier\n"; cat "$f";
+         #end if
        #end if
     #else:
-       cat "$f" ;
+       #if $one_header:
+         awk '{if (NR!=1) {print}}' "$f";
+       #else:
+         cat "$f" ;
+       #end if 
     #end if
 
     #end for#
@@ -25,8 +49,10 @@
     
   </command>
   <inputs>
-     <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" />
+    <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" />
+    <param name="one_header" type="boolean" display="checkboxes" label="Keep one header line" help="Combine first line of each file as the header for the final dataset. Useful when same header line is found in all files."/>
      <conditional name="filename">
+
      <param name="add_name" type="boolean" display="checkboxes" label="Append File name"/>
      <when value="true">
        <param name="place_name" type="select" label="Where to add dataset name">
@@ -44,17 +70,25 @@
   </outputs>
   <tests>
     <test>
-      <param name="input_list">
-        <collection type="list">
-   <element name="input1" value="input1" />
-          <element name="input2" value="input2" />
- </collection>
-      </param>
+      <param name="input_list" value="input1,input2"/>
       <output name="output" file="answer.txt"/>
     </test>
+    <test>
+      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
+      <param name="one_header" value="True"/>
+      <param name="add_name" value="True"/>
+      <param name="place_name" value="same_multiple"/>
+      <output name="output" file="answer2.tsv"/>
+    </test>
+    <test>
+      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
+      <param name="one_header" value="True"/>
+      <output name="output" file="answer3.tsv"/>
+    </test>
+
   </tests>
   <help>
- Combines a list collection into a single file dataset with option to include dataset names.
+ Combines a list collection into a single file dataset with option to include dataset names or merge common header line.
   </help>
   <citations>
   </citations>
b
diff -r c0c988378838 -r 25136a2b0cfe test-data/answer2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/answer2.tsv Fri Mar 10 16:12:32 2017 -0500
b
@@ -0,0 +1,5 @@
+Sample seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected
+strain1.tsv mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+strain1.tsv mcr_2  0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980
+strain2.tsv mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+strain2.tsv mcr_2  0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833
b
diff -r c0c988378838 -r 25136a2b0cfe test-data/answer3.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/answer3.tsv Fri Mar 10 16:12:32 2017 -0500
b
@@ -0,0 +1,5 @@
+seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected
+mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+mcr_2  0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980
+mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+mcr_2  0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833
b
diff -r c0c988378838 -r 25136a2b0cfe test-data/strain1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/strain1.tsv Fri Mar 10 16:12:32 2017 -0500
b
@@ -0,0 +1,3 @@
+seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected
+mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+mcr_2  0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980
b
diff -r c0c988378838 -r 25136a2b0cfe test-data/strain2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/strain2.tsv Fri Mar 10 16:12:32 2017 -0500
b
@@ -0,0 +1,3 @@
+seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected
+mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000
+mcr_2  0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833