Mercurial > repos > nml > collapse_collections

<tool id="collapse_dataset" name="Collapse Collection" version="5.1.0" profile="16.04">
  <description>into single dataset in order of the collection</description>
  <requirements>
    <requirement type="package" version="5.1.0">gawk</requirement>
  </requirements>
  <command>
    <![CDATA[

    (
    #if $one_header:
      #if $filename.add_name:
        awk '{if (NR==1) {print "Sample\t"$0}}' "$input_list[0]";
      #else:
        awk '{if (NR==1) {print}}' "$input_list[0]";
      #end if
    #end if

    #for $f in $input_list#
    #if $filename.add_name:
       #if str($filename.place_name) ==  "same_once":
         #if $one_header:
           printf "$f.element_identifier\t"; tail -q -n +2 "$f";
         #else:
           printf "$f.element_identifier\t"; awk '{ print $0 } END { if (NR == 0 && NF == 0) { print "" } }' "$f";
         #end if
       #elif str($filename.place_name) ==  "same_multiple":
         #if $one_header:
           awk '{if (NR!=1) {print "$f.element_identifier\t"$0}}' "$f";
         #else:
           awk '{print "$f.element_identifier\t"$0} END { if (NR == 0 && NF == 0) { print "$f.element_identifier\t" } }' "$f";
         #end if
       #elif str($filename.place_name) ==  "above":
         #if $one_header:
           printf "$f.element_identifier\n"; tail -q -n +2  "$f";
         #else:
           printf "$f.element_identifier\n"; cat "$f";
         #end if
       #end if
    #else:
       #if $one_header:
         awk '{if (NR!=1) {print}}' "$f";
       #else:
         cat "$f" ;
       #end if
    #end if

    #end for#
    )
    > $output

    ]]>

  </command>
  <inputs>
    <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" />
    <param name="one_header" type="boolean" display="checkboxes" label="Keep one header line" help="Combine first line of each file as the header for the final dataset. Useful when same header line is found in all files."/>
    <conditional name="filename">
     <param name="add_name" type="boolean" display="checkboxes" label="Prepend File name"/>
     <when value="true">
       <param name="place_name" type="select" label="Where to add dataset name">
         <option value="same_once">Same line and only once per dataset</option>
         <option value="same_multiple">Same line and each line in dataset</option>
         <option value="above">Line Above</option>
       </param>
     </when>
     <when value='false'>
     </when>
     </conditional>
  </inputs>
  <outputs>
    <data name="output" format_source="input_list" ></data>
  </outputs>
  <tests>
    <test>
      <param name="input_list" value="input1,input2"/>
      <output name="output" file="answer.txt"/>
    </test>
    <test>
      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
      <param name="one_header" value="True"/>
      <param name="add_name" value="True"/>
      <param name="place_name" value="same_multiple"/>
      <output name="output" file="answer2.tsv"/>
    </test>
    <test>
      <param name="input_list" value="strain1.tsv,strain2.tsv"/>
      <param name="one_header" value="True"/>
      <output name="output" file="answer3.tsv"/>
    </test>

  </tests>
  <help>
	Combines a list collection into a single file dataset with option to include dataset names or merge common header line.
  </help>
  <citations>
  </citations>
</tool>
author	nml
date	Thu, 13 Aug 2020 16:46:16 -0400
parents	830961c48e42
children