changeset 1:3c0e4179be7a draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
author azomics
date Mon, 22 Jun 2020 17:42:26 -0400
parents 426650130311
children
files FCStxtMergeDownsample.py FCStxtMergeDownsample.xml merge_ds_flowtext/FCStxtMergeDownsample.py merge_ds_flowtext/FCStxtMergeDownsample.xml merge_ds_flowtext/test-data/merge1.flowtext merge_ds_flowtext/test-data/merge2.flowtext merge_ds_flowtext/test-data/test1/input1.txt merge_ds_flowtext/test-data/test1/input2.txt merge_ds_flowtext/test-data/test1/input3.txt merge_ds_flowtext/test-data/test2/input1.txt merge_ds_flowtext/test-data/test2/input2.txt merge_ds_flowtext/test-data/test2/input3.txt test-data/merge1.flowtext test-data/merge2.flowtext test-data/test1/input1.txt test-data/test1/input2.txt test-data/test1/input3.txt test-data/test2/input1.txt test-data/test2/input2.txt test-data/test2/input3.txt
diffstat 20 files changed, 506 insertions(+), 504 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FCStxtMergeDownsample.py	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+
+from __future__ import print_function
+from __future__ import division
+import sys
+import os
+import pandas as pd
+from argparse import ArgumentParser
+
+
+def is_number(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+
+def is_integer(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def compare_headers(files):
+    headers = {}
+    for eachfile in files:
+        with open(eachfile, "r") as ef:
+            headers[eachfile] = ef.readline().strip().lower().split("\t")
+
+    hdgs_in_common = []
+    flag = {}
+
+    for ref_hdgs in headers[files[0]]:
+        flag[ref_hdgs] = 1
+
+        for ij in range(1, len(files)):
+            if ref_hdgs in headers[files[ij]]:
+                flag[ref_hdgs] += 1
+        if flag[ref_hdgs] == len(files):
+            hdgs_in_common.append(ref_hdgs)
+
+    if not hdgs_in_common:
+        sys.exit(9)
+    return(hdgs_in_common)
+
+
+def get_nb_lines(files):
+    tot_event = 0
+    for f in files:
+        df = pd.read_table(f)
+        tot_event += (len(df.index) - 1)
+    return(tot_event)
+
+
+def get_headers_index(list_headings, headings):
+    idxs = []
+    lhdgs = [x.lower() for x in headings]
+    for element in list_headings:
+        idxs.append(int(lhdgs.index(element)))
+    return(idxs)
+
+
+def merge_and_DS_txt(in_files, out_file, col_names, factor_ds):
+    """Concatenates together tab-separated files.
+    The output will have only the columns in common to all the files provided
+    as input, as determined by the headers.
+    All lines after the header line must contain only numbers.
+    Potential errors are logged to stderr. If the number of errors reaches 10,
+    the program stops.
+    If a downsampling factor is given, returns the indicated fraction of
+    random lines.
+    """
+
+    nb_errors = 0
+    max_error = 10
+
+    # get list of headers in common to all files
+    list_hdgs = compare_headers(in_files)
+    total_events = get_nb_lines(in_files)
+    total_final = total_events * ds_factor
+    nb_per_file = int(total_final / len(in_files))
+
+    with open(out_file, "w") as outf:
+        ff_order = []
+        # HEADERS:
+        with open(in_files[0], "r") as first_file:
+            headings_ff = first_file.readline().strip()
+            headings = headings_ff.split("\t")
+            # Get index of headers in common:
+            hdrs_idx = get_headers_index(list_hdgs, headings)
+
+            # If column to merge on were provided:
+            if col_names:
+                for ix in col_names:
+                    if ix not in hdrs_idx:
+                        nb_errors += 1
+                        sys.stderr.write(" ".join(["WARNING: column", str(ix), "in", in_files[0],
+                                                   "does not exist in all files or has a different header.\n"]))
+                        if nb_errors == max_error:
+                            exit_code = 4
+                            sys.stderr.write("Run aborted - too many errors.")
+                            os.remove(out_file)
+                hdrs_idx = col_names
+
+            # Print out to output file:
+            headings_to_write = []
+            for cti in range(0, len(headings)):
+                if cti in hdrs_idx:
+                    headings_to_write.append(headings[cti])
+                    ff_order.append(headings[cti])
+            outf.write("\t".join(headings_to_write) + "\n")
+
+        # DATA
+        for infile in in_files:
+            with open(infile, "r") as inf:
+                headings_inf = inf.readline().strip()
+                hdgs = headings_inf.split("\t")
+                # Get the index of columns to keep:
+                hdgs_idx = []
+                for ctc in ff_order:
+                    hdgs_idx.append(int(hdgs.index(ctc)))
+                if col_names:
+                    for iy in col_names:
+                        if iy not in hdgs_idx:
+                            nb_errors += 1
+                            sys.stderr.write(" ".join(["WARNING: column", str(iy), "in", infile,
+                                                       "does not exist in all files or has a different header.\n"]))
+                            if nb_errors == max_error:
+                                exit_code = 4
+                                sys.stderr.write("Run aborted - too many errors.")
+                                os.remove(out_file)
+                    hdgs_idx = col_names
+
+            df = pd.read_table(infile, usecols=hdrs_idx)
+            df_ds = df.sample(nb_per_file, replace=False)
+
+            for cols in df_ds.columns.values:
+                if df_ds[cols].count() != len(df_ds[cols]):
+                    sys.stderr.write(infile + "contains non-numeric data\n")
+
+                    with open(infile, "r") as checkfile:
+                        fl = checkfile.readline()
+                        count_lines = 1
+                        for checklines in checkfile:
+                            to_check = checklines.strip().split("\t")
+                            count_lines += 1
+                            for item in to_check:
+                                if not is_number(item):
+                                    sys.stderr.write(" ".join(["WARNING: line", str(count_lines),
+                                                               "in", infile, "contains non-numeric results\n"]))
+                    sys.exit(2)
+
+            df_ds = df_ds.ix[:, ff_order]
+            df_ds.to_csv(outf, sep="\t", header=False, index=False)
+
+    if nb_errors > 0:
+        exit_code = 3
+        if nb_errors == max_error:
+            exit_code = 4
+            sys.stderr.write("Run aborted - too many errors.")
+            os.remove(out_file)
+        sys.exit(exit_code)
+    return
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="FCStxtmerge",
+             description="Merge based on headers text-converted FCS files into one text file.")
+
+    parser.add_argument(
+            '-i',
+            dest="input_files",
+            required=True,
+            action='append',
+            help="File location for the text files.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    parser.add_argument(
+            '-c',
+            dest="columns",
+            help="Specify which column to keep in output file")
+
+    parser.add_argument(
+            '-d',
+            dest="downsampling_factor",
+            help="How much of each file to keep")
+
+    args = parser.parse_args()
+
+    # Get columns to merge on if any:
+    default_value_col = ["i.e.:1,2,5", "default", "Default"]
+    columns = []
+    if args.columns:
+        if args.columns not in default_value_col:
+            tmp_col = args.columns.split(",")
+            if len(tmp_col) == 1:
+                if not tmp_col[0].strip():
+                    columns = []
+                elif not is_integer(tmp_col[0].strip()):
+                    sys.exit(7)
+                else:
+                    columns.append(int(tmp_col[0].strip()) - 1)
+            else:
+                for c in range(0, len(tmp_col)):
+                    if not is_integer(tmp_col[c].strip()):
+                        sys.exit(6)
+                    else:
+                        columns.append(int(tmp_col[c].strip()) - 1)
+
+    # Get down sampling factor if any:
+    # Note: change '%' to 'X' because somehow that's what Galaxy passes?
+    default_value_ds = ["i.e.:0.1 or 10X", "default", "Default"]
+    ds_factor = 0.1
+    if args.downsampling_factor:
+        if args.downsampling_factor not in default_value_ds:
+            args.downsampling_factor = args.downsampling_factor.strip()
+            downsampling_factor = args.downsampling_factor.rstrip("X")
+            if is_number(downsampling_factor):
+                ds_factor = float(downsampling_factor)
+                if ds_factor > 1 and ds_factor <= 100:
+                    ds_factor = float(downsampling_factor) / 100
+                elif ds_factor > 100 or ds_factor <= 0:
+                    sys.stderr.write(str(ds_factor))
+                    sys.exit(8)
+            else:
+                sys.exit(8)
+
+    input_files = [f for f in args.input_files]
+    merge_and_DS_txt(input_files, args.output_file, columns, ds_factor)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FCStxtMergeDownsample.xml	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,183 @@
+<tool id="fcstxt_merge_downsample" name="Merge and downsample" version="1.1+galaxy0">
+  <description>txt-converted FCS files into one text file based on headers</description>
+  <requirements>
+    <requirement type="package" version="0.17.1">pandas</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="2" level="fatal" description="Non-numeric data. See stderr for more details." />
+    <exit_code range="3" level="warning" description="Selected columns do not exist in all files" />
+    <exit_code range="4" level="fatal" description="Run aborted - too many errors" />
+    <exit_code range="6" level="fatal" description="Please provide integers for columns you want to merge on." />
+    <exit_code range="7" level="fatal" description="Please provide a comma separated list of integers for columns you want to merge on." />
+    <exit_code range="8" level="fatal" description="Please provide a numeric value [0,1] for the downsampling factor." />
+    <exit_code range="9" level="fatal" description="There are no columns in common to all files." />
+  </stdio>
+  <command><![CDATA[
+      python '$__tool_directory__/FCStxtMergeDownsample.py' -o '${output_file}' -d '${factorDS}'
+  #if $columns
+    -c '${columns}'
+  #end if
+  #for $f in $input
+    -i '${f}'
+  #end for
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowtext,txt,tabular" name="input" type="data_collection" collection_type="list" label="Text files Collection"/>
+    <param name="factorDS" type="text" label="Downsample by:" value="i.e.:0.1 or 10%" optional="true" help="1 by default (no downsampling)."/>
+    <param name="columns" type="text" label="Merge columns:" value="i.e.:1,2,5" optional="true" help="By default, will merge on the columns in common to all files.">
+    </param>
+  </inputs>
+  <outputs>
+    <data format="flowtext" name="output_file" label="Merge flowtext on ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input">
+        <collection type="list">
+          <element name="input1.txt" value="test1/input1.txt"/>
+          <element name="input2.txt" value="test1/input2.txt"/>
+          <element name="input3.txt" value="test1/input3.txt"/>
+        </collection>
+      </param>
+      <param name="factorDS" value=".8"/>
+      <param name="columns" value="i.e.:1,2,5"/>
+      <output name="output_file" file="merge1.flowtext" compare="sim_size"/>
+    </test>
+    <test>
+      <param name="input">
+        <collection type="list">
+          <element name="input1.txt" value="test2/input1.txt"/>
+          <element name="input2.txt" value="test2/input2.txt"/>
+          <element name="input3.txt" value="test2/input3.txt"/>
+        </collection>
+      </param>
+      <param name="factorDS" value="i.e.:0.1 or 10%"/>
+      <param name="columns" value="1,2,3"/>
+      <output name="output_file" file="merge2.flowtext" compare="sim_size"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool downsamples and merges multiple txt-converted FCS files into one text file.
+
+-----
+
+**Input files**
+
+This tool requires collections of txt, flowtext or tabular files as input.
+
+**Downsampling**
+
+By default, files are not downsampled. If a downsampling factor is provided, each file in the input dataset collection will be downsampled randomly without replacement as follows:
+
+- If n is between 0 and 1, the size of the output will be n times that of the input files.
+- If n is between 1 and 100, the size of the output will be n% that of the input files.
+
+.. class:: infomark
+
+Downsampling is implemented such that each file will contribute an equal number of event to the aggregate.
+
+.. class:: warningmark
+
+At this time, up-sampling is not supported. If the number provided is greater than 100, the tool will exit.
+
+**Output file**
+
+The output flowtext file contains is a concatenation of the input files provided all data after the header contains only numbers. By default, only columns existing in all input files (as assessed by the header) are concatenated. The user can specify columns to merge, bypassing the headers check. If a downsampling factor is provided, the corresponding proportion of each input file ONLY will be read in (and checked for errors).
+
+.. class:: warningmark
+
+Potential errors are logged to stderr. If the number of errors reaches 10, the run will be aborted. If a file contains non-numeric data, the run will be aborted.
+
+.. class:: infomark
+
+Tip: Three tools in the Flow File Tools section can help prepare files for merging and/or downsampling:
+
+- Check headers tool provides a list of headers for all files in a collection of text, flowtext or tabular files.
+- Remove, rearrange and/or rename columns tool allows manipulation of the columns of a file or a set of files.
+- Check data tool identifies the lines in a file containing non-numeric data.
+
+-----
+
+**Example**
+
+*File1*::
+
+   Marker1 Marker2 Marker3
+   34      45      12
+   33      65      10
+   87      26      76
+   24      56      32
+   95      83      53
+   74      15      87
+   ...     ...     ...
+
+*File2*::
+
+   Marker4 Marker5 Marker3
+   19      62      98
+   12      36      58
+   41      42      68
+   76      74      53
+   62      34      45
+   93      21      76
+   ...     ...     ...
+
+*Output*
+
+.. class:: infomark
+
+If run without specifying the columns::
+
+   Marker3
+   12
+   10
+   76
+   32
+   53
+   87
+   98
+   58
+   68
+   53
+   45
+   76
+   ...
+
+.. class:: infomark
+
+If run specifying columns 1,2,3::
+
+   Marker1 Marker2 Marker3
+   34      45      12
+   33      65      10
+   87      26      76
+   24      56      32
+   95      83      53
+   74      15      87
+   19      62      98
+   12      36      58
+   41      42      68
+   76      74      53
+   62      34      45
+   93      21      76
+   ...     ...     ...
+
+.. class:: infomark
+
+If run specifying columns 1,2,3 and with a downsampling factor of 0.5::
+
+   Marker1 Marker2 Marker3
+   34      45      12
+   24      56      32
+   95      83      53
+   19      62      98
+   12      36      58
+   62      34      45
+   ...     ...     ...
+ ]]>
+  </help>
+  <citations>
+    <citation type="doi">10.1038/srep02327</citation>
+  </citations>
+</tool>
--- a/merge_ds_flowtext/FCStxtMergeDownsample.py	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-#!/usr/bin/env python
-
-######################################################################
-#                  Copyright (c) 2016 Northrop Grumman.
-#                          All rights reserved.
-######################################################################
-
-from __future__ import print_function
-from __future__ import division
-import sys
-import os
-import pandas as pd
-from argparse import ArgumentParser
-
-
-def is_number(s):
-    try:
-        float(s)
-        return True
-    except ValueError:
-        return False
-
-
-def is_integer(s):
-    try:
-        int(s)
-        return True
-    except ValueError:
-        return False
-
-
-def compare_headers(files):
-    headers = {}
-    for eachfile in files:
-        with open(eachfile, "r") as ef:
-            headers[eachfile] = ef.readline().strip().lower().split("\t")
-
-    hdgs_in_common = []
-    flag = {}
-
-    for ref_hdgs in headers[files[0]]:
-        flag[ref_hdgs] = 1
-
-        for ij in range(1, len(files)):
-            if ref_hdgs in headers[files[ij]]:
-                flag[ref_hdgs] += 1
-        if flag[ref_hdgs] == len(files):
-            hdgs_in_common.append(ref_hdgs)
-
-    if not hdgs_in_common:
-        sys.exit(9)
-    return(hdgs_in_common)
-
-
-def get_headers_index(list_headings, headings):
-    idxs = []
-    lhdgs = [x.lower() for x in headings]
-    for element in list_headings:
-        idxs.append(int(lhdgs.index(element)))
-    return(idxs)
-
-
-def merge_and_DS_txt(in_files, out_file, col_names, factor_ds):
-    """Concatenates together tab-separated files.
-    The output will have only the columns in common to all the files provided
-    as input, as determined by the headers.
-    All lines after the header line must contain only numbers.
-    Potential errors are logged to stderr. If the number of errors reaches 10,
-    the program stops.
-    If a downsampling factor is given, returns the indicated fraction of
-    random lines.
-    """
-
-    nb_errors = 0
-    max_error = 10
-
-    # get list of headers in common to all files
-    list_hdgs = compare_headers(in_files)
-
-    with open(out_file, "w") as outf:
-        ff_order = []
-        # HEADERS:
-        with open(in_files[0], "r") as first_file:
-            headings_ff = first_file.readline().strip()
-            headings = headings_ff.split("\t")
-            # Get index of headers in common:
-            hdrs_idx = get_headers_index(list_hdgs, headings)
-
-            # If column to merge on were provided:
-            if col_names:
-                for ix in col_names:
-                    if ix not in hdrs_idx:
-                        nb_errors += 1
-                        sys.stderr.write(" ".join(["WARNING: column", str(ix), "in", in_files[0],
-                                                   "does not exist in all files or has a different header.\n"]))
-                hdrs_idx = col_names
-
-            # Print out to output file:
-            headings_to_write = []
-            for cti in range(0, len(headings)):
-                if cti in hdrs_idx:
-                    headings_to_write.append(headings[cti])
-                    ff_order.append(headings[cti])
-            outf.write("\t".join(headings_to_write) + "\n")
-
-        # DATA
-        for infile in in_files:
-            with open(infile, "r") as inf:
-                headings_inf = inf.readline().strip()
-                hdgs = headings_inf.split("\t")
-                # Get the index of columns to keep:
-                hdgs_idx = []
-                for ctc in ff_order:
-                    hdgs_idx.append(int(hdgs.index(ctc)))
-                if col_names:
-                    for iy in col_names:
-                        if iy not in hdgs_idx:
-                            nb_errors += 1
-                            sys.stderr.write(" ".join(["WARNING: column", str(iy), "in", infile,
-                                                       "does not exist in all files or has a different header.\n"]))
-                    hdgs_idx = col_names
-
-            df = pd.read_table(infile, usecols=hdrs_idx)
-            wc_file = len(df.index) - 1
-            df_ds = df.sample(int(wc_file * factor_ds), replace=False)
-
-            for cols in df_ds.columns.values:
-                if df_ds[cols].count() != len(df_ds[cols]):
-                    sys.stderr.write(infile + "contains non-numeric data\n")
-
-                    with open(infile, "r") as checkfile:
-                        fl = checkfile.readline()
-                        count_lines = 1
-                        for checklines in checkfile:
-                            to_check = checklines.strip().split("\t")
-                            count_lines += 1
-                            for item in to_check:
-                                if not is_number(item):
-                                    sys.stderr.write(" ".join(["WARNING: line", str(count_lines),
-                                                               "in", infile, "contains non-numeric results\n"]))
-                    sys.exit(2)
-
-            df_ds = df_ds.ix[:, ff_order]
-            df_ds.to_csv(outf, sep="\t", header=False, index=False)
-
-    if nb_errors > 0:
-        exit_code = 3
-        if nb_errors == max_error:
-            exit_code = 4
-            sys.stderr.write("Run aborted - too many errors.")
-            os.remove(out_file)
-        sys.exit(exit_code)
-    return
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser(
-             prog="FCStxtmerge",
-             description="Merge based on headers text-converted FCS files into one text file.")
-
-    parser.add_argument(
-            '-i',
-            dest="input_files",
-            required=True,
-            action='append',
-            help="File location for the text files.")
-
-    parser.add_argument(
-            '-o',
-            dest="output_file",
-            required=True,
-            help="Name of the output file.")
-
-    parser.add_argument(
-            '-c',
-            dest="columns",
-            help="Specify which column to keep in output file")
-
-    parser.add_argument(
-            '-d',
-            dest="downsampling_factor",
-            help="How much of each file to keep")
-
-    args = parser.parse_args()
-
-    # Get columns to merge on if any:
-    default_value_col = ["i.e.:1,2,5", "default", "Default"]
-    columns = []
-    if args.columns:
-        if args.columns not in default_value_col:
-            tmp_col = args.columns.split(",")
-            if len(tmp_col) == 1:
-                if not tmp_col[0].strip():
-                    columns = []
-                elif not is_integer(tmp_col[0].strip()):
-                    sys.exit(7)
-                else:
-                    columns.append(int(tmp_col[0].strip()) - 1)
-            else:
-                for c in range(0, len(tmp_col)):
-                    if not is_integer(tmp_col[c].strip()):
-                        sys.exit(6)
-                    else:
-                        columns.append(int(tmp_col[c].strip()) - 1)
-
-    # Get down sampling factor if any:
-    # Note: change '%' to 'X' because somehow that's what Galaxy passes?
-    default_value_ds = ["i.e.:0.1 or 10X", "default", "Default"]
-    ds_factor = 1
-    if args.downsampling_factor:
-        if args.downsampling_factor not in default_value_ds:
-            args.downsampling_factor = args.downsampling_factor.strip()
-            downsampling_factor = args.downsampling_factor.rstrip("X")
-            if is_number(downsampling_factor):
-                ds_factor = float(downsampling_factor)
-                if ds_factor > 1:
-                    ds_factor = float(downsampling_factor) / 100
-                if ds_factor > 100:
-                    sys.exit(8)
-            else:
-                sys.exit(8)
-
-    input_files = [f for f in args.input_files]
-    merge_and_DS_txt(input_files, args.output_file, columns, ds_factor)
-    sys.exit(0)
--- a/merge_ds_flowtext/FCStxtMergeDownsample.xml	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,175 +0,0 @@
-<tool id="fcstxt_merge_downsample" name="Downsample and merge" version="1.1">
-  <description>txt-converted FCS files into one text file based on headers.</description>
-  <requirements>
-    <requirement type="package" version="1.10.2">numpy</requirement>
-    <requirement type="package" version="0.17.1">pandas</requirement>
-  </requirements>
-  <stdio>
-    <exit_code range="2" level="fatal" description="Non-numeric data. See stderr for more details." />
-    <exit_code range="3" level="warning" description="Selected columns do not exist in all files" />
-    <exit_code range="4" level="fatal" description="Run aborted - too many errors" />
-    <exit_code range="6" level="fatal" description="Please provide integers for columns you want to merge on." />
-    <exit_code range="7" level="fatal" description="Please provide a comma separated list of integers for columns you want to merge on." />
-    <exit_code range="8" level="fatal" description="Please provide a numeric value [0,1] for the downsampling factor." />
-    <exit_code range="9" level="fatal" description="There are no columns in common to all files." />
-  </stdio>
-  <command><![CDATA[
-    python $__tool_directory__/FCStxtMergeDownsample.py -o "${output_file}" -d "${factorDS}"
- #if $columns
-    -c "${columns}"
- #end if
- #for $f in $input#
-    -i "${f}"
- #end for#
-  ]]>
-  </command>
-  <inputs>
-    <param format="flowtext" name="input" type="data_collection" collection_type="list" label="Text files Collection"/>
-    <param name="factorDS" type="text" label="Downsample by:" value="i.e.:0.1 or 10%" optional="true" help="1 by default (no downsampling)."/>
-    <param name="columns" type="text" label="Merge columns:" value="i.e.:1,2,5" optional="true" help="By default, will merge on the columns in common to all files.">
-    </param>
-  </inputs>
-  <outputs>
-    <data format="flowtext" name="output_file" label="Merge flowtext on ${input.name}"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input">
-        <collection type="list">
-          <element name="input1.txt" value="test1/input1.txt"/>
-          <element name="input2.txt" value="test1/input2.txt"/>
-          <element name="input3.txt" value="test1/input3.txt"/>
-        </collection>
-      </param>
-      <param name="factorDS" value=".8"/>
-      <param name="columns" value="i.e.:1,2,5"/>
-      <output name="output_file" file="merge1.flowtext" compare="sim_size"/>
-    </test>
-    <test>
-      <param name="input">
-        <collection type="list">
-          <element name="input1.txt" value="test2/input1.txt"/>
-          <element name="input2.txt" value="test2/input2.txt"/>
-          <element name="input3.txt" value="test2/input3.txt"/>
-        </collection>
-      </param>
-      <param name="factorDS" value="i.e.:0.1 or 10%"/>
-      <param name="columns" value="1,2,3"/>
-      <output name="output_file" file="merge2.flowtext" compare="sim_size"/>
-    </test>
-  </tests>
-  <help><![CDATA[
-   This tool downsamples and merges multiple txt-converted FCS files into one text file.
-
------
-
-**Input files**
-
-This tool requires collections of txt, flowtext or tabular files as input.
-
-**Downsampling**
-
-By default, files are not downsampled. If a downsampling factor is provided, each file in the input dataset collection will be downsampled randomly without replacement as follows:
-
-- If n is between 0 and 1, the size of the output will be n times that of the input files.
-- If n is between 1 and 100, the size of the output will be n% that of the input files.
-
-.. class:: warningmark
-
-At this time, up-sampling is not supported. If the number provided is greater than 100, the tool will exit.
-
-**Output file**
-
-The output flowtext file contains is a concatenation of the input files provided all data after the header contains only numbers. By default, only columns existing in all input files (as assessed by the header) are concatenated. The user can specify columns to merge, bypassing the headers check. If a downsampling factor is provided, the corresponding proportion of each input file ONLY will be read in (and checked for errors).
-
-.. class:: warningmark
-
-Potential errors are logged to stderr. If the number of errors reaches 10, the run will be aborted. If a file contains non-numeric data, the run will be aborted.
-
-.. class:: infomark
-
-Tip: Three tools in the Flow File Tools section can help prepare files for merging and/or downsampling:
-
-- Check headers tool provides a list of headers for all files in a collection of text, flowtext or tabular files.
-- Remove, rearrange and/or rename columns tool allows manipulation of the columns of a file or a set of files.
-- Check data tool identifies the lines in a file containing non-numeric data.
-
------
-
-**Example**
-
-*File1*::
-
-   Marker1 Marker2 Marker3
-   34      45      12
-   33      65      10
-   87      26      76
-   24      56      32
-   95      83      53
-   74      15      87
-
-*File2*::
-
-   Marker4 Marker5 Marker3
-   19      62      98
-   12      36      58
-   41      42      68
-   76      74      53
-   62      34      45
-   93      21      76
-
-*Output*
-
-.. class:: infomark
-
-If run without specifying the columns::
-
-   Marker3
-   12
-   10
-   76
-   32
-   53
-   87
-   98
-   58
-   68
-   53
-   45
-   76
-
-.. class:: infomark
-
-If run specifying columns 1,2,3::
-
-   Marker1 Marker2 Marker3
-   34      45      12
-   33      65      10
-   87      26      76
-   24      56      32
-   95      83      53
-   74      15      87
-   19      62      98
-   12      36      58
-   41      42      68
-   76      74      53
-   62      34      45
-   93      21      76
-
-.. class:: infomark
-
-If run specifying columns 1,2,3 and with a downsampling factor of 0.5::
-
-   Marker1 Marker2 Marker3
-   34      45      12
-   24      56      32
-   95      83      53
-   19      62      98
-   12      36      58
-   62      34      45
- ]]>
-  </help>
-  <citations>
-    <citation type="doi">10.1038/srep02327</citation>
-  </citations>
-</tool>
--- a/merge_ds_flowtext/test-data/merge1.flowtext	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-CD4	CCR3	CD8	CCR7
-437	69	0	146
-551	129	169	292
-199	277	320	227
-83	138	335	194
-534	111	83	177
-499	0	0	224
-175	361	225	237
-216	310	270	294
-519	44	51	148
-550	200	0	127
-552	479	0	62
-525	121	0	138
-438	0	626	480
-139	227	293	259
-0	292	641	327
-30	147	483	386
-537	338	568	201
-156	228	734	408
--- a/merge_ds_flowtext/test-data/merge2.flowtext	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-Forward Scatter	Side Scatter	FITC CD4
-340	115	509
-262	73	437
-894	1023	199
-316	76	50
-449	157	551
-388	97	534
-383	139	499
-394	144	83
-372	126	519
-788	1023	216
-1023	1023	289
-363	76	550
-668	1019	73
-420	211	552
-770	1023	175
-602	578	385
-418	105	561
-352	153	30
-383	190	156
-733	970	139
-451	120	537
-373	104	3
-358	185	0
-289	56	438
--- a/merge_ds_flowtext/test-data/test1/input1.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-CD4	CCR3	CD8	CCR7
-551	129	169	292
-199	277	320	227
-437	69	0	146
-509	268	0	74
-50	0	60	129
-83	138	335	194
-499	0	0	224
-239	284	288	280
-534	111	83	177
--- a/merge_ds_flowtext/test-data/test1/input2.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-CD4	CCR3	CD8	CCR7
-550	200	0	127
-519	44	51	148
-289	401	362	254
-175	361	225	237
-525	121	0	138
-385	286	222	131
-216	310	270	294
-552	479	0	62
-73	193	227	132
--- a/merge_ds_flowtext/test-data/test1/input3.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-CD4	CCR3	CD8	CCR7
-438	0	626	480
-30	147	483	386
-156	228	734	408
-432	121	598	555
-537	338	568	201
-3	110	621	584
-561	0	610	562
-0	292	641	327
-139	227	293	259
--- a/merge_ds_flowtext/test-data/test2/input1.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-Forward Scatter	Side Scatter	FITC CD4	PE CCR3	PP CD8	APC CCR4
-449	157	551	129	169	292
-894	1023	199	277	320	227
-262	73	437	69	0	146
-340	115	509	268	0	74
-316	76	50	0	60	129
-394	144	83	138	335	194
-383	139	499	0	0	224
-800	1023	239	284	288	280
-388	97	534	111	83	177
--- a/merge_ds_flowtext/test-data/test2/input2.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-Forward Scatter	Side Scatter	FITC CD4	PE CXCR3	PP CD8	APC CCR5
-363	76	550	200	0	127
-372	126	519	44	51	148
-1023	1023	289	401	362	254
-770	1023	175	361	225	237
-384	111	525	121	0	138
-602	578	385	286	222	131
-788	1023	216	310	270	294
-420	211	552	479	0	62
-668	1019	73	193	227	132
--- a/merge_ds_flowtext/test-data/test2/input3.txt	Mon Feb 27 13:03:02 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-Forward Scatter	Side Scatter	FITC CD4	PE CD25	PP CD3	APC CD45RA
-289	56	438	0	626	480
-352	153	30	147	483	386
-383	190	156	228	734	408
-261	62	432	121	598	555
-451	120	537	338	568	201
-373	104	3	110	621	584
-418	105	561	0	610	562
-358	185	0	292	641	327
-733	970	139	227	293	259
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge1.flowtext	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,19 @@
+CD4	CCR3	CD8	CCR7
+432	121	598	555
+537	338	568	201
+438	0	626	480
+30	147	483	386
+561	0	610	562
+139	227	293	259
+385	286	222	131
+175	361	225	237
+525	121	0	138
+216	310	270	294
+289	401	362	254
+550	200	0	127
+83	138	335	194
+534	111	83	177
+437	69	0	146
+199	277	320	227
+509	268	0	74
+50	0	60	129
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge2.flowtext	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,1 @@
+Forward Scatter	Side Scatter	FITC CD4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1/input1.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+CD4	CCR3	CD8	CCR7
+551	129	169	292
+199	277	320	227
+437	69	0	146
+509	268	0	74
+50	0	60	129
+83	138	335	194
+499	0	0	224
+239	284	288	280
+534	111	83	177
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1/input2.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+CD4	CCR3	CD8	CCR7
+550	200	0	127
+519	44	51	148
+289	401	362	254
+175	361	225	237
+525	121	0	138
+385	286	222	131
+216	310	270	294
+552	479	0	62
+73	193	227	132
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1/input3.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+CD4	CCR3	CD8	CCR7
+438	0	626	480
+30	147	483	386
+156	228	734	408
+432	121	598	555
+537	338	568	201
+3	110	621	584
+561	0	610	562
+0	292	641	327
+139	227	293	259
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2/input1.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CCR3	PP CD8	APC CCR4
+449	157	551	129	169	292
+894	1023	199	277	320	227
+262	73	437	69	0	146
+340	115	509	268	0	74
+316	76	50	0	60	129
+394	144	83	138	335	194
+383	139	499	0	0	224
+800	1023	239	284	288	280
+388	97	534	111	83	177
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2/input2.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CXCR3	PP CD8	APC CCR5
+363	76	550	200	0	127
+372	126	519	44	51	148
+1023	1023	289	401	362	254
+770	1023	175	361	225	237
+384	111	525	121	0	138
+602	578	385	286	222	131
+788	1023	216	310	270	294
+420	211	552	479	0	62
+668	1019	73	193	227	132
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2/input3.txt	Mon Jun 22 17:42:26 2020 -0400
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CD25	PP CD3	APC CD45RA
+289	56	438	0	626	480
+352	153	30	147	483	386
+383	190	156	228	734	408
+261	62	432	121	598	555
+451	120	537	338	568	201
+373	104	3	110	621	584
+418	105	561	0	610	562
+358	185	0	292	641	327
+733	970	139	227	293	259