Repository 'txt_diagnosis'
hg clone https://toolshed.g2.bx.psu.edu/repos/immport-devteam/txt_diagnosis

Changeset 0:e1f0194cf8fc (2017-02-27)
Next changeset 1:b94872d65050 (2020-07-16)
Commit message:
Uploaded
added:
txt_diagnosis/test-data/input_error.txt
txt_diagnosis/test-data/input_noerror.txt
txt_diagnosis/test-data/output_error.txt
txt_diagnosis/test-data/output_noerror.txt
txt_diagnosis/txtDiagnosis.xml
txt_diagnosis/txtdiagnosis.py
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/test-data/input_error.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/input_error.txt Mon Feb 27 13:07:11 2017 -0500
b
@@ -0,0 +1,16 @@
+Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA
+289 56 438 0 626 nan
+352 153 30 147 483 386
+383 190 156 228 734 408
+261 62 432 121 598 555
+451 120 537 338 568 nba
+373 104 3 110 621 584
+418 105 561 0 610 562
+358 185 0 292 641 327
+733 970 139 227 293 259
+765 1023 71 239 bkl 253
+762 957 143 158 271 255
+406 191 513 122 646 264
+695 1023 168 251 234 283
+336 178 0 146 128 we
+668 1023 167 306 302 253
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/test-data/input_noerror.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/input_noerror.txt Mon Feb 27 13:07:11 2017 -0500
b
@@ -0,0 +1,16 @@
+Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA
+289 56 438 0 626 0
+352 153 30 147 483 386
+383 190 156 228 734 408
+261 62 432 121 598 555
+451 120 537 338 568 111
+373 104 3 110 621 584
+418 105 561 0 610 562
+358 185 0 292 641 327
+733 970 139 227 293 259
+765 1023 71 239 54 253
+762 957 143 158 271 255
+406 191 513 122 646 264
+695 1023 168 251 234 283
+336 178 0 146 128 35
+668 1023 167 306 302 253
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/test-data/output_error.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/output_error.txt Mon Feb 27 13:07:11 2017 -0500
b
@@ -0,0 +1,3 @@
+WARNING: line 6 in input_error.txt contains non-numeric results
+WARNING: line 11 in input_error.txt contains non-numeric results
+WARNING: line 15 in input_error.txt contains non-numeric results
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/test-data/output_noerror.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/output_noerror.txt Mon Feb 27 13:07:11 2017 -0500
b
@@ -0,0 +1,1 @@
+No errors in the file.
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/txtDiagnosis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/txtDiagnosis.xml Mon Feb 27 13:07:11 2017 -0500
[
@@ -0,0 +1,75 @@
+<tool id="txt_diagnosis" name="Check data" version="1.1">
+  <description> in txt-converted FCS files.</description>
+  <requirements>
+    <requirement type="package" version="0.17.1">pandas</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+  </stdio>
+  <command><![CDATA[
+    python $__tool_directory__/txtdiagnosis.py -i "${input}" -o "${output}" -n "${input.name}"
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowtext" name="input" type="data" label="Text file to check"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="output" label="Report on ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input_error.txt"/>
+      <output name="output" file="output_error.txt">
+        <assert_contents>
+          <has_text_matching text="WARNING: line 6 in .* contains non-numeric results"/>
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="input" value="input_noerror.txt"/>
+      <output name="output" file="output_noerror.txt">
+        <assert_contents>
+          <has_text text="No errors in the file."/>
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool looks for potential errors in txt-converted FCS files.
+
+-----
+
+**Input**
+
+This diagnosis tools reads in text files, and checks that the data is all numeric.
+
+**Output**
+
+The output is a report with the errors and corresponding line numbers.
+
+-----
+
+**Example**
+
+*Input*::
+
+   Marker1 Marker2 Marker3
+   34      45      12
+   NaN     65      10
+   34      45      12
+   33      NaN     10
+   34      45      12
+   33      65      10
+   34      45      12
+   33      65      NaN
+   34      45      12
+   33      65      10
+
+*Output*::
+
+   WARNING: line 2 in example_file.txt contains non-numeric results
+   WARNING: line 4 in example_file.txt contains non-numeric results
+   WARNING: line 8 in example_file.txt contains non-numeric results
+  ]]>
+  </help>
+</tool>
b
diff -r 000000000000 -r e1f0194cf8fc txt_diagnosis/txtdiagnosis.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/txtdiagnosis.py Mon Feb 27 13:07:11 2017 -0500
[
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+from __future__ import print_function
+from __future__ import division
+import pandas as pd
+from argparse import ArgumentParser
+import sys
+
+
+def is_number(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+
+def error_report(input_file, fname, output_file):
+    errors = 0
+    df = pd.read_table(input_file)
+    with open(output_file, "w") as outf:
+        for cols in df.columns.values:
+            if df[cols].count() != len(df[cols]):
+                with open(input_file, "r") as checkfile:
+                    fl = checkfile.readline()
+                    count_lines = 1
+                    for checklines in checkfile:
+                        to_check = checklines.strip().split("\t")
+                        count_lines += 1
+                        for item in to_check:
+                            if not is_number(item):
+                                errors += 1
+                                outf.write(" ".join(["WARNING: line", str(count_lines), "in", fname, "contains non-numeric results\n"]))
+        if errors == 0:
+            outf.write("No errors in the file.\n")
+    return
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="txtDiagnosis",
+             description="Reports potential errors in text-converted FCS files")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-n',
+            dest="filename",
+            required=True,
+            help="Filename location for the text file.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    args = parser.parse_args()
+
+    error_report(args.input_file, args.filename, args.output_file)
+    sys.exit(0)