changeset 0:e1f0194cf8fc draft

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 13:07:11 -0500
parents
children b94872d65050
files txt_diagnosis/test-data/input_error.txt txt_diagnosis/test-data/input_noerror.txt txt_diagnosis/test-data/output_error.txt txt_diagnosis/test-data/output_noerror.txt txt_diagnosis/txtDiagnosis.xml txt_diagnosis/txtdiagnosis.py
diffstat 6 files changed, 179 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/input_error.txt	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,16 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CD25	PP CD3	APC CD45RA
+289	56	438	0	626	nan
+352	153	30	147	483	386
+383	190	156	228	734	408
+261	62	432	121	598	555
+451	120	537	338	568	nba
+373	104	3	110	621	584
+418	105	561	0	610	562
+358	185	0	292	641	327
+733	970	139	227	293	259
+765	1023	71	239	bkl	253
+762	957	143	158	271	255
+406	191	513	122	646	264
+695	1023	168	251	234	283
+336	178	0	146	128	we
+668	1023	167	306	302	253
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/input_noerror.txt	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,16 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CD25	PP CD3	APC CD45RA
+289	56	438	0	626	0
+352	153	30	147	483	386
+383	190	156	228	734	408
+261	62	432	121	598	555
+451	120	537	338	568	111
+373	104	3	110	621	584
+418	105	561	0	610	562
+358	185	0	292	641	327
+733	970	139	227	293	259
+765	1023	71	239	54	253
+762	957	143	158	271	255
+406	191	513	122	646	264
+695	1023	168	251	234	283
+336	178	0	146	128	35
+668	1023	167	306	302	253
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/output_error.txt	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,3 @@
+WARNING: line 6 in input_error.txt contains non-numeric results
+WARNING: line 11 in input_error.txt contains non-numeric results
+WARNING: line 15 in input_error.txt contains non-numeric results
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/test-data/output_noerror.txt	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,1 @@
+No errors in the file.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/txtDiagnosis.xml	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,75 @@
+<tool id="txt_diagnosis" name="Check data" version="1.1">
+  <description> in txt-converted FCS files.</description>
+  <requirements>
+    <requirement type="package" version="0.17.1">pandas</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+  </stdio>
+  <command><![CDATA[
+    python $__tool_directory__/txtdiagnosis.py -i "${input}" -o "${output}" -n "${input.name}"
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowtext" name="input" type="data" label="Text file to check"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="output" label="Report on ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input_error.txt"/>
+      <output name="output" file="output_error.txt">
+        <assert_contents>
+          <has_text_matching text="WARNING: line 6 in .* contains non-numeric results"/>
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="input" value="input_noerror.txt"/>
+      <output name="output" file="output_noerror.txt">
+        <assert_contents>
+          <has_text text="No errors in the file."/>
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool looks for potential errors in txt-converted FCS files.
+
+-----
+
+**Input**
+
+This diagnosis tools reads in text files, and checks that the data is all numeric.
+
+**Output**
+
+The output is a report with the errors and corresponding line numbers.
+
+-----
+
+**Example**
+
+*Input*::
+
+   Marker1 Marker2 Marker3
+   34      45      12
+   NaN     65      10
+   34      45      12
+   33      NaN     10
+   34      45      12
+   33      65      10
+   34      45      12
+   33      65      NaN
+   34      45      12
+   33      65      10
+
+*Output*::
+
+   WARNING: line 2 in example_file.txt contains non-numeric results
+   WARNING: line 4 in example_file.txt contains non-numeric results
+   WARNING: line 8 in example_file.txt contains non-numeric results
+  ]]>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/txt_diagnosis/txtdiagnosis.py	Mon Feb 27 13:07:11 2017 -0500
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+from __future__ import print_function
+from __future__ import division
+import pandas as pd
+from argparse import ArgumentParser
+import sys
+
+
+def is_number(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+
+def error_report(input_file, fname, output_file):
+    errors = 0
+    df = pd.read_table(input_file)
+    with open(output_file, "w") as outf:
+        for cols in df.columns.values:
+            if df[cols].count() != len(df[cols]):
+                with open(input_file, "r") as checkfile:
+                    fl = checkfile.readline()
+                    count_lines = 1
+                    for checklines in checkfile:
+                        to_check = checklines.strip().split("\t")
+                        count_lines += 1
+                        for item in to_check:
+                            if not is_number(item):
+                                errors += 1
+                                outf.write(" ".join(["WARNING: line", str(count_lines), "in", fname, "contains non-numeric results\n"]))
+        if errors == 0:
+            outf.write("No errors in the file.\n")
+    return
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="txtDiagnosis",
+             description="Reports potential errors in text-converted FCS files")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-n',
+            dest="filename",
+            required=True,
+            help="Filename location for the text file.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    args = parser.parse_args()
+
+    error_report(args.input_file, args.filename, args.output_file)
+    sys.exit(0)