changeset 0:d0fbc0d93d05 draft

Uploaded
author elindfor
date Tue, 14 Jan 2014 11:06:38 -0500
parents
children 2ab9940c1fb2
files SumTest.py
diffstat 1 files changed, 85 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SumTest.py	Tue Jan 14 11:06:38 2014 -0500
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+#Erno Lindfors
+
+import sys
+from rpy import *
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+def main():
+
+    in_fname = sys.argv[1]
+    out_fname = sys.argv[2]
+    try:
+        #columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
+        columns = int( sys.argv[3] ) - 1
+    except:
+        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
+    
+    matrix = []
+    skipped_lines = 0
+    first_invalid_line = 0
+    invalid_value = ''
+    invalid_column = 0
+    i = 0
+    for i, line in enumerate( file( in_fname ) ):
+        valid = True
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ): 
+            row = []
+            fields = line.split( "\t" )
+            for column in columns:
+                try:
+                    val = fields[column]
+                    if val.lower() == "na": 
+                        row.append( float( "nan" ) )
+                    else:
+                        row.append( float( fields[column] ) )
+                except:
+                    valid = False
+                    skipped_lines += 1
+                    if not first_invalid_line:
+                        first_invalid_line = i + 1
+                        try:
+                            invalid_value = fields[column]
+                        except:
+                            invalid_value = ''
+                        invalid_column = column + 1
+                    break
+        else:
+            valid = False
+            skipped_lines += 1
+            if not first_invalid_line:
+                first_invalid_line = i+1
+
+        if valid:
+            matrix.append( row )
+
+    if skipped_lines < i:
+        try:
+            #r.pdf( out_fname, 8, 8 )
+            #r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
+            #r.dev_off()
+            r.sum(inputNumbers)
+            r.write.table(
+              sum(matrix[,1],
+              out_fname,
+              row.names = FALSE,
+              col.names = c("Sum"),
+              quote = FALSE
+            )
+        except Exception, exc:
+            stop_err( "%s" %str( exc ) )
+    else:
+        stop_err( "All values in column %s are non-numeric or empty." % (sys.argv[3]) )
+
+    print "Scatter plot on column %s. " % (sys.argv[3])
+    if skipped_lines > 0:
+        print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column )
+
+    r.quit( save="no" )
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file