diff histogram.py @ 2:6f134426c2b0 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author devteam
date Mon, 27 Jul 2020 03:25:53 -0400
parents 6ff47de059a0
children
line wrap: on
line diff
--- a/histogram.py	Wed Nov 11 12:17:14 2015 -0500
+++ b/histogram.py	Mon Jul 27 03:25:53 2020 -0400
@@ -1,101 +1,100 @@
 #!/usr/bin/env python
-#Greg Von Kuster
+# Greg Von Kuster
 
 import sys
-from rpy import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
 
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
+from rpy2.robjects import r, vectors
+from rpy2.robjects.packages import importr
+
 
 def main():
-
     # Handle input params
     in_fname = sys.argv[1]
-    out_fname = sys.argv[2] 
+    out_fname = sys.argv[2]
     try:
-        column = int( sys.argv[3] ) - 1
-    except:
-        stop_err( "Column not specified, your query does not contain a column of numerical data." )
+        column = int(sys.argv[3]) - 1
+    except Exception:
+        sys.exit("Column not specified, your query does not contain a column of numerical data.")
     title = sys.argv[4]
     xlab = sys.argv[5]
-    breaks = int( sys.argv[6] )
+    breaks = int(sys.argv[6])
     if breaks == 0:
         breaks = "Sturges"
     if sys.argv[7] == "true":
         density = True
-    else: density = False
-    if len( sys.argv ) >= 9 and sys.argv[8] == "true":
+    else:
+        density = False
+    if len(sys.argv) >= 9 and sys.argv[8] == "true":
         frequency = True
-    else: frequency = False
+    else:
+        frequency = False
 
     matrix = []
     skipped_lines = 0
     first_invalid_line = 0
     invalid_value = ''
     i = 0
-    for i, line in enumerate( file( in_fname ) ):
+    for i, line in enumerate(open(in_fname)):
         valid = True
         line = line.rstrip('\r\n')
         # Skip comments
-        if line and not line.startswith( '#' ): 
+        if line and not line.startswith('#'):
             # Extract values and convert to floats
             row = []
             try:
-                fields = line.split( "\t" )
+                fields = line.split("\t")
                 val = fields[column]
                 if val.lower() == "na":
-                    row.append( float( "nan" ) )
-            except:
+                    row.append(float("nan"))
+            except Exception:
                 valid = False
                 skipped_lines += 1
                 if not first_invalid_line:
-                    first_invalid_line = i+1
+                    first_invalid_line = i + 1
             else:
                 try:
-                    row.append( float( val ) )
+                    row.append(float(val))
                 except ValueError:
                     valid = False
                     skipped_lines += 1
                     if not first_invalid_line:
-                        first_invalid_line = i+1
+                        first_invalid_line = i + 1
                         invalid_value = fields[column]
         else:
             valid = False
             skipped_lines += 1
             if not first_invalid_line:
-                first_invalid_line = i+1
+                first_invalid_line = i + 1
 
         if valid:
-            matrix += row
+            matrix.extend(row)
 
     if skipped_lines < i:
         try:
-            a = r.array( matrix )
-            r.pdf( out_fname, 8, 8 )
-            histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks )
+            grdevices = importr('grDevices')
+            graphics = importr('graphics')
+            vector = vectors.FloatVector(matrix)
+            grdevices.pdf(out_fname, 8, 8)
+            histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks)
             if density:
-                density = r.density( a )
+                density = r.density(vector)
                 if frequency:
-                    scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints
-                    density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] )
-                r.lines( density )
-            r.dev_off()
-        except Exception, exc:
-            stop_err( "%s" %str( exc ) )
+                    scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0])  # uniform bandwidth taken from first 2 midpoints
+                    density['y'] = map(lambda x: x * scale_factor, density['y'])
+                graphics.lines(density)
+            grdevices.dev_off()
+        except Exception as exc:
+            sys.exit("%s" % str(exc))
     else:
         if i == 0:
-            stop_err("Input dataset is empty.")
+            sys.exit("Input dataset is empty.")
         else:
-            stop_err( "All values in column %s are non-numeric." %sys.argv[3] )
+            sys.exit("All values in column %s are non-numeric." % sys.argv[3])
 
-    print "Histogram of column %s. " %sys.argv[3]
+    print("Histogram of column %s. " % sys.argv[3])
     if skipped_lines > 0:
-        print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value )
+        print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value))
 
-    r.quit( save="no" )
-    
+
 if __name__ == "__main__":
     main()