changeset 2:6f134426c2b0 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author devteam
date Mon, 27 Jul 2020 03:25:53 -0400
parents cdb9e89e2970
children
files histogram.py histogram2.xml plot_filter.py plotter.py tool_dependencies.xml
diffstat 5 files changed, 56 insertions(+), 164 deletions(-) [+]
line wrap: on
line diff
--- a/histogram.py	Wed Nov 11 12:17:14 2015 -0500
+++ b/histogram.py	Mon Jul 27 03:25:53 2020 -0400
@@ -1,101 +1,100 @@
 #!/usr/bin/env python
-#Greg Von Kuster
+# Greg Von Kuster
 
 import sys
-from rpy import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
 
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
+from rpy2.robjects import r, vectors
+from rpy2.robjects.packages import importr
+
 
 def main():
-
     # Handle input params
     in_fname = sys.argv[1]
-    out_fname = sys.argv[2] 
+    out_fname = sys.argv[2]
     try:
-        column = int( sys.argv[3] ) - 1
-    except:
-        stop_err( "Column not specified, your query does not contain a column of numerical data." )
+        column = int(sys.argv[3]) - 1
+    except Exception:
+        sys.exit("Column not specified, your query does not contain a column of numerical data.")
     title = sys.argv[4]
     xlab = sys.argv[5]
-    breaks = int( sys.argv[6] )
+    breaks = int(sys.argv[6])
     if breaks == 0:
         breaks = "Sturges"
     if sys.argv[7] == "true":
         density = True
-    else: density = False
-    if len( sys.argv ) >= 9 and sys.argv[8] == "true":
+    else:
+        density = False
+    if len(sys.argv) >= 9 and sys.argv[8] == "true":
         frequency = True
-    else: frequency = False
+    else:
+        frequency = False
 
     matrix = []
     skipped_lines = 0
     first_invalid_line = 0
     invalid_value = ''
     i = 0
-    for i, line in enumerate( file( in_fname ) ):
+    for i, line in enumerate(open(in_fname)):
         valid = True
         line = line.rstrip('\r\n')
         # Skip comments
-        if line and not line.startswith( '#' ): 
+        if line and not line.startswith('#'):
             # Extract values and convert to floats
             row = []
             try:
-                fields = line.split( "\t" )
+                fields = line.split("\t")
                 val = fields[column]
                 if val.lower() == "na":
-                    row.append( float( "nan" ) )
-            except:
+                    row.append(float("nan"))
+            except Exception:
                 valid = False
                 skipped_lines += 1
                 if not first_invalid_line:
-                    first_invalid_line = i+1
+                    first_invalid_line = i + 1
             else:
                 try:
-                    row.append( float( val ) )
+                    row.append(float(val))
                 except ValueError:
                     valid = False
                     skipped_lines += 1
                     if not first_invalid_line:
-                        first_invalid_line = i+1
+                        first_invalid_line = i + 1
                         invalid_value = fields[column]
         else:
             valid = False
             skipped_lines += 1
             if not first_invalid_line:
-                first_invalid_line = i+1
+                first_invalid_line = i + 1
 
         if valid:
-            matrix += row
+            matrix.extend(row)
 
     if skipped_lines < i:
         try:
-            a = r.array( matrix )
-            r.pdf( out_fname, 8, 8 )
-            histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks )
+            grdevices = importr('grDevices')
+            graphics = importr('graphics')
+            vector = vectors.FloatVector(matrix)
+            grdevices.pdf(out_fname, 8, 8)
+            histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks)
             if density:
-                density = r.density( a )
+                density = r.density(vector)
                 if frequency:
-                    scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints
-                    density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] )
-                r.lines( density )
-            r.dev_off()
-        except Exception, exc:
-            stop_err( "%s" %str( exc ) )
+                    scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0])  # uniform bandwidth taken from first 2 midpoints
+                    density['y'] = map(lambda x: x * scale_factor, density['y'])
+                graphics.lines(density)
+            grdevices.dev_off()
+        except Exception as exc:
+            sys.exit("%s" % str(exc))
     else:
         if i == 0:
-            stop_err("Input dataset is empty.")
+            sys.exit("Input dataset is empty.")
         else:
-            stop_err( "All values in column %s are non-numeric." %sys.argv[3] )
+            sys.exit("All values in column %s are non-numeric." % sys.argv[3])
 
-    print "Histogram of column %s. " %sys.argv[3]
+    print("Histogram of column %s. " % sys.argv[3])
     if skipped_lines > 0:
-        print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value )
+        print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value))
 
-    r.quit( save="no" )
-    
+
 if __name__ == "__main__":
     main()
--- a/histogram2.xml	Wed Nov 11 12:17:14 2015 -0500
+++ b/histogram2.xml	Mon Jul 27 03:25:53 2020 -0400
@@ -1,10 +1,20 @@
-<tool id="histogram_rpy" name="Histogram" version="1.0.3">
+<tool id="histogram_rpy" name="Histogram" version="1.0.4">
   <description>of a numeric column</description>
   <requirements>
-    <requirement type="package" version="1.0.3">rpy</requirement>
-    <requirement type="package" version="2.11.0">R</requirement>
+    <requirement type="package" version="3.3.2">rpy2</requirement>
+    <requirement type="package" version="3.6.0">R</requirement>
   </requirements>
-  <command interpreter="python">histogram.py $input $out_file1 $numerical_column "$title" "$xlab" $breaks $density $frequency</command>
+  <command>
+python '$__tool_directory__/histogram.py'
+    '$input'
+    '$out_file1'
+    $numerical_column
+    '$title'
+    '$xlab'
+    $breaks
+    $density
+    $frequency
+</command>
   <inputs>
     <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/>
     <param name="numerical_column" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" />
@@ -26,7 +36,7 @@
       <param name="xlab" value="V1"/>
       <param name="density" value="true"/>
       <param name="frequency" value="false"/>
-      <output name="out_file1" file="histogram_out1.pdf"/>
+      <output name="out_file1" file="histogram_out1.pdf" compare="sim_size"/>
     </test>
   </tests>
   <help>
--- a/plot_filter.py	Wed Nov 11 12:17:14 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-
-def validate(incoming):
-    """Validator for the plotting program"""
-    
-    bins = incoming.get("bins","")
-    col  = incoming.get("col","")
-
-    if not bins or not col:
-        raise Exception, "You need to specify a number for bins and columns"
-
-    try:
-        bins = int(bins)
-        col  = int(col)
-    except:
-        raise Exception, "Parameters are not valid numbers, columns:%s, bins:%s" % (col, bins)
-
-    if not 1<bins<100:
-        raise Exception, "The number of bins %s must be a number between 1 and 100" % bins
-
--- a/plotter.py	Wed Nov 11 12:17:14 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-
-# python histogram input_file output_file column bins 
-import sys, os
-import matplotlib; matplotlib.use('Agg')
-
-from pylab import *
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-if __name__ == '__main__':
-    # parse the arguments
-    
-    if len(sys.argv) != 6:
-        stop_err('Usage: python histogram.py input_file column bins output_file style')
-        sys.exit()
-
-    mode = sys.argv[5]
-    HIST = mode == 'hist'
-    try:
-        col =  int(float(sys.argv[2]))
-        if HIST:
-            bin = int(float(sys.argv[3]))
-        else:
-            # hack, this parameter is the plotting style for scatter plots
-            if sys.argv[3] == 'P':
-                style = 'o'
-            elif sys.argv[3] == 'LP':
-                style = 'o-'
-            else:
-                style = '-'
-
-    except:
-        msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4])
-        stop_err(msg)
-
-    # validate arguments
-    inp_file = sys.argv[1]
-    out_file = sys.argv[4]
-
-    if HIST:
-        print "Histogram on column %s (%s bins)" % (col, bin)
-    else:
-        print "Scatterplot on column %s" % (col)
-
-    xcol= col -1
-    # read the file
-    values = []
-    try:
-        count = 0
-        for line in file(inp_file):
-            count += 1
-            line = line.strip()
-            if line and line[0] != '#':
-                values.append(float(line.split()[xcol]))
-    except Exception, e:
-        stop_err('%s' % e)
-        stop_err("Non numerical data at line %d, column %d" % (count, col) )
-
-    # plot the data
-
-    if HIST:
-        n, bins, patches = hist(values, bins=bin, normed=0)
-    else:
-        plot(values, style)
-    
-    xlabel('values')
-    ylabel('counts')
-
-    if HIST:
-        title('Histogram of values over column %s (%s bins)' % (col, len(bins)) )
-    else:
-        title('Scatterplot over column %s' % col )        
-    grid(True)
-    
-    # the plotter detects types by file extension
-    png_out = out_file + '.png' # force it to png
-    savefig(png_out)
-
-    # shuffle it back and clean up
-    data = file(png_out, 'rb').read() 
-    fp = open(out_file, 'wb')
-    fp.write(data)
-    fp.close()
-    os.remove(png_out)
--- a/tool_dependencies.xml	Wed Nov 11 12:17:14 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="rpy" version="1.0.3">
-      <repository changeset_revision="82170c94ca7c" name="package_rpy_1_0_3" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="R" version="2.11.0">
-      <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>