Mercurial > repos > devteam > histogram
changeset 2:6f134426c2b0 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author | devteam |
---|---|
date | Mon, 27 Jul 2020 03:25:53 -0400 |
parents | cdb9e89e2970 |
children | |
files | histogram.py histogram2.xml plot_filter.py plotter.py tool_dependencies.xml |
diffstat | 5 files changed, 56 insertions(+), 164 deletions(-) [+] |
line wrap: on
line diff
--- a/histogram.py Wed Nov 11 12:17:14 2015 -0500 +++ b/histogram.py Mon Jul 27 03:25:53 2020 -0400 @@ -1,101 +1,100 @@ #!/usr/bin/env python -#Greg Von Kuster +# Greg Von Kuster import sys -from rpy import * - -assert sys.version_info[:2] >= ( 2, 4 ) -def stop_err(msg): - sys.stderr.write(msg) - sys.exit() +from rpy2.robjects import r, vectors +from rpy2.robjects.packages import importr + def main(): - # Handle input params in_fname = sys.argv[1] - out_fname = sys.argv[2] + out_fname = sys.argv[2] try: - column = int( sys.argv[3] ) - 1 - except: - stop_err( "Column not specified, your query does not contain a column of numerical data." ) + column = int(sys.argv[3]) - 1 + except Exception: + sys.exit("Column not specified, your query does not contain a column of numerical data.") title = sys.argv[4] xlab = sys.argv[5] - breaks = int( sys.argv[6] ) + breaks = int(sys.argv[6]) if breaks == 0: breaks = "Sturges" if sys.argv[7] == "true": density = True - else: density = False - if len( sys.argv ) >= 9 and sys.argv[8] == "true": + else: + density = False + if len(sys.argv) >= 9 and sys.argv[8] == "true": frequency = True - else: frequency = False + else: + frequency = False matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' i = 0 - for i, line in enumerate( file( in_fname ) ): + for i, line in enumerate(open(in_fname)): valid = True line = line.rstrip('\r\n') # Skip comments - if line and not line.startswith( '#' ): + if line and not line.startswith('#'): # Extract values and convert to floats row = [] try: - fields = line.split( "\t" ) + fields = line.split("\t") val = fields[column] if val.lower() == "na": - row.append( float( "nan" ) ) - except: + row.append(float("nan")) + except Exception: valid = False skipped_lines += 1 if not first_invalid_line: - first_invalid_line = i+1 + first_invalid_line = i + 1 else: try: - row.append( float( val ) ) + row.append(float(val)) except ValueError: valid = False skipped_lines += 1 if not first_invalid_line: - first_invalid_line = i+1 + first_invalid_line = i + 1 invalid_value = fields[column] else: valid = False skipped_lines += 1 if not first_invalid_line: - first_invalid_line = i+1 + first_invalid_line = i + 1 if valid: - matrix += row + matrix.extend(row) if skipped_lines < i: try: - a = r.array( matrix ) - r.pdf( out_fname, 8, 8 ) - histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks ) + grdevices = importr('grDevices') + graphics = importr('graphics') + vector = vectors.FloatVector(matrix) + grdevices.pdf(out_fname, 8, 8) + histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks) if density: - density = r.density( a ) + density = r.density(vector) if frequency: - scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints - density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] ) - r.lines( density ) - r.dev_off() - except Exception, exc: - stop_err( "%s" %str( exc ) ) + scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints + density['y'] = map(lambda x: x * scale_factor, density['y']) + graphics.lines(density) + grdevices.dev_off() + except Exception as exc: + sys.exit("%s" % str(exc)) else: if i == 0: - stop_err("Input dataset is empty.") + sys.exit("Input dataset is empty.") else: - stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) + sys.exit("All values in column %s are non-numeric." % sys.argv[3]) - print "Histogram of column %s. " %sys.argv[3] + print("Histogram of column %s. " % sys.argv[3]) if skipped_lines > 0: - print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) + print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value)) - r.quit( save="no" ) - + if __name__ == "__main__": main()
--- a/histogram2.xml Wed Nov 11 12:17:14 2015 -0500 +++ b/histogram2.xml Mon Jul 27 03:25:53 2020 -0400 @@ -1,10 +1,20 @@ -<tool id="histogram_rpy" name="Histogram" version="1.0.3"> +<tool id="histogram_rpy" name="Histogram" version="1.0.4"> <description>of a numeric column</description> <requirements> - <requirement type="package" version="1.0.3">rpy</requirement> - <requirement type="package" version="2.11.0">R</requirement> + <requirement type="package" version="3.3.2">rpy2</requirement> + <requirement type="package" version="3.6.0">R</requirement> </requirements> - <command interpreter="python">histogram.py $input $out_file1 $numerical_column "$title" "$xlab" $breaks $density $frequency</command> + <command> +python '$__tool_directory__/histogram.py' + '$input' + '$out_file1' + $numerical_column + '$title' + '$xlab' + $breaks + $density + $frequency +</command> <inputs> <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/> <param name="numerical_column" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" /> @@ -26,7 +36,7 @@ <param name="xlab" value="V1"/> <param name="density" value="true"/> <param name="frequency" value="false"/> - <output name="out_file1" file="histogram_out1.pdf"/> + <output name="out_file1" file="histogram_out1.pdf" compare="sim_size"/> </test> </tests> <help>
--- a/plot_filter.py Wed Nov 11 12:17:14 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - -def validate(incoming): - """Validator for the plotting program""" - - bins = incoming.get("bins","") - col = incoming.get("col","") - - if not bins or not col: - raise Exception, "You need to specify a number for bins and columns" - - try: - bins = int(bins) - col = int(col) - except: - raise Exception, "Parameters are not valid numbers, columns:%s, bins:%s" % (col, bins) - - if not 1<bins<100: - raise Exception, "The number of bins %s must be a number between 1 and 100" % bins -
--- a/plotter.py Wed Nov 11 12:17:14 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -# python histogram input_file output_file column bins -import sys, os -import matplotlib; matplotlib.use('Agg') - -from pylab import * - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err(msg): - sys.stderr.write(msg) - sys.exit() - -if __name__ == '__main__': - # parse the arguments - - if len(sys.argv) != 6: - stop_err('Usage: python histogram.py input_file column bins output_file style') - sys.exit() - - mode = sys.argv[5] - HIST = mode == 'hist' - try: - col = int(float(sys.argv[2])) - if HIST: - bin = int(float(sys.argv[3])) - else: - # hack, this parameter is the plotting style for scatter plots - if sys.argv[3] == 'P': - style = 'o' - elif sys.argv[3] == 'LP': - style = 'o-' - else: - style = '-' - - except: - msg = 'Parameter were not numbers %s, %s' % (sys.argv[3], sys.argv[4]) - stop_err(msg) - - # validate arguments - inp_file = sys.argv[1] - out_file = sys.argv[4] - - if HIST: - print "Histogram on column %s (%s bins)" % (col, bin) - else: - print "Scatterplot on column %s" % (col) - - xcol= col -1 - # read the file - values = [] - try: - count = 0 - for line in file(inp_file): - count += 1 - line = line.strip() - if line and line[0] != '#': - values.append(float(line.split()[xcol])) - except Exception, e: - stop_err('%s' % e) - stop_err("Non numerical data at line %d, column %d" % (count, col) ) - - # plot the data - - if HIST: - n, bins, patches = hist(values, bins=bin, normed=0) - else: - plot(values, style) - - xlabel('values') - ylabel('counts') - - if HIST: - title('Histogram of values over column %s (%s bins)' % (col, len(bins)) ) - else: - title('Scatterplot over column %s' % col ) - grid(True) - - # the plotter detects types by file extension - png_out = out_file + '.png' # force it to png - savefig(png_out) - - # shuffle it back and clean up - data = file(png_out, 'rb').read() - fp = open(out_file, 'wb') - fp.write(data) - fp.close() - os.remove(png_out)
--- a/tool_dependencies.xml Wed Nov 11 12:17:14 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="rpy" version="1.0.3"> - <repository changeset_revision="82170c94ca7c" name="package_rpy_1_0_3" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="R" version="2.11.0"> - <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>