comparison histogram.py @ 2:6f134426c2b0 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
author devteam
date Mon, 27 Jul 2020 03:25:53 -0400
parents 6ff47de059a0
children
comparison
equal deleted inserted replaced
1:cdb9e89e2970 2:6f134426c2b0
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Greg Von Kuster 2 # Greg Von Kuster
3 3
4 import sys 4 import sys
5 from rpy import *
6 5
7 assert sys.version_info[:2] >= ( 2, 4 ) 6 from rpy2.robjects import r, vectors
7 from rpy2.robjects.packages import importr
8 8
9 def stop_err(msg):
10 sys.stderr.write(msg)
11 sys.exit()
12 9
13 def main(): 10 def main():
14
15 # Handle input params 11 # Handle input params
16 in_fname = sys.argv[1] 12 in_fname = sys.argv[1]
17 out_fname = sys.argv[2] 13 out_fname = sys.argv[2]
18 try: 14 try:
19 column = int( sys.argv[3] ) - 1 15 column = int(sys.argv[3]) - 1
20 except: 16 except Exception:
21 stop_err( "Column not specified, your query does not contain a column of numerical data." ) 17 sys.exit("Column not specified, your query does not contain a column of numerical data.")
22 title = sys.argv[4] 18 title = sys.argv[4]
23 xlab = sys.argv[5] 19 xlab = sys.argv[5]
24 breaks = int( sys.argv[6] ) 20 breaks = int(sys.argv[6])
25 if breaks == 0: 21 if breaks == 0:
26 breaks = "Sturges" 22 breaks = "Sturges"
27 if sys.argv[7] == "true": 23 if sys.argv[7] == "true":
28 density = True 24 density = True
29 else: density = False 25 else:
30 if len( sys.argv ) >= 9 and sys.argv[8] == "true": 26 density = False
27 if len(sys.argv) >= 9 and sys.argv[8] == "true":
31 frequency = True 28 frequency = True
32 else: frequency = False 29 else:
30 frequency = False
33 31
34 matrix = [] 32 matrix = []
35 skipped_lines = 0 33 skipped_lines = 0
36 first_invalid_line = 0 34 first_invalid_line = 0
37 invalid_value = '' 35 invalid_value = ''
38 i = 0 36 i = 0
39 for i, line in enumerate( file( in_fname ) ): 37 for i, line in enumerate(open(in_fname)):
40 valid = True 38 valid = True
41 line = line.rstrip('\r\n') 39 line = line.rstrip('\r\n')
42 # Skip comments 40 # Skip comments
43 if line and not line.startswith( '#' ): 41 if line and not line.startswith('#'):
44 # Extract values and convert to floats 42 # Extract values and convert to floats
45 row = [] 43 row = []
46 try: 44 try:
47 fields = line.split( "\t" ) 45 fields = line.split("\t")
48 val = fields[column] 46 val = fields[column]
49 if val.lower() == "na": 47 if val.lower() == "na":
50 row.append( float( "nan" ) ) 48 row.append(float("nan"))
51 except: 49 except Exception:
52 valid = False 50 valid = False
53 skipped_lines += 1 51 skipped_lines += 1
54 if not first_invalid_line: 52 if not first_invalid_line:
55 first_invalid_line = i+1 53 first_invalid_line = i + 1
56 else: 54 else:
57 try: 55 try:
58 row.append( float( val ) ) 56 row.append(float(val))
59 except ValueError: 57 except ValueError:
60 valid = False 58 valid = False
61 skipped_lines += 1 59 skipped_lines += 1
62 if not first_invalid_line: 60 if not first_invalid_line:
63 first_invalid_line = i+1 61 first_invalid_line = i + 1
64 invalid_value = fields[column] 62 invalid_value = fields[column]
65 else: 63 else:
66 valid = False 64 valid = False
67 skipped_lines += 1 65 skipped_lines += 1
68 if not first_invalid_line: 66 if not first_invalid_line:
69 first_invalid_line = i+1 67 first_invalid_line = i + 1
70 68
71 if valid: 69 if valid:
72 matrix += row 70 matrix.extend(row)
73 71
74 if skipped_lines < i: 72 if skipped_lines < i:
75 try: 73 try:
76 a = r.array( matrix ) 74 grdevices = importr('grDevices')
77 r.pdf( out_fname, 8, 8 ) 75 graphics = importr('graphics')
78 histogram = r.hist( a, probability=not frequency, main=title, xlab=xlab, breaks=breaks ) 76 vector = vectors.FloatVector(matrix)
77 grdevices.pdf(out_fname, 8, 8)
78 histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks)
79 if density: 79 if density:
80 density = r.density( a ) 80 density = r.density(vector)
81 if frequency: 81 if frequency:
82 scale_factor = len( matrix ) * ( histogram['mids'][1] - histogram['mids'][0] ) #uniform bandwidth taken from first 2 midpoints 82 scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints
83 density[ 'y' ] = map( lambda x: x * scale_factor, density[ 'y' ] ) 83 density['y'] = map(lambda x: x * scale_factor, density['y'])
84 r.lines( density ) 84 graphics.lines(density)
85 r.dev_off() 85 grdevices.dev_off()
86 except Exception, exc: 86 except Exception as exc:
87 stop_err( "%s" %str( exc ) ) 87 sys.exit("%s" % str(exc))
88 else: 88 else:
89 if i == 0: 89 if i == 0:
90 stop_err("Input dataset is empty.") 90 sys.exit("Input dataset is empty.")
91 else: 91 else:
92 stop_err( "All values in column %s are non-numeric." %sys.argv[3] ) 92 sys.exit("All values in column %s are non-numeric." % sys.argv[3])
93 93
94 print "Histogram of column %s. " %sys.argv[3] 94 print("Histogram of column %s. " % sys.argv[3])
95 if skipped_lines > 0: 95 if skipped_lines > 0:
96 print "Skipped %d invalid lines starting with line #%d, '%s'." % ( skipped_lines, first_invalid_line, invalid_value ) 96 print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value))
97 97
98 r.quit( save="no" ) 98
99
100 if __name__ == "__main__": 99 if __name__ == "__main__":
101 main() 100 main()