comparison bar_chart_plot.py @ 1:f1bcd79cd923 draft default tip

Uploaded
author insilico-bob
date Tue, 27 Nov 2018 14:20:40 -0500
parents
children
comparison
equal deleted inserted replaced
0:7f12c81e2083 1:f1bcd79cd923
1 #!/usr/bin/env python
2 """
3 histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
4 a generic histogram builder based on gnuplot backend
5
6 data_file - tab delimited file with data
7 xtic_column - column containing labels for x ticks [integer, 0 means no ticks]
8 column_list - comma separated list of columns to plot
9 title - title for the entire histrogram
10 ylabel - y axis label
11 yrange_max - minimal value at the y axis (integer)
12 yrange_max - maximal value at the y_axis (integer)
13 to set yrange to autoscaling assign 0 to yrange_min and yrange_max
14 graph_file - file to write histogram image to
15 img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
16
17
18 This tool required gnuplot and gnuplot.py
19
20 anton nekrutenko | anton@bx.psu.edu
21 """
22
23 import string
24 import sys
25 import tempfile
26
27 import Gnuplot
28 import Gnuplot.funcutils
29
30 assert sys.version_info[:2] >= (2, 4)
31
32
33 def stop_err(msg):
34 sys.stderr.write(msg)
35 sys.exit()
36
37
38 def main(tmpFileName):
39 skipped_lines_count = 0
40 skipped_lines_index = []
41 gf = open(tmpFileName, 'w')
42
43 try:
44 in_file = open(sys.argv[1], 'r')
45 xtic = int(sys.argv[2])
46 col_list = string.split(sys.argv[3], ",")
47 title = 'set title "' + sys.argv[4] + '"'
48 ylabel = 'set ylabel "' + sys.argv[5] + '"'
49 ymin = sys.argv[6]
50 ymax = sys.argv[7]
51 img_file = sys.argv[8]
52 img_size = sys.argv[9]
53 except:
54 stop_err("Check arguments\n")
55
56 try:
57 int(col_list[0])
58 except:
59 stop_err('You forgot to set columns for plotting\n')
60
61 for i, line in enumerate(in_file):
62 valid = True
63 line = line.rstrip('\r\n')
64 if line and not line.startswith('#'):
65 row = []
66 try:
67 fields = line.split('\t')
68 for col in col_list:
69 row.append(str(float(fields[int(col) - 1])))
70 except:
71 valid = False
72 skipped_lines_count += 1
73 skipped_lines_index.append(i)
74 else:
75 valid = False
76 skipped_lines_count += 1
77 skipped_lines_index.append(i)
78
79 if valid and xtic > 0:
80 row.append(fields[xtic - 1])
81 elif valid and xtic == 0:
82 row.append(str(i))
83
84 if valid:
85 gf.write('\t'.join(row))
86 gf.write('\n')
87
88 if skipped_lines_count < i:
89 # Prepare 'using' clause of plot statement
90 g_plot_command = ' '
91
92 # Set the first column
93 if xtic > 0:
94 g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0])
95 else:
96 g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0])
97
98 # Set subsequent columns
99 for i in range(1, len(col_list)):
100 g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i])
101
102 g_plot_command = g_plot_command.rstrip(', ')
103
104 yrange = 'set yrange [' + ymin + ":" + ymax + ']'
105
106 try:
107 g = Gnuplot.Gnuplot()
108 g('reset')
109 g('set boxwidth 0.9 absolute')
110 g('set style fill solid 1.00 border -1')
111 g('set style histogram clustered gap 5 title offset character 0, 0, 0')
112 g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
113 g('set key invert reverse Left outside')
114 if xtic == 0:
115 g('unset xtics')
116 g(title)
117 g(ylabel)
118 g_term = 'set terminal png tiny size ' + img_size
119 g(g_term)
120 g_out = 'set output "' + img_file + '"'
121 if ymin != ymax:
122 g(yrange)
123 g(g_out)
124 g('set style data histograms')
125 g.plot(g_plot_command)
126 except:
127 stop_err("Gnuplot error: Data cannot be plotted")
128 else:
129 sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3])
130
131 if skipped_lines_count > 0:
132 sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1))
133
134
135 if __name__ == "__main__":
136 # The tempfile initialization is here because while inside the main() it seems to create a condition
137 # when the file is removed before gnuplot has a chance of accessing it
138 gp_data_file = tempfile.NamedTemporaryFile('w')
139 Gnuplot.gp.GnuplotOpts.default_term = 'png'
140 main(gp_data_file.name)