Mercurial > repos > devteam > histogram
annotate histogram.py @ 3:d0b9dd19e919 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit e67a9cc9a7b253ba46df745f5fd6bf26282e97cd
| author | devteam |
|---|---|
| date | Fri, 07 Feb 2025 21:26:17 +0000 |
| parents | 6f134426c2b0 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
2 # Greg Von Kuster |
| 0 | 3 |
| 4 import sys | |
| 5 | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
6 from rpy2.robjects import r, vectors |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
7 from rpy2.robjects.packages import importr |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
8 |
| 0 | 9 |
| 10 def main(): | |
| 11 # Handle input params | |
| 12 in_fname = sys.argv[1] | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
13 out_fname = sys.argv[2] |
| 0 | 14 try: |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
15 column = int(sys.argv[3]) - 1 |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
16 except Exception: |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
17 sys.exit("Column not specified, your query does not contain a column of numerical data.") |
| 0 | 18 title = sys.argv[4] |
| 19 xlab = sys.argv[5] | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
20 breaks = int(sys.argv[6]) |
| 0 | 21 if breaks == 0: |
| 22 breaks = "Sturges" | |
| 23 if sys.argv[7] == "true": | |
| 24 density = True | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
25 else: |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
26 density = False |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
27 if len(sys.argv) >= 9 and sys.argv[8] == "true": |
| 0 | 28 frequency = True |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
29 else: |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
30 frequency = False |
| 0 | 31 |
| 32 matrix = [] | |
| 33 skipped_lines = 0 | |
| 34 first_invalid_line = 0 | |
| 35 invalid_value = '' | |
| 36 i = 0 | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
37 for i, line in enumerate(open(in_fname)): |
| 0 | 38 valid = True |
| 39 line = line.rstrip('\r\n') | |
| 40 # Skip comments | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
41 if line and not line.startswith('#'): |
| 0 | 42 # Extract values and convert to floats |
| 43 row = [] | |
| 44 try: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
45 fields = line.split("\t") |
| 0 | 46 val = fields[column] |
| 47 if val.lower() == "na": | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
48 row.append(float("nan")) |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
49 except Exception: |
| 0 | 50 valid = False |
| 51 skipped_lines += 1 | |
| 52 if not first_invalid_line: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
53 first_invalid_line = i + 1 |
| 0 | 54 else: |
| 55 try: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
56 row.append(float(val)) |
| 0 | 57 except ValueError: |
| 58 valid = False | |
| 59 skipped_lines += 1 | |
| 60 if not first_invalid_line: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
61 first_invalid_line = i + 1 |
| 0 | 62 invalid_value = fields[column] |
| 63 else: | |
| 64 valid = False | |
| 65 skipped_lines += 1 | |
| 66 if not first_invalid_line: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
67 first_invalid_line = i + 1 |
| 0 | 68 |
| 69 if valid: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
70 matrix.extend(row) |
| 0 | 71 |
| 72 if skipped_lines < i: | |
| 73 try: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
74 grdevices = importr('grDevices') |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
75 graphics = importr('graphics') |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
76 vector = vectors.FloatVector(matrix) |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
77 grdevices.pdf(out_fname, 8, 8) |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
78 histogram = graphics.hist(vector, probability=not frequency, main=title, xlab=xlab, breaks=breaks) |
| 0 | 79 if density: |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
80 density = r.density(vector) |
| 0 | 81 if frequency: |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
82 scale_factor = len(matrix) * (histogram['mids'][1] - histogram['mids'][0]) # uniform bandwidth taken from first 2 midpoints |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
83 density['y'] = map(lambda x: x * scale_factor, density['y']) |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
84 graphics.lines(density) |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
85 grdevices.dev_off() |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
86 except Exception as exc: |
|
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
87 sys.exit("%s" % str(exc)) |
| 0 | 88 else: |
| 89 if i == 0: | |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
90 sys.exit("Input dataset is empty.") |
| 0 | 91 else: |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
92 sys.exit("All values in column %s are non-numeric." % sys.argv[3]) |
| 0 | 93 |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
94 print("Histogram of column %s. " % sys.argv[3]) |
| 0 | 95 if skipped_lines > 0: |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
96 print("Skipped %d invalid lines starting with line #%d, '%s'." % (skipped_lines, first_invalid_line, invalid_value)) |
| 0 | 97 |
|
2
6f134426c2b0
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/histogram commit 5666c97386c843c109e45acce462243392285b84"
devteam
parents:
0
diff
changeset
|
98 |
| 0 | 99 if __name__ == "__main__": |
| 100 main() |
