Mercurial > repos > devteam > scatterplot
changeset 0:c12b0759203b draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 28 Jul 2014 11:56:22 -0400 |
parents | |
children | d243056b22ed |
files | scatterplot.png scatterplot.py scatterplot.xml test-data/scatterplot_in1.tabular test-data/scatterplot_out1.pdf tool_dependencies.xml |
diffstat | 6 files changed, 335 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scatterplot.py Mon Jul 28 11:56:22 2014 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/env python +#Greg Von Kuster + +import sys +from rpy import * + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + + in_fname = sys.argv[1] + out_fname = sys.argv[2] + try: + columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1 + except: + stop_err( "Columns not specified, your query does not contain a column of numerical data." ) + title = sys.argv[5] + xlab = sys.argv[6] + ylab = sys.argv[7] + + matrix = [] + skipped_lines = 0 + first_invalid_line = 0 + invalid_value = '' + invalid_column = 0 + i = 0 + for i, line in enumerate( file( in_fname ) ): + valid = True + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ): + row = [] + fields = line.split( "\t" ) + for column in columns: + try: + val = fields[column] + if val.lower() == "na": + row.append( float( "nan" ) ) + else: + row.append( float( fields[column] ) ) + except: + valid = False + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + try: + invalid_value = fields[column] + except: + invalid_value = '' + invalid_column = column + 1 + break + else: + valid = False + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + + if valid: + matrix.append( row ) + + if skipped_lines < i: + try: + r.pdf( out_fname, 8, 8 ) + r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 ) + r.dev_off() + except Exception, exc: + stop_err( "%s" %str( exc ) ) + else: + stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) ) + + print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] ) + if skipped_lines > 0: + print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column ) + + r.quit( save="no" ) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scatterplot.xml Mon Jul 28 11:56:22 2014 -0400 @@ -0,0 +1,71 @@ +<tool id="scatterplot_rpy" name="Scatterplot" version="1.0.0"> + <description>of two numeric columns</description> + <requirements> + <requirement type="package" version="1.0.3">rpy</requirement> + </requirements> + <command interpreter="python">scatterplot.py $input $out_file1 $col1 $col2 "$title" "$xlab" "$ylab"</command> + <inputs> + <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/> + <param name="col1" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" /> + <param name="col2" type="data_column" data_ref="input" numerical="True" label="Numerical column for y axis" /> + <param name="title" size="30" type="text" value="Scatterplot" label="Plot title"/> + <param name="xlab" size="30" type="text" value="V1" label="Label for x axis"/> + <param name="ylab" size="30" type="text" value="V2" label="Label for y axis"/> + </inputs> + <outputs> + <data format="pdf" name="out_file1" /> + </outputs> + <!-- TODO: uncomment the following test when we have tools.update_state() working for + multiple dependents with the same dependency. + <tests> + <test> + <param name="input" value="scatterplot_in1.tabular" ftype="tabular"/> + <param name="col1" value="2"/> + <param name="col2" value="3"/> + <param name="title" value="Scatterplot"/> + <param name="xlab" value="V1"/> + <param name="ylab" value="V2"/> + <output name="out_file1" file="scatterplot_out1.pdf" /> + </test> + </tests> + --> + <help> + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**Syntax** + +This tool creates a simple scatter plot between two variables containing numeric values of a selected dataset. + +- All invalid, blank and comment lines in the dataset are skipped. The number of skipped lines is displayed in the resulting history item. + +- **Plot title** The scatterplot title +- **Label for x axis** and **Label for y axis** The labels for x and y axis of the scatterplot. + +----- + +**Example** + +- Input file:: + + 1 68 4.1 + 2 71 4.6 + 3 62 3.8 + 4 75 4.4 + 5 58 3.2 + 6 60 3.1 + 7 67 3.8 + 8 68 4.1 + 9 71 4.3 + 10 69 3.7 + +- Create a simple scatterplot between the variables in column 2 and column 3 of the above dataset. + +.. image:: scatterplot.png + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/scatterplot_in1.tabular Mon Jul 28 11:56:22 2014 -0400 @@ -0,0 +1,10 @@ +1 68 4.1 +2 71 4.6 +3 62 3.8 +4 75 4.4 +5 58 3.2 +6 60 3.1 +7 67 3.8 +8 68 4.1 +9 71 4.3 +10 69 3.7
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/scatterplot_out1.pdf Mon Jul 28 11:56:22 2014 -0400 @@ -0,0 +1,169 @@ +%PDF-1.1 +%ρ\r +1 0 obj +<< +/CreationDate (D:20080404100027) +/ModDate (D:20080404100027) +/Title (R Graphics Output) +/Producer (R 2.6.2) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /ZapfDingbats +>> +endobj +6 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 7 0 R +/Resources 4 0 R +>> +endobj +7 0 obj +<< +/Length 8 0 R +>> +stream +q +Q q 59.04 73.44 486.72 443.52 re W n +0.000 0.000 1.000 rg +0.000 0.000 1.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +BT +/F1 1 Tf 2 Tr 7.48 0 0 7.48 339.20 361.05 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 418.73 497.94 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 180.14 278.92 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 524.77 443.18 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 74.10 114.65 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 127.12 87.27 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 312.69 278.92 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 339.20 361.05 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 418.73 415.80 Tm (l) Tj 0 Tr +/F1 1 Tf 2 Tr 7.48 0 0 7.48 365.71 251.54 Tm (l) Tj 0 Tr +ET +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +130.09 73.44 m 527.73 73.44 l S +130.09 73.44 m 130.09 66.24 l S +262.64 73.44 m 262.64 66.24 l S +395.18 73.44 m 395.18 66.24 l S +527.73 73.44 m 527.73 66.24 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 123.41 47.52 Tm (60) Tj +/F2 1 Tf 12.00 0.00 -0.00 12.00 255.96 47.52 Tm (65) Tj +/F2 1 Tf 12.00 0.00 -0.00 12.00 388.51 47.52 Tm (70) Tj +/F2 1 Tf 12.00 0.00 -0.00 12.00 521.06 47.52 Tm (75) Tj +ET +59.04 199.38 m 59.04 473.16 l S +59.04 199.38 m 51.84 199.38 l S +59.04 336.27 m 51.84 336.27 l S +59.04 473.16 m 51.84 473.16 l S +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 191.04 Tm (3.5) Tj +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 327.93 Tm (4.0) Tj +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 464.82 Tm (4.5) Tj +ET +59.04 73.44 m +545.76 73.44 l +545.76 516.96 l +59.04 516.96 l +59.04 73.44 l +S +Q q +BT +0.000 0.000 0.000 rg +/F3 1 Tf 14.00 0.00 -0.00 14.00 265.84 541.45 Tm (Scatterplot) Tj +/F2 1 Tf 12.00 0.00 -0.00 12.00 295.06 18.72 Tm (V1) Tj +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 287.86 Tm (V2) Tj +ET +Q +endstream +endobj +8 0 obj +1743 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +6 0 R +] +/Count 1 +/MediaBox [0 0 576 576] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font << /F1 5 0 R /F2 10 0 R /F3 11 0 R >> +/ExtGState << >> +>> +endobj +9 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +10 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 9 0 R +>> endobj +11 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding 9 0 R +>> endobj +xref +0 12 +0000000000 65535 f +0000000021 00000 n +0000000163 00000 n +0000002191 00000 n +0000002274 00000 n +0000000212 00000 n +0000000295 00000 n +0000000375 00000 n +0000002171 00000 n +0000002378 00000 n +0000002635 00000 n +0000002732 00000 n +trailer +<< +/Size 12 +/Info 1 0 R +/Root 2 0 R +>> +startxref +2834 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jul 28 11:56:22 2014 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="rpy" version="1.0.3"> + <repository changeset_revision="82170c94ca7c" name="package_rpy_1_0_3" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>