changeset 0:c12b0759203b draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:56:22 -0400
parents
children d243056b22ed
files scatterplot.png scatterplot.py scatterplot.xml test-data/scatterplot_in1.tabular test-data/scatterplot_out1.pdf tool_dependencies.xml
diffstat 6 files changed, 335 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file scatterplot.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scatterplot.py	Mon Jul 28 11:56:22 2014 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#Greg Von Kuster
+
+import sys
+from rpy import *
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+def main():
+
+    in_fname = sys.argv[1]
+    out_fname = sys.argv[2]
+    try:
+        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
+    except:
+        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
+    title = sys.argv[5]
+    xlab = sys.argv[6]
+    ylab = sys.argv[7]
+
+    matrix = []
+    skipped_lines = 0
+    first_invalid_line = 0
+    invalid_value = ''
+    invalid_column = 0
+    i = 0
+    for i, line in enumerate( file( in_fname ) ):
+        valid = True
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ): 
+            row = []
+            fields = line.split( "\t" )
+            for column in columns:
+                try:
+                    val = fields[column]
+                    if val.lower() == "na": 
+                        row.append( float( "nan" ) )
+                    else:
+                        row.append( float( fields[column] ) )
+                except:
+                    valid = False
+                    skipped_lines += 1
+                    if not first_invalid_line:
+                        first_invalid_line = i + 1
+                        try:
+                            invalid_value = fields[column]
+                        except:
+                            invalid_value = ''
+                        invalid_column = column + 1
+                    break
+        else:
+            valid = False
+            skipped_lines += 1
+            if not first_invalid_line:
+                first_invalid_line = i+1
+
+        if valid:
+            matrix.append( row )
+
+    if skipped_lines < i:
+        try:
+            r.pdf( out_fname, 8, 8 )
+            r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
+            r.dev_off()
+        except Exception, exc:
+            stop_err( "%s" %str( exc ) )
+    else:
+        stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) )
+
+    print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] )
+    if skipped_lines > 0:
+        print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column )
+
+    r.quit( save="no" )
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scatterplot.xml	Mon Jul 28 11:56:22 2014 -0400
@@ -0,0 +1,71 @@
+<tool id="scatterplot_rpy" name="Scatterplot" version="1.0.0">
+  <description>of two numeric columns</description>
+  <requirements>
+    <requirement type="package" version="1.0.3">rpy</requirement>
+  </requirements>
+  <command interpreter="python">scatterplot.py $input $out_file1 $col1 $col2 "$title" "$xlab" "$ylab"</command>
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset" help="Dataset missing? See TIP below"/>
+    <param name="col1" type="data_column" data_ref="input" numerical="True" label="Numerical column for x axis" />
+    <param name="col2" type="data_column" data_ref="input" numerical="True" label="Numerical column for y axis" />
+    <param name="title" size="30" type="text" value="Scatterplot" label="Plot title"/>
+    <param name="xlab" size="30" type="text" value="V1" label="Label for x axis"/>
+    <param name="ylab" size="30" type="text" value="V2" label="Label for y axis"/>
+  </inputs>
+  <outputs>
+    <data format="pdf" name="out_file1" />
+  </outputs>
+  <!-- TODO: uncomment the following test when we have tools.update_state() working for 
+       multiple dependents with the same dependency.
+  <tests>
+    <test>
+      <param name="input" value="scatterplot_in1.tabular" ftype="tabular"/>
+      <param name="col1" value="2"/>
+      <param name="col2" value="3"/>
+      <param name="title" value="Scatterplot"/>
+      <param name="xlab" value="V1"/>
+      <param name="ylab" value="V2"/>
+      <output name="out_file1" file="scatterplot_out1.pdf" />
+    </test>
+  </tests>
+  -->
+  <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+**Syntax**
+
+This tool creates a simple scatter plot between two variables containing numeric values of a selected dataset. 
+
+- All invalid, blank and comment lines in the dataset are skipped.  The number of skipped lines is displayed in the resulting history item.
+
+- **Plot title** The scatterplot title
+- **Label for x axis** and **Label for y axis** The labels for x and y axis of the scatterplot.
+
+-----
+
+**Example**
+
+- Input file::
+
+    1   68  4.1
+    2   71  4.6
+    3   62  3.8
+    4   75  4.4
+    5   58  3.2
+    6   60  3.1
+    7   67  3.8
+    8   68  4.1
+    9   71  4.3
+    10  69  3.7 
+
+- Create a simple scatterplot between the variables in column 2 and column 3 of the above dataset.
+
+.. image:: scatterplot.png
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scatterplot_in1.tabular	Mon Jul 28 11:56:22 2014 -0400
@@ -0,0 +1,10 @@
+1	68	4.1
+2	71	4.6
+3	62	3.8
+4	75	4.4
+5	58	3.2
+6	60	3.1
+7	67	3.8
+8	68	4.1
+9	71	4.3
+10	69	3.7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scatterplot_out1.pdf	Mon Jul 28 11:56:22 2014 -0400
@@ -0,0 +1,169 @@
+%PDF-1.1
+%ρ\r
+1 0 obj
+<<
+/CreationDate (D:20080404100027)
+/ModDate (D:20080404100027)
+/Title (R Graphics Output)
+/Producer (R 2.6.2)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Font
+/Subtype /Type1
+/Name /F1
+/BaseFont /ZapfDingbats
+>>
+endobj
+6 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 7 0 R
+/Resources 4 0 R
+>>
+endobj
+7 0 obj
+<<
+/Length 8 0 R
+>>
+stream
+q
+Q q 59.04 73.44 486.72 443.52 re W n
+0.000 0.000 1.000 rg
+0.000 0.000 1.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+BT
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 339.20 361.05 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 418.73 497.94 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 180.14 278.92 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 524.77 443.18 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 74.10 114.65 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 127.12 87.27 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 312.69 278.92 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 339.20 361.05 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 418.73 415.80 Tm (l) Tj 0 Tr
+/F1 1 Tf 2 Tr 7.48 0 0 7.48 365.71 251.54 Tm (l) Tj 0 Tr
+ET
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+130.09 73.44 m 527.73 73.44 l S
+130.09 73.44 m 130.09 66.24 l S
+262.64 73.44 m 262.64 66.24 l S
+395.18 73.44 m 395.18 66.24 l S
+527.73 73.44 m 527.73 66.24 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 123.41 47.52 Tm (60) Tj
+/F2 1 Tf 12.00 0.00 -0.00 12.00 255.96 47.52 Tm (65) Tj
+/F2 1 Tf 12.00 0.00 -0.00 12.00 388.51 47.52 Tm (70) Tj
+/F2 1 Tf 12.00 0.00 -0.00 12.00 521.06 47.52 Tm (75) Tj
+ET
+59.04 199.38 m 59.04 473.16 l S
+59.04 199.38 m 51.84 199.38 l S
+59.04 336.27 m 51.84 336.27 l S
+59.04 473.16 m 51.84 473.16 l S
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 191.04 Tm (3.5) Tj
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 327.93 Tm (4.0) Tj
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 464.82 Tm (4.5) Tj
+ET
+59.04 73.44 m
+545.76 73.44 l
+545.76 516.96 l
+59.04 516.96 l
+59.04 73.44 l
+S
+Q q
+BT
+0.000 0.000 0.000 rg
+/F3 1 Tf 14.00 0.00 -0.00 14.00 265.84 541.45 Tm (Scatterplot) Tj
+/F2 1 Tf 12.00 0.00 -0.00 12.00 295.06 18.72 Tm (V1) Tj
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 287.86 Tm (V2) Tj
+ET
+Q
+endstream
+endobj
+8 0 obj
+1743
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+6 0 R
+]
+/Count 1
+/MediaBox [0 0 576 576]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font << /F1 5 0 R /F2 10 0 R /F3 11 0 R >>
+/ExtGState << >>
+>>
+endobj
+9 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+10 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 9 0 R
+>> endobj
+11 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F3
+/BaseFont /Helvetica-Bold
+/Encoding 9 0 R
+>> endobj
+xref
+0 12
+0000000000 65535 f 
+0000000021 00000 n 
+0000000163 00000 n 
+0000002191 00000 n 
+0000002274 00000 n 
+0000000212 00000 n 
+0000000295 00000 n 
+0000000375 00000 n 
+0000002171 00000 n 
+0000002378 00000 n 
+0000002635 00000 n 
+0000002732 00000 n 
+trailer
+<<
+/Size 12
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+2834
+%%EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Jul 28 11:56:22 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="rpy" version="1.0.3">
+      <repository changeset_revision="82170c94ca7c" name="package_rpy_1_0_3" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>