# HG changeset patch
# User devteam
# Date 1396363943 14400
# Node ID 88ef41de020d5472e53d9406e4d9fb87cd40ff24
Imported from capsule None
diff -r 000000000000 -r 88ef41de020d partialR_square.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/partialR_square.py Tue Apr 01 10:52:23 2014 -0400
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+
+import sys
+from rpy import *
+import numpy
+
+#export PYTHONPATH=~/galaxy/lib/
+#running command python partialR_square.py reg_inp.tab 4 1,2,3 partialR_result.tabular
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+
+def sscombs(s):
+ if len(s) == 1:
+ return [s]
+ else:
+ ssc = sscombs(s[1:])
+ return [s[0]] + [s[0]+comb for comb in ssc] + ssc
+
+
+infile = sys.argv[1]
+y_col = int(sys.argv[2])-1
+x_cols = sys.argv[3].split(',')
+outfile = sys.argv[4]
+
+print "Predictor columns: %s; Response column: %d" % ( x_cols, y_col+1 )
+fout = open(outfile,'w')
+
+for i, line in enumerate( file ( infile )):
+ line = line.rstrip('\r\n')
+ if len( line )>0 and not line.startswith( '#' ):
+ elems = line.split( '\t' )
+ break
+ if i == 30:
+ break # Hopefully we'll never get here...
+
+if len( elems )<1:
+ stop_err( "The data in your input dataset is either missing or not formatted properly." )
+
+y_vals = []
+x_vals = []
+
+for k, col in enumerate(x_cols):
+ x_cols[k] = int(col)-1
+ x_vals.append([])
+ """
+ try:
+ float( elems[x_cols[k]] )
+ except:
+ try:
+ msg = "This operation cannot be performed on non-numeric column %d containing value '%s'." % ( col, elems[x_cols[k]] )
+ except:
+ msg = "This operation cannot be performed on non-numeric data."
+ stop_err( msg )
+ """
+NA = 'NA'
+for ind, line in enumerate( file( infile )):
+ if line and not line.startswith( '#' ):
+ try:
+ fields = line.split("\t")
+ try:
+ yval = float(fields[y_col])
+ except Exception, ey:
+ yval = r('NA')
+ #print >> sys.stderr, "ey = %s" %ey
+ y_vals.append(yval)
+ for k, col in enumerate(x_cols):
+ try:
+ xval = float(fields[col])
+ except Exception, ex:
+ xval = r('NA')
+ #print >> sys.stderr, "ex = %s" %ex
+ x_vals[k].append(xval)
+ except:
+ pass
+
+x_vals1 = numpy.asarray(x_vals).transpose()
+dat = r.list(x=array(x_vals1), y=y_vals)
+
+set_default_mode(NO_CONVERSION)
+try:
+ full = r.lm(r("y ~ x"), data= r.na_exclude(dat)) #full model includes all the predictor variables specified by the user
+except RException, rex:
+ stop_err("Error performing linear regression on the input data.\nEither the response column or one of the predictor columns contain no numeric values.")
+set_default_mode(BASIC_CONVERSION)
+
+summary = r.summary(full)
+fullr2 = summary.get('r.squared','NA')
+
+if fullr2 == 'NA':
+ stop_err("Error in linear regression")
+
+if len(x_vals) < 10:
+ s = ""
+ for ch in range(len(x_vals)):
+ s += str(ch)
+else:
+ stop_err("This tool only works with less than 10 predictors.")
+
+print >> fout, "#Model\tR-sq\tpartial_R_Terms\tpartial_R_Value"
+all_combos = sorted(sscombs(s), key=len)
+all_combos.reverse()
+for j, cols in enumerate(all_combos):
+ #if len(cols) == len(s): #Same as the full model above
+ # continue
+ if len(cols) == 1:
+ x_vals1 = x_vals[int(cols)]
+ else:
+ x_v = []
+ for col in cols:
+ x_v.append(x_vals[int(col)])
+ x_vals1 = numpy.asarray(x_v).transpose()
+ dat = r.list(x=array(x_vals1), y=y_vals)
+ set_default_mode(NO_CONVERSION)
+ red = r.lm(r("y ~ x"), data= dat) #Reduced model
+ set_default_mode(BASIC_CONVERSION)
+ summary = r.summary(red)
+ redr2 = summary.get('r.squared','NA')
+ try:
+ partial_R = (float(fullr2)-float(redr2))/(1-float(redr2))
+ except:
+ partial_R = 'NA'
+ col_str = ""
+ for col in cols:
+ col_str = col_str + str(int(x_cols[int(col)]) + 1) + " "
+ col_str.strip()
+ partial_R_col_str = ""
+ for col in s:
+ if col not in cols:
+ partial_R_col_str = partial_R_col_str + str(int(x_cols[int(col)]) + 1) + " "
+ partial_R_col_str.strip()
+ if len(cols) == len(s): #full model
+ partial_R_col_str = "-"
+ partial_R = "-"
+ try:
+ redr2 = "%.4f" % (float(redr2))
+ except:
+ pass
+ try:
+ partial_R = "%.4f" % (float(partial_R))
+ except:
+ pass
+ print >> fout, "%s\t%s\t%s\t%s" % ( col_str, redr2, partial_R_col_str, partial_R )
diff -r 000000000000 -r 88ef41de020d partialR_square.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/partialR_square.xml Tue Apr 01 10:52:23 2014 -0400
@@ -0,0 +1,73 @@
+
+
+
+ R
+ numpy
+ rpy
+
+
+ partialR_square.py
+ $input1
+ $response_col
+ $predictor_cols
+ $out_file1
+ 1>/dev/null
+
+
+
+
+
+
+
+
+
+
+
+
+ rpy
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Edit Datasets->Convert characters*
+
+-----
+
+.. class:: infomark
+
+**What it does**
+
+This tool computes the Partial R squared for all possible variable subsets using the following formula:
+
+**Partial R squared = [SSE(without i: 1,2,...,p-1) - SSE (full: 1,2,..,i..,p-1) / SSE(without i: 1,2,...,p-1)]**, which denotes the case where the 'i'th predictor is dropped.
+
+
+
+In general, **Partial R squared = [SSE(without i: 1,2,...,p-1) - SSE (full: 1,2,..,i..,p-1) / SSE(without i: 1,2,...,p-1)]**, where,
+
+- SSE (full: 1,2,..,i..,p-1) = Sum of Squares left out by the full set of predictors SSE(X1, X2 … Xp)
+- SSE (full: 1,2,..,i..,p-1) = Sum of Squares left out by the set of predictors excluding; for example, if we omit the first predictor, it will be SSE(X2 … Xp).
+
+
+The 4 columns in the output are described below:
+
+- Column 1 (Model): denotes the variables present in the model
+- Column 2 (R-sq): denotes the R-squared value corresponding to the model in Column 1
+- Column 3 (Partial R squared_Terms): denotes the variable/s for which Partial R squared is computed. These are the variables that are absent in the reduced model in Column 1. A '-' in this column indicates that the model in Column 1 is the Full model.
+- Column 4 (Partial R squared): denotes the Partial R squared value corresponding to the variable/s in Column 3. A '-' in this column indicates that the model in Column 1 is the Full model.
+
+*R Development Core Team (2010). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
+
+
+
diff -r 000000000000 -r 88ef41de020d test-data/partialR_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/partialR_result.tabular Tue Apr 01 10:52:23 2014 -0400
@@ -0,0 +1,4 @@
+#Model R-sq partial_R_Terms partial_R_Value
+1 2 0.9388 - -
+2 0.7280 1 0.7750
+1 0.9104 2 0.3167
\ No newline at end of file
diff -r 000000000000 -r 88ef41de020d test-data/regr_inp.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regr_inp.tabular Tue Apr 01 10:52:23 2014 -0400
@@ -0,0 +1,10 @@
+7 33 42
+4 41 33
+16 7 75
+3 49 28
+21 5 91
+8 31 55
+7 35 52
+5 30 16
+15 10 69
+20 10 94
diff -r 000000000000 -r 88ef41de020d tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Apr 01 10:52:23 2014 -0400
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+