0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 from galaxy import eggs
|
|
4 import sys, string
|
|
5 from rpy import *
|
|
6 import numpy
|
|
7
|
|
8 def stop_err(msg):
|
|
9 sys.stderr.write(msg)
|
|
10 sys.exit()
|
|
11
|
|
12 infile = sys.argv[1]
|
|
13 x_cols = sys.argv[2].split(',')
|
|
14 y_cols = sys.argv[3].split(',')
|
|
15
|
|
16 x_scale = x_center = "FALSE"
|
|
17 if sys.argv[4] == 'both':
|
|
18 x_scale = x_center = "TRUE"
|
|
19 elif sys.argv[4] == 'center':
|
|
20 x_center = "TRUE"
|
|
21 elif sys.argv[4] == 'scale':
|
|
22 x_scale = "TRUE"
|
|
23
|
|
24 y_scale = y_center = "FALSE"
|
|
25 if sys.argv[5] == 'both':
|
|
26 y_scale = y_center = "TRUE"
|
|
27 elif sys.argv[5] == 'center':
|
|
28 y_center = "TRUE"
|
|
29 elif sys.argv[5] == 'scale':
|
|
30 y_scale = "TRUE"
|
|
31
|
|
32 std_scores = "FALSE"
|
|
33 if sys.argv[6] == "yes":
|
|
34 std_scores = "TRUE"
|
|
35
|
|
36 outfile = sys.argv[7]
|
|
37 outfile2 = sys.argv[8]
|
|
38
|
|
39 fout = open(outfile,'w')
|
|
40 elems = []
|
|
41 for i, line in enumerate( file ( infile )):
|
|
42 line = line.rstrip('\r\n')
|
|
43 if len( line )>0 and not line.startswith( '#' ):
|
|
44 elems = line.split( '\t' )
|
|
45 break
|
|
46 if i == 30:
|
|
47 break # Hopefully we'll never get here...
|
|
48
|
|
49 if len( elems )<1:
|
|
50 stop_err( "The data in your input dataset is either missing or not formatted properly." )
|
|
51
|
|
52 x_vals = []
|
|
53
|
|
54 for k,col in enumerate(x_cols):
|
|
55 x_cols[k] = int(col)-1
|
|
56 x_vals.append([])
|
|
57
|
|
58 y_vals = []
|
|
59
|
|
60 for k,col in enumerate(y_cols):
|
|
61 y_cols[k] = int(col)-1
|
|
62 y_vals.append([])
|
|
63
|
|
64 skipped = 0
|
|
65 for ind,line in enumerate( file( infile )):
|
|
66 if line and not line.startswith( '#' ):
|
|
67 try:
|
|
68 fields = line.strip().split("\t")
|
|
69 valid_line = True
|
|
70 for col in x_cols+y_cols:
|
|
71 try:
|
|
72 assert float(fields[col])
|
|
73 except:
|
|
74 skipped += 1
|
|
75 valid_line = False
|
|
76 break
|
|
77 if valid_line:
|
|
78 for k,col in enumerate(x_cols):
|
|
79 try:
|
|
80 xval = float(fields[col])
|
|
81 except:
|
|
82 xval = NaN#
|
|
83 x_vals[k].append(xval)
|
|
84 for k,col in enumerate(y_cols):
|
|
85 try:
|
|
86 yval = float(fields[col])
|
|
87 except:
|
|
88 yval = NaN#
|
|
89 y_vals[k].append(yval)
|
|
90 except:
|
|
91 skipped += 1
|
|
92
|
|
93 x_vals1 = numpy.asarray(x_vals).transpose()
|
|
94 y_vals1 = numpy.asarray(y_vals).transpose()
|
|
95
|
|
96 x_dat= r.list(array(x_vals1))
|
|
97 y_dat= r.list(array(y_vals1))
|
|
98
|
|
99 try:
|
|
100 r.suppressWarnings(r.library("yacca"))
|
|
101 except:
|
|
102 stop_err("Missing R library yacca.")
|
|
103
|
|
104 set_default_mode(NO_CONVERSION)
|
|
105 try:
|
|
106 xcolnames = ["c%d" %(el+1) for el in x_cols]
|
|
107 ycolnames = ["c%d" %(el+1) for el in y_cols]
|
|
108 cc = r.cca(x=x_dat, y=y_dat, xlab=xcolnames, ylab=ycolnames, xcenter=r(x_center), ycenter=r(y_center), xscale=r(x_scale), yscale=r(y_scale), standardize_scores=r(std_scores))
|
|
109 ftest = r.F_test_cca(cc)
|
|
110 except RException, rex:
|
|
111 stop_err("Encountered error while performing CCA on the input data: %s" %(rex))
|
|
112
|
|
113 set_default_mode(BASIC_CONVERSION)
|
|
114 summary = r.summary(cc)
|
|
115
|
|
116 ncomps = len(summary['corr'])
|
|
117 comps = summary['corr'].keys()
|
|
118 corr = summary['corr'].values()
|
|
119 xlab = summary['xlab']
|
|
120 ylab = summary['ylab']
|
|
121
|
|
122 for i in range(ncomps):
|
|
123 corr[comps.index('CV %s' %(i+1))] = summary['corr'].values()[i]
|
|
124
|
|
125 ftest=ftest.as_py()
|
|
126 print >>fout, "#Component\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
127 print >>fout, "#Correlation\t%s" %("\t".join(["%.4g" % el for el in corr]))
|
|
128 print >>fout, "#F-statistic\t%s" %("\t".join(["%.4g" % el for el in ftest['statistic']]))
|
|
129 print >>fout, "#p-value\t%s" %("\t".join(["%.4g" % el for el in ftest['p.value']]))
|
|
130
|
|
131 print >>fout, "#X-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
132 for i,val in enumerate(summary['xcoef']):
|
|
133 print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
134
|
|
135 print >>fout, "#Y-Coefficients\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
136 for i,val in enumerate(summary['ycoef']):
|
|
137 print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
138
|
|
139 print >>fout, "#X-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
140 for i,val in enumerate(summary['xstructcorr']):
|
|
141 print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
142
|
|
143 print >>fout, "#Y-Loadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
144 for i,val in enumerate(summary['ystructcorr']):
|
|
145 print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
146
|
|
147 print >>fout, "#X-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
148 for i,val in enumerate(summary['xcrosscorr']):
|
|
149 print >>fout, "%s\t%s" %(xlab[i], "\t".join(["%.4g" % el for el in val]))
|
|
150
|
|
151 print >>fout, "#Y-CrossLoadings\t%s" %("\t".join(["%s" % el for el in range(1,ncomps+1)]))
|
|
152 for i,val in enumerate(summary['ycrosscorr']):
|
|
153 print >>fout, "%s\t%s" %(ylab[i], "\t".join(["%.4g" % el for el in val]))
|
|
154
|
|
155 r.pdf( outfile2, 8, 8 )
|
|
156 #r.plot(cc)
|
|
157 for i in range(ncomps):
|
|
158 r.helio_plot(cc, cv = i+1, main = r.paste("Explained Variance for CV",i+1), type = "variance")
|
|
159 r.dev_off() |