annotate riboseqr/difftrans.py @ 6:5a242f289347 default tip

Merge
author Vimalkumar Velayudhan <vimalkumarvelayudhan@gmail.com>
date Tue, 27 Oct 2015 12:29:39 +0000
parents c34c364ce75d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
1 # -*- coding: utf-8 -*-
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
2 import os
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
3 import sys
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
4 import argparse
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
5 import logging
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
6 import rpy2.robjects as robjects
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
7 import utils
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
8
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
9 rscript = ''
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
10 R = robjects.r
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
11
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
12
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
13 def run_rscript(command=None):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
14 """Run R command, log it, append to rscript"""
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
15 global rscript
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
16 if not command:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
17 return
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
18 logging.debug(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
19 rscript += '{}\n'.format(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
20 output = R(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
21 return output
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
22
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
23
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
24 def get_counts(rdata_load='Metagene.rda', slice_lengths='27',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
25 frames='', group1=None, group2=None, num_counts=10,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
26 normalize='FALSE', html_file='Counts.html',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
27 output_path='counts'):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
28 options = {'slice_lengths': utils.process_args(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
29 slice_lengths, ret_type='int', ret_mode='charvector')}
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
30
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
31 if not num_counts:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
32 num_counts = 10
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
33
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
34 options['frames'] = utils.process_args(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
35 frames, ret_type='int', ret_mode='listvector')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
36
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
37 run_rscript('suppressMessages(library(riboSeqR))')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
38 run_rscript('load("{}")'.format(rdata_load))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
39
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
40 cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
41 if frames:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
42 cmd_args += ', frames={frames}'.format(**options)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
43
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
44 run_rscript("""riboCounts <- sliceCounts({})
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
45 annotation <- as.data.frame(ffCs@CDS)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
46 rownames(riboCounts) <- annotation$seqnames
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
47 colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
48
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
49 run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
50 rownames(mrnaCounts) <- annotation$seqnames""")
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
51
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
52 if not os.path.exists(output_path):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
53 os.mkdir(output_path)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
54
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
55 html = '<h2>Differential Translation Analysis</h2><hr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
56 for count_name, file_name, legend in (
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
57 ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'),
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
58 ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'),
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
59 ('tC', 'TopCounts.csv', 'baySeq topCounts')):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
60 if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
61 run_rscript('suppressMessages(library(baySeq))')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
62 cmd = """pD <- new("countData", replicates=ffCs@replicates, \
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
63 data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
64 annotation=as.data.frame(ffCs@CDS), \
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
65 densityFunction=bbDensity)""".format(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
66 utils.process_args(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
67 group1, ret_type='int', ret_mode='charvector'),
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
68 utils.process_args(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
69 group2, ret_type='str', ret_mode='charvector'))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
70 run_rscript(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
71
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
72 run_rscript('libsizes(pD) <- getLibsizes(pD)')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
73 run_rscript('pD <- getPriors(pD, cl=NULL)')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
74 run_rscript('pD <- getLikelihoods(pD, cl=NULL)')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
75 run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
76 'number={})'.format(normalize, num_counts))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
77
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
78 if R[count_name]:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
79 html += '<h3>{}</h3>'.format(legend)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
80 output_file = os.path.join(output_path, file_name)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
81 run_rscript('write.csv({}, file="{}")'.format(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
82 count_name, output_file))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
83 with open(output_file) as g:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
84 html += '<table cellpadding="4" border="1">'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
85 header = g.readline()
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
86 html += '<tr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
87 for item in header.strip().split(","):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
88 html += '<th>{}</th>'.format(item.strip('"'))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
89 html += '</tr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
90 for line in g:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
91 html += '<tr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
92 data = line.strip().split(",")
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
93 # html += '<td>{}</td>'.format(data[0].strip('"'))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
94 for item in data:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
95 html += ('<td align="center"><code>{}</code>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
96 '</td>'.format(item.strip('"')))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
97 html += '</tr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
98 html += ('</table><p>Download: <a href="{0}">'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
99 '{0}</a></p><hr>'.format(file_name))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
100
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
101 with open(os.path.join(output_path, 'counts.R'), 'w') as r:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
102 r.write(rscript)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
103
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
104 html += ('<h4>R script for this session</h4>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
105 '<p>Download: <a href="counts.R">counts.R</a></p>')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
106
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
107 with open(html_file, 'w') as f:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
108 f.write(html)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
109
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
110
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
111 if __name__ == '__main__':
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
112 parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
113
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
114 # required arguments
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
115 flags = parser.add_argument_group('required arguments')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
116 flags.add_argument('--rdata_load', required=True,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
117 help='Saved riboSeqR data from Step 2')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
118 flags.add_argument('--slice_lengths',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
119 help='Lengths of ribosome footprints to inform count '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
120 'data', default='27,28', required=True)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
121 flags.add_argument('--group1', help='Group model for baySeq', required=True)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
122 flags.add_argument('--group2', help='Group model for baySeq', required=True)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
123 flags.add_argument('--frames',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
124 help='Frames of ribosome footprints (relative to '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
125 'coding start site). If omitted, all frames '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
126 'are used.', required=True)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
127
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
128 # optional arguments
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
129 parser.add_argument(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
130 '--num_counts', help='How many results to return? (topCounts)')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
131 parser.add_argument('--normalize', help='Normalize data?',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
132 choices=['TRUE', 'FALSE'], default='FALSE')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
133 parser.add_argument('--html_file', help='HTML file with reports')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
134 parser.add_argument('--output_path', help='Directory to save output files')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
135 parser.add_argument(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
136 '--debug', help='Produce debug output', action='store_true')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
137
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
138 args = parser.parse_args()
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
139 if args.debug:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
140 logging.basicConfig(format='%(levelname)s - %(message)s',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
141 level=logging.DEBUG, stream=sys.stdout)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
142 logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
143
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
144 if not os.path.exists(args.output_path):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
145 os.mkdir(args.output_path)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
146
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
147 get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
148 frames=args.frames, group1=args.group1, group2=args.group2,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
149 num_counts=args.num_counts, normalize=args.normalize,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
150 html_file=args.html_file, output_path=args.output_path)