comparison riboseqr/difftrans.py @ 0:c34c364ce75d

First commit
author Vimalkumar Velayudhan <vimal@biotechcoder.com>
date Tue, 21 Jul 2015 14:48:39 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c34c364ce75d
1 # -*- coding: utf-8 -*-
2 import os
3 import sys
4 import argparse
5 import logging
6 import rpy2.robjects as robjects
7 import utils
8
9 rscript = ''
10 R = robjects.r
11
12
13 def run_rscript(command=None):
14 """Run R command, log it, append to rscript"""
15 global rscript
16 if not command:
17 return
18 logging.debug(command)
19 rscript += '{}\n'.format(command)
20 output = R(command)
21 return output
22
23
24 def get_counts(rdata_load='Metagene.rda', slice_lengths='27',
25 frames='', group1=None, group2=None, num_counts=10,
26 normalize='FALSE', html_file='Counts.html',
27 output_path='counts'):
28 options = {'slice_lengths': utils.process_args(
29 slice_lengths, ret_type='int', ret_mode='charvector')}
30
31 if not num_counts:
32 num_counts = 10
33
34 options['frames'] = utils.process_args(
35 frames, ret_type='int', ret_mode='listvector')
36
37 run_rscript('suppressMessages(library(riboSeqR))')
38 run_rscript('load("{}")'.format(rdata_load))
39
40 cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options)
41 if frames:
42 cmd_args += ', frames={frames}'.format(**options)
43
44 run_rscript("""riboCounts <- sliceCounts({})
45 annotation <- as.data.frame(ffCs@CDS)
46 rownames(riboCounts) <- annotation$seqnames
47 colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args))
48
49 run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS)
50 rownames(mrnaCounts) <- annotation$seqnames""")
51
52 if not os.path.exists(output_path):
53 os.mkdir(output_path)
54
55 html = '<h2>Differential Translation Analysis</h2><hr>'
56 for count_name, file_name, legend in (
57 ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'),
58 ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'),
59 ('tC', 'TopCounts.csv', 'baySeq topCounts')):
60 if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']:
61 run_rscript('suppressMessages(library(baySeq))')
62 cmd = """pD <- new("countData", replicates=ffCs@replicates, \
63 data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \
64 annotation=as.data.frame(ffCs@CDS), \
65 densityFunction=bbDensity)""".format(
66 utils.process_args(
67 group1, ret_type='int', ret_mode='charvector'),
68 utils.process_args(
69 group2, ret_type='str', ret_mode='charvector'))
70 run_rscript(cmd)
71
72 run_rscript('libsizes(pD) <- getLibsizes(pD)')
73 run_rscript('pD <- getPriors(pD, cl=NULL)')
74 run_rscript('pD <- getLikelihoods(pD, cl=NULL)')
75 run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, '
76 'number={})'.format(normalize, num_counts))
77
78 if R[count_name]:
79 html += '<h3>{}</h3>'.format(legend)
80 output_file = os.path.join(output_path, file_name)
81 run_rscript('write.csv({}, file="{}")'.format(
82 count_name, output_file))
83 with open(output_file) as g:
84 html += '<table cellpadding="4" border="1">'
85 header = g.readline()
86 html += '<tr>'
87 for item in header.strip().split(","):
88 html += '<th>{}</th>'.format(item.strip('"'))
89 html += '</tr>'
90 for line in g:
91 html += '<tr>'
92 data = line.strip().split(",")
93 # html += '<td>{}</td>'.format(data[0].strip('"'))
94 for item in data:
95 html += ('<td align="center"><code>{}</code>'
96 '</td>'.format(item.strip('"')))
97 html += '</tr>'
98 html += ('</table><p>Download: <a href="{0}">'
99 '{0}</a></p><hr>'.format(file_name))
100
101 with open(os.path.join(output_path, 'counts.R'), 'w') as r:
102 r.write(rscript)
103
104 html += ('<h4>R script for this session</h4>'
105 '<p>Download: <a href="counts.R">counts.R</a></p>')
106
107 with open(html_file, 'w') as f:
108 f.write(html)
109
110
111 if __name__ == '__main__':
112 parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts')
113
114 # required arguments
115 flags = parser.add_argument_group('required arguments')
116 flags.add_argument('--rdata_load', required=True,
117 help='Saved riboSeqR data from Step 2')
118 flags.add_argument('--slice_lengths',
119 help='Lengths of ribosome footprints to inform count '
120 'data', default='27,28', required=True)
121 flags.add_argument('--group1', help='Group model for baySeq', required=True)
122 flags.add_argument('--group2', help='Group model for baySeq', required=True)
123 flags.add_argument('--frames',
124 help='Frames of ribosome footprints (relative to '
125 'coding start site). If omitted, all frames '
126 'are used.', required=True)
127
128 # optional arguments
129 parser.add_argument(
130 '--num_counts', help='How many results to return? (topCounts)')
131 parser.add_argument('--normalize', help='Normalize data?',
132 choices=['TRUE', 'FALSE'], default='FALSE')
133 parser.add_argument('--html_file', help='HTML file with reports')
134 parser.add_argument('--output_path', help='Directory to save output files')
135 parser.add_argument(
136 '--debug', help='Produce debug output', action='store_true')
137
138 args = parser.parse_args()
139 if args.debug:
140 logging.basicConfig(format='%(levelname)s - %(message)s',
141 level=logging.DEBUG, stream=sys.stdout)
142 logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
143
144 if not os.path.exists(args.output_path):
145 os.mkdir(args.output_path)
146
147 get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths,
148 frames=args.frames, group1=args.group1, group2=args.group2,
149 num_counts=args.num_counts, normalize=args.normalize,
150 html_file=args.html_file, output_path=args.output_path)