diff riboseqr/difftrans.py @ 0:c34c364ce75d

First commit
author Vimalkumar Velayudhan <vimal@biotechcoder.com>
date Tue, 21 Jul 2015 14:48:39 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/riboseqr/difftrans.py	Tue Jul 21 14:48:39 2015 +0100
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+import argparse
+import logging
+import rpy2.robjects as robjects
+import utils
+
+rscript = ''
+R = robjects.r
+
+
+def run_rscript(command=None):
+    """Run R command, log it, append to rscript"""
+    global rscript
+    if not command:
+        return
+    logging.debug(command)
+    rscript += '{}\n'.format(command)
+    output = R(command)
+    return output
+
+
+def get_counts(rdata_load='Metagene.rda', slice_lengths='27',
+               frames='', group1=None, group2=None, num_counts=10,
+               normalize='FALSE', html_file='Counts.html',
+               output_path='counts'):
+    options = {'slice_lengths': utils.process_args(
+        slice_lengths, ret_type='int', ret_mode='charvector')}
+
+    if not num_counts:
+        num_counts = 10
+
+    options['frames'] = utils.process_args(
+        frames, ret_type='int', ret_mode='listvector')
+
+    run_rscript('suppressMessages(library(riboSeqR))')
+    run_rscript('load("{}")'.format(rdata_load))
+
+    cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options)
+    if frames:
+        cmd_args += ', frames={frames}'.format(**options)
+
+    run_rscript("""riboCounts <- sliceCounts({})
+    annotation <- as.data.frame(ffCs@CDS)
+    rownames(riboCounts) <- annotation$seqnames
+    colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args))
+
+    run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS)
+    rownames(mrnaCounts) <- annotation$seqnames""")
+
+    if not os.path.exists(output_path):
+        os.mkdir(output_path)
+
+    html = '<h2>Differential Translation Analysis</h2><hr>'
+    for count_name, file_name, legend in (
+            ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'),
+            ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'),
+            ('tC', 'TopCounts.csv', 'baySeq topCounts')):
+        if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']:
+            run_rscript('suppressMessages(library(baySeq))')
+            cmd = """pD <- new("countData", replicates=ffCs@replicates, \
+            data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \
+            annotation=as.data.frame(ffCs@CDS), \
+            densityFunction=bbDensity)""".format(
+                utils.process_args(
+                    group1, ret_type='int', ret_mode='charvector'),
+                utils.process_args(
+                    group2, ret_type='str', ret_mode='charvector'))
+            run_rscript(cmd)
+
+            run_rscript('libsizes(pD) <- getLibsizes(pD)')
+            run_rscript('pD <- getPriors(pD, cl=NULL)')
+            run_rscript('pD <- getLikelihoods(pD, cl=NULL)')
+            run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, '
+                        'number={})'.format(normalize, num_counts))
+
+        if R[count_name]:
+            html += '<h3>{}</h3>'.format(legend)
+            output_file = os.path.join(output_path, file_name)
+            run_rscript('write.csv({}, file="{}")'.format(
+                count_name, output_file))
+            with open(output_file) as g:
+                html += '<table cellpadding="4" border="1">'
+                header = g.readline()
+                html += '<tr>'
+                for item in header.strip().split(","):
+                    html += '<th>{}</th>'.format(item.strip('"'))
+                html += '</tr>'
+                for line in g:
+                    html += '<tr>'
+                    data = line.strip().split(",")
+                    # html += '<td>{}</td>'.format(data[0].strip('"'))
+                    for item in data:
+                        html += ('<td align="center"><code>{}</code>'
+                                 '</td>'.format(item.strip('"')))
+                    html += '</tr>'
+                html += ('</table><p>Download: <a href="{0}">'
+                         '{0}</a></p><hr>'.format(file_name))
+
+    with open(os.path.join(output_path, 'counts.R'), 'w') as r:
+        r.write(rscript)
+
+    html += ('<h4>R script for this session</h4>'
+             '<p>Download: <a href="counts.R">counts.R</a></p>')
+
+    with open(html_file, 'w') as f:
+        f.write(html)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts')
+
+    # required arguments
+    flags = parser.add_argument_group('required arguments')
+    flags.add_argument('--rdata_load', required=True,
+                       help='Saved riboSeqR data from Step 2')
+    flags.add_argument('--slice_lengths',
+                       help='Lengths of ribosome footprints to inform count '
+                            'data', default='27,28', required=True)
+    flags.add_argument('--group1', help='Group model for baySeq', required=True)
+    flags.add_argument('--group2', help='Group model for baySeq', required=True)
+    flags.add_argument('--frames',
+                       help='Frames of ribosome footprints (relative to '
+                            'coding start site). If omitted, all frames '
+                            'are used.', required=True)
+
+    # optional arguments
+    parser.add_argument(
+        '--num_counts', help='How many results to return? (topCounts)')
+    parser.add_argument('--normalize', help='Normalize data?',
+                        choices=['TRUE', 'FALSE'], default='FALSE')
+    parser.add_argument('--html_file', help='HTML file with reports')
+    parser.add_argument('--output_path', help='Directory to save output files')
+    parser.add_argument(
+        '--debug', help='Produce debug output', action='store_true')
+
+    args = parser.parse_args()
+    if args.debug:
+        logging.basicConfig(format='%(levelname)s - %(message)s',
+                            level=logging.DEBUG, stream=sys.stdout)
+        logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
+
+    if not os.path.exists(args.output_path):
+        os.mkdir(args.output_path)
+
+    get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths,
+               frames=args.frames, group1=args.group1, group2=args.group2,
+               num_counts=args.num_counts, normalize=args.normalize,
+               html_file=args.html_file, output_path=args.output_path)