Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
view riboseqr/difftrans.py @ 0:c34c364ce75d
First commit
| author | Vimalkumar Velayudhan <vimal@biotechcoder.com> | 
|---|---|
| date | Tue, 21 Jul 2015 14:48:39 +0100 | 
| parents | |
| children | 
line wrap: on
 line source
# -*- coding: utf-8 -*- import os import sys import argparse import logging import rpy2.robjects as robjects import utils rscript = '' R = robjects.r def run_rscript(command=None): """Run R command, log it, append to rscript""" global rscript if not command: return logging.debug(command) rscript += '{}\n'.format(command) output = R(command) return output def get_counts(rdata_load='Metagene.rda', slice_lengths='27', frames='', group1=None, group2=None, num_counts=10, normalize='FALSE', html_file='Counts.html', output_path='counts'): options = {'slice_lengths': utils.process_args( slice_lengths, ret_type='int', ret_mode='charvector')} if not num_counts: num_counts = 10 options['frames'] = utils.process_args( frames, ret_type='int', ret_mode='listvector') run_rscript('suppressMessages(library(riboSeqR))') run_rscript('load("{}")'.format(rdata_load)) cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options) if frames: cmd_args += ', frames={frames}'.format(**options) run_rscript("""riboCounts <- sliceCounts({}) annotation <- as.data.frame(ffCs@CDS) rownames(riboCounts) <- annotation$seqnames colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args)) run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS) rownames(mrnaCounts) <- annotation$seqnames""") if not os.path.exists(output_path): os.mkdir(output_path) html = '<h2>Differential Translation Analysis</h2><hr>' for count_name, file_name, legend in ( ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'), ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'), ('tC', 'TopCounts.csv', 'baySeq topCounts')): if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']: run_rscript('suppressMessages(library(baySeq))') cmd = """pD <- new("countData", replicates=ffCs@replicates, \ data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \ annotation=as.data.frame(ffCs@CDS), \ densityFunction=bbDensity)""".format( utils.process_args( group1, ret_type='int', ret_mode='charvector'), utils.process_args( group2, ret_type='str', ret_mode='charvector')) run_rscript(cmd) run_rscript('libsizes(pD) <- getLibsizes(pD)') run_rscript('pD <- getPriors(pD, cl=NULL)') run_rscript('pD <- getLikelihoods(pD, cl=NULL)') run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, ' 'number={})'.format(normalize, num_counts)) if R[count_name]: html += '<h3>{}</h3>'.format(legend) output_file = os.path.join(output_path, file_name) run_rscript('write.csv({}, file="{}")'.format( count_name, output_file)) with open(output_file) as g: html += '<table cellpadding="4" border="1">' header = g.readline() html += '<tr>' for item in header.strip().split(","): html += '<th>{}</th>'.format(item.strip('"')) html += '</tr>' for line in g: html += '<tr>' data = line.strip().split(",") # html += '<td>{}</td>'.format(data[0].strip('"')) for item in data: html += ('<td align="center"><code>{}</code>' '</td>'.format(item.strip('"'))) html += '</tr>' html += ('</table><p>Download: <a href="{0}">' '{0}</a></p><hr>'.format(file_name)) with open(os.path.join(output_path, 'counts.R'), 'w') as r: r.write(rscript) html += ('<h4>R script for this session</h4>' '<p>Download: <a href="counts.R">counts.R</a></p>') with open(html_file, 'w') as f: f.write(html) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts') # required arguments flags = parser.add_argument_group('required arguments') flags.add_argument('--rdata_load', required=True, help='Saved riboSeqR data from Step 2') flags.add_argument('--slice_lengths', help='Lengths of ribosome footprints to inform count ' 'data', default='27,28', required=True) flags.add_argument('--group1', help='Group model for baySeq', required=True) flags.add_argument('--group2', help='Group model for baySeq', required=True) flags.add_argument('--frames', help='Frames of ribosome footprints (relative to ' 'coding start site). If omitted, all frames ' 'are used.', required=True) # optional arguments parser.add_argument( '--num_counts', help='How many results to return? (topCounts)') parser.add_argument('--normalize', help='Normalize data?', choices=['TRUE', 'FALSE'], default='FALSE') parser.add_argument('--html_file', help='HTML file with reports') parser.add_argument('--output_path', help='Directory to save output files') parser.add_argument( '--debug', help='Produce debug output', action='store_true') args = parser.parse_args() if args.debug: logging.basicConfig(format='%(levelname)s - %(message)s', level=logging.DEBUG, stream=sys.stdout) logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) if not os.path.exists(args.output_path): os.mkdir(args.output_path) get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths, frames=args.frames, group1=args.group1, group2=args.group2, num_counts=args.num_counts, normalize=args.normalize, html_file=args.html_file, output_path=args.output_path)
