Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
comparison riboseqr/difftrans.py @ 0:c34c364ce75d
First commit
| author | Vimalkumar Velayudhan <vimal@biotechcoder.com> |
|---|---|
| date | Tue, 21 Jul 2015 14:48:39 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c34c364ce75d |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 import os | |
| 3 import sys | |
| 4 import argparse | |
| 5 import logging | |
| 6 import rpy2.robjects as robjects | |
| 7 import utils | |
| 8 | |
| 9 rscript = '' | |
| 10 R = robjects.r | |
| 11 | |
| 12 | |
| 13 def run_rscript(command=None): | |
| 14 """Run R command, log it, append to rscript""" | |
| 15 global rscript | |
| 16 if not command: | |
| 17 return | |
| 18 logging.debug(command) | |
| 19 rscript += '{}\n'.format(command) | |
| 20 output = R(command) | |
| 21 return output | |
| 22 | |
| 23 | |
| 24 def get_counts(rdata_load='Metagene.rda', slice_lengths='27', | |
| 25 frames='', group1=None, group2=None, num_counts=10, | |
| 26 normalize='FALSE', html_file='Counts.html', | |
| 27 output_path='counts'): | |
| 28 options = {'slice_lengths': utils.process_args( | |
| 29 slice_lengths, ret_type='int', ret_mode='charvector')} | |
| 30 | |
| 31 if not num_counts: | |
| 32 num_counts = 10 | |
| 33 | |
| 34 options['frames'] = utils.process_args( | |
| 35 frames, ret_type='int', ret_mode='listvector') | |
| 36 | |
| 37 run_rscript('suppressMessages(library(riboSeqR))') | |
| 38 run_rscript('load("{}")'.format(rdata_load)) | |
| 39 | |
| 40 cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options) | |
| 41 if frames: | |
| 42 cmd_args += ', frames={frames}'.format(**options) | |
| 43 | |
| 44 run_rscript("""riboCounts <- sliceCounts({}) | |
| 45 annotation <- as.data.frame(ffCs@CDS) | |
| 46 rownames(riboCounts) <- annotation$seqnames | |
| 47 colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args)) | |
| 48 | |
| 49 run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS) | |
| 50 rownames(mrnaCounts) <- annotation$seqnames""") | |
| 51 | |
| 52 if not os.path.exists(output_path): | |
| 53 os.mkdir(output_path) | |
| 54 | |
| 55 html = '<h2>Differential Translation Analysis</h2><hr>' | |
| 56 for count_name, file_name, legend in ( | |
| 57 ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'), | |
| 58 ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'), | |
| 59 ('tC', 'TopCounts.csv', 'baySeq topCounts')): | |
| 60 if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']: | |
| 61 run_rscript('suppressMessages(library(baySeq))') | |
| 62 cmd = """pD <- new("countData", replicates=ffCs@replicates, \ | |
| 63 data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \ | |
| 64 annotation=as.data.frame(ffCs@CDS), \ | |
| 65 densityFunction=bbDensity)""".format( | |
| 66 utils.process_args( | |
| 67 group1, ret_type='int', ret_mode='charvector'), | |
| 68 utils.process_args( | |
| 69 group2, ret_type='str', ret_mode='charvector')) | |
| 70 run_rscript(cmd) | |
| 71 | |
| 72 run_rscript('libsizes(pD) <- getLibsizes(pD)') | |
| 73 run_rscript('pD <- getPriors(pD, cl=NULL)') | |
| 74 run_rscript('pD <- getLikelihoods(pD, cl=NULL)') | |
| 75 run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, ' | |
| 76 'number={})'.format(normalize, num_counts)) | |
| 77 | |
| 78 if R[count_name]: | |
| 79 html += '<h3>{}</h3>'.format(legend) | |
| 80 output_file = os.path.join(output_path, file_name) | |
| 81 run_rscript('write.csv({}, file="{}")'.format( | |
| 82 count_name, output_file)) | |
| 83 with open(output_file) as g: | |
| 84 html += '<table cellpadding="4" border="1">' | |
| 85 header = g.readline() | |
| 86 html += '<tr>' | |
| 87 for item in header.strip().split(","): | |
| 88 html += '<th>{}</th>'.format(item.strip('"')) | |
| 89 html += '</tr>' | |
| 90 for line in g: | |
| 91 html += '<tr>' | |
| 92 data = line.strip().split(",") | |
| 93 # html += '<td>{}</td>'.format(data[0].strip('"')) | |
| 94 for item in data: | |
| 95 html += ('<td align="center"><code>{}</code>' | |
| 96 '</td>'.format(item.strip('"'))) | |
| 97 html += '</tr>' | |
| 98 html += ('</table><p>Download: <a href="{0}">' | |
| 99 '{0}</a></p><hr>'.format(file_name)) | |
| 100 | |
| 101 with open(os.path.join(output_path, 'counts.R'), 'w') as r: | |
| 102 r.write(rscript) | |
| 103 | |
| 104 html += ('<h4>R script for this session</h4>' | |
| 105 '<p>Download: <a href="counts.R">counts.R</a></p>') | |
| 106 | |
| 107 with open(html_file, 'w') as f: | |
| 108 f.write(html) | |
| 109 | |
| 110 | |
| 111 if __name__ == '__main__': | |
| 112 parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts') | |
| 113 | |
| 114 # required arguments | |
| 115 flags = parser.add_argument_group('required arguments') | |
| 116 flags.add_argument('--rdata_load', required=True, | |
| 117 help='Saved riboSeqR data from Step 2') | |
| 118 flags.add_argument('--slice_lengths', | |
| 119 help='Lengths of ribosome footprints to inform count ' | |
| 120 'data', default='27,28', required=True) | |
| 121 flags.add_argument('--group1', help='Group model for baySeq', required=True) | |
| 122 flags.add_argument('--group2', help='Group model for baySeq', required=True) | |
| 123 flags.add_argument('--frames', | |
| 124 help='Frames of ribosome footprints (relative to ' | |
| 125 'coding start site). If omitted, all frames ' | |
| 126 'are used.', required=True) | |
| 127 | |
| 128 # optional arguments | |
| 129 parser.add_argument( | |
| 130 '--num_counts', help='How many results to return? (topCounts)') | |
| 131 parser.add_argument('--normalize', help='Normalize data?', | |
| 132 choices=['TRUE', 'FALSE'], default='FALSE') | |
| 133 parser.add_argument('--html_file', help='HTML file with reports') | |
| 134 parser.add_argument('--output_path', help='Directory to save output files') | |
| 135 parser.add_argument( | |
| 136 '--debug', help='Produce debug output', action='store_true') | |
| 137 | |
| 138 args = parser.parse_args() | |
| 139 if args.debug: | |
| 140 logging.basicConfig(format='%(levelname)s - %(message)s', | |
| 141 level=logging.DEBUG, stream=sys.stdout) | |
| 142 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | |
| 143 | |
| 144 if not os.path.exists(args.output_path): | |
| 145 os.mkdir(args.output_path) | |
| 146 | |
| 147 get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths, | |
| 148 frames=args.frames, group1=args.group1, group2=args.group2, | |
| 149 num_counts=args.num_counts, normalize=args.normalize, | |
| 150 html_file=args.html_file, output_path=args.output_path) |
