| 0 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 1 # -*- coding: utf-8 -*- | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 2 import os | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 3 import sys | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 4 import argparse | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 5 import logging | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 6 import rpy2.robjects as robjects | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 7 import utils | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 8 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 9 rscript = '' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 10 R = robjects.r | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 11 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 12 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 13 def run_rscript(command=None): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 14     """Run R command, log it, append to rscript""" | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 15     global rscript | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 16     if not command: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 17         return | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 18     logging.debug(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 19     rscript += '{}\n'.format(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 20     output = R(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 21     return output | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 22 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 23 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 24 def get_counts(rdata_load='Metagene.rda', slice_lengths='27', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 25                frames='', group1=None, group2=None, num_counts=10, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 26                normalize='FALSE', html_file='Counts.html', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 27                output_path='counts'): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 28     options = {'slice_lengths': utils.process_args( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 29         slice_lengths, ret_type='int', ret_mode='charvector')} | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 30 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 31     if not num_counts: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 32         num_counts = 10 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 33 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 34     options['frames'] = utils.process_args( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 35         frames, ret_type='int', ret_mode='listvector') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 36 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 37     run_rscript('suppressMessages(library(riboSeqR))') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 38     run_rscript('load("{}")'.format(rdata_load)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 39 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 40     cmd_args = 'ffCs, lengths={slice_lengths}'.format(**options) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 41     if frames: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 42         cmd_args += ', frames={frames}'.format(**options) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 43 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 44     run_rscript("""riboCounts <- sliceCounts({}) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 45     annotation <- as.data.frame(ffCs@CDS) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 46     rownames(riboCounts) <- annotation$seqnames | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 47     colnames(riboCounts) <- names(riboDat@riboGR)""".format(cmd_args)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 48 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 49     run_rscript("""mrnaCounts <- rnaCounts(riboDat, ffCs@CDS) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 50     rownames(mrnaCounts) <- annotation$seqnames""") | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 51 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 52     if not os.path.exists(output_path): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 53         os.mkdir(output_path) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 54 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 55     html = '<h2>Differential Translation Analysis</h2><hr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 56     for count_name, file_name, legend in ( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 57             ('riboCounts', 'RiboCounts.csv', 'Ribo-Seq counts'), | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 58             ('mrnaCounts', 'RNACounts.csv', 'RNA-Seq counts'), | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 59             ('tC', 'TopCounts.csv', 'baySeq topCounts')): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 60         if count_name == 'tC' and R['riboCounts'] and R['mrnaCounts']: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 61             run_rscript('suppressMessages(library(baySeq))') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 62             cmd = """pD <- new("countData", replicates=ffCs@replicates, \ | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 63             data=list(riboCounts, mrnaCounts), groups=list(NDT={0}, DT={1}), \ | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 64             annotation=as.data.frame(ffCs@CDS), \ | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 65             densityFunction=bbDensity)""".format( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 66                 utils.process_args( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 67                     group1, ret_type='int', ret_mode='charvector'), | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 68                 utils.process_args( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 69                     group2, ret_type='str', ret_mode='charvector')) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 70             run_rscript(cmd) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 71 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 72             run_rscript('libsizes(pD) <- getLibsizes(pD)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 73             run_rscript('pD <- getPriors(pD, cl=NULL)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 74             run_rscript('pD <- getLikelihoods(pD, cl=NULL)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 75             run_rscript('tC <- topCounts(pD, "DT", normaliseData={}, ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 76                         'number={})'.format(normalize, num_counts)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 77 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 78         if R[count_name]: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 79             html += '<h3>{}</h3>'.format(legend) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 80             output_file = os.path.join(output_path, file_name) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 81             run_rscript('write.csv({}, file="{}")'.format( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 82                 count_name, output_file)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 83             with open(output_file) as g: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 84                 html += '<table cellpadding="4" border="1">' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 85                 header = g.readline() | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 86                 html += '<tr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 87                 for item in header.strip().split(","): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 88                     html += '<th>{}</th>'.format(item.strip('"')) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 89                 html += '</tr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 90                 for line in g: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 91                     html += '<tr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 92                     data = line.strip().split(",") | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 93                     # html += '<td>{}</td>'.format(data[0].strip('"')) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 94                     for item in data: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 95                         html += ('<td align="center"><code>{}</code>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 96                                  '</td>'.format(item.strip('"'))) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 97                     html += '</tr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 98                 html += ('</table><p>Download: <a href="{0}">' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 99                          '{0}</a></p><hr>'.format(file_name)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 100 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 101     with open(os.path.join(output_path, 'counts.R'), 'w') as r: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 102         r.write(rscript) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 103 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 104     html += ('<h4>R script for this session</h4>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 105              '<p>Download: <a href="counts.R">counts.R</a></p>') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 106 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 107     with open(html_file, 'w') as f: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 108         f.write(html) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 109 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 110 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 111 if __name__ == '__main__': | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 112     parser = argparse.ArgumentParser(description='Get Ribo and RNA-Seq counts') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 113 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 114     # required arguments | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 115     flags = parser.add_argument_group('required arguments') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 116     flags.add_argument('--rdata_load', required=True, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 117                        help='Saved riboSeqR data from Step 2') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 118     flags.add_argument('--slice_lengths', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 119                        help='Lengths of ribosome footprints to inform count ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 120                             'data', default='27,28', required=True) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 121     flags.add_argument('--group1', help='Group model for baySeq', required=True) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 122     flags.add_argument('--group2', help='Group model for baySeq', required=True) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 123     flags.add_argument('--frames', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 124                        help='Frames of ribosome footprints (relative to ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 125                             'coding start site). If omitted, all frames ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 126                             'are used.', required=True) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 127 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 128     # optional arguments | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 129     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 130         '--num_counts', help='How many results to return? (topCounts)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 131     parser.add_argument('--normalize', help='Normalize data?', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 132                         choices=['TRUE', 'FALSE'], default='FALSE') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 133     parser.add_argument('--html_file', help='HTML file with reports') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 134     parser.add_argument('--output_path', help='Directory to save output files') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 135     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 136         '--debug', help='Produce debug output', action='store_true') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 137 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 138     args = parser.parse_args() | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 139     if args.debug: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 140         logging.basicConfig(format='%(levelname)s - %(message)s', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 141                             level=logging.DEBUG, stream=sys.stdout) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 142         logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 143 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 144     if not os.path.exists(args.output_path): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 145         os.mkdir(args.output_path) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 146 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 147     get_counts(rdata_load=args.rdata_load, slice_lengths=args.slice_lengths, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 148                frames=args.frames, group1=args.group1, group2=args.group2, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 149                num_counts=args.num_counts, normalize=args.normalize, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 150                html_file=args.html_file, output_path=args.output_path) |