| 0 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 2 import os | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 3 import sys | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 4 import argparse | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 5 import logging | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 6 import rpy2.robjects as robjects | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 7 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 8 import utils | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 9 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 10 rscript = '' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 11 R = robjects.r | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 12 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 13 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 14 def run_rscript(command=None): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 15     """Run R command, log it, append to rscript""" | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 16     global rscript | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 17     if not command: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 18         return | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 19     logging.debug(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 20     rscript += '{}\n'.format(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 21     output = R(command) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 22     return output | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 23 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 24 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 25 def find_periodicity( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 26         rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 27         fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 28         text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 29         html_file='Periodicity-report.html', output_path=os.getcwd()): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 30     """Plot triplet periodicity from prepared R data file. """ | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 31     logging.debug('{}'.format(R('sessionInfo()'))) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 32     cmd = 'suppressMessages(library(riboSeqR))' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 33     run_rscript(cmd) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 34 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 35     logging.debug('Loading saved R data file') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 36     cmd = 'load("{}")'.format(rdata_load) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 37     run_rscript(cmd) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 38 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 39     # R("""options(showTailLines=Inf)""") | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 40     starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 41                      utils.process_args(stop_codons, ret_mode='charvector')) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 42 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 43     cmd = ('fastaCDS <- findCDS(fastaFile={0!r}, startCodon={1}, ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 44            'stopCodon={2})'.format(fasta_file, starts, stops)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 45     run_rscript(cmd) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 46 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 47     logging.debug('Potential coding sequences using start codon (ATG) and ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 48                   'stop codons TAG, TAA, TGA') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 49     logging.debug('{}\n'.format(R['fastaCDS'])) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 50 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 51     cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 52     fS <- readingFrame(rC=fCs, lengths={1}); fS""".\ | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 53         format(include_lengths, analyze_plot_lengths) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 54     run_rscript(cmd) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 55 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 56     logging.debug('riboDat \n{}\n'.format(R['riboDat'])) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 57     logging.debug('fCs\n{0}\n'.format(R['fCs'])) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 58     logging.debug('Reading frames for each n-mer\n{}'.format(R['fS'])) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 59 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 60     legend = utils.process_args(text_legend, ret_mode='charvector') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 61 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 62     for fmat in ('pdf', 'png'): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 63         if fmat == 'png': | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 64             cmd = '{0}(file="{1}", type="cairo")' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 65         else: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 66             cmd = '{0}(file="{1}")' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 67         run_rscript(cmd.format(fmat, os.path.join( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 68             output_path, '{0}.{1}'.format('Periodicity-plot', fmat)))) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 69         run_rscript('plotFS(fS, legend.text = {0})'.format(legend)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 70         run_rscript('dev.off()') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 71 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 72     run_rscript('save("fCs", "fS", "riboDat", "fastaCDS", ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 73                 'file="{}", compress=FALSE)'.format(rdata_save)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 74 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 75     html = '<h2>Triplet periodicity - results</h2><hr>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 76     html += ('<h4>Results of reading frame analysis</h4>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 77              '<pre>{}</pre><br>'.format(R['fS'])) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 78     html += ('<p>Lengths used for reading frame analysis - <code>{0}</code>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 79              '<br>Lengths selected for the plot - <code>{1}</code>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 80              '</p>'.format(include_lengths, analyze_plot_lengths)) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 81     html += ('<p><img src="Periodicity-plot.png" border="1" ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 82              'alt="Triplet periodicity plot" />' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 83              '<br><a href="Periodicity-plot.pdf">PDF version</a></p>') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 84 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 85     logging.debug('\n{:#^80}\n{}\n{:#^80}\n'.format( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 86         ' R script for this session ', rscript, ' End R script ')) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 87 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 88     with open(os.path.join(output_path, 'periodicity.R'), 'w') as r: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 89         r.write(rscript) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 90 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 91     html += ('<h4>R script for this session</h4>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 92              '<p><a href="periodicity.R">periodicity.R</a></p>' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 93              '<p>Next step: <em>Metagene analysis</em></p>') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 94 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 95     with open(html_file, 'w') as f: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 96         f.write(html) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 97 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 98 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 99 if __name__ == '__main__': | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 100 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 101     parser = argparse.ArgumentParser( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 102         description='Plot triplet periodicity for different read lengths.') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 103 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 104     # required arguments | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 105     flags = parser.add_argument_group('required arguments') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 106     flags.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 107         '--rdata_load', required=True, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 108         help='riboSeqR data from input preparation step (Prepare.rda)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 109     flags.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 110         '--fasta_file', required=True, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 111         help='FASTA file of the reference transcriptome') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 112 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 113     # optional arguments | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 114     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 115         '--start_codons', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 116         help='Start codon(s) to use. (default: %(default)s)', default='ATG') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 117     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 118         '--stop_codons', help='Stop codon(s) to use. (default: %(default)s)', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 119         default='TAG, TAA, TGA') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 120     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 121         '--include_lengths', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 122         help='Lengths of ribosome footprints to be included ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 123              '(default: %(default)s)', default='25:30') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 124     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 125         '--analyze_plot_lengths', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 126         help='Lenghts of reads to be analyzed for frame-shift and plotting ' | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 127              '(default: %(default)s)', default='26:30') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 128     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 129         '--text_legend', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 130         help='Text for legend used in the plot (default: %(default)s)', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 131         default='Frame 0, Frame 1, Frame 2') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 132     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 133         '--rdata_save', help='File to write RData to (default: %(default)s)', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 134         default='Periodicity.rda') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 135     parser.add_argument('--html_file', help='Output file for results (HTML)') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 136     parser.add_argument('--output_path', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 137                         help='Files are saved in this directory') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 138     parser.add_argument( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 139         '--debug', help='Produce debug output', action='store_true') | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 140 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 141     args = parser.parse_args() | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 142     if args.debug: | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 143         logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 144                             level=logging.DEBUG, stream=sys.stdout) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 145         logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 146 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 147     if not os.path.exists(args.output_path): | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 148         os.mkdir(args.output_path) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 149 | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 150     find_periodicity( | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 151         rdata_load=args.rdata_load, start_codons=args.start_codons, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 152         stop_codons=args.stop_codons, fasta_file=args.fasta_file, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 153         include_lengths=args.include_lengths, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 154         analyze_plot_lengths=args.analyze_plot_lengths, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 155         text_legend=args.text_legend, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 156         rdata_save=args.rdata_save, html_file=args.html_file, | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 157         output_path=args.output_path) | 
| 
Vimalkumar Velayudhan <vimal@biotechcoder.com> parents: diff
changeset | 158 logging.debug("Done!") |