0
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
2 import os
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
3 import sys
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
4 import argparse
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
5 import logging
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
6 import rpy2.robjects as robjects
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
7
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
8 import utils
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
9
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
10 rscript = ''
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
11 R = robjects.r
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
12
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
13
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
14 def run_rscript(command=None):
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
15 """Run R command, log it, append to rscript"""
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
16 global rscript
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
17 if not command:
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
18 return
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
19 logging.debug(command)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
20 rscript += '{}\n'.format(command)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
21 output = R(command)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
22 return output
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
23
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
24
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
25 def find_periodicity(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
26 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
27 fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
28 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
29 html_file='Periodicity-report.html', output_path=os.getcwd()):
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
30 """Plot triplet periodicity from prepared R data file. """
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
31 logging.debug('{}'.format(R('sessionInfo()')))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
32 cmd = 'suppressMessages(library(riboSeqR))'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
33 run_rscript(cmd)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
34
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
35 logging.debug('Loading saved R data file')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
36 cmd = 'load("{}")'.format(rdata_load)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
37 run_rscript(cmd)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
38
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
39 # R("""options(showTailLines=Inf)""")
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
40 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'),
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
41 utils.process_args(stop_codons, ret_mode='charvector'))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
42
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
43 cmd = ('fastaCDS <- findCDS(fastaFile={0!r}, startCodon={1}, '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
44 'stopCodon={2})'.format(fasta_file, starts, stops))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
45 run_rscript(cmd)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
46
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
47 logging.debug('Potential coding sequences using start codon (ATG) and '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
48 'stop codons TAG, TAA, TGA')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
49 logging.debug('{}\n'.format(R['fastaCDS']))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
50
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
51 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0})
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
52 fS <- readingFrame(rC=fCs, lengths={1}); fS""".\
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
53 format(include_lengths, analyze_plot_lengths)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
54 run_rscript(cmd)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
55
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
56 logging.debug('riboDat \n{}\n'.format(R['riboDat']))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
57 logging.debug('fCs\n{0}\n'.format(R['fCs']))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
58 logging.debug('Reading frames for each n-mer\n{}'.format(R['fS']))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
59
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
60 legend = utils.process_args(text_legend, ret_mode='charvector')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
61
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
62 for fmat in ('pdf', 'png'):
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
63 if fmat == 'png':
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
64 cmd = '{0}(file="{1}", type="cairo")'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
65 else:
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
66 cmd = '{0}(file="{1}")'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
67 run_rscript(cmd.format(fmat, os.path.join(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
68 output_path, '{0}.{1}'.format('Periodicity-plot', fmat))))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
69 run_rscript('plotFS(fS, legend.text = {0})'.format(legend))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
70 run_rscript('dev.off()')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
71
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
72 run_rscript('save("fCs", "fS", "riboDat", "fastaCDS", '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
73 'file="{}", compress=FALSE)'.format(rdata_save))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
74
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
75 html = '<h2>Triplet periodicity - results</h2><hr>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
76 html += ('<h4>Results of reading frame analysis</h4>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
77 '<pre>{}</pre><br>'.format(R['fS']))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
78 html += ('<p>Lengths used for reading frame analysis - <code>{0}</code>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
79 '<br>Lengths selected for the plot - <code>{1}</code>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
80 '</p>'.format(include_lengths, analyze_plot_lengths))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
81 html += ('<p><img src="Periodicity-plot.png" border="1" '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
82 'alt="Triplet periodicity plot" />'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
83 '<br><a href="Periodicity-plot.pdf">PDF version</a></p>')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
84
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
85 logging.debug('\n{:#^80}\n{}\n{:#^80}\n'.format(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
86 ' R script for this session ', rscript, ' End R script '))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
87
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
88 with open(os.path.join(output_path, 'periodicity.R'), 'w') as r:
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
89 r.write(rscript)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
90
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
91 html += ('<h4>R script for this session</h4>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
92 '<p><a href="periodicity.R">periodicity.R</a></p>'
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
93 '<p>Next step: <em>Metagene analysis</em></p>')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
94
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
95 with open(html_file, 'w') as f:
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
96 f.write(html)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
97
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
98
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
99 if __name__ == '__main__':
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
100
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
101 parser = argparse.ArgumentParser(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
102 description='Plot triplet periodicity for different read lengths.')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
103
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
104 # required arguments
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
105 flags = parser.add_argument_group('required arguments')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
106 flags.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
107 '--rdata_load', required=True,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
108 help='riboSeqR data from input preparation step (Prepare.rda)')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
109 flags.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
110 '--fasta_file', required=True,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
111 help='FASTA file of the reference transcriptome')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
112
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
113 # optional arguments
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
114 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
115 '--start_codons',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
116 help='Start codon(s) to use. (default: %(default)s)', default='ATG')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
117 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
118 '--stop_codons', help='Stop codon(s) to use. (default: %(default)s)',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
119 default='TAG, TAA, TGA')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
120 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
121 '--include_lengths',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
122 help='Lengths of ribosome footprints to be included '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
123 '(default: %(default)s)', default='25:30')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
124 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
125 '--analyze_plot_lengths',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
126 help='Lenghts of reads to be analyzed for frame-shift and plotting '
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
127 '(default: %(default)s)', default='26:30')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
128 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
129 '--text_legend',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
130 help='Text for legend used in the plot (default: %(default)s)',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
131 default='Frame 0, Frame 1, Frame 2')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
132 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
133 '--rdata_save', help='File to write RData to (default: %(default)s)',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
134 default='Periodicity.rda')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
135 parser.add_argument('--html_file', help='Output file for results (HTML)')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
136 parser.add_argument('--output_path',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
137 help='Files are saved in this directory')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
138 parser.add_argument(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
139 '--debug', help='Produce debug output', action='store_true')
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
140
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
141 args = parser.parse_args()
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
142 if args.debug:
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
143 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s',
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
144 level=logging.DEBUG, stream=sys.stdout)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
145 logging.debug('Supplied Arguments\n{}\n'.format(vars(args)))
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
146
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
147 if not os.path.exists(args.output_path):
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
148 os.mkdir(args.output_path)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
149
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
150 find_periodicity(
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
151 rdata_load=args.rdata_load, start_codons=args.start_codons,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
152 stop_codons=args.stop_codons, fasta_file=args.fasta_file,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
153 include_lengths=args.include_lengths,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
154 analyze_plot_lengths=args.analyze_plot_lengths,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
155 text_legend=args.text_legend,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
156 rdata_save=args.rdata_save, html_file=args.html_file,
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
157 output_path=args.output_path)
|
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff
changeset
|
158 logging.debug("Done!")
|