Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
comparison riboseqr/prepare.py @ 0:c34c364ce75d
First commit
author | Vimalkumar Velayudhan <vimal@biotechcoder.com> |
---|---|
date | Tue, 21 Jul 2015 14:48:39 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c34c364ce75d |
---|---|
1 #!/usr/bin/env python | |
2 import os | |
3 import sys | |
4 import argparse | |
5 import logging | |
6 import rpy2.robjects as robjects | |
7 | |
8 import utils | |
9 | |
10 rscript = '' | |
11 R = robjects.r | |
12 | |
13 | |
14 def run_rscript(command=None): | |
15 """Run R command, log it, append to rscript""" | |
16 global rscript | |
17 if not command: | |
18 return | |
19 logging.debug(command) | |
20 rscript += '{}\n'.format(command) | |
21 output = R(command) | |
22 return output | |
23 | |
24 | |
25 def prep_riboseqr_input(sam_file, output_file): | |
26 """Generate input file for riboSeqR from SAM format file.""" | |
27 with open(output_file, 'w') as f: | |
28 for line in open(sam_file): | |
29 if line.startswith('@'): | |
30 continue | |
31 line = line.split() | |
32 flag = line[1] | |
33 | |
34 if flag != '0': | |
35 continue | |
36 # make start 0-indexed, sam alignments are 1-indexed | |
37 start = int(line[3]) - 1 | |
38 (name, sequence) = (line[2], line[9]) | |
39 f.write('"+"\t"{0}"\t{1}\t"{2}"\n'.format(name, start, sequence)) | |
40 | |
41 | |
42 def batch_process(sam_files, seq_type, output_path): | |
43 """Batch process the conversion of SAM format files -> riboSeqR format | |
44 input files. | |
45 | |
46 Files are saved with file names corresponding to their sequence type. | |
47 | |
48 """ | |
49 outputs = [] | |
50 prefix = '{}' | |
51 if seq_type == 'riboseq': | |
52 prefix = 'RiboSeq file {}' | |
53 elif seq_type == 'rnaseq': | |
54 prefix = 'RNASeq file {}' | |
55 | |
56 for count, fname in enumerate(sam_files): | |
57 count += 1 | |
58 out_file = os.path.join(output_path, prefix.format(count)) | |
59 logging.debug('Processing: {}'.format(fname)) | |
60 logging.debug('Writing output to: {}'.format(out_file)) | |
61 prep_riboseqr_input(fname, out_file) | |
62 outputs.append(out_file) | |
63 return outputs | |
64 | |
65 | |
66 def generate_ribodata(ribo_files='', rna_files='', replicate_names='', | |
67 seqnames='', rdata_save='Prepare.rda', sam_format=True, | |
68 html_file='Prepare-report.html', output_path=os.getcwd()): | |
69 """Prepares Ribo and RNA seq data in the format required for riboSeqR. Calls | |
70 the readRibodata function of riboSeqR and saves the result objects in an | |
71 R data file which can be used as input for the next step. | |
72 | |
73 """ | |
74 input_ribo_files = utils.process_args(ribo_files, ret_mode='list') | |
75 logging.debug('Found {} Ribo-Seq files'.format(len(input_ribo_files))) | |
76 logging.debug(input_ribo_files) | |
77 | |
78 input_rna_files = [] | |
79 if rna_files: | |
80 input_rna_files = utils.process_args(rna_files, ret_mode='list') | |
81 logging.debug('Found {} RNA-Seq files'.format(len(input_rna_files))) | |
82 logging.debug(input_rna_files) | |
83 | |
84 replicates = utils.process_args(replicate_names, ret_mode='charvector') | |
85 logging.debug('Replicates: {}\n'.format(replicates)) | |
86 | |
87 if sam_format: | |
88 ribo_seq_files = batch_process(input_ribo_files, 'riboseq', output_path) | |
89 else: | |
90 ribo_seq_files = input_ribo_files | |
91 | |
92 html = '<h2>Prepare riboSeqR input - results</h2><hr>' | |
93 if len(ribo_seq_files): | |
94 html += '<h4>Generated riboSeqR format input files ' \ | |
95 '<em>(RiboSeq)</em></h4><p>' | |
96 for fname in ribo_seq_files: | |
97 html += '<a href="{0}">{0}</a><br>'.format( | |
98 os.path.basename(fname)) | |
99 html += '</p>' | |
100 | |
101 rna_seq_files = [] | |
102 if len(input_rna_files): | |
103 if sam_format: | |
104 rna_seq_files = batch_process( | |
105 input_rna_files, 'rnaseq', output_path) | |
106 else: | |
107 rna_seq_files = input_rna_files | |
108 | |
109 if len(rna_seq_files): | |
110 html += ('<h4>Generated riboSeqR format input files ' | |
111 '<em>(RNASeq)</em></h4><p>') | |
112 for fname in rna_seq_files: | |
113 html += '<a href="{0}">{0}</a><br>'.format( | |
114 os.path.basename(fname)) | |
115 html += '</p>' | |
116 | |
117 input_seqnames = utils.process_args(seqnames, ret_mode='charvector') | |
118 options = {'ribo_seq_files': 'c({})'.format(str(ribo_seq_files)[1:-1]), | |
119 'rna_seq_files': 'c({})'.format(str(rna_seq_files)[1:-1]), | |
120 'input_replicates': replicates, | |
121 'input_seqnames': input_seqnames} | |
122 | |
123 logging.debug('{}'.format(R('sessionInfo()'))) | |
124 | |
125 script = '' | |
126 cmd = 'suppressMessages(library(riboSeqR))' | |
127 run_rscript(cmd) | |
128 script += '{}\n'.format(cmd) | |
129 | |
130 if len(rna_seq_files): | |
131 cmd_args = ('riboFiles={ribo_seq_files}, ' | |
132 'rnaFiles={rna_seq_files}'.format(**options)) | |
133 else: | |
134 cmd_args = 'riboFiles={ribo_seq_files}'.format(**options) | |
135 | |
136 if input_seqnames: | |
137 cmd_args += ', seqnames={input_seqnames}'.format(**options) | |
138 if replicates: | |
139 cmd_args += ', replicates={input_replicates}'.format(**options) | |
140 else: | |
141 cmd_args += ', replicates=c("")' | |
142 cmd = 'riboDat <- readRibodata({0})'.format(cmd_args) | |
143 run_rscript(cmd) | |
144 script += '{}\n'.format(cmd) | |
145 | |
146 ribo_data = R['riboDat'] | |
147 logging.debug('riboDat \n{}\n'.format(ribo_data)) | |
148 cmd = 'save("riboDat", file="{}", compress=FALSE)'.format(rdata_save) | |
149 run_rscript(cmd) | |
150 script += '{}\n'.format(cmd) | |
151 | |
152 msg = '\n{:#^80}\n{}\n{:#^80}\n'.format( | |
153 ' R script for this session ', script, ' End R script ') | |
154 logging.debug(msg) | |
155 | |
156 with open(os.path.join(output_path, 'prepare.R'), 'w') as r: | |
157 r.write(script) | |
158 | |
159 html += ('<h4>R script for this session</h4>' | |
160 '<p><a href="prepare.R">prepare.R</a></p>' | |
161 '<p>Next step: <em>Triplet periodicity</em></p>') | |
162 | |
163 with open(html_file, 'w') as f: | |
164 f.write(html) | |
165 | |
166 return ribo_data | |
167 | |
168 | |
169 if __name__ == '__main__': | |
170 | |
171 description = ( | |
172 'Prepare riboSeqR input file from SAM format RNA/Ribo-Seq alignment.') | |
173 parser = argparse.ArgumentParser(description=description) | |
174 | |
175 # required arguments | |
176 flags = parser.add_argument_group('required arguments') | |
177 flags.add_argument('--ribo_files', required=True, | |
178 help='List of Ribo-Seq files, comma-separated') | |
179 # optional arguments | |
180 parser.add_argument('--rna_files', | |
181 help='List of RNA-Seq files. Comma-separated') | |
182 parser.add_argument('--replicate_names', | |
183 help='Replicate names, comma-separated') | |
184 parser.add_argument('--seqnames', | |
185 help='Transcript (seqname) names to be read') | |
186 parser.add_argument('--rdata_save', | |
187 help='File to write RData to (default: %(default)s)', | |
188 default='Prepare.rda') | |
189 parser.add_argument( | |
190 '--sam_format', | |
191 help='Flag. Input is in SAM format', action='store_true') | |
192 parser.add_argument('--html_file', help='Output file for results (HTML)') | |
193 parser.add_argument('--output_path', | |
194 help='Files are saved in this directory') | |
195 parser.add_argument('--debug', help='Flag. Produce debug output', | |
196 action='store_true') | |
197 args = parser.parse_args() | |
198 | |
199 if args.debug: | |
200 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | |
201 level=logging.DEBUG, stream=sys.stdout) | |
202 logging.debug('Supplied Arguments: {}'.format(vars(args))) | |
203 | |
204 if not os.path.exists(args.output_path): | |
205 os.mkdir(args.output_path) | |
206 | |
207 generate_ribodata( | |
208 ribo_files=args.ribo_files, rna_files=args.rna_files, | |
209 replicate_names=args.replicate_names, seqnames=args.seqnames, | |
210 rdata_save=args.rdata_save, | |
211 sam_format=args.sam_format, html_file=args.html_file, | |
212 output_path=args.output_path | |
213 ) | |
214 logging.debug('Done') |