annotate riboseqr/prepare.py @ 0:c34c364ce75d

First commit
author Vimalkumar Velayudhan <vimal@biotechcoder.com>
date Tue, 21 Jul 2015 14:48:39 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
1 #!/usr/bin/env python
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
2 import os
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
3 import sys
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
4 import argparse
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
5 import logging
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
6 import rpy2.robjects as robjects
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
7
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
8 import utils
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
9
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
10 rscript = ''
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
11 R = robjects.r
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
12
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
13
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
14 def run_rscript(command=None):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
15 """Run R command, log it, append to rscript"""
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
16 global rscript
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
17 if not command:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
18 return
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
19 logging.debug(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
20 rscript += '{}\n'.format(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
21 output = R(command)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
22 return output
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
23
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
24
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
25 def prep_riboseqr_input(sam_file, output_file):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
26 """Generate input file for riboSeqR from SAM format file."""
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
27 with open(output_file, 'w') as f:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
28 for line in open(sam_file):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
29 if line.startswith('@'):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
30 continue
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
31 line = line.split()
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
32 flag = line[1]
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
33
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
34 if flag != '0':
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
35 continue
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
36 # make start 0-indexed, sam alignments are 1-indexed
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
37 start = int(line[3]) - 1
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
38 (name, sequence) = (line[2], line[9])
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
39 f.write('"+"\t"{0}"\t{1}\t"{2}"\n'.format(name, start, sequence))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
40
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
41
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
42 def batch_process(sam_files, seq_type, output_path):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
43 """Batch process the conversion of SAM format files -> riboSeqR format
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
44 input files.
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
45
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
46 Files are saved with file names corresponding to their sequence type.
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
47
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
48 """
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
49 outputs = []
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
50 prefix = '{}'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
51 if seq_type == 'riboseq':
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
52 prefix = 'RiboSeq file {}'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
53 elif seq_type == 'rnaseq':
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
54 prefix = 'RNASeq file {}'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
55
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
56 for count, fname in enumerate(sam_files):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
57 count += 1
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
58 out_file = os.path.join(output_path, prefix.format(count))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
59 logging.debug('Processing: {}'.format(fname))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
60 logging.debug('Writing output to: {}'.format(out_file))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
61 prep_riboseqr_input(fname, out_file)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
62 outputs.append(out_file)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
63 return outputs
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
64
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
65
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
66 def generate_ribodata(ribo_files='', rna_files='', replicate_names='',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
67 seqnames='', rdata_save='Prepare.rda', sam_format=True,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
68 html_file='Prepare-report.html', output_path=os.getcwd()):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
69 """Prepares Ribo and RNA seq data in the format required for riboSeqR. Calls
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
70 the readRibodata function of riboSeqR and saves the result objects in an
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
71 R data file which can be used as input for the next step.
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
72
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
73 """
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
74 input_ribo_files = utils.process_args(ribo_files, ret_mode='list')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
75 logging.debug('Found {} Ribo-Seq files'.format(len(input_ribo_files)))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
76 logging.debug(input_ribo_files)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
77
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
78 input_rna_files = []
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
79 if rna_files:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
80 input_rna_files = utils.process_args(rna_files, ret_mode='list')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
81 logging.debug('Found {} RNA-Seq files'.format(len(input_rna_files)))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
82 logging.debug(input_rna_files)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
83
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
84 replicates = utils.process_args(replicate_names, ret_mode='charvector')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
85 logging.debug('Replicates: {}\n'.format(replicates))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
86
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
87 if sam_format:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
88 ribo_seq_files = batch_process(input_ribo_files, 'riboseq', output_path)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
89 else:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
90 ribo_seq_files = input_ribo_files
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
91
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
92 html = '<h2>Prepare riboSeqR input - results</h2><hr>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
93 if len(ribo_seq_files):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
94 html += '<h4>Generated riboSeqR format input files ' \
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
95 '<em>(RiboSeq)</em></h4><p>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
96 for fname in ribo_seq_files:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
97 html += '<a href="{0}">{0}</a><br>'.format(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
98 os.path.basename(fname))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
99 html += '</p>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
100
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
101 rna_seq_files = []
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
102 if len(input_rna_files):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
103 if sam_format:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
104 rna_seq_files = batch_process(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
105 input_rna_files, 'rnaseq', output_path)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
106 else:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
107 rna_seq_files = input_rna_files
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
108
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
109 if len(rna_seq_files):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
110 html += ('<h4>Generated riboSeqR format input files '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
111 '<em>(RNASeq)</em></h4><p>')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
112 for fname in rna_seq_files:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
113 html += '<a href="{0}">{0}</a><br>'.format(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
114 os.path.basename(fname))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
115 html += '</p>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
116
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
117 input_seqnames = utils.process_args(seqnames, ret_mode='charvector')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
118 options = {'ribo_seq_files': 'c({})'.format(str(ribo_seq_files)[1:-1]),
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
119 'rna_seq_files': 'c({})'.format(str(rna_seq_files)[1:-1]),
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
120 'input_replicates': replicates,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
121 'input_seqnames': input_seqnames}
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
122
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
123 logging.debug('{}'.format(R('sessionInfo()')))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
124
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
125 script = ''
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
126 cmd = 'suppressMessages(library(riboSeqR))'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
127 run_rscript(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
128 script += '{}\n'.format(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
129
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
130 if len(rna_seq_files):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
131 cmd_args = ('riboFiles={ribo_seq_files}, '
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
132 'rnaFiles={rna_seq_files}'.format(**options))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
133 else:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
134 cmd_args = 'riboFiles={ribo_seq_files}'.format(**options)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
135
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
136 if input_seqnames:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
137 cmd_args += ', seqnames={input_seqnames}'.format(**options)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
138 if replicates:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
139 cmd_args += ', replicates={input_replicates}'.format(**options)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
140 else:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
141 cmd_args += ', replicates=c("")'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
142 cmd = 'riboDat <- readRibodata({0})'.format(cmd_args)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
143 run_rscript(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
144 script += '{}\n'.format(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
145
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
146 ribo_data = R['riboDat']
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
147 logging.debug('riboDat \n{}\n'.format(ribo_data))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
148 cmd = 'save("riboDat", file="{}", compress=FALSE)'.format(rdata_save)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
149 run_rscript(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
150 script += '{}\n'.format(cmd)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
151
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
152 msg = '\n{:#^80}\n{}\n{:#^80}\n'.format(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
153 ' R script for this session ', script, ' End R script ')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
154 logging.debug(msg)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
155
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
156 with open(os.path.join(output_path, 'prepare.R'), 'w') as r:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
157 r.write(script)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
158
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
159 html += ('<h4>R script for this session</h4>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
160 '<p><a href="prepare.R">prepare.R</a></p>'
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
161 '<p>Next step: <em>Triplet periodicity</em></p>')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
162
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
163 with open(html_file, 'w') as f:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
164 f.write(html)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
165
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
166 return ribo_data
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
167
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
168
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
169 if __name__ == '__main__':
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
170
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
171 description = (
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
172 'Prepare riboSeqR input file from SAM format RNA/Ribo-Seq alignment.')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
173 parser = argparse.ArgumentParser(description=description)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
174
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
175 # required arguments
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
176 flags = parser.add_argument_group('required arguments')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
177 flags.add_argument('--ribo_files', required=True,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
178 help='List of Ribo-Seq files, comma-separated')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
179 # optional arguments
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
180 parser.add_argument('--rna_files',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
181 help='List of RNA-Seq files. Comma-separated')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
182 parser.add_argument('--replicate_names',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
183 help='Replicate names, comma-separated')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
184 parser.add_argument('--seqnames',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
185 help='Transcript (seqname) names to be read')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
186 parser.add_argument('--rdata_save',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
187 help='File to write RData to (default: %(default)s)',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
188 default='Prepare.rda')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
189 parser.add_argument(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
190 '--sam_format',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
191 help='Flag. Input is in SAM format', action='store_true')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
192 parser.add_argument('--html_file', help='Output file for results (HTML)')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
193 parser.add_argument('--output_path',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
194 help='Files are saved in this directory')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
195 parser.add_argument('--debug', help='Flag. Produce debug output',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
196 action='store_true')
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
197 args = parser.parse_args()
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
198
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
199 if args.debug:
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
200 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s',
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
201 level=logging.DEBUG, stream=sys.stdout)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
202 logging.debug('Supplied Arguments: {}'.format(vars(args)))
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
203
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
204 if not os.path.exists(args.output_path):
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
205 os.mkdir(args.output_path)
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
206
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
207 generate_ribodata(
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
208 ribo_files=args.ribo_files, rna_files=args.rna_files,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
209 replicate_names=args.replicate_names, seqnames=args.seqnames,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
210 rdata_save=args.rdata_save,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
211 sam_format=args.sam_format, html_file=args.html_file,
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
212 output_path=args.output_path
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
213 )
c34c364ce75d First commit
Vimalkumar Velayudhan <vimal@biotechcoder.com>
parents:
diff changeset
214 logging.debug('Done')