Mercurial > repos > vimalkumarvelayudhan > riboseqr_wrapper
comparison riboseqr/triplet.py @ 5:423ad61697c4
Bugfix 1: [triplet] Lengths (frameCounting) if given should be a range (not zero).
readingFrame function fails with subscript out of bounds.
Bugfix 2: [triplet] Check if transcript name in SAM matches the name in FASTA. Produce
an error message if it's not. This fixes the problem where an empty plot is
produced (no bars).
[ribosome_profile] - A proper error message is now produced if an invalid transcript name is
provided.
Updated test data
author | Vimalkumar Velayudhan <vimalkumarvelayudhan@gmail.com> |
---|---|
date | Tue, 27 Oct 2015 12:21:50 +0000 |
parents | c34c364ce75d |
children |
comparison
equal
deleted
inserted
replaced
2:b2eb07000039 | 5:423ad61697c4 |
---|---|
7 | 7 |
8 import utils | 8 import utils |
9 | 9 |
10 rscript = '' | 10 rscript = '' |
11 R = robjects.r | 11 R = robjects.r |
12 | |
13 | |
14 class TripletPeriodicityError(Exception): | |
15 pass | |
12 | 16 |
13 | 17 |
14 def run_rscript(command=None): | 18 def run_rscript(command=None): |
15 """Run R command, log it, append to rscript""" | 19 """Run R command, log it, append to rscript""" |
16 global rscript | 20 global rscript |
22 return output | 26 return output |
23 | 27 |
24 | 28 |
25 def find_periodicity( | 29 def find_periodicity( |
26 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', | 30 rdata_load='Prepare.rda', start_codons='ATG', stop_codons='TAG,TAA,TGA', |
27 fasta_file=None, include_lengths='25:30', analyze_plot_lengths='26:30', | 31 fasta_file=None, include_lengths='25:30', analyze_plot_lengths=None, |
28 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', | 32 text_legend='Frame 0, Frame 1, Frame 2', rdata_save='Periodicity.rda', |
29 html_file='Periodicity-report.html', output_path=os.getcwd()): | 33 html_file='Periodicity-report.html', output_path=os.getcwd()): |
30 """Plot triplet periodicity from prepared R data file. """ | 34 """Plot triplet periodicity from prepared R data file. """ |
31 logging.debug('{}'.format(R('sessionInfo()'))) | 35 logging.debug('{}'.format(R('sessionInfo()'))) |
32 cmd = 'suppressMessages(library(riboSeqR))' | 36 cmd = 'suppressMessages(library(riboSeqR))' |
33 run_rscript(cmd) | 37 run_rscript(cmd) |
34 | 38 |
35 logging.debug('Loading saved R data file') | 39 logging.debug('Loading saved R data file') |
36 cmd = 'load("{}")'.format(rdata_load) | 40 cmd = 'load("{}")'.format(rdata_load) |
37 run_rscript(cmd) | 41 run_rscript(cmd) |
42 | |
43 # get the first seqname (populated from input file which was generated from | |
44 # SAM) | |
45 refseq = R('levels(riboDat@riboGR$`RiboSeq file 1`@seqnames)[1]') | |
46 refseq = tuple(refseq)[0] | |
47 | |
48 fasta_valid = False | |
49 for line in open(fasta_file): | |
50 if line.startswith('>'): | |
51 seqname = line.strip().lstrip('>').split() | |
52 # get the FASTA header, split on space, and use the first column to | |
53 # compare. Multiple fields separated by space doesn't appear to be | |
54 # a problem for riboSeqR | |
55 if seqname[0] == refseq: | |
56 fasta_valid = True | |
57 break | |
58 if not fasta_valid: | |
59 raise TripletPeriodicityError( | |
60 'Transcript name in SAM alignment does not match with name in the transcriptome ' | |
61 'FASTA header. Please also check if the reads were aligned to this transcriptome ' | |
62 'and not the genome') | |
38 | 63 |
39 # R("""options(showTailLines=Inf)""") | 64 # R("""options(showTailLines=Inf)""") |
40 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), | 65 starts, stops = (utils.process_args(start_codons, ret_mode='charvector'), |
41 utils.process_args(stop_codons, ret_mode='charvector')) | 66 utils.process_args(stop_codons, ret_mode='charvector')) |
42 | 67 |
47 logging.debug('Potential coding sequences using start codon (ATG) and ' | 72 logging.debug('Potential coding sequences using start codon (ATG) and ' |
48 'stop codons TAG, TAA, TGA') | 73 'stop codons TAG, TAA, TGA') |
49 logging.debug('{}\n'.format(R['fastaCDS'])) | 74 logging.debug('{}\n'.format(R['fastaCDS'])) |
50 | 75 |
51 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) | 76 cmd = """fCs <- frameCounting(riboDat, fastaCDS, lengths={0}) |
52 fS <- readingFrame(rC=fCs, lengths={1}); fS""".\ | 77 |
53 format(include_lengths, analyze_plot_lengths) | 78 """.format(include_lengths) |
79 if analyze_plot_lengths: | |
80 cmd += 'fS <- readingFrame(rC=fCs, lengths={0}); fS'.format(analyze_plot_lengths) | |
81 else: | |
82 cmd += 'fS <- readingFrame(rC=fCs); fS' | |
54 run_rscript(cmd) | 83 run_rscript(cmd) |
55 | 84 |
56 logging.debug('riboDat \n{}\n'.format(R['riboDat'])) | 85 logging.debug('riboDat \n{}\n'.format(R['riboDat'])) |
57 logging.debug('fCs\n{0}\n'.format(R['fCs'])) | 86 logging.debug('fCs\n{0}\n'.format(R['fCs'])) |
58 logging.debug('Reading frames for each n-mer\n{}'.format(R['fS'])) | 87 logging.debug('Reading frames for each n-mer\n{}'.format(R['fS'])) |
95 with open(html_file, 'w') as f: | 124 with open(html_file, 'w') as f: |
96 f.write(html) | 125 f.write(html) |
97 | 126 |
98 | 127 |
99 if __name__ == '__main__': | 128 if __name__ == '__main__': |
100 | 129 |
101 parser = argparse.ArgumentParser( | 130 parser = argparse.ArgumentParser( |
102 description='Plot triplet periodicity for different read lengths.') | 131 description='Plot triplet periodicity for different read lengths.') |
103 | 132 |
104 # required arguments | 133 # required arguments |
105 flags = parser.add_argument_group('required arguments') | 134 flags = parser.add_argument_group('required arguments') |
106 flags.add_argument( | 135 flags.add_argument( |
107 '--rdata_load', required=True, | 136 '--rdata_load', required=True, |
108 help='riboSeqR data from input preparation step (Prepare.rda)') | 137 help='riboSeqR data from input preparation step (Prepare.rda)') |
109 flags.add_argument( | 138 flags.add_argument( |
110 '--fasta_file', required=True, | 139 '--fasta_file', required=True, |
111 help='FASTA file of the reference transcriptome') | 140 help='FASTA file of the reference transcriptome') |
112 | 141 |
113 # optional arguments | 142 # optional arguments |
114 parser.add_argument( | 143 parser.add_argument( |
115 '--start_codons', | 144 '--start_codons', |
116 help='Start codon(s) to use. (default: %(default)s)', default='ATG') | 145 help='Start codon(s) to use. (default: %(default)s)', default='ATG') |
117 parser.add_argument( | 146 parser.add_argument( |
121 '--include_lengths', | 150 '--include_lengths', |
122 help='Lengths of ribosome footprints to be included ' | 151 help='Lengths of ribosome footprints to be included ' |
123 '(default: %(default)s)', default='25:30') | 152 '(default: %(default)s)', default='25:30') |
124 parser.add_argument( | 153 parser.add_argument( |
125 '--analyze_plot_lengths', | 154 '--analyze_plot_lengths', |
126 help='Lenghts of reads to be analyzed for frame-shift and plotting ' | 155 help='Lenghts of reads to be analyzed for frame-shift and plotting') |
127 '(default: %(default)s)', default='26:30') | |
128 parser.add_argument( | 156 parser.add_argument( |
129 '--text_legend', | 157 '--text_legend', |
130 help='Text for legend used in the plot (default: %(default)s)', | 158 help='Text for legend used in the plot (default: %(default)s)', |
131 default='Frame 0, Frame 1, Frame 2') | 159 default='Frame 0, Frame 1, Frame 2') |
132 parser.add_argument( | 160 parser.add_argument( |
135 parser.add_argument('--html_file', help='Output file for results (HTML)') | 163 parser.add_argument('--html_file', help='Output file for results (HTML)') |
136 parser.add_argument('--output_path', | 164 parser.add_argument('--output_path', |
137 help='Files are saved in this directory') | 165 help='Files are saved in this directory') |
138 parser.add_argument( | 166 parser.add_argument( |
139 '--debug', help='Produce debug output', action='store_true') | 167 '--debug', help='Produce debug output', action='store_true') |
140 | 168 |
141 args = parser.parse_args() | 169 args = parser.parse_args() |
142 if args.debug: | 170 if args.debug: |
143 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', | 171 logging.basicConfig(format='%(module)s: %(levelname)s - %(message)s', |
144 level=logging.DEBUG, stream=sys.stdout) | 172 level=logging.DEBUG, stream=sys.stdout) |
145 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) | 173 logging.debug('Supplied Arguments\n{}\n'.format(vars(args))) |