Mercurial > repos > bioit_sciensano > phagetermvirome
comparison _modules/functions_PhageTerm.py @ 0:69e8f12c8b31 draft
"planemo upload"
author | bioit_sciensano |
---|---|
date | Fri, 11 Mar 2022 15:06:20 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:69e8f12c8b31 |
---|---|
1 #! /usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 ## @file functions_PhageTerm.py | |
4 # | |
5 # This file is a part of PhageTerm software | |
6 # A tool to determine phage termini and packaging strategy | |
7 # and other useful informations using raw sequencing reads. | |
8 # (This programs works with sequencing reads from a randomly | |
9 # sheared DNA library preparations as Illumina TruSeq paired-end or similar) | |
10 # | |
11 # ---------------------------------------------------------------------- | |
12 # Copyright (C) 2017 Julian Garneau | |
13 # | |
14 # This program is free software; you can redistribute it and/or modify | |
15 # it under the terms of the GNU General Public License as published by | |
16 # the Free Software Foundation; either version 3 of the License, or | |
17 # (at your option) any later version. | |
18 # <http://www.gnu.org/licenses/gpl-3.0.html> | |
19 # | |
20 # This program is distributed in the hope that it will be useful, | |
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 # GNU General Public License for more details. | |
24 # ---------------------------------------------------------------------- | |
25 # | |
26 # @author Julian Garneau <julian.garneau@usherbrooke.ca> | |
27 # @author Marc Monot <marc.monot@pasteur.fr> | |
28 # @author David Bikard <david.bikard@pasteur.fr> | |
29 | |
30 | |
31 ### PYTHON Module | |
32 # Base | |
33 from __future__ import print_function | |
34 | |
35 import sys | |
36 | |
37 import os | |
38 | |
39 | |
40 import matplotlib | |
41 matplotlib.use('Agg') | |
42 import matplotlib.pyplot as plt | |
43 from matplotlib import patches | |
44 from matplotlib.path import Path | |
45 | |
46 import numpy as np | |
47 import pandas as pd | |
48 | |
49 # String | |
50 #import cStringIO | |
51 import io | |
52 | |
53 # PDF report building | |
54 import time | |
55 from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT, TA_RIGHT | |
56 from reportlab.lib.pagesizes import letter, landscape | |
57 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak | |
58 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
59 from reportlab.lib.units import inch | |
60 from reportlab.lib import colors | |
61 from reportlab.lib.utils import ImageReader | |
62 | |
63 from _modules.utilities import reverseComplement,hybridCoverage,applyCoverage,correctEdge | |
64 from _modules.common_readsCoverage_processing import picMax | |
65 from _modules.readsCoverage_res import RCRes, RCCheckpoint_handler,RCWorkingS | |
66 | |
67 ### UTILITY function | |
68 def chunks(l, n): | |
69 """Yield n successive chunks from l.""" | |
70 newn = int(1.0 * len(l) / n + 0.5) | |
71 for i in range(0, n-1): | |
72 yield l[i*newn:i*newn+newn] | |
73 yield l[n*newn-newn:] | |
74 | |
75 ## | |
76 # Initializes working structure for readsCoverage | |
77 def init_ws(p_res,refseq,hostseq): | |
78 gen_len = len(refseq) | |
79 host_len = len(hostseq) | |
80 k = count_line = 0 | |
81 if p_res==None: | |
82 termini_coverage = np.array([gen_len*[0], gen_len*[0]]) | |
83 whole_coverage = np.array([gen_len*[0], gen_len*[0]]) | |
84 paired_whole_coverage = np.array([gen_len*[0], gen_len*[0]]) | |
85 phage_hybrid_coverage = np.array([gen_len*[0], gen_len*[0]]) | |
86 host_hybrid_coverage = np.array([host_len*[0], host_len*[0]]) | |
87 host_whole_coverage = np.array([host_len*[0], host_len*[0]]) | |
88 list_hybrid = np.array([0,0]) | |
89 insert = [] | |
90 paired_missmatch = 0 | |
91 read_match = 0 | |
92 else: | |
93 termini_coverage=p_res.interm_res.termini_coverage | |
94 whole_coverage=p_res.interm_res.whole_coverage | |
95 paired_whole_coverage=p_res.interm_res.paired_whole_coverage | |
96 phage_hybrid_coverage=p_res.interm_res.phage_hybrid_coverage | |
97 host_hybrid_coverage=p_res.interm_res.host_hybrid_coverage | |
98 host_whole_coverage=p_res.interm_res.host_whole_coverage | |
99 list_hybrid=p_res.interm_res.list_hybrid | |
100 insert=p_res.interm_res.insert | |
101 paired_missmatch=p_res.interm_res.paired_mismatch | |
102 k=int(p_res.interm_res.reads_tested) | |
103 #count_line=p_res.count_line-1 # do that because readsCoverage will start by incrementing it of 1 | |
104 read_match=p_res.read_match | |
105 return gen_len,host_len,termini_coverage,whole_coverage,paired_whole_coverage,phage_hybrid_coverage,host_hybrid_coverage, \ | |
106 host_whole_coverage,list_hybrid,insert,paired_missmatch,k,count_line,read_match #TODO refactor that. | |
107 | |
108 | |
109 | |
110 ## COVERAGE Starting and Whole function | |
111 # | |
112 # VL: use debug mode to keep track of what reads matched and what reads didn't. For those who matched, want to know if it is at the beginning of the read or at the end or if it is its reverse complement. | |
113 # My aim is to compare the results with those of the GPU version. | |
114 def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_start,line_end,tParms,\ | |
115 chk_handler,idx_refseq,logger=None): | |
116 """Calculate whole coverage and first base coverage. """ | |
117 | |
118 p_res=chk_handler.load(core_id,idx_refseq) | |
119 gen_len,host_len,termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage,\ | |
120 host_whole_coverage, list_hybrid, insert, paired_missmatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq) | |
121 if logger!=None: | |
122 logger.add_rw(p_res) | |
123 test_read_seq = match = 0 | |
124 # Timer | |
125 if core_id == (tParms.core-1): | |
126 sys.stdout.write(" 0.0 %") | |
127 sys.stdout.flush() | |
128 | |
129 # Mapping | |
130 filin = open(inRawDArgs.fastq) | |
131 line = filin.readline() | |
132 | |
133 if inRawDArgs.paired != "": | |
134 filin_paired = open(inRawDArgs.paired) | |
135 line_paired = filin_paired.readline() | |
136 count_line_tmp=0 | |
137 while line and count_line!=count_line_tmp: | |
138 count_line_tmp += 1 | |
139 line = filin.readline() | |
140 while line: | |
141 count_line+=1 | |
142 if count_line//4 <= line_start: | |
143 test_read_seq = 0 | |
144 if count_line//4 > line_end: | |
145 break | |
146 | |
147 if test_read_seq: | |
148 k += 1 | |
149 # Read sequence | |
150 read = line.split("\n")[0].split("\r")[0] | |
151 line = filin.readline() | |
152 | |
153 if inRawDArgs.paired != "": | |
154 read_paired = line_paired.split("\n")[0].split("\r")[0] | |
155 line_paired = filin_paired.readline() | |
156 | |
157 readlen = len(read) | |
158 if readlen < fParms.seed: | |
159 if logger!=None: | |
160 print("CPU rejecting read",k) | |
161 continue | |
162 corlen = readlen-fParms.seed | |
163 | |
164 if logger!=None: | |
165 print("CPU processing read: ",k,read, reverseComplement(read)) | |
166 logger.newRmInfo(k) | |
167 | |
168 ### Match sense + (multiple, random pick one) | |
169 #print("read[:fParms.seed]=",read[:fParms.seed]) | |
170 matchPplus_start, matchPplus_end = applyCoverage(read[:fParms.seed], refseq) | |
171 | |
172 ## Phage | |
173 if matchPplus_start != -1: | |
174 if logger!=None: | |
175 print("CPU found: ",read[:fParms.seed]) | |
176 logger.rMatch("mstart") | |
177 match = 1 | |
178 termini_coverage[0][matchPplus_start]+=1 | |
179 position_end = matchPplus_end+corlen | |
180 | |
181 # whole coverage | |
182 for i in range(matchPplus_start, min(gen_len,position_end)): | |
183 whole_coverage[0][i]+=1 | |
184 | |
185 # Paired-read | |
186 if inRawDArgs.paired != "": | |
187 matchPplus_start_paired, matchPplus_end_paired = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], refseq) | |
188 insert_length = matchPplus_end_paired - matchPplus_start | |
189 if insert_length > 0 and insert_length < fParms.insert_max: | |
190 position_end = matchPplus_end_paired | |
191 insert.append(insert_length) | |
192 else: | |
193 paired_missmatch += 1 | |
194 # Paired hybrid | |
195 if inDArgs.hostseq != "" and matchPplus_start_paired == -1: | |
196 matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq) | |
197 if matchHplus_start != -1: | |
198 list_hybrid[0] += 1 | |
199 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) | |
200 host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) | |
201 else: | |
202 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[:fParms.seed], inDArgs.hostseq) | |
203 if matchHminus_start != -1: | |
204 list_hybrid[0] += 1 | |
205 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) | |
206 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) | |
207 | |
208 # Single hybrid | |
209 elif inDArgs.hostseq != "": | |
210 matchPFplus_start, matchPFplus_end = applyCoverage(read[-fParms.seed:], refseq) | |
211 if matchPFplus_start == -1: | |
212 matchHplus_start, matchHplus_end = applyCoverage(read[-fParms.seed:], inDArgs.hostseq) | |
213 if matchHplus_start != -1: | |
214 list_hybrid[0] += 1 | |
215 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) | |
216 host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) | |
217 else: | |
218 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq) | |
219 if matchHminus_start != -1: | |
220 list_hybrid[0] += 1 | |
221 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) ) | |
222 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) | |
223 | |
224 # whole coverage | |
225 for i in range(matchPplus_start, min(gen_len,position_end)): | |
226 paired_whole_coverage[0][i]+=1 | |
227 | |
228 ### if no match sense +, then test sense - | |
229 if not match: | |
230 matchPminus_start, matchPminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], refseq) | |
231 | |
232 ## Phage | |
233 if matchPminus_end != -1: | |
234 if logger != None: | |
235 print("CPU found: ",reverseComplement(read)[-fParms.seed:]," from ",reverseComplement(read)) | |
236 logger.rMatch("mrcplstart") | |
237 match = 1 | |
238 termini_coverage[1][matchPminus_end-1]+=1 | |
239 position_start = matchPminus_start-corlen | |
240 | |
241 # whole coverage | |
242 for i in range(max(0,position_start), matchPminus_end): | |
243 whole_coverage[1][i]+=1 | |
244 | |
245 # Paired-read | |
246 if inRawDArgs.paired != "": | |
247 matchPminus_start_paired, matchPminus_end_paired = applyCoverage(read_paired[:fParms.seed], refseq) | |
248 insert_length = matchPminus_end - matchPminus_start_paired | |
249 if insert_length > 0 and insert_length < fParms.insert_max: | |
250 position_start = matchPminus_start_paired | |
251 insert.append(insert_length) | |
252 else: | |
253 paired_missmatch += 1 | |
254 # Paired hybrid | |
255 if inDArgs.hostseq != "" and matchPminus_start_paired == -1: | |
256 matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq) | |
257 if matchHplus_start != -1: | |
258 list_hybrid[1] += 1 | |
259 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) | |
260 host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) | |
261 | |
262 else: | |
263 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], inDArgs.hostseq) | |
264 if matchHminus_start != -1: | |
265 list_hybrid[1] += 1 | |
266 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) | |
267 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) | |
268 | |
269 # Single hybrid | |
270 elif inDArgs.hostseq != "": | |
271 matchPRplus_start, matchPRplus_end = applyCoverage(reverseComplement(read)[:fParms.seed], refseq) | |
272 if matchPRplus_start == -1: | |
273 matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq) | |
274 if matchHplus_start != -1: | |
275 list_hybrid[1] += 1 | |
276 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) | |
277 host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) ) | |
278 else: | |
279 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[:fParms.seed], inDArgs.hostseq) | |
280 if matchHminus_start != -1: | |
281 list_hybrid[1] += 1 | |
282 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) ) | |
283 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) ) | |
284 | |
285 # whole coverage | |
286 for i in range(max(0,position_start), matchPminus_end): | |
287 paired_whole_coverage[1][i]+=1 | |
288 | |
289 ### if no match on Phage, test Host | |
290 if not match: | |
291 matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq) | |
292 if matchHplus_start != -1: | |
293 for i in range(matchHplus_start, min(host_len,matchHplus_end+corlen)): | |
294 host_whole_coverage[0][i]+=1 | |
295 else: | |
296 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq) | |
297 if matchHminus_end != -1: | |
298 for i in range(max(0,matchHminus_start-corlen), matchHminus_end): | |
299 host_whole_coverage[1][i]+=1 | |
300 | |
301 # TEST limit_coverage | |
302 read_match += match*readlen | |
303 | |
304 match = test_read_seq = 0 | |
305 # Timer | |
306 if core_id == (tParms.core-1): | |
307 if k%1000 == 0: | |
308 sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( float(read_match/gen_len) / tParms.limit_coverage * 100 )) | |
309 sys.stdout.flush() | |
310 | |
311 chk_handler.check(count_line,core_id,idx_refseq,termini_coverage,whole_coverage,paired_whole_coverage,\ | |
312 phage_hybrid_coverage, host_hybrid_coverage, \ | |
313 host_whole_coverage,list_hybrid,insert,paired_missmatch,k,read_match) # maybe time to create checkpoint | |
314 | |
315 else: | |
316 if line[0] == "@": | |
317 test_read_seq = 1 | |
318 | |
319 line = filin.readline() | |
320 if inRawDArgs.paired != "": | |
321 line_paired = filin_paired.readline() | |
322 | |
323 # TEST limit_coverage | |
324 if (read_match/gen_len) > tParms.limit_coverage: | |
325 line = 0 | |
326 | |
327 | |
328 if core_id == (tParms.core-1): | |
329 sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( 100 )) | |
330 sys.stdout.flush() | |
331 | |
332 # Close file | |
333 filin.close() | |
334 if inRawDArgs.paired != "": | |
335 filin_paired.close() | |
336 | |
337 | |
338 # Correct EDGE coverage | |
339 if sum(termini_coverage[0]) + sum(termini_coverage[1]) == 0 and not fParms.virome: | |
340 print("WARNING: No read Match, please check your fastq file") | |
341 | |
342 termini_coverage = correctEdge(termini_coverage, fParms.edge) | |
343 #paired_whole_coverage = correctEdge(whole_coverage, fParms.edge) #TODO: discuss with Julian and Max about the PE issue that Max reported. | |
344 whole_coverage = correctEdge(whole_coverage, fParms.edge) | |
345 phage_hybrid_coverage = correctEdge(phage_hybrid_coverage, fParms.edge) | |
346 if inDArgs.hostseq != "": | |
347 host_whole_coverage = correctEdge(host_whole_coverage, fParms.edge) | |
348 host_hybrid_coverage = correctEdge(host_hybrid_coverage, fParms.edge) | |
349 | |
350 if return_dict!=None and tParms.dir_cov_mm==None: | |
351 return_dict[core_id] = [termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, host_whole_coverage, list_hybrid, np.array(insert), paired_missmatch, k] | |
352 elif return_dict==None and tParms.dir_cov_mm!=None: | |
353 insert = np.array(insert) | |
354 fic_name = os.path.join(tParms.dir_cov_mm, "coverage" + str(idx_refseq) + "_" + str(core_id)) | |
355 res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\ | |
356 phage_hybrid_coverage, host_hybrid_coverage, \ | |
357 host_whole_coverage,list_hybrid,insert,paired_missmatch,k) | |
358 res.save(fic_name) | |
359 else: | |
360 print("Error: readsCoverage must be used either with directory name or return_dict") | |
361 chk_handler.end(core_id) | |
362 | |
363 return | |
364 | |
365 | |
366 | |
367 ### IMAGE Functions | |
368 def GraphCov(termini_coverage, picMaxPlus, picMaxMinus, phagename, norm, draw, hybrid = 0): | |
369 """Produces a plot with termini coverage values.""" | |
370 # Remove old plot | |
371 plt.clf() | |
372 plt.cla() | |
373 plt.close() | |
374 # Create figure | |
375 plt.figure(1) | |
376 term_len = len(termini_coverage[0]) | |
377 term_range = list(range(term_len)) | |
378 # MOP: not totally sure what's going on here with the plot formatting | |
379 # but I refactored this out as it was getting complicated. | |
380 # Someone who understands the code better might put in more informative var names. | |
381 zipper = list(zip(*picMaxPlus)) | |
382 max_first_zipper = max(np.array(zipper[0])) | |
383 if norm == 1: | |
384 ylim = 1.2 | |
385 elif hybrid == 1: | |
386 offset = 0.2*(max_first_zipper) + 1 | |
387 ylim = max_first_zipper + offset | |
388 else: | |
389 offset = 0.2*(max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0])))) | |
390 ylim = max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0]))) + offset | |
391 # Strand (+) | |
392 plt.subplot(211) | |
393 if norm == 1: | |
394 plt.scatter(term_range,termini_coverage[0]) | |
395 else: | |
396 plt.plot(termini_coverage[0],linewidth=2) | |
397 plt.title('strand (+)') | |
398 plt.ylabel('') | |
399 # Axes | |
400 axes = plt.gca() | |
401 axes.set_ylim([0,ylim]) | |
402 # Maximum | |
403 x_strandplus = np.array(list(zip(*picMaxPlus))[1]) | |
404 y_strandplus = np.array(list(zip(*picMaxPlus))[0]) | |
405 # Plot | |
406 plt.plot(x_strandplus, y_strandplus, 'ro') | |
407 if norm == 1: | |
408 axes.axhline(y=0.5, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5) | |
409 axes.axhline(y=1.0, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5) | |
410 # Annotation | |
411 for i,j in zip(x_strandplus,y_strandplus): | |
412 plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1)) | |
413 # Plot Option | |
414 plt.margins(0.1) | |
415 plt.locator_params(axis = 'x', nbins = 10) | |
416 plt.locator_params(axis = 'y', nbins = 3) | |
417 plt.xticks(rotation=75) | |
418 # Strand (-) | |
419 plt.subplot(212) | |
420 if norm == 1: | |
421 plt.scatter(term_range,termini_coverage[1]) | |
422 else: | |
423 plt.plot(termini_coverage[1],linewidth=2) | |
424 plt.title('strand (-)') | |
425 plt.ylabel('') | |
426 # Axes | |
427 if hybrid == 1: | |
428 offset = 0.2*(max_first_zipper) + 1 | |
429 ylim = max_first_zipper + offset | |
430 axes = plt.gca() | |
431 axes.set_ylim([0,ylim]) | |
432 # Maximum | |
433 x_strandminus = np.array(list(zip(*picMaxMinus))[1]) | |
434 y_strandminus = np.array(list(zip(*picMaxMinus))[0]) | |
435 # Plot | |
436 plt.plot(x_strandminus, y_strandminus, 'ro') | |
437 if norm == 1: | |
438 axes.axhline(y=0.5, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5) | |
439 axes.axhline(y=1.0, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5) | |
440 # Annotation | |
441 for i,j in zip(x_strandminus,y_strandminus): | |
442 plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1)) | |
443 # Plot Option | |
444 plt.margins(0.1) | |
445 plt.locator_params(axis = 'x', nbins = 10) | |
446 plt.locator_params(axis = 'y', nbins = 3) | |
447 plt.xticks(rotation=75) | |
448 # Plot Adjustments | |
449 plt.tight_layout() | |
450 # Draw graph | |
451 if draw: | |
452 plt.savefig("%s_TCov.png" % phagename, dpi=200) | |
453 fig = plt.figure(1) | |
454 return fig | |
455 | |
456 def GraphWholeCov(added_whole_coverage, phagename, draw, P_left = "", P_right = "", pos_left = 0, pos_right = 0, graphZoom = 0, title = "WHOLE COVERAGE"): | |
457 """Produces a plot with whole coverage values.""" | |
458 # Remove old plot | |
459 plt.clf() | |
460 plt.cla() | |
461 plt.close() | |
462 # Create figure | |
463 offset = 0.2*(max(added_whole_coverage)) | |
464 ylim = max(added_whole_coverage) + offset | |
465 # Cumulative both strands | |
466 plt.figure(figsize=(15,8)) | |
467 plt.plot(added_whole_coverage,linewidth=2) | |
468 plt.title(title) | |
469 # Axes | |
470 axes = plt.gca() | |
471 axes.set_ylim([0,ylim]) | |
472 # Plot Option | |
473 plt.margins(0.1) | |
474 plt.locator_params(axis = 'x', nbins = 10) | |
475 plt.xticks(rotation=75) | |
476 # Termini vertical dashed line display | |
477 if graphZoom and isinstance(P_left, np.integer): | |
478 plt.axvline(x=pos_left, ymin=0, ymax=ylim, color='red', zorder=2, linestyle='dashed', linewidth=1) | |
479 if graphZoom and isinstance(P_right, np.integer): | |
480 plt.axvline(x=pos_right, ymin=0, ymax=ylim, color='green', zorder=2, linestyle='dashed', linewidth=1) | |
481 # Draw graph | |
482 if draw: | |
483 plt.savefig("%s_plot_WCov.png" % phagename, dpi=200) | |
484 fig = plt.figure(1) | |
485 return fig | |
486 | |
487 def GraphLogo(P_class, P_left, P_right, draw): | |
488 """Produce logo.""" | |
489 # Remove old plot | |
490 plt.clf() | |
491 plt.cla() | |
492 plt.close() | |
493 # Create figure | |
494 plt.figure(figsize=(10,10)) | |
495 #axes = plt.add_subplot(111) | |
496 axes = plt.gca() | |
497 axes.set_frame_on(False) | |
498 axes.xaxis.set_visible(False) | |
499 axes.yaxis.set_visible(False) | |
500 # Cadre | |
501 axes.add_artist(patches.Rectangle((0.1, 0.1), 0.8, 0.8, edgecolor = 'black', fill = False, linewidth = 15)) | |
502 | |
503 if P_class == "Headful (pac)": | |
504 # Texte | |
505 axes.text(0.17, 0.7, r"Headful (pac)", fontsize=50, fontweight='bold') | |
506 # PAC (blue line) | |
507 axes.axhline(y=0.35, xmin=0.2, xmax=0.8, color='blue', linewidth=15) | |
508 # PAC (red line) | |
509 axes.axvline(x=0.19, ymin=0.30, ymax=0.40, color='red', linewidth=10) | |
510 axes.axvline(x=0.42, ymin=0.30, ymax=0.40, color='red', linewidth=10) | |
511 axes.axvline(x=0.65, ymin=0.30, ymax=0.40, color='red', linewidth=10) | |
512 # PAC (Arrow) | |
513 axes.axvline(x=0.19, ymin=0.45, ymax=0.55, color='red', linewidth=15) | |
514 axes.arrow(0.19, 0.55, 0.07, 0, color='red', linewidth=15, head_width=0.07, head_length=0.1) | |
515 | |
516 elif P_class == "COS (5')": | |
517 # Texte | |
518 axes.text(0.3, 0.7, r"COS (5')", fontsize=50, fontweight='bold') | |
519 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) | |
520 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) | |
521 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) | |
522 axes.axhline(y=0.56, xmin=0.415, xmax=0.48, color='red', linewidth=16) | |
523 axes.axhline(y=0.601, xmin=0.52, xmax=0.585, color='red', linewidth=16) | |
524 | |
525 elif P_class == "COS (3')": | |
526 # Texte | |
527 axes.text(0.3, 0.7, r"COS (3')", fontsize=50, fontweight='bold') | |
528 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) | |
529 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) | |
530 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) | |
531 axes.axhline(y=0.601, xmin=0.415, xmax=0.48, color='red', linewidth=16) | |
532 axes.axhline(y=0.56, xmin=0.52, xmax=0.585, color='red', linewidth=16) | |
533 | |
534 elif P_class == "COS": | |
535 # Texte | |
536 axes.text(0.4, 0.7, r"COS", fontsize=50, fontweight='bold') | |
537 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15)) | |
538 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15)) | |
539 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True)) | |
540 | |
541 elif P_class == "DTR (short)": | |
542 # Texte | |
543 axes.text(0.22, 0.7, r"DTR (short)", fontsize=50, fontweight='bold') | |
544 | |
545 verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)] | |
546 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] | |
547 path = Path(verts, codes) | |
548 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) | |
549 axes.add_patch(patch) | |
550 | |
551 verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)] | |
552 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] | |
553 path = Path(verts, codes) | |
554 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) | |
555 axes.add_patch(patch) | |
556 | |
557 verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)] | |
558 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
559 path = Path(verts, codes) | |
560 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
561 axes.add_patch(patch) | |
562 | |
563 verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)] | |
564 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
565 path = Path(verts, codes) | |
566 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
567 axes.add_patch(patch) | |
568 | |
569 verts = [(0.5, 0.50), (0.56, 0.480), (0, 0)] | |
570 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
571 path = Path(verts, codes) | |
572 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=20) | |
573 axes.add_patch(patch) | |
574 | |
575 verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)] | |
576 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
577 path = Path(verts, codes) | |
578 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
579 axes.add_patch(patch) | |
580 | |
581 verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)] | |
582 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
583 path = Path(verts, codes) | |
584 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
585 axes.add_patch(patch) | |
586 | |
587 elif P_class == "DTR (long)": | |
588 # Texte | |
589 axes.text(0.25, 0.7, r"DTR (long)", fontsize=50, fontweight='bold') | |
590 verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)] | |
591 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] | |
592 path = Path(verts, codes) | |
593 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) | |
594 axes.add_patch(patch) | |
595 | |
596 verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)] | |
597 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4] | |
598 path = Path(verts, codes) | |
599 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15) | |
600 axes.add_patch(patch) | |
601 | |
602 verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)] | |
603 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
604 path = Path(verts, codes) | |
605 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
606 axes.add_patch(patch) | |
607 | |
608 verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)] | |
609 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
610 path = Path(verts, codes) | |
611 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
612 axes.add_patch(patch) | |
613 | |
614 verts = [(0.62, 0.521), (0.64, 0.516), (0, 0)] | |
615 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
616 path = Path(verts, codes) | |
617 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
618 axes.add_patch(patch) | |
619 | |
620 verts = [(0.68, 0.507), (0.70, 0.501), (0, 0)] | |
621 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
622 path = Path(verts, codes) | |
623 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
624 axes.add_patch(patch) | |
625 | |
626 verts = [(0.5, 0.50), (0.65, 0.460), (0, 0)] | |
627 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
628 path = Path(verts, codes) | |
629 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=25) | |
630 axes.add_patch(patch) | |
631 | |
632 verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)] | |
633 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
634 path = Path(verts, codes) | |
635 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
636 axes.add_patch(patch) | |
637 | |
638 verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)] | |
639 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
640 path = Path(verts, codes) | |
641 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
642 axes.add_patch(patch) | |
643 | |
644 verts = [(0.62, 0.471), (0.64, 0.465), (0, 0)] | |
645 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
646 path = Path(verts, codes) | |
647 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
648 axes.add_patch(patch) | |
649 | |
650 verts = [(0.68, 0.456), (0.70, 0.450), (0, 0)] | |
651 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY] | |
652 path = Path(verts, codes) | |
653 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15) | |
654 axes.add_patch(patch) | |
655 | |
656 elif P_class == "Mu-like": | |
657 # Texte | |
658 axes.text(0.33, 0.7, r"Mu-like", fontsize=50, fontweight='bold') | |
659 axes.axhline(y=0.43, xmin=0.3, xmax=0.7, color='blue', linewidth=15) | |
660 axes.axhline(y=0.47, xmin=0.3, xmax=0.7, color='blue', linewidth=15) | |
661 axes.axhline(y=0.43, xmin=0.7, xmax=0.8, color='green', linewidth=15) | |
662 axes.axhline(y=0.47, xmin=0.7, xmax=0.8, color='green', linewidth=15) | |
663 axes.axhline(y=0.43, xmin=0.2, xmax=0.3, color='green', linewidth=15) | |
664 axes.axhline(y=0.47, xmin=0.2, xmax=0.3, color='green', linewidth=15) | |
665 | |
666 elif P_left == "Random" and P_right == "Random": | |
667 # Texte | |
668 axes.text(0.25, 0.7, r"UNKNOWN", fontsize=50, fontweight='bold') | |
669 axes.text(0.44, 0.3, r"?", fontsize=200, fontweight='bold') | |
670 else: | |
671 # Texte | |
672 axes.text(0.4, 0.7, r"NEW", fontsize=50, fontweight='bold') | |
673 axes.text(0.44, 0.3, r"!", fontsize=200, fontweight='bold') | |
674 | |
675 # Draw graph | |
676 if draw: | |
677 plt.savefig("%s_logo.png" % phagename, dpi=200) | |
678 fig = plt.figure(1) | |
679 return fig | |
680 | |
681 | |
682 ### OUTPUT Result files | |
683 def exportDataSplit(sequence, split): | |
684 """Export sequence with split line length.""" | |
685 seq = "" | |
686 for i in range((len(sequence)//split)+1): | |
687 seq += "".join(map(str,sequence[i*split:(i+1)*split])) + '\n' | |
688 return seq | |
689 | |
690 def ExportStatistics(phagename, whole_coverage, paired_whole_coverage, termini_coverage, phage_plus_norm, phage_minus_norm, paired, test_run): | |
691 """Export peaks statistics.""" | |
692 if test_run: | |
693 return | |
694 export = pd.DataFrame() | |
695 # ORGANIZE Column | |
696 export["Position"] = list(phage_plus_norm.sort_values("Position")["Position"]) | |
697 if paired != "": | |
698 export["Coverage +"] = paired_whole_coverage[0] | |
699 else: | |
700 export["Coverage +"] = whole_coverage[0] | |
701 export["SPC +"] = termini_coverage[0] | |
702 export["T +"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])] | |
703 export["T + (close)"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])] | |
704 export["pvalue +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma"])] | |
705 export["padj +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma_adj"])] | |
706 if paired != "": | |
707 export["Coverage -"] = whole_coverage[1] | |
708 else: | |
709 export["Coverage -"] = paired_whole_coverage[1] | |
710 export["SPC -"] = termini_coverage[1] | |
711 export["T -"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])] | |
712 export["T - (close)"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])] | |
713 export["pvalue -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma"])] | |
714 export["padj -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma_adj"])] | |
715 filout = open(phagename + "_statistics.csv", "w") | |
716 filout.write(export.to_csv(index=False)) | |
717 filout.close() | |
718 return | |
719 | |
720 def ExportCohesiveSeq(phagename, ArtcohesiveSeq, P_seqcoh, test_run, multi = 0): | |
721 """Export cohesive sequence of COS phages.""" | |
722 if test_run: | |
723 return "" | |
724 if len(ArtcohesiveSeq) < 3 and len(P_seqcoh) < 3: | |
725 return "" | |
726 if len(ArtcohesiveSeq) < 20 and len(P_seqcoh) < 20: | |
727 export_text = "cohesive sequence" | |
728 if not multi: | |
729 filout = open(phagename + "_cohesive-sequence.fasta", "w") | |
730 else: | |
731 export_text = "direct terminal repeats sequence" | |
732 if not multi: | |
733 filout = open(phagename + "_direct-term-repeats.fasta", "w") | |
734 if P_seqcoh != '': | |
735 if not multi: | |
736 filout.write(">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60)) | |
737 else: | |
738 return ">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60) | |
739 if ArtcohesiveSeq != '': | |
740 if not multi: | |
741 filout.write(">" + phagename + " " + export_text + " (Analysis: Li)\n" + exportDataSplit(ArtcohesiveSeq, 60)) | |
742 filout.close() | |
743 return "" | |
744 | |
745 def ExportPhageSequence(phagename, P_left, P_right, refseq, P_orient, Redundant, Mu_like, P_class, P_seqcoh, test_run, multi = 0): | |
746 """Export the phage sequence reorganized and completed if needed.""" | |
747 if test_run: | |
748 return "" | |
749 seq_out = "" | |
750 # Mu-like | |
751 if Mu_like: | |
752 if P_orient == "Forward": | |
753 if P_right != "Random": | |
754 if P_left > P_right: | |
755 seq_out = refseq[P_right-1:P_left-1] | |
756 else: | |
757 seq_out = refseq[P_right-1:] + refseq[:P_left-1] | |
758 else: | |
759 seq_out = refseq[P_left-1:] + refseq[:P_left-1] | |
760 elif P_orient == "Reverse": | |
761 if P_left != "Random": | |
762 if P_left > P_right: | |
763 seq_out = reverseComplement(refseq[P_right-1:P_left-1]) | |
764 else: | |
765 seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_left-1])) | |
766 else: | |
767 seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_right-1]) ) | |
768 # COS | |
769 elif isinstance(P_left, np.integer) and isinstance(P_right, np.integer): | |
770 # Cos or DTR | |
771 if P_class == "COS (3')": | |
772 if abs(P_left-P_right) > len(refseq)/2: | |
773 seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)] | |
774 else: | |
775 seq_out = refseq[max(P_left,P_right)-1:] + refseq[:min(P_left,P_right)] | |
776 seq_out = seq_out + P_seqcoh | |
777 else: | |
778 # Genome | |
779 if abs(P_left-P_right) > len(refseq)/2: | |
780 seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)] | |
781 else: | |
782 seq_out = refseq[max(P_left,P_right):] + refseq[:min(P_left,P_right)-1] | |
783 # COS 5' | |
784 if P_class == "COS (5')": | |
785 seq_out = P_seqcoh + seq_out | |
786 # DTR | |
787 else: | |
788 seq_out = P_seqcoh + seq_out + P_seqcoh | |
789 # PAC | |
790 elif isinstance(P_left, np.integer) or isinstance(P_right, np.integer): | |
791 if P_orient == "Reverse": | |
792 seq_out = reverseComplement(refseq[:P_right]) + reverseComplement(refseq[P_right:]) | |
793 else: | |
794 seq_out = refseq[P_left-1:] + refseq[:P_left-1] | |
795 # Write Sequence | |
796 if multi: | |
797 return ">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60) | |
798 else: | |
799 filout = open(phagename + "_sequence.fasta", "w") | |
800 filout.write(">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60)) | |
801 filout.close() | |
802 return "" | |
803 | |
804 def CreateReport(phagename, seed, added_whole_coverage, draw, Redundant, P_left, P_right, Permuted, P_orient, termini_coverage_norm_close, picMaxPlus_norm_close, picMaxMinus_norm_close, gen_len, tot_reads, P_seqcoh, phage_plus_norm, phage_minus_norm, ArtPackmode, termini, forward, reverse, ArtOrient, ArtcohesiveSeq, termini_coverage_close, picMaxPlus_close, picMaxMinus_close, picOUT_norm_forw, picOUT_norm_rev, picOUT_forw, picOUT_rev, lost_perc, ave_whole_cov, R1, R2, R3, host, host_len, host_whole_coverage, picMaxPlus_host, picMaxMinus_host, surrounding, drop_cov, paired, insert, phage_hybrid_coverage, host_hybrid_coverage, added_paired_whole_coverage, Mu_like, test_run, P_class, P_type, P_concat, multi = 0, multiReport = 0, *args, **kwargs): | |
805 """Produce a PDF report.""" | |
806 if not multi: | |
807 doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % phagename, pagesize=letter, rightMargin=10,leftMargin=10, topMargin=5, bottomMargin=10) | |
808 report=[] | |
809 else: | |
810 report = multiReport | |
811 | |
812 styles=getSampleStyleSheet() | |
813 styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY)) | |
814 styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER)) | |
815 styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT)) | |
816 styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT)) | |
817 | |
818 ### GENERAL INFORMATION | |
819 | |
820 # TITLE | |
821 ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>' | |
822 report.append(Paragraph(ptext, styles["Center"])) | |
823 report.append(Spacer(1, 15)) | |
824 | |
825 ## ZOOMED TERMINI GRAPH AND LOGO RESULT | |
826 | |
827 # LOGO SLECTION | |
828 | |
829 imgdata = io.BytesIO() | |
830 fig_logo = GraphLogo(P_class, P_left, P_right, draw) | |
831 fig_logo.savefig(imgdata, format='png') | |
832 imgdata.seek(0) | |
833 IMG = ImageReader(imgdata) | |
834 IMAGE_2 = Image(IMG.fileName, width=150, height=150, kind='proportional') | |
835 IMAGE_2.hAlign = 'CENTER' | |
836 | |
837 # Zoom on inter-termini seq | |
838 if isinstance(P_left, np.integer) and isinstance(P_right, np.integer) and not Mu_like: | |
839 Zoom_left = min(P_left-1000, P_right-1000) | |
840 Zoom_right = max(P_left+1000, P_right+1000) | |
841 imgdata = io.BytesIO() | |
842 if P_orient == "Reverse": | |
843 zoom_pos_left = P_right-max(0,Zoom_left) | |
844 zoom_pos_right = P_left-max(0,Zoom_left) | |
845 else: | |
846 zoom_pos_left = P_left-max(0,Zoom_left) | |
847 zoom_pos_right = P_right-max(0,Zoom_left) | |
848 | |
849 figZ_whole = GraphWholeCov(added_whole_coverage[max(0,Zoom_left):min(gen_len,Zoom_right)], phagename + "-zoom", draw, P_left, P_right, zoom_pos_left, zoom_pos_right, 1, "Zoom Termini") | |
850 figZ_whole.savefig(imgdata, format='png') | |
851 imgdata.seek(0) | |
852 IMG = ImageReader(imgdata) | |
853 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') | |
854 IMAGE.hAlign = 'CENTER' | |
855 | |
856 data = [[IMAGE, IMAGE_2]] | |
857 t=Table(data, 1*[4*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
858 report.append(t) | |
859 report.append(Spacer(1, 5)) | |
860 | |
861 elif isinstance(P_left, np.integer) and P_orient == "Forward": | |
862 imgdata = io.BytesIO() | |
863 | |
864 if Mu_like: | |
865 figZL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") | |
866 else: | |
867 figZL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, P_right, P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") | |
868 figZL_whole.savefig(imgdata, format='png') | |
869 imgdata.seek(0) | |
870 IMG = ImageReader(imgdata) | |
871 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') | |
872 IMAGE.hAlign = 'CENTER' | |
873 | |
874 data = [[IMAGE, IMAGE_2]] | |
875 t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
876 report.append(t) | |
877 | |
878 elif isinstance(P_right, np.integer) and P_orient == "Reverse": | |
879 imgdata = io.BytesIO() | |
880 | |
881 if Mu_like: | |
882 figZR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") | |
883 else: | |
884 figZR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, P_left, P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") | |
885 figZR_whole.savefig(imgdata, format='png') | |
886 imgdata.seek(0) | |
887 IMG = ImageReader(imgdata) | |
888 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') | |
889 IMAGE.hAlign = 'CENTER' | |
890 | |
891 data = [[IMAGE, IMAGE_2]] | |
892 t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
893 report.append(t) | |
894 report.append(Spacer(1, 5)) | |
895 else: | |
896 data = [[IMAGE_2]] | |
897 t=Table(data, 1*[1.5*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
898 report.append(t) | |
899 | |
900 # Warning coverage message | |
901 if ave_whole_cov < 50 and test_run == 0: | |
902 ptextawc = "- - - - - - - - - WARNING: Coverage (" + str(int(ave_whole_cov)) + ") is under the limit of the software, Please consider results carrefuly. - - - - - - - - -" | |
903 data = [[ptextawc]] | |
904 t=Table(data, 1*[5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('TEXTCOLOR',(0,0),(-1,-1),'RED'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
905 report.append(t) | |
906 | |
907 ## Statistics | |
908 ptext = '<u><font size=14>PhageTerm Method</font></u>' | |
909 report.append(Paragraph(ptext, styles["Left"])) | |
910 report.append(Spacer(1, 10)) | |
911 | |
912 if Redundant: | |
913 Ends = "Redundant" | |
914 else: | |
915 Ends = "Non Red." | |
916 | |
917 data = [["Ends", "Left (red)", "Right (green)", "Permuted", "Orientation", "Class", "Type"], [Ends, P_left, P_right, Permuted, P_orient, P_class, P_type]] | |
918 t=Table(data, 7*[1.10*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
919 report.append(t) | |
920 report.append(Spacer(1, 5)) | |
921 | |
922 # Seq cohesive or Direct terminal repeats | |
923 if P_seqcoh != "": | |
924 if len(P_seqcoh) < 20: | |
925 ptext = '<i><font size=12>*Sequence cohesive: ' + P_seqcoh + '</font></i>' | |
926 else: | |
927 ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(P_seqcoh)) + ' bp</font></i>' | |
928 report.append(Paragraph(ptext, styles["Left"])) | |
929 | |
930 # Multiple / Multiple (Nextera) | |
931 if P_left == "Multiple" and P_right == "Multiple": | |
932 ptext = '<i><font size=12>*This results could be due to a non-random fragmented sequence (e.g. Nextera)</font></i>' | |
933 report.append(Paragraph(ptext, styles["Left"])) | |
934 | |
935 | |
936 # Concatermer | |
937 elif P_class[:7] == "Headful" and paired != "": | |
938 ptext = '<i><font size=12>*concatemer estimation: ' + str(P_concat) + '</font></i>' | |
939 report.append(Paragraph(ptext, styles["Left"])) | |
940 | |
941 # Mu hybrid | |
942 elif Mu_like: | |
943 if P_orient == "Forward": | |
944 Mu_termini = P_left | |
945 else: | |
946 Mu_termini = P_right | |
947 ptext = '<i><font size=12>*Mu estimated termini position with hybrid fragments: ' + str(Mu_termini) + '</font></i>' | |
948 report.append(Paragraph(ptext, styles["Left"])) | |
949 | |
950 report.append(Spacer(1, 10)) | |
951 | |
952 # Results | |
953 imgdata = io.BytesIO() | |
954 figP_norm = GraphCov(termini_coverage_norm_close, picMaxPlus_norm_close[:1], picMaxMinus_norm_close[:1], phagename + "-norm", 1, draw) | |
955 figP_norm.savefig(imgdata, format='png') | |
956 imgdata.seek(0) | |
957 IMG = ImageReader(imgdata) | |
958 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') | |
959 IMAGE.hAlign = 'CENTER' | |
960 | |
961 data = [["Strand", "Location", "T", "pvalue", "T (Start. Pos. Cov. / Whole Cov.)"], ["+",phage_plus_norm["Position"][0],format(phage_plus_norm["SPC"][0]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][0], '0.2e'),IMAGE], ["",phage_plus_norm["Position"][1],format(phage_plus_norm["SPC"][1]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_plus_norm["Position"][2],format(phage_plus_norm["SPC"][2]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_plus_norm["Position"][3],format(phage_plus_norm["SPC"][3]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_plus_norm["Position"][4],format(phage_plus_norm["SPC"][4]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][4], '0.2e'),""], ["-",phage_minus_norm["Position"][0],format(phage_minus_norm["SPC"][0]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][0], '0.2e'),""], ["",phage_minus_norm["Position"][1],format(phage_minus_norm["SPC"][1]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_minus_norm["Position"][2],format(phage_minus_norm["SPC"][2]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_minus_norm["Position"][3],format(phage_minus_norm["SPC"][3]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_minus_norm["Position"][4],format(phage_minus_norm["SPC"][4]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][4], '0.2e'),""]] | |
962 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
963 | |
964 report.append(t) | |
965 report.append(Spacer(1, 5)) | |
966 | |
967 ## Li's Analysis | |
968 ptext = '<u><font size=14>Li\'s Method</font></u>' | |
969 report.append(Paragraph(ptext, styles["Left"])) | |
970 report.append(Spacer(1, 10)) | |
971 | |
972 data = [["Packaging", "Termini", "Forward", "Reverse", "Orientation"], [ArtPackmode, termini, forward, reverse, ArtOrient]] | |
973 t=Table(data, 2*[1*inch] + 2*[2*inch] + 1*[1*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
974 | |
975 report.append(t) | |
976 report.append(Spacer(1, 5)) | |
977 | |
978 # Seq cohesive or Direct terminal repeats | |
979 if len(ArtcohesiveSeq) > 2: | |
980 if len(ArtcohesiveSeq) < 20: | |
981 ptext = '<i><font size=12>*Sequence cohesive: ' + ArtcohesiveSeq + '</font></i>' | |
982 else: | |
983 ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(ArtcohesiveSeq)) + ' bp</font></i>' | |
984 report.append(Paragraph(ptext, styles["Left"])) | |
985 report.append(Spacer(1, 10)) | |
986 | |
987 # Results | |
988 imgdata = io.BytesIO() | |
989 figP = GraphCov(termini_coverage_close, picMaxPlus_close[:1], picMaxMinus_close[:1], phagename, 0, draw) | |
990 figP.savefig(imgdata, format='png') | |
991 imgdata.seek(0) | |
992 IMG = ImageReader(imgdata) | |
993 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') | |
994 IMAGE.hAlign = 'CENTER' | |
995 | |
996 data = [["Strand", "Location", "SPC", "R", "SPC"],["+",picMaxPlus_close[0][1]+1,picMaxPlus_close[0][0],R2,IMAGE],["",picMaxPlus_close[1][1]+1,picMaxPlus_close[1][0],"-",""],["",picMaxPlus_close[2][1]+1,picMaxPlus_close[2][0],"-",""],["",picMaxPlus_close[3][1]+1,picMaxPlus_close[3][0],"-",""],["",picMaxPlus_close[4][1]+1,picMaxPlus_close[4][0],"-",""],["-",picMaxMinus_close[0][1]+1,picMaxMinus_close[0][0],R3,""], ["",picMaxMinus_close[1][1]+1,picMaxMinus_close[1][0],"-",""], ["",picMaxMinus_close[2][1]+1,picMaxMinus_close[2][0],"-",""], ["",picMaxMinus_close[3][1]+1,picMaxMinus_close[3][0],"-",""], ["",picMaxMinus_close[4][1]+1,picMaxMinus_close[4][0],"-",""]] | |
997 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
998 | |
999 report.append(t) | |
1000 | |
1001 | |
1002 # NEW PAGE | |
1003 report.append(PageBreak()) | |
1004 | |
1005 # HOST RESULTS | |
1006 if host != "": | |
1007 # Host coverage | |
1008 ptext = '<u><font size=14>Host Analysis</font></u>' | |
1009 report.append(Paragraph(ptext, styles["Left"])) | |
1010 report.append(Spacer(1, 10)) | |
1011 | |
1012 ptext = '<i><font size=10></font>Reads that does not match on the phage genome are tested on the host genome. These reads could come from Phage transduction but also Host DNA contamination.</i>' | |
1013 report.append(Paragraph(ptext, styles["Justify"])) | |
1014 report.append(Spacer(1, 5)) | |
1015 | |
1016 data = [["Host Genome", str(host_len) + " bp"]] | |
1017 t=Table(data, 2*[2.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'LEFT') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1018 | |
1019 report.append(t) | |
1020 report.append(Spacer(1, 5)) | |
1021 | |
1022 imgdata = io.BytesIO() | |
1023 | |
1024 figH = GraphCov(host_whole_coverage, picMaxPlus_host[:1], picMaxMinus_host[:1], "", 0, draw) | |
1025 figH.savefig(imgdata, format='png') | |
1026 imgdata.seek(0) | |
1027 IMG = ImageReader(imgdata) | |
1028 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') | |
1029 IMAGE.hAlign = 'CENTER' | |
1030 | |
1031 data = [["Strand", "Location", "Coverage", "-", "Whole Coverage"],["+",picMaxPlus_host[0][1]+1,picMaxPlus_host[0][0],"-",IMAGE],["",picMaxPlus_host[1][1]+1,picMaxPlus_host[1][0],"-",""],["",picMaxPlus_host[2][1]+1,picMaxPlus_host[2][0],"-",""],["",picMaxPlus_host[3][1]+1,picMaxPlus_host[3][0],"-",""],["",picMaxPlus_host[4][1]+1,picMaxPlus_host[4][0],"-",""],["-",picMaxMinus_host[0][1]+1,picMaxMinus_host[0][0],"-",""], ["",picMaxMinus_host[1][1]+1,picMaxMinus_host[1][0],"-",""], ["",picMaxMinus_host[2][1]+1,picMaxMinus_host[2][0],"-",""], ["",picMaxMinus_host[3][1]+1,picMaxMinus_host[3][0],"-",""], ["",picMaxMinus_host[4][1]+1,picMaxMinus_host[4][0],"-",""]] | |
1032 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1033 | |
1034 report.append(t) | |
1035 report.append(Spacer(1, 10)) | |
1036 | |
1037 # Hybrid coverage | |
1038 ptext = '<u><font size=14>Hybrid Analysis</font></u>' | |
1039 report.append(Paragraph(ptext, styles["Left"])) | |
1040 report.append(Spacer(1, 10)) | |
1041 | |
1042 ptext = '<i><font size=10></font>Hybrid reads with one edge on the phage genome and the other edge on the host genome are detected. Phage Hybrid Coverages are used to detect Mu-like packaging mode. Host Hybrid Coverages could be used to detect Phage Transduction but also genome integration location of prophages.</i>' | |
1043 report.append(Paragraph(ptext, styles["Justify"])) | |
1044 report.append(Spacer(1, 5)) | |
1045 | |
1046 picMaxPlus_phage_hybrid, picMaxMinus_phage_hybrid, TopFreqH_phage_hybrid = picMax(phage_hybrid_coverage, 5) | |
1047 picMaxPlus_host_hybrid, picMaxMinus_host_hybrid, TopFreqH_host_hybrid = picMax(host_hybrid_coverage, 5) | |
1048 | |
1049 imgdataPH = io.BytesIO() | |
1050 figPH = GraphCov(phage_hybrid_coverage, picMaxPlus_phage_hybrid[:1], picMaxMinus_phage_hybrid[:1], "", 0, draw, 1) | |
1051 figPH.savefig(imgdataPH, format='png') | |
1052 imgdataPH.seek(0) | |
1053 IMGPH = ImageReader(imgdataPH) | |
1054 IMAGEPH = Image(IMGPH.fileName, width=240, height=340, kind='proportional') | |
1055 IMAGEPH.hAlign = 'CENTER' | |
1056 | |
1057 | |
1058 imgdataHH = io.BytesIO() | |
1059 figHH = GraphCov(host_hybrid_coverage, picMaxPlus_host_hybrid[:1], picMaxMinus_host_hybrid[:1], "", 0, draw, 1) | |
1060 figHH.savefig(imgdataHH, format='png') | |
1061 imgdataHH.seek(0) | |
1062 IMGHH = ImageReader(imgdataHH) | |
1063 IMAGEHH = Image(IMGHH.fileName, width=240, height=340, kind='proportional') | |
1064 IMAGEHH.hAlign = 'CENTER' | |
1065 | |
1066 data = [["Phage Hybrid Coverage", "Host Hybrid Coverage"],[IMAGEPH,IMAGEHH]] | |
1067 t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2.5*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1068 | |
1069 report.append(t) | |
1070 report.append(Spacer(1, 10)) | |
1071 | |
1072 # NEW PAGE | |
1073 report.append(PageBreak()) | |
1074 | |
1075 | |
1076 # DETAILED RESULTS | |
1077 ptext = '<u><font size=14>Analysis Methodology</font></u>' | |
1078 report.append(Paragraph(ptext, styles["Left"])) | |
1079 report.append(Spacer(1, 10)) | |
1080 | |
1081 ptext = '<i><font size=10>PhageTerm software uses raw reads of a phage sequenced with a sequencing technology using random fragmentation and its genomic reference sequence to determine the termini position. The process starts with the alignment of NGS reads to the phage genome in order to calculate the starting position coverage (SPC), where a hit is given only to the position of the first base in a successfully aligned read (the alignment algorithm uses the lenght of the seed (default: 20) for mapping and does not accept gap or missmatch to speed up the process). Then the program apply 2 distinct scoring methods: i) a statistical approach based on the Gamma law; and ii) a method derived from LI and al. 2014 paper.</font></i>' | |
1082 report.append(Paragraph(ptext, styles["Justify"])) | |
1083 report.append(Spacer(1, 5)) | |
1084 | |
1085 | |
1086 # INFORMATION | |
1087 ptext = '<u><font size=12>General set-up and mapping informations</font></u>' | |
1088 report.append(Paragraph(ptext, styles["Justify"])) | |
1089 report.append(Spacer(1, 5)) | |
1090 | |
1091 | |
1092 imgdata = io.BytesIO() | |
1093 | |
1094 if paired != "": | |
1095 figP_whole = GraphWholeCov(added_paired_whole_coverage, phagename, draw) | |
1096 else: | |
1097 figP_whole = GraphWholeCov(added_whole_coverage, phagename, draw) | |
1098 figP_whole.savefig(imgdata, format='png') | |
1099 imgdata.seek(0) | |
1100 IMG = ImageReader(imgdata) | |
1101 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') | |
1102 IMAGE.hAlign = 'CENTER' | |
1103 | |
1104 if host == "": | |
1105 host_analysis = "No" | |
1106 else: | |
1107 host_analysis = "Yes" | |
1108 | |
1109 if paired == "": | |
1110 sequencing_reads = "Single-ends Reads" | |
1111 else: | |
1112 sequencing_reads = "Paired-ends Reads" | |
1113 | |
1114 data = [["Phage Genome ", str(gen_len) + " bp",IMAGE], ["Sequencing Reads", int(tot_reads),""], ["Mapping Reads", str(int(100 - lost_perc)) + " %",""], ["OPTIONS","",""], ["Mapping Seed",seed,""], ["Surrounding",surrounding,""], ["Host Analysis ", host_analysis,""], ["","",""]] | |
1115 t=Table(data, 1*[2.25*inch]+1*[1.80*inch]+1*[4*inch], 8*[0.25*inch], hAlign='LEFT', style=[('SPAN',(2,0),(2,-1)), ('FONT',(0,0),(0,2),'Helvetica-Bold'), ('FONT',(0,3),(0,3),'Helvetica-Oblique'), ('FONT',(0,4),(1,-1),'Helvetica-Oblique'), ('FONT',(2,0),(2,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-2),'LEFT'), ('ALIGN',(2,0),(2,-1),'CENTER') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1116 | |
1117 report.append(t) | |
1118 report.append(Spacer(1, 5)) | |
1119 | |
1120 | |
1121 # Img highest peaks of each side even if no significative | |
1122 ptext = '<u><font size=12>Highest peak of each side coverage graphics</font></u>' | |
1123 report.append(Paragraph(ptext, styles["Justify"])) | |
1124 report.append(Spacer(1, 5)) | |
1125 | |
1126 | |
1127 imgdata = io.BytesIO() | |
1128 | |
1129 if Mu_like and isinstance(P_left, np.integer): | |
1130 figHL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") | |
1131 else: | |
1132 P_left = phage_plus_norm["Position"][0] | |
1133 figHL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini") | |
1134 figHL_whole.savefig(imgdata, format='png') | |
1135 imgdata.seek(0) | |
1136 IMG = ImageReader(imgdata) | |
1137 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional') | |
1138 IMAGE.hAlign = 'CENTER' | |
1139 | |
1140 imgdata2 = io.BytesIO() | |
1141 | |
1142 if Mu_like and isinstance(P_right, np.integer): | |
1143 figHR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") | |
1144 else: | |
1145 P_right = phage_minus_norm["Position"][0] | |
1146 figHR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini") | |
1147 figHR_whole.savefig(imgdata2, format='png') | |
1148 imgdata2.seek(0) | |
1149 IMG2 = ImageReader(imgdata2) | |
1150 IMAGE2 = Image(IMG2.fileName, width=275, height=340, kind='proportional') | |
1151 IMAGE2.hAlign = 'CENTER' | |
1152 | |
1153 if Mu_like: | |
1154 data = [["Hybrid Coverage Zoom (Left)", "Hybrid Coverage Zoom (Right)"],[IMAGE,IMAGE2]] | |
1155 else: | |
1156 data = [["Whole Coverage Zoom (Left)", "Whole Coverage Zoom (Right)"],[IMAGE,IMAGE2]] | |
1157 t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1158 report.append(t) | |
1159 | |
1160 # Controls | |
1161 ptext = '<u><font size=12>General controls information</font></u>' | |
1162 report.append(Paragraph(ptext, styles["Justify"])) | |
1163 report.append(Spacer(1, 5)) | |
1164 | |
1165 if ave_whole_cov < 50: | |
1166 ptextawc = "WARNING: Under the limit of the software (50)" | |
1167 elif ave_whole_cov < 200: | |
1168 ptextawc = "WARNING: Low (<200), Li's method could not be reliable" | |
1169 else: | |
1170 ptextawc = "OK" | |
1171 | |
1172 data = [["Whole genome coverage", int(ave_whole_cov), ptextawc]] | |
1173 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1174 report.append(t) | |
1175 | |
1176 drop_perc = len([i for i in added_whole_coverage if i < (ave_whole_cov/2)]) / float(len(added_whole_coverage)) | |
1177 if drop_perc < 1: | |
1178 ptextdp = "OK" | |
1179 else: | |
1180 ptextdp = "Check your genome reference" | |
1181 | |
1182 data = [["Weak genome coverage", "%.1f %%" %drop_perc, ptextdp]] | |
1183 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1184 report.append(t) | |
1185 | |
1186 if paired != "": | |
1187 if len(insert) != 0: | |
1188 insert_mean = sum(insert)/len(insert) | |
1189 else: | |
1190 insert_mean = "-" | |
1191 data = [["Insert mean size", insert_mean, "Mean insert estimated from paired-end reads"]] | |
1192 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1193 report.append(t) | |
1194 | |
1195 if lost_perc > 25: | |
1196 ptextlp = "Warning: high percentage of reads lost" | |
1197 else: | |
1198 ptextlp = "OK" | |
1199 | |
1200 data = [["Reads lost during alignment", "%.1f %%" %lost_perc, ptextlp]] | |
1201 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1202 report.append(t) | |
1203 report.append(Spacer(1, 5)) | |
1204 | |
1205 # DETAILED SCORE | |
1206 ptext = '<b><font size=14>i) PhageTerm method</font></b>' | |
1207 report.append(Paragraph(ptext, styles["Left"])) | |
1208 report.append(Spacer(1, 10)) | |
1209 | |
1210 ptext = '<i><font size=10>Reads are mapped on the reference to determine the starting position coverage (SPC) as well as the coverage (COV) in each orientation. These values are then used to compute the variable T = SPC/COV. The average value of T at positions along the genome that are not termini is expected to be 1/F, where F is the average fragment size. For the termini that depends of the packaging mode. Cos Phages: no reads should start before the terminus and therefore T=1. DTR phages: for N phages present in the sample, there should be N fragments that start at the terminus and N fragments that cover the edge of the repeat on the other side of the genome as a results T is expected to be 0.5. Pac phages: for N phages in the sample, there should be N/C fragments starting at the pac site, where C is the number of phage genome copies per concatemer. In the same sample N fragments should cover the pac site position, T is expected to be (N/C)/(N+N/C) = 1/(1+C). To assess whether the number of reads starting at a given position along the genome can be considered a significant outlier, PhageTerm first segments the genome according to coverage using a regression tree. A gamma distribution is fitted to SPC for each segment and an adjusted p-value is computed for each position. If several significant peaks are detected within a small sequence window (default: 20bp), their X values are merged.</font></i>' | |
1211 report.append(Paragraph(ptext, styles["Justify"])) | |
1212 report.append(Spacer(1, 5)) | |
1213 | |
1214 # surrounding | |
1215 if surrounding > 0: | |
1216 data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_norm_forw)-1) + " / " + str(len(picOUT_norm_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]] | |
1217 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1218 report.append(t) | |
1219 | |
1220 report.append(Spacer(1, 10)) | |
1221 | |
1222 # Li's Method | |
1223 if not multi: | |
1224 ptext = '<b><font size=14>ii) Li\'s method</font></b>' | |
1225 report.append(Paragraph(ptext, styles["Left"])) | |
1226 report.append(Spacer(1, 10)) | |
1227 | |
1228 ptext = '<i><font size=10>The second approach is based on the calculation and interpretation of three specific ratios R1, R2 and R3 as suggested in a previous publication from Li et al. 2014. The first ratio, is calculated as follow: the highest starting frequency found on either the forward or reverse strands is divided by the average starting frequency, R1 = (highest frequency/average frequency). Li’s et al. have proposed three possible interpretation of the R1 ratio. First, if R1 < 30, the phage genome does not have any termini, and is either circular or completely permuted and terminally redundant. The second interpretation for R1 is when 30 ≤ R1 ≥ 100, suggesting the presence of preferred termini with terminal redundancy and apparition of partially circular permutations. At last if R1 > 100 that is an indication that at least one fixed termini is present with terminase recognizing a specific site. The two other ratios are R2 and R3 and the calculation is done in a similar manner. R2 is calculated using the highest two frequencies (T1-F and T2-F) found on the forward strand and R3 is calculated using the highest two frequencies (T1-R and T2-R) found on the reverse strand. To calculate these two ratios, we divide the highest frequency by the second highest frequency T2. So R2 = (T1-F / T2-F) and R3 = (T1-R / T2-R). These two ratios are used to analyze termini characteristics on each strand taken individually. Li et al. suggested two possible interpretations for R2 and R3 ratios combine to R1. When R1 < 30 and R2 < 3, we either have no obvious termini on the forward strand, or we have multiple preferred termini on the forward strand, if 30 ≤ R1 ≤ 100. If R2 > 3, it is suggested that there is an obvious unique termini on the forward strand. The same reasoning is applicable for the result of R3. Combining the results for ratios found with this approach, it is possible to make the first prediction for the viral packaging mode of the analyzed phage. A unique obvious termini present at both ends (both R2 and R3 > 3) reveals the presence of a COS mode of packaging. The headful mode of packaging PAC is concluded when we have a single obvious termini only on one strand.</font></i>' | |
1229 report.append(Paragraph(ptext, styles["Justify"])) | |
1230 report.append(Spacer(1, 5)) | |
1231 | |
1232 if surrounding > 0: | |
1233 data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_forw)-1) + " / " + str(len(picOUT_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]] | |
1234 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1235 report.append(t) | |
1236 report.append(Spacer(1, 5)) | |
1237 | |
1238 if R1 > 100: | |
1239 ptextR1 = "At least one fixed termini is present with terminase recognizing a specific site." | |
1240 elif R1 > 30: | |
1241 ptextR1 = "Presence of preferred termini with terminal redundancy and apparition of partially circular permutations." | |
1242 else: | |
1243 ptextR1 = "Phage genome does not have any termini, and is either circular or completely permuted and terminally redundant." | |
1244 | |
1245 data = [["R1 - highest freq./average freq.", int(R1), Paragraph(ptextR1, styles["Justify"])]] | |
1246 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1247 report.append(t) | |
1248 report.append(Spacer(1, 5)) | |
1249 | |
1250 if R2 < 3 and R1 < 30: | |
1251 ptextR2 = "No obvious termini on the forward strand." | |
1252 elif R2 < 3 : | |
1253 ptextR2 = "Multiple preferred termini on the forward strand." | |
1254 elif R2 >= 3: | |
1255 ptextR2 = "Unique termini on the forward strand." | |
1256 | |
1257 data = [["R2 Forw - highest freq./second freq.", int(R2), Paragraph(ptextR2, styles["Justify"])]] | |
1258 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1259 report.append(t) | |
1260 report.append(Spacer(1, 5)) | |
1261 | |
1262 if R3 < 3 and R1 < 30: | |
1263 ptextR3 = "No obvious termini on the reverse strand." | |
1264 elif R3 < 3 : | |
1265 ptextR3 = "Multiple preferred termini on the reverse strand." | |
1266 elif R3 >= 3: | |
1267 ptextR3 = "Unique termini on the reverse strand." | |
1268 | |
1269 data = [["R3 Rev - highest freq./second freq.", int(R3), Paragraph(ptextR3, styles["Justify"])]] | |
1270 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1271 report.append(t) | |
1272 | |
1273 # CREDITS and TIME | |
1274 ptext = '<font size=8>%s</font>' % "Please cite: Sci. Rep. DOI 10.1038/s41598-017-07910-5" | |
1275 report.append(Paragraph(ptext, styles["Center"])) | |
1276 ptext = '<font size=8>%s</font>' % "Garneau, Depardieu, Fortier, Bikard and Monot. PhageTerm: Determining Bacteriophage Termini and Packaging using NGS data." | |
1277 report.append(Paragraph(ptext, styles["Center"])) | |
1278 ptext = '<font size=8>Report generated : %s</font>' % time.ctime() | |
1279 report.append(Paragraph(ptext, styles["Center"])) | |
1280 | |
1281 # CREATE PDF | |
1282 if not multi: | |
1283 doc.build(report) | |
1284 else: | |
1285 report.append(PageBreak()) | |
1286 return report | |
1287 return | |
1288 | |
1289 def SummaryReport(phagename, DR, no_match): | |
1290 """ Create first page of multi reports.""" | |
1291 report=[] | |
1292 styles=getSampleStyleSheet() | |
1293 styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY)) | |
1294 styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER)) | |
1295 styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT)) | |
1296 styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT)) | |
1297 | |
1298 ### GENERAL INFORMATION | |
1299 | |
1300 # TITLE | |
1301 ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>' | |
1302 report.append(Paragraph(ptext, styles["Center"])) | |
1303 report.append(Spacer(1, 15)) | |
1304 | |
1305 # No Match | |
1306 if len(no_match) > 0: | |
1307 ptext = '<u><font size=14>No Match ('+ str(len(no_match)) +')</font></u>' | |
1308 report.append(Paragraph(ptext, styles["Left"])) | |
1309 report.append(Spacer(1, 10)) | |
1310 | |
1311 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] | |
1312 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1313 report.append(t) | |
1314 | |
1315 for contig in no_match: | |
1316 P_comments = "No read match" | |
1317 | |
1318 data = [[contig, "-", "-", "-", "-", "-", 0, P_comments]] | |
1319 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1320 report.append(t) | |
1321 | |
1322 # COS Phages | |
1323 count_COS = len(DR["COS (3')"]) + len(DR["COS (5')"]) + len(DR["COS"]) | |
1324 ptext = '<u><font size=14>COS Phages ('+ str(count_COS) +')</font></u>' | |
1325 report.append(Paragraph(ptext, styles["Left"])) | |
1326 report.append(Spacer(1, 10)) | |
1327 | |
1328 if count_COS != 0: | |
1329 | |
1330 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] | |
1331 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1332 report.append(t) | |
1333 | |
1334 for DC in DR["COS (3')"]: | |
1335 P_comments = "" | |
1336 if int(DR["COS (3')"][DC]["ave_whole_cov"]) < 50: | |
1337 P_comments = "Low coverage" | |
1338 | |
1339 data = [[DC, DR["COS (3')"][DC]["P_class"], DR["COS (3')"][DC]["P_left"], DR["COS (3')"][DC]["P_right"], DR["COS (3')"][DC]["P_type"], DR["COS (3')"][DC]["P_orient"], int(DR["COS (3')"][DC]["ave_whole_cov"]), P_comments]] | |
1340 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1341 report.append(t) | |
1342 | |
1343 for DC in DR["COS (5')"]: | |
1344 P_comments = "" | |
1345 if int(DR["COS (5')"][DC]["ave_whole_cov"]) < 50: | |
1346 P_comments = "Low coverage" | |
1347 | |
1348 data = [[DC, DR["COS (5')"][DC]["P_class"], DR["COS (5')"][DC]["P_left"], DR["COS (5')"][DC]["P_right"], DR["COS (5')"][DC]["P_type"], DR["COS (5')"][DC]["P_orient"], int(DR["COS (5')"][DC]["ave_whole_cov"]), P_comments]] | |
1349 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1350 report.append(t) | |
1351 | |
1352 for DC in DR["COS"]: | |
1353 P_comments = "" | |
1354 if int(DR["COS"][DC]["ave_whole_cov"]) < 50: | |
1355 P_comments = "Low coverage" | |
1356 | |
1357 data = [[DC, DR["COS"][DC]["P_class"], DR["COS"][DC]["P_left"], DR["COS"][DC]["P_right"], DR["COS"][DC]["P_type"], DR["COS"][DC]["P_orient"], int(DR["COS"][DC]["ave_whole_cov"]), P_comments]] | |
1358 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1359 report.append(t) | |
1360 | |
1361 report.append(Spacer(1, 5)) | |
1362 | |
1363 # DTR Phages | |
1364 count_DTR = len(DR["DTR (short)"]) + len(DR["DTR (long)"]) | |
1365 ptext = '<u><font size=14>DTR Phages ('+ str(count_DTR) +')</font></u>' | |
1366 report.append(Paragraph(ptext, styles["Left"])) | |
1367 report.append(Spacer(1, 10)) | |
1368 | |
1369 if count_DTR != 0: | |
1370 | |
1371 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] | |
1372 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1373 report.append(t) | |
1374 | |
1375 for DC in DR["DTR (short)"]: | |
1376 P_comments = "" | |
1377 if int(DR["DTR (short)"][DC]["ave_whole_cov"]) < 50: | |
1378 P_comments = "Low coverage" | |
1379 | |
1380 data = [[DC, DR["DTR (short)"][DC]["P_class"], DR["DTR (short)"][DC]["P_left"], DR["DTR (short)"][DC]["P_right"], DR["DTR (short)"][DC]["P_type"], DR["DTR (short)"][DC]["P_orient"], int(DR["DTR (short)"][DC]["ave_whole_cov"]), P_comments]] | |
1381 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1382 report.append(t) | |
1383 | |
1384 for DC in DR["DTR (long)"]: | |
1385 P_comments = "" | |
1386 if int(DR["DTR (long)"][DC]["ave_whole_cov"]) < 50: | |
1387 P_comments = "Low coverage" | |
1388 | |
1389 data = [[DC, DR["DTR (long)"][DC]["P_class"], DR["DTR (long)"][DC]["P_left"], DR["DTR (long)"][DC]["P_right"], DR["DTR (long)"][DC]["P_type"], DR["DTR (long)"][DC]["P_orient"], int(DR["DTR (long)"][DC]["ave_whole_cov"]), P_comments]] | |
1390 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1391 report.append(t) | |
1392 | |
1393 report.append(Spacer(1, 5)) | |
1394 | |
1395 # Headful Phages | |
1396 count_Headful = len(DR["Headful (pac)"]) | |
1397 ptext = '<u><font size=14>Headful Phages ('+ str(count_Headful) +')</font></u>' | |
1398 report.append(Paragraph(ptext, styles["Left"])) | |
1399 report.append(Spacer(1, 10)) | |
1400 | |
1401 if count_Headful != 0: | |
1402 | |
1403 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] | |
1404 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1405 report.append(t) | |
1406 | |
1407 for DC in DR["Headful (pac)"]: | |
1408 P_comments = "" | |
1409 if int(DR["Headful (pac)"][DC]["ave_whole_cov"]) < 50: | |
1410 P_comments = "Low coverage" | |
1411 | |
1412 data = [[DC, DR["Headful (pac)"][DC]["P_class"], DR["Headful (pac)"][DC]["P_left"], DR["Headful (pac)"][DC]["P_right"], DR["Headful (pac)"][DC]["P_type"], DR["Headful (pac)"][DC]["P_orient"], int(DR["Headful (pac)"][DC]["ave_whole_cov"]), P_comments]] | |
1413 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1414 report.append(t) | |
1415 | |
1416 report.append(Spacer(1, 5)) | |
1417 | |
1418 # OTHERS Phages | |
1419 count_Others = len(DR["Mu-like"]) + len(DR["UNKNOWN"]) + len(DR["NEW"]) | |
1420 ptext = '<u><font size=14>Others Phages ('+ str(count_Others) +')</font></u>' | |
1421 report.append(Paragraph(ptext, styles["Left"])) | |
1422 report.append(Spacer(1, 10)) | |
1423 | |
1424 if count_Others != 0: | |
1425 | |
1426 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]] | |
1427 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1428 report.append(t) | |
1429 | |
1430 for DC in DR["Mu-like"]: | |
1431 P_comments = "" | |
1432 if int(DR["Mu-like"][DC]["ave_whole_cov"]) < 50: | |
1433 P_comments = "Low coverage" | |
1434 | |
1435 data = [[DC, DR["Mu-like"][DC]["P_class"], DR["Mu-like"][DC]["P_left"], DR["Mu-like"][DC]["P_right"], DR["Mu-like"][DC]["P_type"], DR["Mu-like"][DC]["P_orient"], int(DR["Mu-like"][DC]["ave_whole_cov"]), P_comments]] | |
1436 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1437 report.append(t) | |
1438 | |
1439 for DC in DR["NEW"]: | |
1440 P_comments = "" | |
1441 if int(DR["NEW"][DC]["ave_whole_cov"]) < 50: | |
1442 P_comments = "Low coverage" | |
1443 | |
1444 data = [[DC, DR["NEW"][DC]["P_class"], DR["NEW"][DC]["P_left"], DR["NEW"][DC]["P_right"], DR["NEW"][DC]["P_type"], DR["NEW"][DC]["P_orient"], int(DR["NEW"][DC]["ave_whole_cov"]), P_comments]] | |
1445 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1446 report.append(t) | |
1447 | |
1448 for DC in DR["UNKNOWN"]: | |
1449 P_comments = "" | |
1450 if int(DR["UNKNOWN"][DC]["ave_whole_cov"]) < 50: | |
1451 P_comments = "Low coverage" | |
1452 | |
1453 data = [[DC, DR["UNKNOWN"][DC]["P_class"], DR["UNKNOWN"][DC]["P_left"], DR["UNKNOWN"][DC]["P_right"], DR["UNKNOWN"][DC]["P_type"], DR["UNKNOWN"][DC]["P_orient"], int(DR["UNKNOWN"][DC]["ave_whole_cov"]), P_comments]] | |
1454 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) | |
1455 report.append(t) | |
1456 | |
1457 report.append(Spacer(1, 5)) | |
1458 | |
1459 report.append(PageBreak()) | |
1460 | |
1461 return report | |
1462 | |
1463 def WorkflowReport(phagename, P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, multi = 0, phage_plus_norm=None, phage_minus_norm=None,*args, **kwargs): | |
1464 """ Text report for each phage.""" | |
1465 | |
1466 P_comments = "" | |
1467 if ave_whole_cov < 50: | |
1468 P_comments = "WARNING: Low coverage" | |
1469 | |
1470 if ave_whole_cov == 0: | |
1471 P_comments = "No read match" | |
1472 | |
1473 if not multi: | |
1474 filoutWorkflow = open(phagename + "_workflow.txt", "w") | |
1475 filoutWorkflow.write("#phagename\tClass\tLeft\tRight\tType\tOrient\tCoverage\tComments\n") | |
1476 filoutWorkflow.write(phagename + "\t" + P_class + "\t" + str(P_left) + "\t" + str(P_right) + "\t" + P_type + "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n") | |
1477 filoutWorkflow.close() | |
1478 else: | |
1479 pval_left_peak="-" | |
1480 pval_adj_left_peak="-" | |
1481 pval_right_peak="-" | |
1482 pval_adj_right_peak="-" | |
1483 if isinstance(P_left,np.int64): | |
1484 # get pvalue and adjusted pvalue for this + peak | |
1485 left_peak_infos=phage_plus_norm.loc[phage_plus_norm['Position']==P_left] | |
1486 pval_left_peak=left_peak_infos["pval_gamma"] | |
1487 pval_left_peak=pval_left_peak.values[0] | |
1488 pval_adj_left_peak=left_peak_infos["pval_gamma_adj"] | |
1489 pval_adj_left_peak =pval_adj_left_peak.values[0] | |
1490 if isinstance(P_right,np.int64): | |
1491 # get pvalue and adjusted pvalue for this + peak | |
1492 right_peak_infos=phage_minus_norm.loc[phage_minus_norm['Position']==P_right] | |
1493 pval_right_peak=right_peak_infos["pval_gamma"] | |
1494 pval_right_peak=pval_right_peak.values[0] | |
1495 pval_adj_right_peak=right_peak_infos["pval_gamma_adj"] | |
1496 pval_adj_right_peak=pval_adj_right_peak.values[0] | |
1497 return phagename + "\t" + P_class + "\t" + str(P_left) + "\t" +str(pval_left_peak)+ "\t" +str(pval_adj_left_peak)+\ | |
1498 "\t" + str(P_right) + "\t" + str(pval_right_peak) + "\t" + str(pval_adj_right_peak)+ "\t" + P_type +\ | |
1499 "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n" | |
1500 return | |
1501 | |
1502 def EstimateTime(secondes): | |
1503 """ Convert secondes into time.""" | |
1504 conv = (86400,3600,60,1) | |
1505 result = [0,0,0,0] | |
1506 i=0 | |
1507 while secondes>0: | |
1508 result[i]= secondes/conv[i] | |
1509 secondes=secondes-result[i]*conv[i] | |
1510 i+=1 | |
1511 return str(result[0]) + " Days " + str(result[1]) + " Hrs " + str(result[2]) + " Min " + str(result[3]) + " Sec" | |
1512 | |
1513 | |
1514 | |
1515 | |
1516 | |
1517 |