annotate _modules/generate_report.py @ 0:69e8f12c8b31 draft

"planemo upload"
author bioit_sciensano
date Fri, 11 Mar 2022 15:06:20 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
1 from __future__ import print_function
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
2 import os
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
3 import pickle
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
5 from reportlab.lib.pagesizes import letter, landscape
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
6 from _modules.functions_PhageTerm import SummaryReport,WorkflowReport,ExportCohesiveSeq,ExportPhageSequence,CreateReport
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
7
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
8
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
9 def loadDR(DR_path,DR):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
10 for d in os.listdir(DR_path): # iterate over P_class subdirectories.
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
11 if not os.path.isdir(os.path.join(DR_path,d)):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
12 err_str=DR_path+" should contain only directories."
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
13 raise RuntimeError(err_str)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
14 for fic_name in os.listdir(os.path.join(DR_path,d)): # iterate over all files for a given P_class
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
15 p=os.path.join(DR_path,d)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
16 fname=os.path.join(p,fic_name)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
17 with open(fname, 'rb') as f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
18 loaded_items=pickle.load(f)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
19 # d is P_class name, fic_name is phagename.
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
20 dict_tmp=dict()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
21 dict_tmp["phagename"]=loaded_items[0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
22 dict_tmp["seed"]=loaded_items[1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
23 dict_tmp["added_whole_coverage"]=loaded_items[2]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
24 dict_tmp["Redundant"]=loaded_items[3]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
25 dict_tmp["P_left"]=loaded_items[4]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
26 print("P_left=",dict_tmp["P_left"],type(dict_tmp["P_left"]))
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
27 dict_tmp["P_right"] = loaded_items[5]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
28 print("P_right=",dict_tmp["P_right"],type(dict_tmp["P_right"]))
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
29 dict_tmp["Permuted"]=loaded_items[6]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
30 dict_tmp["P_orient"] =loaded_items[7]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
31 dict_tmp["termini_coverage_norm_close"] =loaded_items[8]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
32 dict_tmp["picMaxPlus_norm_close"] =loaded_items[9]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
33 dict_tmp["picMaxMinus_norm_close"] =loaded_items[10]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
34 dict_tmp["gen_len"] =loaded_items[11]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
35 dict_tmp["tot_reads"] =loaded_items[12]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
36 dict_tmp["P_seqcoh"] =loaded_items[13]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
37 dict_tmp["phage_plus_norm"] =loaded_items[14]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
38 dict_tmp["phage_minus_norm"] =loaded_items[15]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
39 dict_tmp["ArtPackmode"] = loaded_items[16]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
40 dict_tmp["termini"] = loaded_items[17]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
41 dict_tmp["forward"] = loaded_items[18]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
42 dict_tmp["reverse"] = loaded_items[19]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
43 dict_tmp["ArtOrient"] = loaded_items[20]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
44 dict_tmp["ArtcohesiveSeq"] = loaded_items[21]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
45 dict_tmp["termini_coverage_close"] = loaded_items[22]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
46 dict_tmp["picMaxPlus_close"] = loaded_items[23]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
47 dict_tmp["picMaxMinus_close"] = loaded_items[24]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
48 dict_tmp["picOUT_norm_forw"] = loaded_items[25]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
49 dict_tmp["picOUT_norm_rev"] = loaded_items[26]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
50 dict_tmp["picOUT_forw"] = loaded_items[27]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
51 dict_tmp["picOUT_rev"] = loaded_items[28]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
52 dict_tmp["lost_perc"] = loaded_items[29]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
53 dict_tmp["ave_whole_cov"] = loaded_items[30]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
54 dict_tmp["R1"] = loaded_items[31]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
55 dict_tmp["R2"] = loaded_items[32]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
56 dict_tmp["R3"] = loaded_items[33]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
57 dict_tmp["host"] = loaded_items[34]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
58 dict_tmp["host_len"] = loaded_items[35]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
59 dict_tmp["host_whole_coverage"] = loaded_items[36]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
60 dict_tmp["picMaxPlus_host"] = loaded_items[37]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
61 dict_tmp["picMaxMinus_host"] = loaded_items[38]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
62 dict_tmp["surrounding"] = loaded_items[39]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
63 dict_tmp["drop_cov"] = loaded_items[40]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
64 dict_tmp["paired"] = loaded_items[41]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
65 dict_tmp["insert"] = loaded_items[42]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
66 dict_tmp["phage_hybrid_coverage"] = loaded_items[43]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
67 dict_tmp["host_hybrid_coverage"] = loaded_items[44]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
68 dict_tmp["added_paired_whole_coverage"] = loaded_items[45]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
69 dict_tmp["Mu_like"] = loaded_items[46]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
70 dict_tmp["test_run"] = loaded_items[47]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
71 dict_tmp["P_class"] = loaded_items[48]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
72 dict_tmp["P_type"] = loaded_items[49]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
73 dict_tmp["P_concat"] = loaded_items[50]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
74 dict_tmp["idx_refseq_in_list"] = loaded_items[51]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
75 DR [d][fic_name]=dict_tmp
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
76 f.close()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
77
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
78
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
79
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
80
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
81 def genReport(fParms,inDArgs,inRawDArgs,no_match,DR):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
82 # Test No Match
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
83 if len(no_match) == inDArgs.nbr_virome:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
84 print("\n\nERROR: No reads match, please check your reference file.")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
85 exit()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
86
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
87 # Report Resume
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
88 multiReport = SummaryReport(inRawDArgs.phagename, DR, no_match)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
89 multiCohSeq = ""
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
90 multiPhageSeq = ""
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
91 multiWorkflow = "#phagename\tClass\tLeft\tRight\tType\tOrient\tCoverage\tComments\n"
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
92
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
93 # No Match in workflow
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
94 if fParms.workflow:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
95 for no_match_contig in no_match:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
96 multiWorkflow += WorkflowReport(no_match_contig, "-", "-", "-", "-", "-", 0, 1)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
97
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
98 for DPC in DR:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
99 for DC in DR[DPC]:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
100 # Text report
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
101 if fParms.workflow: # phagename, P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, multi = 0
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
102 multiWorkflow += WorkflowReport(DC, DR[DPC][DC]["P_class"], DR[DPC][DC]["P_left"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
103 DR[DPC][DC]["P_right"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
104 DR[DPC][DC]["P_type"], DR[DPC][DC]["P_orient"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
105 DR[DPC][DC]["ave_whole_cov"], 1)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
106
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
107 # Sequence
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
108 idx_refseq = DR[DPC][DC]["idx_refseq_in_list"]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
109 refseq = inDArgs.refseq_liste[idx_refseq]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
110 multiCohSeq += ExportCohesiveSeq(DC, DR[DPC][DC]["ArtcohesiveSeq"], DR[DPC][DC]["P_seqcoh"], fParms.test_run, 1)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
111 multiPhageSeq += ExportPhageSequence(DC, DR[DPC][DC]["P_left"], DR[DPC][DC]["P_right"], refseq,
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
112 DR[DPC][DC]["P_orient"], DR[DPC][DC]["Redundant"], DR[DPC][DC]["Mu_like"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
113 DR[DPC][DC]["P_class"], DR[DPC][DC]["P_seqcoh"], fParms.test_run, 1)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
114
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
115 # Report
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
116 draw=0 # TODO VL: ask what is the use of this parameter that is alwayes 0...
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
117 multiReport = CreateReport(DC, DR[DPC][DC]["seed"], DR[DPC][DC]["added_whole_coverage"], draw,
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
118 DR[DPC][DC]["Redundant"], DR[DPC][DC]["P_left"], DR[DPC][DC]["P_right"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
119 DR[DPC][DC]["Permuted"], DR[DPC][DC]["P_orient"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
120 DR[DPC][DC]["termini_coverage_norm_close"], DR[DPC][DC]["picMaxPlus_norm_close"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
121 DR[DPC][DC]["picMaxMinus_norm_close"], DR[DPC][DC]["gen_len"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
122 DR[DPC][DC]["tot_reads"], DR[DPC][DC]["P_seqcoh"], DR[DPC][DC]["phage_plus_norm"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
123 DR[DPC][DC]["phage_minus_norm"], DR[DPC][DC]["ArtPackmode"], DR[DPC][DC]["termini"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
124 DR[DPC][DC]["forward"], DR[DPC][DC]["reverse"], DR[DPC][DC]["ArtOrient"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
125 DR[DPC][DC]["ArtcohesiveSeq"], DR[DPC][DC]["termini_coverage_close"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
126 DR[DPC][DC]["picMaxPlus_close"], DR[DPC][DC]["picMaxMinus_close"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
127 DR[DPC][DC]["picOUT_norm_forw"], DR[DPC][DC]["picOUT_norm_rev"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
128 DR[DPC][DC]["picOUT_forw"], DR[DPC][DC]["picOUT_rev"], DR[DPC][DC]["lost_perc"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
129 DR[DPC][DC]["ave_whole_cov"], DR[DPC][DC]["R1"], DR[DPC][DC]["R2"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
130 DR[DPC][DC]["R3"], DR[DPC][DC]["host"], DR[DPC][DC]["host_len"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
131 DR[DPC][DC]["host_whole_coverage"], DR[DPC][DC]["picMaxPlus_host"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
132 DR[DPC][DC]["picMaxMinus_host"], DR[DPC][DC]["surrounding"], DR[DPC][DC]["drop_cov"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
133 DR[DPC][DC]["paired"], DR[DPC][DC]["insert"], DR[DPC][DC]["phage_hybrid_coverage"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
134 DR[DPC][DC]["host_hybrid_coverage"], DR[DPC][DC]["added_paired_whole_coverage"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
135 DR[DPC][DC]["Mu_like"], fParms.test_run, DR[DPC][DC]["P_class"],
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
136 DR[DPC][DC]["P_type"], DR[DPC][DC]["P_concat"], 1, multiReport)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
137
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
138 # Workflow
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
139 if not fParms.test:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
140 if fParms.workflow:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
141 filoutWorkflow = open(inRawDArgs.phagename + "_workflow.txt", "w")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
142 filoutWorkflow.write(multiWorkflow)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
143 filoutWorkflow.close()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
144
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
145 # Concatene Sequences
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
146 filoutCohSeq = open(inRawDArgs.phagename + "_cohesive-sequence.fasta", "w")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
147 filoutCohSeq.write(multiCohSeq)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
148 filoutCohSeq.close()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
149
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
150 filoutPhageSeq = open(inRawDArgs.phagename + "_sequence.fasta", "w")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
151 filoutPhageSeq.write(multiPhageSeq)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
152 filoutPhageSeq.close()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
153
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
154 # Concatene Report
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
155 doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % inRawDArgs.phagename, pagesize=letter, rightMargin=10,
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
156 leftMargin=10, topMargin=5, bottomMargin=10)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
157 doc.build(multiReport)