annotate make_EAR.py @ 2:a34826ae0a73 draft default tip

planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
author bgruening
date Fri, 30 Aug 2024 09:27:31 +0000
parents 8c99976de71e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
2 import argparse
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
3 import logging
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
4 import math
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
5 import os
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
6 import re
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
7 import sys
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
8 from datetime import datetime
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
9
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
10 import pytz
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
11 import requests
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
12 import yaml
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
13 from reportlab.lib import colors
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
14 from reportlab.lib.pagesizes import A4
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
15 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
16 from reportlab.lib.units import cm
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
17 from reportlab.platypus import Image, PageBreak, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
18
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
19
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
20 # make_EAR_glxy.py
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
21 # CAUTION: This is for the Galaxy version!
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
22 # by Diego De Panis
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
23 # ERGA Sequencing and Assembly Committee
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
24 EAR_version = "v24.08.26"
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
25
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
26
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
27 def make_report(yaml_file):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
28 logging.basicConfig(filename='EAR.log', level=logging.INFO)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
29 # Read the content from EAR.yaml file
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
30 with open(yaml_file, "r") as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
31 yaml_data = yaml.safe_load(file)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
32
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
33 # FUNCTIONS ###################################################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
34
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
35 def format_number(value):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
36 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
37 value_float = float(value)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
38 if value_float.is_integer():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
39 # format as an integer if no decimal part
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
40 return f'{int(value_float):,}'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
41 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
42 # format as a float
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
43 return f'{value_float:,}'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
44 except ValueError:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
45 # return the original value if it can't be converted to a float
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
46 return value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
47
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
48 # extract gfastats values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
49 def extract_gfastats_values(content, keys):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
50 return [re.findall(f"{key}: (.+)", content)[0] for key in keys]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
51
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
52 keys = [
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
53 "Total scaffold length",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
54 "GC content %",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
55 "# gaps in scaffolds",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
56 "Total gap length in scaffolds",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
57 "# scaffolds",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
58 "Scaffold N50",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
59 "Scaffold L50",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
60 "Scaffold L90",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
61 "# contigs",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
62 "Contig N50",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
63 "Contig L50",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
64 "Contig L90",
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
65 ]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
66
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
67 display_names = keys.copy()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
68 display_names[display_names.index("Total scaffold length")] = "Total bp"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
69 total_length_index = keys.index("Total scaffold length")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
70 display_names[display_names.index("GC content %")] = "GC %"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
71 display_names[display_names.index("Total gap length in scaffolds")] = "Total gap bp"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
72 display_names[display_names.index("# scaffolds")] = "Scaffolds"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
73 display_names[display_names.index("# contigs")] = "Contigs"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
74
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
75 gaps_index = keys.index("# gaps in scaffolds")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
76 exclusion_list = ["# gaps in scaffolds"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
77
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
78 # extract Total bp from gfastats report
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
79 def extract_total_bp_from_gfastats(gfastats_path):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
80 with open(gfastats_path, "r") as f:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
81 content = f.read()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
82 total_bp = re.search(r"Total scaffold length: (.+)", content).group(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
83 total_bp = int(total_bp.replace(',', ''))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
84 return "{:,}".format(total_bp)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
85
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
86 # compute EBP quality metric
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
87 def compute_ebp_metric(haplotype, gfastats_path, qv_value):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
88 keys_needed = ["Contig N50", "Scaffold N50"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
89 content = ''
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
90 with open(gfastats_path, "r") as f:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
91 content = f.read()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
92
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
93 values = extract_gfastats_values(content, keys_needed)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
94 contig_n50_log = math.floor(math.log10(int(values[0].replace(',', ''))))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
95 scaffold_n50_log = math.floor(math.log10(int(values[1].replace(',', ''))))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
96
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
97 return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
98
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
99 # extract qv values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
100 def get_qv_value(file_path, order, tool, haplotype):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
101 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
102 with open(file_path, 'r') as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
103 lines = file.readlines()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
104 if len(lines) > order and (len(lines) == 1 or lines[2].split('\t')[0].strip() == "Both"):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
105 target_line = lines[order]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
106 fourth_column_value = target_line.split('\t')[3]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
107 return fourth_column_value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
108 except Exception as e:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
109 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
110 return ''
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
111
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
112 # extract Kmer completeness values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
113 def get_completeness_value(file_path, order, tool, haplotype):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
114 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
115 with open(file_path, 'r') as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
116 lines = file.readlines()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
117 if len(lines) > order:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
118 target_line = lines[order]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
119 fifth_column_value = target_line.split('\t')[4].strip()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
120 return fifth_column_value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
121 except Exception as e:
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
122 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
123 return ''
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
124
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
125 # get unique part in file names
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
126 def find_unique_parts(file1, file2):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
127 # Split filenames into parts
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
128 parts1 = file1.split('.')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
129 parts2 = file2.split('.')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
130 # Find unique parts
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
131 unique_parts1 = [part for part in parts1 if part not in parts2]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
132 unique_parts2 = [part for part in parts2 if part not in parts1]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
133 return ' '.join(unique_parts1), ' '.join(unique_parts2)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
134
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
135 # extract BUSCO values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
136 def extract_busco_values(file_path):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
137 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
138 with open(file_path, 'r') as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
139 content = file.read()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
140 results_line = re.findall(r"C:.*n:\d+", content)[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
141 s_value = re.findall(r"S:(\d+\.\d+%)", results_line)[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
142 d_value = re.findall(r"D:(\d+\.\d+%)", results_line)[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
143 f_value = re.findall(r"F:(\d+\.\d+%)", results_line)[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
144 m_value = re.findall(r"M:(\d+\.\d+%)", results_line)[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
145 return s_value, d_value, f_value, m_value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
146 except Exception as e:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
147 logging.warning(f"Error reading {file_path}: {str(e)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
148 return '', '', '', ''
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
149
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
150 # extract BUSCO info
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
151 def extract_busco_info(file_path):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
152 busco_version = None
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
153 lineage_info = None
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
154
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
155 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
156 with open(file_path, 'r') as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
157 content = file.read()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
158 version_match = re.search(r"# BUSCO version is: ([\d.]+)", content)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
159 if version_match:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
160 busco_version = version_match.group(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
161 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of genomes: (\d+), number of BUSCOs: (\d+)\)", content)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
162 if lineage_match:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
163 lineage_info = lineage_match.groups()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
164 if not lineage_info:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
165 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of species: (\d+), number of BUSCOs: (\d+)\)", content)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
166 if lineage_match:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
167 lineage_info = lineage_match.groups()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
168
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
169 except Exception as e:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
170 logging.warning(f"Error reading {file_path}: {str(e)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
171
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
172 return busco_version, lineage_info
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
173
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
174 # Function to check and generate warning messages
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
175 def generate_warning_paragraphs(expected, observed, trait):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
176 paragraphs = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
177 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
178 if trait == "Haploid size (bp)":
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
179 expected_val = int(expected.replace(',', ''))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
180 observed_val = int(observed.replace(',', ''))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
181 if abs(expected_val - observed_val) / expected_val > 0.20:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
182 message = f". Observed {trait} has >20% difference with Expected"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
183 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
184 elif trait in ["Haploid Number", "Ploidy"]:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
185 # Ensure both values are integers for comparison
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
186 expected_val = int(expected)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
187 observed_val = int(observed)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
188 if expected_val != observed_val:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
189 message = f". Observed {trait} is different from Expected"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
190 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
191 elif trait == "Sample Sex":
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
192 # Compare case-insensitive and trimmed strings
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
193 if expected.strip().lower() != observed.strip().lower():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
194 message = ". Observed sex is different from Sample sex"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
195 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
196 except Exception as e:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
197 logging.warning(f"Error in generating warning for {trait}: {str(e)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
198 return paragraphs
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
199
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
200 # Generate warnings for curated haplotypes (qv, kcomp, busco)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
201 def generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
202 paragraphs = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
203 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
204 # Ensure values are correctly interpreted as floats
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
205 qv_val = float(qv_value)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
206 completeness_val = float(completeness_value)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
207 s_value = float(busco_scores[0].rstrip('%'))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
208 d_value = float(busco_scores[1].rstrip('%'))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
209
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
210 # Check QV value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
211 if qv_val < 40:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
212 message = f". QV value is less than 40 for {haplotype}"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
213 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
214
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
215 # Check Kmer completeness value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
216 if completeness_val < 90:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
217 message = f". Kmer completeness value is less than 90 for {haplotype}"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
218 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
219
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
220 # Check BUSCO s_value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
221 if s_value < 90:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
222 message = f". BUSCO single copy value is less than 90% for {haplotype}"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
223 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
224
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
225 # Check BUSCO d_value
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
226 if d_value > 5:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
227 message = f". BUSCO duplicated value is more than 5% for {haplotype}"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
228 paragraphs.append(Paragraph(message, styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
229
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
230 except Exception as e:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
231 logging.warning(f"Error in generating warnings for {haplotype}: {str(e)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
232
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
233 return paragraphs
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
234
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
235 # Generate warnings for curated haplotypes (loss, gaps, 90inChrom)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
236 def generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
237 warnings = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
238
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
239 # Iterate over haplotypes and generate warnings based on the criteria
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
240 for haplotype in asm_stages:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
241 pre_curation_bp = extract_total_bp_from_gfastats(asm_data['Pre-curation'][haplotype]['gfastats--nstar-report_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
242 curated_bp = extract_total_bp_from_gfastats(asm_data['Curated'][haplotype]['gfastats--nstar-report_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
243 scaffold_l90 = float(gfastats_data[('Curated', haplotype)][display_names.index('Scaffold L90')].replace(',', ''))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
244
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
245 # Check for assembly length loss > 3%
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
246 if pre_curation_bp and curated_bp:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
247 loss_percentage = (float(pre_curation_bp.replace(',', '')) - float(curated_bp.replace(',', ''))) / float(pre_curation_bp.replace(',', '')) * 100
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
248 if loss_percentage > 3:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
249 warnings.append(Paragraph(f". Assembly length loss > 3% for {haplotype}", styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
250
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
251 # Check for more than 1000 gaps/Gbp
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
252 gaps_gbp = gaps_per_gbp_data.get(('Curated', haplotype), 0)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
253 if gaps_gbp > 1000:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
254 warnings.append(Paragraph(f". More than 1000 gaps/Gbp for {haplotype}", styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
255
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
256 # Check if Scaffold L90 value is more than Observed Haploid number
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
257 if scaffold_l90 > float(obs_haploid_num):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
258 warnings.append(Paragraph(f". Not 90% of assembly in chromosomes for {haplotype}", styles["midiStyle"]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
259
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
260 return warnings
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
261
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
262 # Parse pipeline and generate "tree"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
263 def generate_pipeline_tree(pipeline_data):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
264 tree_lines = []
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
265 indent = "&nbsp;" * 2 # Adjust indent spacing
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
266
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
267 if isinstance(pipeline_data, dict):
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
268 for tool, version_param in pipeline_data.items():
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
269 # Tool line
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
270 tool_line = f"- <b>{tool}</b>"
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
271 tree_lines.append(tool_line)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
272
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
273 # Convert version_param to string and split
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
274 version_param_str = str(version_param)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
275 parts = version_param_str.split('/')
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
276 version = parts[0]
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
277 params = [p for p in parts[1:] if p] # This will remove empty strings
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
278
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
279 # Version line
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
280 version_line = f"{indent * 2}|_ <i>ver:</i> {version}"
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
281 tree_lines.append(version_line)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
282
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
283 # Param line(s)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
284 if params:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
285 for param in params:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
286 param_line = f"{indent * 2}|_ <i>key param:</i> {param}"
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
287 tree_lines.append(param_line)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
288 else:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
289 param_line = f"{indent * 2}|_ <i>key param:</i> NA"
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
290 tree_lines.append(param_line)
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
291 else:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
292 tree_lines.append("Invalid pipeline data format")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
293
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
294 # Join lines with HTML break for paragraph
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
295 tree_diagram = "<br/>".join(tree_lines)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
296 return tree_diagram
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
297
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
298 # Reading SAMPLE INFORMATION section from yaml ################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
299
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
300 # Check for required fields
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
301 required_fields = ["ToLID", "Species", "Sex", "Submitter", "Affiliation", "Tags"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
302 missing_fields = [field for field in required_fields if field not in yaml_data or not yaml_data[field]]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
303
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
304 if missing_fields:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
305 logging.error(f"# GENERAL INFORMATION section in the yaml file is missing or empty for the following information: {', '.join(missing_fields)}")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
306 sys.exit(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
307
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
308 # Check that "Species" field is a string
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
309 if not isinstance(yaml_data["Species"], str):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
310 logging.error(f"# GENERAL INFORMATION section in the yaml file contains incorrect data type for 'Species'. Expected 'str' but got '{type(yaml_data['Species']).__name__}'.")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
311 sys.exit(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
312
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
313 # Get data for Header, ToLID table and submitter
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
314 tol_id = yaml_data["ToLID"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
315 species = yaml_data["Species"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
316 sex = yaml_data["Sex"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
317 submitter = yaml_data["Submitter"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
318 affiliation = yaml_data["Affiliation"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
319 tags = yaml_data["Tags"]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
320
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
321 # Check if tag is valid
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
322 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Community", "ERGA-testing"]
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
323 if tags not in valid_tags:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
324 tags += "[INVALID TAG]"
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
325 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Community.")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
326
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
327 # Get data from GoaT based on species name
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
328 # urllib.parse.quote to handle special characters and spaces in the species name
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
329 species_name = requests.utils.quote(species)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
330
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
331 # Get stuff from GoaT
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
332 goat_response = requests.get(f'https://goat.genomehubs.org/api/v2/search?query=tax_name%28{species_name}%29&result=taxon')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
333 goat_data = goat_response.json() # convert json to dict
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
334
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
335 taxon_number = goat_data['results'][0]['result']['taxon_id']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
336
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
337 goat_results = goat_data['results']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
338
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
339 class_name = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
340 order_name = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
341 haploid_number = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
342 haploid_source = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
343 ploidy = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
344 ploidy_source = 'NA'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
345
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
346 for result in goat_results:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
347 lineage = result['result']['lineage']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
348 for node in lineage:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
349 if node['taxon_rank'] == 'class':
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
350 class_name = node['scientific_name']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
351 if node['taxon_rank'] == 'order':
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
352 order_name = node['scientific_name']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
353
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
354 goat_second_response = requests.get(f'https://goat.genomehubs.org/api/v2/record?recordId={taxon_number}&result=taxon&taxonomy=ncbi')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
355 goat_second_data = goat_second_response.json()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
356
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
357 ploidy_info = goat_second_data['records'][0]['record']['attributes']['ploidy']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
358
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
359 ploidy = ploidy_info['value']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
360 ploidy_source = ploidy_info['aggregation_source']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
361
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
362 haploid_info = goat_second_data['records'][0]['record']['attributes']['haploid_number']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
363
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
364 haploid_number = haploid_info['value']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
365 haploid_source = haploid_info['aggregation_source']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
366
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
367 sp_data = [
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
368 ["TxID", "ToLID", "Species", "Class", "Order"],
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
369 [taxon_number, tol_id, species, class_name, order_name]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
370 ]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
371
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
372 # Transpose the data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
373 transposed_sp_data = list(map(list, zip(*sp_data)))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
374
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
375 # Reading SEQUENCING DATA section from yaml ###################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
376
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
377 # get DATA section from yaml
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
378 data_list = yaml_data.get('DATA', [])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
379
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
380 # Prepare headers
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
381 headers = ['Data']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
382 data_values = ['Coverage']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
383
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
384 # Extract data from YAML and format it for the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
385 for item in data_list:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
386 for technology, coverage in item.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
387 headers.append(technology)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
388 data_values.append('NA' if not coverage else coverage)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
389
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
390 # Create a list of lists for the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
391 table_data = [headers, data_values]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
392
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
393 # Extract pipeline data
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
394 asm_pipeline_data = yaml_data.get('PIPELINES', {}).get('Assembly', {})
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
395 curation_pipeline_data = yaml_data.get('PIPELINES', {}).get('Curation', {})
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
396
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
397 # Extract pipeline data from 'Curated' category
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
398 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
399 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
400
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
401 # Reading GENOME PROFILING DATA section from yaml #############################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
402 profiling_data = yaml_data.get('PROFILING')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
403
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
404 # Check if profiling_data is available
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
405 if not profiling_data:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
406 logging.error('Error: No profiling data found in the YAML file.')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
407 sys.exit(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
408
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
409 # Check for GenomeScope data (mandatory)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
410 genomescope_data = profiling_data.get('GenomeScope')
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
411 if not genomescope_data:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
412 logging.error("Error: GenomeScope data is missing in the YAML file. This is mandatory.")
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
413 sys.exit(1)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
414
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
415 genomescope_summary = genomescope_data.get('genomescope_summary_txt')
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
416 if not genomescope_summary:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
417 logging.error("Error: GenomeScope summary file path is missing in the YAML file.")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
418 sys.exit(1)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
419
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
420 # Read the content of the GenomeScope summary file
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
421 try:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
422 with open(genomescope_summary, "r") as f:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
423 summary_txt = f.read()
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
424 # Extract values from summary.txt
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
425 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
426 proposed_ploidy = re.search(r"p = (\d+)", summary_txt).group(1)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
427 except Exception as e:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
428 logging.error(f"Error reading GenomeScope summary file: {str(e)}")
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
429 sys.exit(1)
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
430
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
431 # Check for Smudgeplot data (optional)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
432 smudgeplot_data = profiling_data.get('Smudgeplot')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
433 if smudgeplot_data:
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
434 smudgeplot_summary = smudgeplot_data.get('smudgeplot_verbose_summary_txt')
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
435 if smudgeplot_summary:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
436 try:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
437 with open(smudgeplot_summary, "r") as f:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
438 smud_summary_txt = f.readlines()
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
439 for line in smud_summary_txt:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
440 if line.startswith("* Proposed ploidy"):
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
441 proposed_ploidy = line.split(":")[1].strip()
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
442 break
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
443 except Exception as e:
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
444 logging.warning(f"Error reading Smudgeplot summary file: {str(e)}. Using GenomeScope ploidy.")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
445 else:
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
446 logging.warning("Smudgeplot summary file path is missing. Using GenomeScope ploidy.")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
447 else:
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
448 logging.info("Smudgeplot data not provided. Using GenomeScope ploidy.")
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
449
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
450 # Reading ASSEMBLY DATA section from yaml #####################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
451
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
452 asm_data = yaml_data.get('ASSEMBLIES', {})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
453
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
454 # make a list from the assemblies available in asm_data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
455 asm_stages = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
456 for asm_stage, stage_properties in asm_data.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
457 for haplotypes in stage_properties.keys():
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
458 if haplotypes not in asm_stages:
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
459 asm_stages.append(haplotypes)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
460
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
461 # get gfastats-based data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
462 gfastats_data = {}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
463 for asm_stage, stage_properties in asm_data.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
464 for haplotypes, haplotype_properties in stage_properties.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
465 if isinstance(haplotype_properties, dict):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
466 if 'gfastats--nstar-report_txt' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
467 file_path = haplotype_properties['gfastats--nstar-report_txt']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
468 with open(file_path, 'r') as file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
469 content = file.read()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
470 gfastats_data[(asm_stage, haplotypes)] = extract_gfastats_values(content, keys)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
471
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
472 gaps_per_gbp_data = {}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
473 for (asm_stage, haplotypes), values in gfastats_data.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
474 try:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
475 gaps = float(values[gaps_index])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
476 total_length = float(values[total_length_index])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
477 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
478 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
479 except (ValueError, ZeroDivisionError):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
480 gaps_per_gbp_data[(asm_stage, haplotypes)] = ''
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
481
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
482 # Define the contigging table (column names)
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
483 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
484
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
485 # Fill the table with the gfastats data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
486 for i in range(len(display_names)):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
487 metric = display_names[i]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
488 if metric not in exclusion_list:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
489 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
490
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
491 # Add the gaps/gbp in between
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
492 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
493
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
494 # get QV, Kmer completeness and BUSCO data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
495 qv_data = {}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
496 completeness_data = {}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
497 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
498 for asm_stage, stage_properties in asm_data.items():
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
499 asm_stage_elements = list(stage_properties.keys())
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
500 for i, haplotypes in enumerate(asm_stage_elements):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
501 haplotype_properties = stage_properties[haplotypes]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
502 if isinstance(haplotype_properties, dict):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
503 if 'merqury_qv' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
504 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
505 if 'merqury_completeness_stats' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
506 completeness_data[(asm_stage, haplotypes)] = get_completeness_value(haplotype_properties['merqury_completeness_stats'], i, asm_stage, haplotypes)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
507 if 'busco_short_summary_txt' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
508 s_value, d_value, f_value, m_value = extract_busco_values(haplotype_properties['busco_short_summary_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
509 busco_data['BUSCO sing.'].update({(asm_stage, haplotypes): s_value})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
510 busco_data['BUSCO dupl.'].update({(asm_stage, haplotypes): d_value})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
511 busco_data['BUSCO frag.'].update({(asm_stage, haplotypes): f_value})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
512 busco_data['BUSCO miss.'].update({(asm_stage, haplotypes): m_value})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
513
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
514 # Fill the table with the QV data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
515 asm_table_data.append(['QV'] + [qv_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
516
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
517 # Fill the table with the Kmer completeness data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
518 asm_table_data.append(['Kmer compl.'] + [completeness_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
519
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
520 # Fill the table with the BUSCO data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
521 for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
522 asm_table_data.append([metric] + [busco_data[metric].get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
523
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
524 # Reading CURATION NOTES section from yaml ####################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
525
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
526 obs_haploid_num = yaml_data.get("NOTES", {}).get("Obs_Haploid_num", "NA")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
527 obs_sex = yaml_data.get("NOTES", {}).get("Obs_Sex", "NA")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
528 interventions_per_gb = yaml_data.get("NOTES", {}).get("Interventions_per_Gb", "NA")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
529 contamination_notes = yaml_data.get("NOTES", {}).get("Contamination_notes", "NA")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
530 other_notes = yaml_data.get("NOTES", {}).get("Other_notes", "NA")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
531
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
532 # Extract Total bp for each haplotype and find the maximum
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
533 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
534 total_bp_values = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
535 for haplotype, properties in curated_assemblies.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
536 if 'gfastats--nstar-report_txt' in properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
537 total_bp = extract_total_bp_from_gfastats(properties['gfastats--nstar-report_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
538 total_bp_values.append(total_bp)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
539
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
540 max_total_bp = max(total_bp_values, default='NA')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
541
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
542 # Create table data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
543 genome_traits_table_data = [
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
544 ["Genome Traits", "Expected", "Observed"],
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
545 ["Haploid size (bp)", genome_haploid_length, f"{max_total_bp}"],
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
546 ["Haploid Number", f"{haploid_number} (source: {haploid_source})", obs_haploid_num],
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
547 ["Ploidy", f"{ploidy} (source: {ploidy_source})", proposed_ploidy],
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
548 ["Sample Sex", sex, obs_sex]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
549 ]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
550
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
551 # Get curator notes
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
552 curator_notes_text = (
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
553 f". Interventions/Gb: {interventions_per_gb}<br/>"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
554 f". Contamination notes: &quot;{contamination_notes}&quot;<br/>"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
555 f". Other observations: &quot;{other_notes}&quot;"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
556 )
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
557
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
558 # PDF CONSTRUCTION ############################################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
559
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
560 # Set up the PDF file
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
561 pdf_filename = "EAR.pdf"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
562 margin = 0.5 * 72 # 0.5 inch in points (normal margin is 1 inch)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
563 pdf = SimpleDocTemplate(pdf_filename,
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
564 pagesize=A4,
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
565 leftMargin=margin,
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
566 rightMargin=margin,
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
567 topMargin=margin,
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
568 bottomMargin=margin)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
569 elements = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
570
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
571 # Set all the styles
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
572 styles = getSampleStyleSheet()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
573 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
574 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
575 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
576 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10))
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
577 # styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True))
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
578 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
579 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
580 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
581
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
582 # PDF SECTION 1 -------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
583
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
584 # Add the title
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
585 title = Paragraph("ERGA Assembly Report", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
586 elements.append(title)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
587
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
588 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
589 elements.append(Spacer(1, 12))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
590
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
591 # Add version
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
592 ver_paragraph = Paragraph(EAR_version, styles['normalStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
593 elements.append(ver_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
594
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
595 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
596 elements.append(Spacer(1, 12))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
597
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
598 # Add tags
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
599 tags_paragraph = Paragraph(f"Tags: {tags}", styles['normalStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
600 elements.append(tags_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
601
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
602 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
603 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
604
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
605 # Create the SPECIES DATA table with the transposed data
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
606 sp_data_table = Table(transposed_sp_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
607
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
608 # Style the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
609 sp_data_table.setStyle(TableStyle([
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
610 ("BACKGROUND", (0, 0), (0, -1), '#e7e7e7'), # Grey background for column 1
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
611 ("BACKGROUND", (1, 0), (1, -1), colors.white), # White background for column 2
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
612 ("ALIGN", (0, 0), (-1, -1), "CENTER"),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
613 ('FONTNAME', (0, 0), (0, 0), 'Courier'), # Regular font for row1, col1
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
614 ('FONTNAME', (1, 0), (1, 0), 'Courier'),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
615 ('FONTNAME', (0, 1), (-1, -1), 'Courier'), # Regular font for the rest of the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
616 ('FONTNAME', (1, 1), (1, 1), 'Courier-Bold'), # Bold font for row1, col2
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
617 ("FONTSIZE", (0, 0), (-1, -1), 14),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
618 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
619 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
620 ]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
621
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
622 # Add SPECIES DATA table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
623 elements.append(sp_data_table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
624
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
625 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
626 elements.append(Spacer(1, 32))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
627
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
628 # Create the GENOME TRAITS table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
629 genome_traits_table = Table(genome_traits_table_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
630
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
631 # Style the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
632 genome_traits_table.setStyle(TableStyle([
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
633 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
634 ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
635 ('FONTNAME', (0, 0), (-1, -1), 'Courier'),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
636 ('FONTSIZE', (0, 0), (-1, -1), 12),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
637 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
638 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
639 ]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
640
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
641 # Add GENOME TRAITS table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
642 elements.append(genome_traits_table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
643
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
644 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
645 elements.append(Spacer(1, 28))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
646
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
647 # Add EBP METRICS SECTION subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
648 subtitle = Paragraph("EBP metrics summary and curation notes", styles['subTitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
649 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
650
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
651 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
652 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
653
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
654 # Iterate over haplotypes in the Curated category to get data for EBP metrics
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
655 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
656 haplotype_names = list(curated_assemblies.keys())
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
657
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
658 for haplotype in haplotype_names:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
659 properties = curated_assemblies[haplotype]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
660 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
661 gfastats_path = properties['gfastats--nstar-report_txt']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
662 order = haplotype_names.index(haplotype) # Determine the order based on the position of the haplotype in the list
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
663 qv_value = get_qv_value(properties['merqury_qv'], order, 'Curated', haplotype)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
664
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
665 ebp_quality_metric = compute_ebp_metric(haplotype, gfastats_path, qv_value)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
666 EBP_metric_paragraph = Paragraph(ebp_quality_metric, styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
667
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
668 # Add the EBP quality metric paragraph to elements
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
669 elements.append(EBP_metric_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
670
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
671 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
672 elements.append(Spacer(1, 8))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
673
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
674 # Add sentence
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
675 Textline = Paragraph("The following metrics were automatically flagged as below EBP recommended standards or different from expected:", styles['midiStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
676 elements.append(Textline)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
677
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
678 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
679 elements.append(Spacer(1, 4))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
680
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
681 # Apply checks and add warning paragraphs to elements
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
682 elements += generate_warning_paragraphs(genome_haploid_length, max_total_bp, "Haploid size (bp)")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
683 elements += generate_warning_paragraphs(haploid_number, obs_haploid_num, "Haploid Number")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
684 elements += generate_warning_paragraphs(proposed_ploidy, ploidy, "Ploidy")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
685 elements += generate_warning_paragraphs(sex, obs_sex, "Sample Sex")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
686
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
687 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
688 elements.append(Spacer(1, 4))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
689
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
690 # Iterate over haplotypes in the Curated category and apply checks
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
691 for haplotype in haplotype_names:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
692 properties = curated_assemblies[haplotype]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
693 if isinstance(properties, dict) and 'merqury_qv' in properties and 'merqury_completeness_stats' in properties and 'busco_short_summary_txt' in properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
694 order = haplotype_names.index(haplotype)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
695 qv_value = get_qv_value(properties['merqury_qv'], order, "Curated", haplotype)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
696 completeness_value = get_completeness_value(properties['merqury_completeness_stats'], order, "Curated", haplotype)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
697 busco_scores = extract_busco_values(properties['busco_short_summary_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
698
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
699 warnings = generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
700 elements += warnings
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
701
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
702 assembly_warnings = generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
703 elements.extend(assembly_warnings)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
704
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
705 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
706 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
707
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
708 # Add small subtitle for Curator notes
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
709 subtitle = Paragraph("Curator notes", styles['normalStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
710 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
711
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
712 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
713 elements.append(Spacer(1, 8))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
714
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
715 # Curator notes
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
716 curator_notes_paragraph = Paragraph(curator_notes_text, styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
717 elements.append(curator_notes_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
718
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
719 # Page break
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
720 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
721
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
722 # PDF SECTION 2 -------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
723
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
724 # Add quality metrics section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
725 subtitle = Paragraph("Quality metrics table", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
726 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
727
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
728 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
729 elements.append(Spacer(1, 48))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
730
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
731 # create QUALITY METRICS table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
732 asm_table = Table(asm_table_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
733
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
734 # Style the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
735 asm_table.setStyle(TableStyle([
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
736 ('BACKGROUND', (0, 0), (-1, 0), '#e7e7e7'), # grey background for the header
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
737 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
738 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # bold font for the header
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
739 ('FONTSIZE', (0, 0), (-1, -1), 11), # font size
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
740 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
741 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
742 ]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
743
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
744 # Add QUALITY METRICS table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
745 elements.append(asm_table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
746
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
747 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
748 elements.append(Spacer(1, 5))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
749
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
750 # Store BUSCO version and lineage information from each file in list
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
751 busco_info_list = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
752 for asm_stages, stage_properties in asm_data.items():
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
753 for i, haplotype_properties in stage_properties.items():
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
754 if isinstance(haplotype_properties, dict):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
755 if 'busco_short_summary_txt' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
756 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
757 if busco_version and lineage_info:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
758 busco_info_list.append((busco_version, lineage_info))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
759
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
760 # Checking if all elements in the list are identical
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
761 if all(info == busco_info_list[0] for info in busco_info_list):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
762 busco_version, (lineage_name, num_genomes, num_buscos) = busco_info_list[0]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
763 elements.append(Paragraph(f"BUSCO {busco_version} Lineage: {lineage_name} (genomes:{num_genomes}, BUSCOs:{num_buscos})", styles['miniStyle']))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
764 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
765 elements.append(Paragraph("Warning: BUSCO versions or lineage datasets are not the same across results", styles['miniStyle']))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
766 logging.warning("WARNING!!! BUSCO versions or lineage datasets are not the same across results")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
767
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
768 # Page break
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
769 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
770
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
771 # PDF SECTION 3 -------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
772
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
773 # Add hic maps section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
774 subtitle = Paragraph("HiC contact map of curated assembly", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
775 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
776
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
777 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
778 elements.append(Spacer(1, 36))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
779
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
780 # Initialize counter
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
781 tool_count = 0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
782
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
783 # Add title and images for each step
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
784 for asm_stages, stage_properties in asm_data.items():
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
785 if asm_stages == 'Curated':
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
786 tool_elements = list(stage_properties.keys())
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
787
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
788 images_with_names = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
789
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
790 for haplotype in tool_elements:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
791 haplotype_properties = stage_properties[haplotype]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
792
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
793 # Check if there is an image and/or a link
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
794 png_file = haplotype_properties.get('hic_FullMap_png', '')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
795 link = haplotype_properties.get('hic_FullMap_link', '')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
796
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
797 # Prepare paragraphs for the image and link
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
798 if png_file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
799 # Create image object
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
800 img = Image(png_file, width=11 * cm, height=11 * cm)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
801 images_with_names.append([img])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
802 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
803 # Add paragraph for missing image
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
804 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> HiC PNG is missing!", styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
805 images_with_names.append([missing_png_paragraph])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
806
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
807 # Add paragraph for the link
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
808 if link:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
809 link_html = f'<b>{haplotype}</b> <link href="{link}" color="blue">[LINK]</link>'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
810 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
811 link_html = f'<b>{haplotype}</b> File link is missing!'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
812
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
813 link_paragraph = Paragraph(link_html, styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
814 images_with_names.append([link_paragraph])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
815
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
816 # Append a spacer only if the next element is an image
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
817 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
818 images_with_names.append([Spacer(1, 12)])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
819
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
820 # Add images and names to the elements in pairs
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
821 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
822 elements_to_add = images_with_names[i: i + 4]
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
823
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
824 # Create table for the images and names
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
825 table = Table(elements_to_add)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
826 table.hAlign = 'CENTER'
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
827 elements.append(table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
828
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
829 # Add a page break conditionally
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
830 next_elements_start = i + 4
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
831 if next_elements_start < len(images_with_names):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
832 if len(images_with_names[next_elements_start]) > 0 and isinstance(images_with_names[next_elements_start][0], Image):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
833 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
834
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
835 tool_count += 1
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
836
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
837 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
838
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
839 # PDF SECTION 4 -------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
840
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
841 # Add kmer spectra section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
842 subtitle = Paragraph("K-mer spectra of curated assembly", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
843 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
844
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
845 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
846 elements.append(Spacer(1, 48))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
847
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
848 # Initialize counter
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
849 counter = 0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
850
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
851 # Iterate over haplotypes in the Curated category to get K-mer spectra images
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
852 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
853
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
854 # Get paths for spectra files
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
855 spectra_files = {
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
856 'hap1': {
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
857 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
858 },
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
859 'hap2': {
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
860 'spectra_cn_png': curated_assemblies.get('hap2', {}).get('merqury_hap_spectra_cn_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
861 },
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
862 'common': {
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
863 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_cn_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
864 'spectra_asm_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_asm_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
865 }
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
866 }
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
867
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
868 # Filter out None values and empty strings
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
869 spectra_files = {k: {sk: v for sk, v in sv.items() if v} for k, sv in spectra_files.items()}
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
870
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
871 # Determine the number of spectra-cn files and assign unique names if needed
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
872 spectra_cn_files = [
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
873 spectra_files['common'].get('spectra_cn_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
874 spectra_files['hap1'].get('spectra_cn_png', None),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
875 spectra_files['hap2'].get('spectra_cn_png', None)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
876 ]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
877 spectra_cn_files = [f for f in spectra_cn_files if f] # Filter out None values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
878
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
879 if len(spectra_cn_files) == 3:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
880 # For 3 spectra-cn files
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
881 shortest_spectra_cn_file = min(spectra_cn_files, key=lambda f: len(os.path.basename(f)), default=None)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
882 similar_files = [f for f in spectra_cn_files if f != shortest_spectra_cn_file]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
883 if similar_files:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
884 unique_name1, unique_name2 = find_unique_parts(os.path.basename(similar_files[0]), os.path.basename(similar_files[1]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
885 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
886 shortest_spectra_cn_file = spectra_cn_files[0] if spectra_cn_files else None
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
887 unique_name1 = unique_name2 = None
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
888
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
889 # Create image objects and add filename below each image
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
890 images = []
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
891
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
892 for label, file_dict in spectra_files.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
893 for key, png_file in file_dict.items():
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
894 if png_file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
895 image = Image(png_file, width=8.4 * cm, height=7 * cm)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
896 filename = os.path.basename(png_file)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
897
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
898 if filename.endswith("spectra-asm.ln.png"):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
899 text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
900 elif filename.endswith("spectra-cn.ln.png"):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
901 if len(spectra_cn_files) == 3:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
902 # For 3 spectra-cn files use particular text
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
903 if png_file == shortest_spectra_cn_file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
904 text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
905 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
906 if png_file == spectra_files['hap1'].get('spectra_cn_png', None):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
907 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name1}</b> (hapl.)"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
908 elif png_file == spectra_files['hap2'].get('spectra_cn_png', None):
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
909 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name2}</b> (hapl.)"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
910 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
911 text = "Distribution of k-mer counts per copy numbers found in asm"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
912 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
913 # For 2 spectra-cn files use same text
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
914 text = "Distribution of k-mer counts per copy numbers found in asm"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
915 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
916 text = filename
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
917
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
918 images.append([image, Paragraph(text, styles["midiStyle"])])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
919
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
920 # Filter None values
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
921 images = [img for img in images if img[0] is not None]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
922
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
923 # Get number of rows and columns for the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
924 num_rows = (len(images) + 1) // 2 # +1 to handle odd numbers of images
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
925 num_columns = 2
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
926
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
927 # Create the table with dynamic size
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
928 image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
929 image_table = Table(image_table_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
930
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
931 # Style the "table"
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
932 table_style = TableStyle([
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
933 ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
934 ('BOTTOMPADDING', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
935 ])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
936
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
937 # Set the style
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
938 image_table.setStyle(table_style)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
939
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
940 # Add image table to elements
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
941 elements.append(image_table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
942
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
943 # Increase counter by the number of PNGs added
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
944 counter += len(images)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
945
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
946 # If counter is a multiple of 4, insert a page break and reset counter
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
947 if counter % 4 == 0:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
948 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
949
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
950 # Add spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
951 elements.append(Spacer(1, 12))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
952
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
953 # If we have processed all haps and the last page does not contain exactly 4 images, insert a page break
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
954 if counter % 4 != 0:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
955 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
956
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
957 # PDF SECTION 5 -------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
958
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
959 # Add contamination section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
960 subtitle = Paragraph("Post-curation contamination screening", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
961 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
962
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
963 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
964 elements.append(Spacer(1, 36))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
965
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
966 # Initialize counter
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
967 tool_count = 0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
968
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
969 # Add title and images for each step
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
970 for asm_stages, stage_properties in asm_data.items():
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
971 if asm_stages == 'Curated': # Check if the current stage is 'Curated'
2
a34826ae0a73 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
972 tool_elements = list(stage_properties.keys())
0
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
973
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
974 for haplotype in tool_elements:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
975 haplotype_properties = stage_properties[haplotype]
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
976 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
977 # Get image path
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
978 png_file = haplotype_properties['blobplot_cont_png']
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
979
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
980 # If png_file is not empty, display it
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
981 if png_file:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
982 # Create image object
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
983 img = Image(png_file, width=20 * cm, height=20 * cm)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
984 elements.append(img)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
985
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
986 # Create paragraph for filename with haplotype name
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
987 blob_text = f"<b>{haplotype}.</b> Bubble plot circles are scaled by sequence length, positioned by coverage and GC proportion, and coloured by taxonomy. Histograms show total assembly length distribution on each axis."
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
988 blob_paragraph = Paragraph(blob_text, styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
989 elements.append(blob_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
990 else:
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
991 # Add paragraph for missing image
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
992 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> PNG is missing!", styles["midiStyle"])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
993 elements.append(missing_png_paragraph)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
994
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
995 # Add a page break after each image and its description
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
996 elements.append(PageBreak())
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
997
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
998 tool_count += 1
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
999
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1000 # SECTION 6 -----------------------------------------------------------------------------------
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1001
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1002 # Add data profile section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1003 subtitle = Paragraph("Data profile", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1004 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1005
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1006 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1007 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1008
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1009 # Create the DATA PROFILE table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1010 data_table = Table(table_data)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1011
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1012 # Style the table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1013 data_table.setStyle(TableStyle([
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1014 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'), # grey background for the first column
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1015 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1016 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # remove bold font
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1017 ('FONTSIZE', (0, 0), (-1, -1), 12), # font size for the header
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1018 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1019 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1020 ]))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1021
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1022 # Add DATA PROFILE table
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1023 elements.append(data_table)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1024
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1025 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1026 elements.append(Spacer(1, 32))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1027
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1028 # Add assembly pipeline section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1029 subtitle = Paragraph("Assembly pipeline", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1030 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1031
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1032 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1033 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1034
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1035 # Add ASM PIPELINE tree
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1036 elements.append(Paragraph(asm_pipeline_tree, styles['treeStyle']))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1037
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1038 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1039 elements.append(Spacer(1, 32))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1040
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1041 # Add curation pipeline section subtitle
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1042 subtitle = Paragraph("Curation pipeline", styles['TitleStyle'])
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1043 elements.append(subtitle)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1044
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1045 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1046 elements.append(Spacer(1, 24))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1047
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1048 # Add CURATION PIPELINE tree
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1049 elements.append(Paragraph(curation_pipeline_tree, styles['treeStyle']))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1050
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1051 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1052 elements.append(Spacer(1, 48))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1053
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1054 # Add submitter, affiliation
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1055 submitter_paragraph_style = ParagraphStyle(name='SubmitterStyle', fontName='Courier', fontSize=10)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1056 elements.append(Paragraph(f"Submitter: {submitter}", submitter_paragraph_style))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1057 elements.append(Paragraph(f"Affiliation: {affiliation}", submitter_paragraph_style))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1058
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1059 # Spacer
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1060 elements.append(Spacer(1, 8))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1061
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1062 # Add the date and time (CET) of the document creation
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1063 cet = pytz.timezone("CET")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1064 current_datetime = datetime.now(cet)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1065 formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S %Z")
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1066 elements.append(Paragraph(f"Date and time: {formatted_datetime}", submitter_paragraph_style))
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1067
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1068 # Build the PDF ###############################################################################
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1069 pdf.build(elements)
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1070
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1071
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1072 if __name__ == "__main__":
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1073 parser = argparse.ArgumentParser(description='Create an ERGA Assembly Report (EAR) from a YAML file. Visit https://github.com/ERGA-consortium/EARs for more information')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1074 parser.add_argument('yaml_file', type=str, help='Path to the YAML file')
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1075 args = parser.parse_args()
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1076
8c99976de71e planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e9db2e88e55a442d7f8fdb5e8e86330410d2e92d
bgruening
parents:
diff changeset
1077 make_report(args.yaml_file)