comparison make_EAR.py @ 2:a34826ae0a73 draft

planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
author bgruening
date Fri, 30 Aug 2024 09:27:31 +0000
parents 8c99976de71e
children 3dd6be0cd8dd
comparison
equal deleted inserted replaced
1:b61022e1b807 2:a34826ae0a73
1 1
2 import argparse 2 import argparse
3 import glob
4 import logging 3 import logging
5 import math 4 import math
6 import os 5 import os
7 import re 6 import re
8 import sys 7 import sys
20 19
21 # make_EAR_glxy.py 20 # make_EAR_glxy.py
22 # CAUTION: This is for the Galaxy version! 21 # CAUTION: This is for the Galaxy version!
23 # by Diego De Panis 22 # by Diego De Panis
24 # ERGA Sequencing and Assembly Committee 23 # ERGA Sequencing and Assembly Committee
25 EAR_version = "v24.05.20_glxy_beta" 24 EAR_version = "v24.08.26"
26 25
27 26
28 def make_report(yaml_file): 27 def make_report(yaml_file):
29 logging.basicConfig(filename='EAR.log', level=logging.INFO) 28 logging.basicConfig(filename='EAR.log', level=logging.INFO)
30 # Read the content from EAR.yaml file 29 # Read the content from EAR.yaml file
118 if len(lines) > order: 117 if len(lines) > order:
119 target_line = lines[order] 118 target_line = lines[order]
120 fifth_column_value = target_line.split('\t')[4].strip() 119 fifth_column_value = target_line.split('\t')[4].strip()
121 return fifth_column_value 120 return fifth_column_value
122 except Exception as e: 121 except Exception as e:
123 logging.warning(f"Error reading {file_path}: {str(e)}") 122 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
124 return '' 123 return ''
125
126 # Getting kmer plots for curated asm
127 def get_png_files(dir_path):
128 png_files = glob.glob(f"{dir_path}/*.ln.png")
129 if len(png_files) < 4:
130 logging.warning(f"Warning: Less than 4 png files found in {dir_path}. If this is diploid, some images may be missing.")
131 # fill missing with None
132 while len(png_files) < 4:
133 png_files.append(None)
134 return png_files[:4]
135 124
136 # get unique part in file names 125 # get unique part in file names
137 def find_unique_parts(file1, file2): 126 def find_unique_parts(file1, file2):
138 # Split filenames into parts 127 # Split filenames into parts
139 parts1 = file1.split('.') 128 parts1 = file1.split('.')
140 parts2 = file2.split('.') 129 parts2 = file2.split('.')
141 # Find unique parts 130 # Find unique parts
142 unique_parts1 = [part for part in parts1 if part not in parts2] 131 unique_parts1 = [part for part in parts1 if part not in parts2]
143 unique_parts2 = [part for part in parts2 if part not in parts1] 132 unique_parts2 = [part for part in parts2 if part not in parts1]
144
145 return ' '.join(unique_parts1), ' '.join(unique_parts2) 133 return ' '.join(unique_parts1), ' '.join(unique_parts2)
146 134
147 # extract BUSCO values 135 # extract BUSCO values
148 def extract_busco_values(file_path): 136 def extract_busco_values(file_path):
149 try: 137 try:
272 return warnings 260 return warnings
273 261
274 # Parse pipeline and generate "tree" 262 # Parse pipeline and generate "tree"
275 def generate_pipeline_tree(pipeline_data): 263 def generate_pipeline_tree(pipeline_data):
276 tree_lines = [] 264 tree_lines = []
277 indent = "&nbsp;" * 2 # Adjust indent spacing as needed 265 indent = "&nbsp;" * 2 # Adjust indent spacing
278 266
279 for tool_version_param in pipeline_data: 267 if isinstance(pipeline_data, dict):
280 parts = tool_version_param.split('|') 268 for tool, version_param in pipeline_data.items():
281 tool_version = parts[0] 269 # Tool line
282 tool, version = tool_version.split('_v') if '_v' in tool_version else (tool_version, "NA") 270 tool_line = f"- <b>{tool}</b>"
283 271 tree_lines.append(tool_line)
284 # Handle parameters: join all but the first (which is tool_version) with ', ' 272
285 param_text = ', '.join(parts[1:]) if len(parts) > 1 else "NA" 273 # Convert version_param to string and split
286 274 version_param_str = str(version_param)
287 # Tool line 275 parts = version_param_str.split('/')
288 tool_line = f"- <b>{tool}</b>" 276 version = parts[0]
289 tree_lines.append(tool_line) 277 params = [p for p in parts[1:] if p] # This will remove empty strings
290 278
291 # Version line 279 # Version line
292 version_line = f"{indent*2}|_ <i>ver:</i> {version}" 280 version_line = f"{indent * 2}|_ <i>ver:</i> {version}"
293 tree_lines.append(version_line) 281 tree_lines.append(version_line)
294 282
295 # Param line(s) 283 # Param line(s)
296 if param_text != "NA": 284 if params:
297 for param in param_text.split(','): 285 for param in params:
298 param = param.strip() 286 param_line = f"{indent * 2}|_ <i>key param:</i> {param}"
299 param_line = f"{indent*2}|_ <i>key param:</i> {param if param else 'NA'}" 287 tree_lines.append(param_line)
288 else:
289 param_line = f"{indent * 2}|_ <i>key param:</i> NA"
300 tree_lines.append(param_line) 290 tree_lines.append(param_line)
301 else: 291 else:
302 param_line = f"{indent*2}|_ <i>key param:</i> NA" 292 tree_lines.append("Invalid pipeline data format")
303 tree_lines.append(param_line)
304 293
305 # Join lines with HTML break for paragraph 294 # Join lines with HTML break for paragraph
306 tree_diagram = "<br/>".join(tree_lines) 295 tree_diagram = "<br/>".join(tree_lines)
307 return tree_diagram 296 return tree_diagram
308 297
328 submitter = yaml_data["Submitter"] 317 submitter = yaml_data["Submitter"]
329 affiliation = yaml_data["Affiliation"] 318 affiliation = yaml_data["Affiliation"]
330 tags = yaml_data["Tags"] 319 tags = yaml_data["Tags"]
331 320
332 # Check if tag is valid 321 # Check if tag is valid
333 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Satellite"] 322 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Community", "ERGA-testing"]
334 if tags not in valid_tags: 323 if tags not in valid_tags:
335 tags += "[INVALID TAG]" 324 tags += "[INVALID TAG]"
336 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Satellite") 325 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Community.")
337 326
338 # Get data from GoaT based on species name 327 # Get data from GoaT based on species name
339 # urllib.parse.quote to handle special characters and spaces in the species name 328 # urllib.parse.quote to handle special characters and spaces in the species name
340 species_name = requests.utils.quote(species) 329 species_name = requests.utils.quote(species)
341 330
399 data_values.append('NA' if not coverage else coverage) 388 data_values.append('NA' if not coverage else coverage)
400 389
401 # Create a list of lists for the table 390 # Create a list of lists for the table
402 table_data = [headers, data_values] 391 table_data = [headers, data_values]
403 392
404 # Extract pipeline data from 'Pre-curation' category 393 # Extract pipeline data
405 asm_pipeline_data = yaml_data.get('ASSEMBLIES', {}).get('Pre-curation', {}).get('pipeline', []) 394 asm_pipeline_data = yaml_data.get('PIPELINES', {}).get('Assembly', {})
395 curation_pipeline_data = yaml_data.get('PIPELINES', {}).get('Curation', {})
396
397 # Extract pipeline data from 'Curated' category
406 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data) 398 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data)
407
408 # Extract pipeline data from 'Curated' category
409 curation_pipeline_data = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}).get('pipeline', [])
410 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data) 399 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data)
411 400
412 # Reading GENOME PROFILING DATA section from yaml ############################################# 401 # Reading GENOME PROFILING DATA section from yaml #############################################
413
414 profiling_data = yaml_data.get('PROFILING') 402 profiling_data = yaml_data.get('PROFILING')
415 403
416 # Check if profiling_data is available 404 # Check if profiling_data is available
417 if not profiling_data: 405 if not profiling_data:
418 logging.error('Error: No profiling data found in the YAML file.') 406 logging.error('Error: No profiling data found in the YAML file.')
419 sys.exit(1) 407 sys.exit(1)
420 408
421 # Handle GenomeScope specific processing 409 # Check for GenomeScope data (mandatory)
422 genomescope_data = profiling_data.get('GenomeScope') 410 genomescope_data = profiling_data.get('GenomeScope')
423 if genomescope_data: 411 if not genomescope_data:
424 summary_file = genomescope_data.get('genomescope_summary_txt') 412 logging.error("Error: GenomeScope data is missing in the YAML file. This is mandatory.")
425 if summary_file and os.path.exists(summary_file):
426 with open(summary_file, "r") as f:
427 summary_txt = f.read()
428 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1)
429 proposed_ploidy_match = re.search(r"p = (\d+)", summary_txt)
430 proposed_ploidy = proposed_ploidy_match.group(1) if proposed_ploidy_match else 'NA'
431 else:
432 logging.error(f"File {summary_file} not found for GenomeScope.")
433 sys.exit(1)
434 else:
435 logging.error("GenomeScope data is missing in the PROFILING section.")
436 sys.exit(1) 413 sys.exit(1)
437 414
438 # Handle Smudgeplot specific processing 415 genomescope_summary = genomescope_data.get('genomescope_summary_txt')
416 if not genomescope_summary:
417 logging.error("Error: GenomeScope summary file path is missing in the YAML file.")
418 sys.exit(1)
419
420 # Read the content of the GenomeScope summary file
421 try:
422 with open(genomescope_summary, "r") as f:
423 summary_txt = f.read()
424 # Extract values from summary.txt
425 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1)
426 proposed_ploidy = re.search(r"p = (\d+)", summary_txt).group(1)
427 except Exception as e:
428 logging.error(f"Error reading GenomeScope summary file: {str(e)}")
429 sys.exit(1)
430
431 # Check for Smudgeplot data (optional)
439 smudgeplot_data = profiling_data.get('Smudgeplot') 432 smudgeplot_data = profiling_data.get('Smudgeplot')
440 if smudgeplot_data: 433 if smudgeplot_data:
441 verbose_summary_file = smudgeplot_data.get('smudgeplot_verbose_summary_txt') 434 smudgeplot_summary = smudgeplot_data.get('smudgeplot_verbose_summary_txt')
442 if verbose_summary_file and os.path.exists(verbose_summary_file): 435 if smudgeplot_summary:
443 with open(verbose_summary_file, "r") as f: 436 try:
444 smud_summary_txt = f.readlines() 437 with open(smudgeplot_summary, "r") as f:
445 for line in smud_summary_txt: 438 smud_summary_txt = f.readlines()
446 if line.startswith("* Proposed ploidy"): 439 for line in smud_summary_txt:
447 proposed_ploidy = line.split(":")[1].strip() 440 if line.startswith("* Proposed ploidy"):
448 break 441 proposed_ploidy = line.split(":")[1].strip()
442 break
443 except Exception as e:
444 logging.warning(f"Error reading Smudgeplot summary file: {str(e)}. Using GenomeScope ploidy.")
449 else: 445 else:
450 logging.warning(f"Verbose summary file {verbose_summary_file} not found for Smudgeplot; skipping detailed Smudgeplot analysis.") 446 logging.warning("Smudgeplot summary file path is missing. Using GenomeScope ploidy.")
451 else: 447 else:
452 logging.warning("Smudgeplot data is missing in the PROFILING section; skipping Smudgeplot analysis.") 448 logging.info("Smudgeplot data not provided. Using GenomeScope ploidy.")
453 449
454 # Reading ASSEMBLY DATA section from yaml ##################################################### 450 # Reading ASSEMBLY DATA section from yaml #####################################################
455 451
456 asm_data = yaml_data.get('ASSEMBLIES', {}) 452 asm_data = yaml_data.get('ASSEMBLIES', {})
457 453
458 # make a list from the assemblies available in asm_data 454 # make a list from the assemblies available in asm_data
459 asm_stages = [] 455 asm_stages = []
460 for asm_stage, stage_properties in asm_data.items(): 456 for asm_stage, stage_properties in asm_data.items():
461 for haplotypes in stage_properties.keys(): 457 for haplotypes in stage_properties.keys():
462 if haplotypes != 'pipeline' and haplotypes not in asm_stages: 458 if haplotypes not in asm_stages:
463 asm_stages.append(haplotypes) 459 asm_stages.append(haplotypes)
464 460
465 # get gfastats-based data 461 # get gfastats-based data
466 gfastats_data = {} 462 gfastats_data = {}
467 for asm_stage, stage_properties in asm_data.items(): 463 for asm_stage, stage_properties in asm_data.items():
481 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2) 477 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2)
482 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp 478 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp
483 except (ValueError, ZeroDivisionError): 479 except (ValueError, ZeroDivisionError):
484 gaps_per_gbp_data[(asm_stage, haplotypes)] = '' 480 gaps_per_gbp_data[(asm_stage, haplotypes)] = ''
485 481
486 # Define the contigging table (column names) DON'T MOVE THIS AGAIN!!!!!!! 482 # Define the contigging table (column names)
487 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]] 483 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]]
488 484
489 # Fill the table with the gfastats data 485 # Fill the table with the gfastats data
490 for i in range(len(display_names)): 486 for i in range(len(display_names)):
491 metric = display_names[i] 487 metric = display_names[i]
492 if metric not in exclusion_list: 488 if metric not in exclusion_list:
493 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) 489 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
494 490
495 # Add the gaps/gbp in between 491 # Add the gaps/gbp in between
496 gc_index = display_names.index("GC %")
497 gc_index
498 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) 492 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
499 493
500 # get QV, Kmer completeness and BUSCO data 494 # get QV, Kmer completeness and BUSCO data
501 qv_data = {} 495 qv_data = {}
502 completeness_data = {} 496 completeness_data = {}
503 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']} 497 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']}
504 for asm_stage, stage_properties in asm_data.items(): 498 for asm_stage, stage_properties in asm_data.items():
505 asm_stage_elements = [element for element in stage_properties.keys() if element != 'pipeline'] 499 asm_stage_elements = list(stage_properties.keys())
506 for i, haplotypes in enumerate(asm_stage_elements): 500 for i, haplotypes in enumerate(asm_stage_elements):
507 haplotype_properties = stage_properties[haplotypes] 501 haplotype_properties = stage_properties[haplotypes]
508 if isinstance(haplotype_properties, dict): 502 if isinstance(haplotype_properties, dict):
509 if 'merqury_qv' in haplotype_properties: 503 if 'merqury_qv' in haplotype_properties:
510 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes) 504 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes)
578 styles = getSampleStyleSheet() 572 styles = getSampleStyleSheet()
579 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20)) 573 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20))
580 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16)) 574 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16))
581 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12)) 575 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12))
582 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10)) 576 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10))
583 styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True)) 577 # styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True))
584 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12)) 578 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12))
585 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8)) 579 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8))
586 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6)) 580 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6))
587 581
588 # PDF SECTION 1 ------------------------------------------------------------------------------- 582 # PDF SECTION 1 -------------------------------------------------------------------------------
657 # Spacer 651 # Spacer
658 elements.append(Spacer(1, 24)) 652 elements.append(Spacer(1, 24))
659 653
660 # Iterate over haplotypes in the Curated category to get data for EBP metrics 654 # Iterate over haplotypes in the Curated category to get data for EBP metrics
661 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) 655 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
662 haplotype_names = [key for key in curated_assemblies.keys() if key != 'pipeline'] 656 haplotype_names = list(curated_assemblies.keys())
663 657
664 for haplotype in haplotype_names: 658 for haplotype in haplotype_names:
665 properties = curated_assemblies[haplotype] 659 properties = curated_assemblies[haplotype]
666 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties: 660 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties:
667 gfastats_path = properties['gfastats--nstar-report_txt'] 661 gfastats_path = properties['gfastats--nstar-report_txt']
754 elements.append(Spacer(1, 5)) 748 elements.append(Spacer(1, 5))
755 749
756 # Store BUSCO version and lineage information from each file in list 750 # Store BUSCO version and lineage information from each file in list
757 busco_info_list = [] 751 busco_info_list = []
758 for asm_stages, stage_properties in asm_data.items(): 752 for asm_stages, stage_properties in asm_data.items():
759 for haplotype_keys, haplotype_properties in stage_properties.items(): 753 for i, haplotype_properties in stage_properties.items():
760 if isinstance(haplotype_properties, dict): 754 if isinstance(haplotype_properties, dict):
761 if 'busco_short_summary_txt' in haplotype_properties: 755 if 'busco_short_summary_txt' in haplotype_properties:
762 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt']) 756 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt'])
763 if busco_version and lineage_info: 757 if busco_version and lineage_info:
764 busco_info_list.append((busco_version, lineage_info)) 758 busco_info_list.append((busco_version, lineage_info))
785 779
786 # Initialize counter 780 # Initialize counter
787 tool_count = 0 781 tool_count = 0
788 782
789 # Add title and images for each step 783 # Add title and images for each step
790 for idx, (asm_stages, stage_properties) in enumerate(asm_data.items(), 1): 784 for asm_stages, stage_properties in asm_data.items():
791 if asm_stages == 'Curated': 785 if asm_stages == 'Curated':
792 tool_elements = [element for element in stage_properties.keys() if element != 'pipeline'] 786 tool_elements = list(stage_properties.keys())
793 787
794 images_with_names = [] 788 images_with_names = []
795 789
796 for haplotype in tool_elements: 790 for haplotype in tool_elements:
797 haplotype_properties = stage_properties[haplotype] 791 haplotype_properties = stage_properties[haplotype]
823 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1: 817 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1:
824 images_with_names.append([Spacer(1, 12)]) 818 images_with_names.append([Spacer(1, 12)])
825 819
826 # Add images and names to the elements in pairs 820 # Add images and names to the elements in pairs
827 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time 821 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time
828 elements_to_add = images_with_names[i:i + 4] 822 elements_to_add = images_with_names[i: i + 4]
829 823
830 # Create table for the images and names 824 # Create table for the images and names
831 table = Table(elements_to_add) 825 table = Table(elements_to_add)
832 table.hAlign = 'CENTER' 826 table.hAlign = 'CENTER'
833 elements.append(table) 827 elements.append(table)
854 # Initialize counter 848 # Initialize counter
855 counter = 0 849 counter = 0
856 850
857 # Iterate over haplotypes in the Curated category to get K-mer spectra images 851 # Iterate over haplotypes in the Curated category to get K-mer spectra images
858 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) 852 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
859 haplotype_names = [key for key in curated_assemblies.keys() if key != 'pipeline']
860 853
861 # Get paths for spectra files 854 # Get paths for spectra files
862 spectra_files = { 855 spectra_files = {
863 'hap1': { 856 'hap1': {
864 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None), 857 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None),
972 965
973 # Initialize counter 966 # Initialize counter
974 tool_count = 0 967 tool_count = 0
975 968
976 # Add title and images for each step 969 # Add title and images for each step
977 for idx, (asm_stages, stage_properties) in enumerate(asm_data.items(), 1): 970 for asm_stages, stage_properties in asm_data.items():
978 if asm_stages == 'Curated': # Check if the current stage is 'Curated' 971 if asm_stages == 'Curated': # Check if the current stage is 'Curated'
979 tool_elements = [element for element in stage_properties.keys() if element != 'pipeline'] 972 tool_elements = list(stage_properties.keys())
980 973
981 for haplotype in tool_elements: 974 for haplotype in tool_elements:
982 haplotype_properties = stage_properties[haplotype] 975 haplotype_properties = stage_properties[haplotype]
983 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties: 976 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties:
984 # Get image path 977 # Get image path