Mercurial > repos > bgruening > erga_ear
comparison make_EAR.py @ 2:a34826ae0a73 draft
planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
author | bgruening |
---|---|
date | Fri, 30 Aug 2024 09:27:31 +0000 |
parents | 8c99976de71e |
children | 3dd6be0cd8dd |
comparison
equal
deleted
inserted
replaced
1:b61022e1b807 | 2:a34826ae0a73 |
---|---|
1 | 1 |
2 import argparse | 2 import argparse |
3 import glob | |
4 import logging | 3 import logging |
5 import math | 4 import math |
6 import os | 5 import os |
7 import re | 6 import re |
8 import sys | 7 import sys |
20 | 19 |
21 # make_EAR_glxy.py | 20 # make_EAR_glxy.py |
22 # CAUTION: This is for the Galaxy version! | 21 # CAUTION: This is for the Galaxy version! |
23 # by Diego De Panis | 22 # by Diego De Panis |
24 # ERGA Sequencing and Assembly Committee | 23 # ERGA Sequencing and Assembly Committee |
25 EAR_version = "v24.05.20_glxy_beta" | 24 EAR_version = "v24.08.26" |
26 | 25 |
27 | 26 |
28 def make_report(yaml_file): | 27 def make_report(yaml_file): |
29 logging.basicConfig(filename='EAR.log', level=logging.INFO) | 28 logging.basicConfig(filename='EAR.log', level=logging.INFO) |
30 # Read the content from EAR.yaml file | 29 # Read the content from EAR.yaml file |
118 if len(lines) > order: | 117 if len(lines) > order: |
119 target_line = lines[order] | 118 target_line = lines[order] |
120 fifth_column_value = target_line.split('\t')[4].strip() | 119 fifth_column_value = target_line.split('\t')[4].strip() |
121 return fifth_column_value | 120 return fifth_column_value |
122 except Exception as e: | 121 except Exception as e: |
123 logging.warning(f"Error reading {file_path}: {str(e)}") | 122 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}") |
124 return '' | 123 return '' |
125 | |
126 # Getting kmer plots for curated asm | |
127 def get_png_files(dir_path): | |
128 png_files = glob.glob(f"{dir_path}/*.ln.png") | |
129 if len(png_files) < 4: | |
130 logging.warning(f"Warning: Less than 4 png files found in {dir_path}. If this is diploid, some images may be missing.") | |
131 # fill missing with None | |
132 while len(png_files) < 4: | |
133 png_files.append(None) | |
134 return png_files[:4] | |
135 | 124 |
136 # get unique part in file names | 125 # get unique part in file names |
137 def find_unique_parts(file1, file2): | 126 def find_unique_parts(file1, file2): |
138 # Split filenames into parts | 127 # Split filenames into parts |
139 parts1 = file1.split('.') | 128 parts1 = file1.split('.') |
140 parts2 = file2.split('.') | 129 parts2 = file2.split('.') |
141 # Find unique parts | 130 # Find unique parts |
142 unique_parts1 = [part for part in parts1 if part not in parts2] | 131 unique_parts1 = [part for part in parts1 if part not in parts2] |
143 unique_parts2 = [part for part in parts2 if part not in parts1] | 132 unique_parts2 = [part for part in parts2 if part not in parts1] |
144 | |
145 return ' '.join(unique_parts1), ' '.join(unique_parts2) | 133 return ' '.join(unique_parts1), ' '.join(unique_parts2) |
146 | 134 |
147 # extract BUSCO values | 135 # extract BUSCO values |
148 def extract_busco_values(file_path): | 136 def extract_busco_values(file_path): |
149 try: | 137 try: |
272 return warnings | 260 return warnings |
273 | 261 |
274 # Parse pipeline and generate "tree" | 262 # Parse pipeline and generate "tree" |
275 def generate_pipeline_tree(pipeline_data): | 263 def generate_pipeline_tree(pipeline_data): |
276 tree_lines = [] | 264 tree_lines = [] |
277 indent = " " * 2 # Adjust indent spacing as needed | 265 indent = " " * 2 # Adjust indent spacing |
278 | 266 |
279 for tool_version_param in pipeline_data: | 267 if isinstance(pipeline_data, dict): |
280 parts = tool_version_param.split('|') | 268 for tool, version_param in pipeline_data.items(): |
281 tool_version = parts[0] | 269 # Tool line |
282 tool, version = tool_version.split('_v') if '_v' in tool_version else (tool_version, "NA") | 270 tool_line = f"- <b>{tool}</b>" |
283 | 271 tree_lines.append(tool_line) |
284 # Handle parameters: join all but the first (which is tool_version) with ', ' | 272 |
285 param_text = ', '.join(parts[1:]) if len(parts) > 1 else "NA" | 273 # Convert version_param to string and split |
286 | 274 version_param_str = str(version_param) |
287 # Tool line | 275 parts = version_param_str.split('/') |
288 tool_line = f"- <b>{tool}</b>" | 276 version = parts[0] |
289 tree_lines.append(tool_line) | 277 params = [p for p in parts[1:] if p] # This will remove empty strings |
290 | 278 |
291 # Version line | 279 # Version line |
292 version_line = f"{indent*2}|_ <i>ver:</i> {version}" | 280 version_line = f"{indent * 2}|_ <i>ver:</i> {version}" |
293 tree_lines.append(version_line) | 281 tree_lines.append(version_line) |
294 | 282 |
295 # Param line(s) | 283 # Param line(s) |
296 if param_text != "NA": | 284 if params: |
297 for param in param_text.split(','): | 285 for param in params: |
298 param = param.strip() | 286 param_line = f"{indent * 2}|_ <i>key param:</i> {param}" |
299 param_line = f"{indent*2}|_ <i>key param:</i> {param if param else 'NA'}" | 287 tree_lines.append(param_line) |
288 else: | |
289 param_line = f"{indent * 2}|_ <i>key param:</i> NA" | |
300 tree_lines.append(param_line) | 290 tree_lines.append(param_line) |
301 else: | 291 else: |
302 param_line = f"{indent*2}|_ <i>key param:</i> NA" | 292 tree_lines.append("Invalid pipeline data format") |
303 tree_lines.append(param_line) | |
304 | 293 |
305 # Join lines with HTML break for paragraph | 294 # Join lines with HTML break for paragraph |
306 tree_diagram = "<br/>".join(tree_lines) | 295 tree_diagram = "<br/>".join(tree_lines) |
307 return tree_diagram | 296 return tree_diagram |
308 | 297 |
328 submitter = yaml_data["Submitter"] | 317 submitter = yaml_data["Submitter"] |
329 affiliation = yaml_data["Affiliation"] | 318 affiliation = yaml_data["Affiliation"] |
330 tags = yaml_data["Tags"] | 319 tags = yaml_data["Tags"] |
331 | 320 |
332 # Check if tag is valid | 321 # Check if tag is valid |
333 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Satellite"] | 322 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Community", "ERGA-testing"] |
334 if tags not in valid_tags: | 323 if tags not in valid_tags: |
335 tags += "[INVALID TAG]" | 324 tags += "[INVALID TAG]" |
336 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Satellite") | 325 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Community.") |
337 | 326 |
338 # Get data from GoaT based on species name | 327 # Get data from GoaT based on species name |
339 # urllib.parse.quote to handle special characters and spaces in the species name | 328 # urllib.parse.quote to handle special characters and spaces in the species name |
340 species_name = requests.utils.quote(species) | 329 species_name = requests.utils.quote(species) |
341 | 330 |
399 data_values.append('NA' if not coverage else coverage) | 388 data_values.append('NA' if not coverage else coverage) |
400 | 389 |
401 # Create a list of lists for the table | 390 # Create a list of lists for the table |
402 table_data = [headers, data_values] | 391 table_data = [headers, data_values] |
403 | 392 |
404 # Extract pipeline data from 'Pre-curation' category | 393 # Extract pipeline data |
405 asm_pipeline_data = yaml_data.get('ASSEMBLIES', {}).get('Pre-curation', {}).get('pipeline', []) | 394 asm_pipeline_data = yaml_data.get('PIPELINES', {}).get('Assembly', {}) |
395 curation_pipeline_data = yaml_data.get('PIPELINES', {}).get('Curation', {}) | |
396 | |
397 # Extract pipeline data from 'Curated' category | |
406 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data) | 398 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data) |
407 | |
408 # Extract pipeline data from 'Curated' category | |
409 curation_pipeline_data = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}).get('pipeline', []) | |
410 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data) | 399 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data) |
411 | 400 |
412 # Reading GENOME PROFILING DATA section from yaml ############################################# | 401 # Reading GENOME PROFILING DATA section from yaml ############################################# |
413 | |
414 profiling_data = yaml_data.get('PROFILING') | 402 profiling_data = yaml_data.get('PROFILING') |
415 | 403 |
416 # Check if profiling_data is available | 404 # Check if profiling_data is available |
417 if not profiling_data: | 405 if not profiling_data: |
418 logging.error('Error: No profiling data found in the YAML file.') | 406 logging.error('Error: No profiling data found in the YAML file.') |
419 sys.exit(1) | 407 sys.exit(1) |
420 | 408 |
421 # Handle GenomeScope specific processing | 409 # Check for GenomeScope data (mandatory) |
422 genomescope_data = profiling_data.get('GenomeScope') | 410 genomescope_data = profiling_data.get('GenomeScope') |
423 if genomescope_data: | 411 if not genomescope_data: |
424 summary_file = genomescope_data.get('genomescope_summary_txt') | 412 logging.error("Error: GenomeScope data is missing in the YAML file. This is mandatory.") |
425 if summary_file and os.path.exists(summary_file): | |
426 with open(summary_file, "r") as f: | |
427 summary_txt = f.read() | |
428 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1) | |
429 proposed_ploidy_match = re.search(r"p = (\d+)", summary_txt) | |
430 proposed_ploidy = proposed_ploidy_match.group(1) if proposed_ploidy_match else 'NA' | |
431 else: | |
432 logging.error(f"File {summary_file} not found for GenomeScope.") | |
433 sys.exit(1) | |
434 else: | |
435 logging.error("GenomeScope data is missing in the PROFILING section.") | |
436 sys.exit(1) | 413 sys.exit(1) |
437 | 414 |
438 # Handle Smudgeplot specific processing | 415 genomescope_summary = genomescope_data.get('genomescope_summary_txt') |
416 if not genomescope_summary: | |
417 logging.error("Error: GenomeScope summary file path is missing in the YAML file.") | |
418 sys.exit(1) | |
419 | |
420 # Read the content of the GenomeScope summary file | |
421 try: | |
422 with open(genomescope_summary, "r") as f: | |
423 summary_txt = f.read() | |
424 # Extract values from summary.txt | |
425 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1) | |
426 proposed_ploidy = re.search(r"p = (\d+)", summary_txt).group(1) | |
427 except Exception as e: | |
428 logging.error(f"Error reading GenomeScope summary file: {str(e)}") | |
429 sys.exit(1) | |
430 | |
431 # Check for Smudgeplot data (optional) | |
439 smudgeplot_data = profiling_data.get('Smudgeplot') | 432 smudgeplot_data = profiling_data.get('Smudgeplot') |
440 if smudgeplot_data: | 433 if smudgeplot_data: |
441 verbose_summary_file = smudgeplot_data.get('smudgeplot_verbose_summary_txt') | 434 smudgeplot_summary = smudgeplot_data.get('smudgeplot_verbose_summary_txt') |
442 if verbose_summary_file and os.path.exists(verbose_summary_file): | 435 if smudgeplot_summary: |
443 with open(verbose_summary_file, "r") as f: | 436 try: |
444 smud_summary_txt = f.readlines() | 437 with open(smudgeplot_summary, "r") as f: |
445 for line in smud_summary_txt: | 438 smud_summary_txt = f.readlines() |
446 if line.startswith("* Proposed ploidy"): | 439 for line in smud_summary_txt: |
447 proposed_ploidy = line.split(":")[1].strip() | 440 if line.startswith("* Proposed ploidy"): |
448 break | 441 proposed_ploidy = line.split(":")[1].strip() |
442 break | |
443 except Exception as e: | |
444 logging.warning(f"Error reading Smudgeplot summary file: {str(e)}. Using GenomeScope ploidy.") | |
449 else: | 445 else: |
450 logging.warning(f"Verbose summary file {verbose_summary_file} not found for Smudgeplot; skipping detailed Smudgeplot analysis.") | 446 logging.warning("Smudgeplot summary file path is missing. Using GenomeScope ploidy.") |
451 else: | 447 else: |
452 logging.warning("Smudgeplot data is missing in the PROFILING section; skipping Smudgeplot analysis.") | 448 logging.info("Smudgeplot data not provided. Using GenomeScope ploidy.") |
453 | 449 |
454 # Reading ASSEMBLY DATA section from yaml ##################################################### | 450 # Reading ASSEMBLY DATA section from yaml ##################################################### |
455 | 451 |
456 asm_data = yaml_data.get('ASSEMBLIES', {}) | 452 asm_data = yaml_data.get('ASSEMBLIES', {}) |
457 | 453 |
458 # make a list from the assemblies available in asm_data | 454 # make a list from the assemblies available in asm_data |
459 asm_stages = [] | 455 asm_stages = [] |
460 for asm_stage, stage_properties in asm_data.items(): | 456 for asm_stage, stage_properties in asm_data.items(): |
461 for haplotypes in stage_properties.keys(): | 457 for haplotypes in stage_properties.keys(): |
462 if haplotypes != 'pipeline' and haplotypes not in asm_stages: | 458 if haplotypes not in asm_stages: |
463 asm_stages.append(haplotypes) | 459 asm_stages.append(haplotypes) |
464 | 460 |
465 # get gfastats-based data | 461 # get gfastats-based data |
466 gfastats_data = {} | 462 gfastats_data = {} |
467 for asm_stage, stage_properties in asm_data.items(): | 463 for asm_stage, stage_properties in asm_data.items(): |
481 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2) | 477 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2) |
482 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp | 478 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp |
483 except (ValueError, ZeroDivisionError): | 479 except (ValueError, ZeroDivisionError): |
484 gaps_per_gbp_data[(asm_stage, haplotypes)] = '' | 480 gaps_per_gbp_data[(asm_stage, haplotypes)] = '' |
485 | 481 |
486 # Define the contigging table (column names) DON'T MOVE THIS AGAIN!!!!!!! | 482 # Define the contigging table (column names) |
487 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]] | 483 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]] |
488 | 484 |
489 # Fill the table with the gfastats data | 485 # Fill the table with the gfastats data |
490 for i in range(len(display_names)): | 486 for i in range(len(display_names)): |
491 metric = display_names[i] | 487 metric = display_names[i] |
492 if metric not in exclusion_list: | 488 if metric not in exclusion_list: |
493 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) | 489 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) |
494 | 490 |
495 # Add the gaps/gbp in between | 491 # Add the gaps/gbp in between |
496 gc_index = display_names.index("GC %") | |
497 gc_index | |
498 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) | 492 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) |
499 | 493 |
500 # get QV, Kmer completeness and BUSCO data | 494 # get QV, Kmer completeness and BUSCO data |
501 qv_data = {} | 495 qv_data = {} |
502 completeness_data = {} | 496 completeness_data = {} |
503 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']} | 497 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']} |
504 for asm_stage, stage_properties in asm_data.items(): | 498 for asm_stage, stage_properties in asm_data.items(): |
505 asm_stage_elements = [element for element in stage_properties.keys() if element != 'pipeline'] | 499 asm_stage_elements = list(stage_properties.keys()) |
506 for i, haplotypes in enumerate(asm_stage_elements): | 500 for i, haplotypes in enumerate(asm_stage_elements): |
507 haplotype_properties = stage_properties[haplotypes] | 501 haplotype_properties = stage_properties[haplotypes] |
508 if isinstance(haplotype_properties, dict): | 502 if isinstance(haplotype_properties, dict): |
509 if 'merqury_qv' in haplotype_properties: | 503 if 'merqury_qv' in haplotype_properties: |
510 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes) | 504 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes) |
578 styles = getSampleStyleSheet() | 572 styles = getSampleStyleSheet() |
579 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20)) | 573 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20)) |
580 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16)) | 574 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16)) |
581 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12)) | 575 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12)) |
582 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10)) | 576 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10)) |
583 styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True)) | 577 # styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True)) |
584 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12)) | 578 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12)) |
585 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8)) | 579 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8)) |
586 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6)) | 580 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6)) |
587 | 581 |
588 # PDF SECTION 1 ------------------------------------------------------------------------------- | 582 # PDF SECTION 1 ------------------------------------------------------------------------------- |
657 # Spacer | 651 # Spacer |
658 elements.append(Spacer(1, 24)) | 652 elements.append(Spacer(1, 24)) |
659 | 653 |
660 # Iterate over haplotypes in the Curated category to get data for EBP metrics | 654 # Iterate over haplotypes in the Curated category to get data for EBP metrics |
661 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) | 655 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) |
662 haplotype_names = [key for key in curated_assemblies.keys() if key != 'pipeline'] | 656 haplotype_names = list(curated_assemblies.keys()) |
663 | 657 |
664 for haplotype in haplotype_names: | 658 for haplotype in haplotype_names: |
665 properties = curated_assemblies[haplotype] | 659 properties = curated_assemblies[haplotype] |
666 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties: | 660 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties: |
667 gfastats_path = properties['gfastats--nstar-report_txt'] | 661 gfastats_path = properties['gfastats--nstar-report_txt'] |
754 elements.append(Spacer(1, 5)) | 748 elements.append(Spacer(1, 5)) |
755 | 749 |
756 # Store BUSCO version and lineage information from each file in list | 750 # Store BUSCO version and lineage information from each file in list |
757 busco_info_list = [] | 751 busco_info_list = [] |
758 for asm_stages, stage_properties in asm_data.items(): | 752 for asm_stages, stage_properties in asm_data.items(): |
759 for haplotype_keys, haplotype_properties in stage_properties.items(): | 753 for i, haplotype_properties in stage_properties.items(): |
760 if isinstance(haplotype_properties, dict): | 754 if isinstance(haplotype_properties, dict): |
761 if 'busco_short_summary_txt' in haplotype_properties: | 755 if 'busco_short_summary_txt' in haplotype_properties: |
762 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt']) | 756 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt']) |
763 if busco_version and lineage_info: | 757 if busco_version and lineage_info: |
764 busco_info_list.append((busco_version, lineage_info)) | 758 busco_info_list.append((busco_version, lineage_info)) |
785 | 779 |
786 # Initialize counter | 780 # Initialize counter |
787 tool_count = 0 | 781 tool_count = 0 |
788 | 782 |
789 # Add title and images for each step | 783 # Add title and images for each step |
790 for idx, (asm_stages, stage_properties) in enumerate(asm_data.items(), 1): | 784 for asm_stages, stage_properties in asm_data.items(): |
791 if asm_stages == 'Curated': | 785 if asm_stages == 'Curated': |
792 tool_elements = [element for element in stage_properties.keys() if element != 'pipeline'] | 786 tool_elements = list(stage_properties.keys()) |
793 | 787 |
794 images_with_names = [] | 788 images_with_names = [] |
795 | 789 |
796 for haplotype in tool_elements: | 790 for haplotype in tool_elements: |
797 haplotype_properties = stage_properties[haplotype] | 791 haplotype_properties = stage_properties[haplotype] |
823 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1: | 817 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1: |
824 images_with_names.append([Spacer(1, 12)]) | 818 images_with_names.append([Spacer(1, 12)]) |
825 | 819 |
826 # Add images and names to the elements in pairs | 820 # Add images and names to the elements in pairs |
827 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time | 821 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time |
828 elements_to_add = images_with_names[i:i + 4] | 822 elements_to_add = images_with_names[i: i + 4] |
829 | 823 |
830 # Create table for the images and names | 824 # Create table for the images and names |
831 table = Table(elements_to_add) | 825 table = Table(elements_to_add) |
832 table.hAlign = 'CENTER' | 826 table.hAlign = 'CENTER' |
833 elements.append(table) | 827 elements.append(table) |
854 # Initialize counter | 848 # Initialize counter |
855 counter = 0 | 849 counter = 0 |
856 | 850 |
857 # Iterate over haplotypes in the Curated category to get K-mer spectra images | 851 # Iterate over haplotypes in the Curated category to get K-mer spectra images |
858 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) | 852 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) |
859 haplotype_names = [key for key in curated_assemblies.keys() if key != 'pipeline'] | |
860 | 853 |
861 # Get paths for spectra files | 854 # Get paths for spectra files |
862 spectra_files = { | 855 spectra_files = { |
863 'hap1': { | 856 'hap1': { |
864 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None), | 857 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None), |
972 | 965 |
973 # Initialize counter | 966 # Initialize counter |
974 tool_count = 0 | 967 tool_count = 0 |
975 | 968 |
976 # Add title and images for each step | 969 # Add title and images for each step |
977 for idx, (asm_stages, stage_properties) in enumerate(asm_data.items(), 1): | 970 for asm_stages, stage_properties in asm_data.items(): |
978 if asm_stages == 'Curated': # Check if the current stage is 'Curated' | 971 if asm_stages == 'Curated': # Check if the current stage is 'Curated' |
979 tool_elements = [element for element in stage_properties.keys() if element != 'pipeline'] | 972 tool_elements = list(stage_properties.keys()) |
980 | 973 |
981 for haplotype in tool_elements: | 974 for haplotype in tool_elements: |
982 haplotype_properties = stage_properties[haplotype] | 975 haplotype_properties = stage_properties[haplotype] |
983 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties: | 976 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties: |
984 # Get image path | 977 # Get image path |