Galaxy |

Changeset 3:3dd6be0cd8dd (2024-10-15)

Previous changeset 2:a34826ae0a73 (2024-08-30)

Commit message:
planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c

modified:
macros.xml
make_EAR.py

diff -r a34826ae0a73 -r 3dd6be0cd8dd macros.xml
--- a/macros.xml Fri Aug 30 09:27:31 2024 +0000
+++ b/macros.xml Tue Oct 15 12:52:59 2024 +0000

@@ -1,5 +1,5 @@
<macros>
-    <token name="@TOOL_VERSION@">24.08.26</token>
+    <token name="@TOOL_VERSION@">24.10.15</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.2</token>
     <xml name="creator">

diff -r a34826ae0a73 -r 3dd6be0cd8dd make_EAR.py
--- a/make_EAR.py Fri Aug 30 09:27:31 2024 +0000
+++ b/make_EAR.py Tue Oct 15 12:52:59 2024 +0000

[

b'@@ -21,7 +21,7 @@\n # CAUTION: This is for the Galaxy version!\n # by Diego De Panis\n # ERGA Sequencing and Assembly Committee\n-EAR_version = "v24.08.26"\n+EAR_version = "v24.10.15"\n \n \n def make_report(yaml_file):\n@@ -36,18 +36,29 @@\n try:\n value_float = float(value)\n if value_float.is_integer():\n- # format as an integer if no decimal part\n+ # format as integer if no decimal\n return f\'{int(value_float):,}\'\n else:\n # format as a float\n return f\'{value_float:,}\'\n except ValueError:\n- # return the original value if it can\'t be converted to a float\n+ # return original value if can\'t be converted to float\n return value\n \n # extract gfastats values\n def extract_gfastats_values(content, keys):\n- return [re.findall(f"{key}: (.+)", content)[0] for key in keys]\n+ values = []\n+ for key in keys:\n+ # colon-separated as default format first\n+ match = re.search(rf"{re.escape(key)}:\\s*(.+)", content)\n+ if not match:\n+ # If not try galaxy\'s tab-separated\n+ match = re.search(rf"{re.escape(key)}\\t(.+)", content)\n+ if match:\n+ values.append(match.group(1).strip())\n+ else:\n+ values.append("N/A")\n+ return values\n \n keys = [\n "Total scaffold length",\n@@ -79,9 +90,17 @@\n def extract_total_bp_from_gfastats(gfastats_path):\n with open(gfastats_path, "r") as f:\n content = f.read()\n- total_bp = re.search(r"Total scaffold length: (.+)", content).group(1)\n- total_bp = int(total_bp.replace(\',\', \'\'))\n- return "{:,}".format(total_bp)\n+ # Try colon-separated format first\n+ match = re.search(r"Total scaffold length:\\s*(.+)", content)\n+ if not match:\n+ # If not found, try tab-separated format\n+ match = re.search(r"Total scaffold length\\t(.+)", content)\n+ if match:\n+ total_bp = match.group(1).replace(\',\', \'\')\n+ return "{:,}".format(int(total_bp))\n+ else:\n+ logging.error(f"Could not find Total scaffold length in {gfastats_path}")\n+ return "N/A"\n \n # compute EBP quality metric\n def compute_ebp_metric(haplotype, gfastats_path, qv_value):\n@@ -93,7 +112,6 @@\n values = extract_gfastats_values(content, keys_needed)\n contig_n50_log = math.floor(math.log10(int(values[0].replace(\',\', \'\'))))\n scaffold_n50_log = math.floor(math.log10(int(values[1].replace(\',\', \'\'))))\n-\n return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}"\n \n # extract qv values\n@@ -151,6 +169,8 @@\n def extract_busco_info(file_path):\n busco_version = None\n lineage_info = None\n+ busco_mode = None\n+ busco_pred = None\n \n try:\n with open(file_path, \'r\') as file:\n@@ -158,18 +178,20 @@\n version_match = re.search(r"# BUSCO version is: ([\\d.]+)", content)\n if version_match:\n busco_version = version_match.group(1)\n- lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of genomes: (\\d+), number of BUSCOs: (\\d+)\\)", content)\n+ lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of (genomes|species): (\\d+), number of BUSCOs: (\\d+)\\)", content)\n if lineage_match:\n- lineage_info = lineage_match.groups()\n- if not lineage_info:\n- lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of species: (\\d+), number of BUSCOs: (\\d+)\\)", content)\n- if lineage_match:\n- lineage_info = lineage_match.groups()\n+ '..b'lename.endswith("spectra-asm.ln.png"):\n- text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"\n- elif filename.endswith("spectra-cn.ln.png"):\n- if len(spectra_cn_files) == 3:\n- # For 3 spectra-cn files use particular text\n- if png_file == shortest_spectra_cn_file:\n- text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"\n+ if filename.endswith("spectra-asm.ln.png"):\n+ text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"\n+ elif filename.endswith("spectra-cn.ln.png"):\n+ if len(spectra_cn_files) == 3:\n+ if png_file == shortest_spectra_cn_file:\n+ text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"\n+ else:\n+ text = f"Distribution of k-mer counts per copy numbers found in {label} (hapl.)"\n else:\n- if png_file == spectra_files[\'hap1\'].get(\'spectra_cn_png\', None):\n- text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name1}</b> (hapl.)"\n- elif png_file == spectra_files[\'hap2\'].get(\'spectra_cn_png\', None):\n- text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name2}</b> (hapl.)"\n- else:\n- text = "Distribution of k-mer counts per copy numbers found in asm"\n+ text = "Distribution of k-mer counts per copy numbers found in asm"\n else:\n- # For 2 spectra-cn files use same text\n- text = "Distribution of k-mer counts per copy numbers found in asm"\n- else:\n- text = filename\n-\n- images.append([image, Paragraph(text, styles["midiStyle"])])\n+ text = filename\n \n- # Filter None values\n- images = [img for img in images if img[0] is not None]\n-\n- # Get number of rows and columns for the table\n- num_rows = (len(images) + 1) // 2 # +1 to handle odd numbers of images\n- num_columns = 2\n+ images.append([image, Paragraph(text, styles["midiStyle"])])\n+ except Exception as e:\n+ logging.error(f"Error processing image {png_file}: {str(e)}")\n \n # Create the table with dynamic size\n- image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]\n- image_table = Table(image_table_data)\n+ if images:\n+ num_rows = (len(images) + 1) // 2\n+ num_columns = 2\n+ image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]\n+ image_table = Table(image_table_data)\n \n- # Style the "table"\n- table_style = TableStyle([\n- (\'VALIGN\', (0, 0), (-1, -1), \'MIDDLE\'),\n- (\'BOTTOMPADDING\', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows\n- ])\n+ # Style the table\n+ table_style = TableStyle([\n+ (\'VALIGN\', (0, 0), (-1, -1), \'MIDDLE\'),\n+ (\'BOTTOMPADDING\', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows\n+ ])\n \n- # Set the style\n- image_table.setStyle(table_style)\n-\n- # Add image table to elements\n- elements.append(image_table)\n+ image_table.setStyle(table_style)\n+ elements.append(image_table)\n+ else:\n+ elements.append(Paragraph("No K-mer spectra images available.", styles["midiStyle"]))\n \n # Increase counter by the number of PNGs added\n counter += len(images)\n'