Repository 'erga_ear'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/erga_ear

Changeset 3:3dd6be0cd8dd (2024-10-15)
Previous changeset 2:a34826ae0a73 (2024-08-30)
Commit message:
planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
modified:
macros.xml
make_EAR.py
b
diff -r a34826ae0a73 -r 3dd6be0cd8dd macros.xml
--- a/macros.xml Fri Aug 30 09:27:31 2024 +0000
+++ b/macros.xml Tue Oct 15 12:52:59 2024 +0000
b
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">24.08.26</token>
+    <token name="@TOOL_VERSION@">24.10.15</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.2</token>
     <xml name="creator">
b
diff -r a34826ae0a73 -r 3dd6be0cd8dd make_EAR.py
--- a/make_EAR.py Fri Aug 30 09:27:31 2024 +0000
+++ b/make_EAR.py Tue Oct 15 12:52:59 2024 +0000
[
b'@@ -21,7 +21,7 @@\n # CAUTION: This is for the Galaxy version!\n # by Diego De Panis\n # ERGA Sequencing and Assembly Committee\n-EAR_version = "v24.08.26"\n+EAR_version = "v24.10.15"\n \n \n def make_report(yaml_file):\n@@ -36,18 +36,29 @@\n         try:\n             value_float = float(value)\n             if value_float.is_integer():\n-                # format as an integer if no decimal part\n+                # format as integer if no decimal\n                 return f\'{int(value_float):,}\'\n             else:\n                 # format as a float\n                 return f\'{value_float:,}\'\n         except ValueError:\n-            # return the original value if it can\'t be converted to a float\n+            # return original value if can\'t be converted to float\n             return value\n \n     # extract gfastats values\n     def extract_gfastats_values(content, keys):\n-        return [re.findall(f"{key}: (.+)", content)[0] for key in keys]\n+        values = []\n+        for key in keys:\n+            # colon-separated as default format first\n+            match = re.search(rf"{re.escape(key)}:\\s*(.+)", content)\n+            if not match:\n+                # If not try galaxy\'s tab-separated\n+                match = re.search(rf"{re.escape(key)}\\t(.+)", content)\n+            if match:\n+                values.append(match.group(1).strip())\n+            else:\n+                values.append("N/A")\n+        return values\n \n     keys = [\n         "Total scaffold length",\n@@ -79,9 +90,17 @@\n     def extract_total_bp_from_gfastats(gfastats_path):\n         with open(gfastats_path, "r") as f:\n             content = f.read()\n-        total_bp = re.search(r"Total scaffold length: (.+)", content).group(1)\n-        total_bp = int(total_bp.replace(\',\', \'\'))\n-        return "{:,}".format(total_bp)\n+        # Try colon-separated format first\n+        match = re.search(r"Total scaffold length:\\s*(.+)", content)\n+        if not match:\n+            # If not found, try tab-separated format\n+            match = re.search(r"Total scaffold length\\t(.+)", content)\n+        if match:\n+            total_bp = match.group(1).replace(\',\', \'\')\n+            return "{:,}".format(int(total_bp))\n+        else:\n+            logging.error(f"Could not find Total scaffold length in {gfastats_path}")\n+            return "N/A"\n \n     # compute EBP quality metric\n     def compute_ebp_metric(haplotype, gfastats_path, qv_value):\n@@ -93,7 +112,6 @@\n         values = extract_gfastats_values(content, keys_needed)\n         contig_n50_log = math.floor(math.log10(int(values[0].replace(\',\', \'\'))))\n         scaffold_n50_log = math.floor(math.log10(int(values[1].replace(\',\', \'\'))))\n-\n         return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}"\n \n     # extract qv values\n@@ -151,6 +169,8 @@\n     def extract_busco_info(file_path):\n         busco_version = None\n         lineage_info = None\n+        busco_mode = None\n+        busco_pred = None\n \n         try:\n             with open(file_path, \'r\') as file:\n@@ -158,18 +178,20 @@\n                 version_match = re.search(r"# BUSCO version is: ([\\d.]+)", content)\n                 if version_match:\n                     busco_version = version_match.group(1)\n-                lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of genomes: (\\d+), number of BUSCOs: (\\d+)\\)", content)\n+                lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of (genomes|species): (\\d+), number of BUSCOs: (\\d+)\\)", content)\n                 if lineage_match:\n-                    lineage_info = lineage_match.groups()\n-                if not lineage_info:\n-                    lineage_match = re.search(r"The lineage dataset is: (.*?) \\(Creation date:.*?, number of species: (\\d+), number of BUSCOs: (\\d+)\\)", content)\n-                    if lineage_match:\n-                        lineage_info = lineage_match.groups()\n+               '..b'lename.endswith("spectra-asm.ln.png"):\n-                    text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"\n-                elif filename.endswith("spectra-cn.ln.png"):\n-                    if len(spectra_cn_files) == 3:\n-                        # For 3 spectra-cn files use particular text\n-                        if png_file == shortest_spectra_cn_file:\n-                            text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"\n+                    if filename.endswith("spectra-asm.ln.png"):\n+                        text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"\n+                    elif filename.endswith("spectra-cn.ln.png"):\n+                        if len(spectra_cn_files) == 3:\n+                            if png_file == shortest_spectra_cn_file:\n+                                text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"\n+                            else:\n+                                text = f"Distribution of k-mer counts per copy numbers found in {label} (hapl.)"\n                         else:\n-                            if png_file == spectra_files[\'hap1\'].get(\'spectra_cn_png\', None):\n-                                text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name1}</b> (hapl.)"\n-                            elif png_file == spectra_files[\'hap2\'].get(\'spectra_cn_png\', None):\n-                                text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name2}</b> (hapl.)"\n-                            else:\n-                                text = "Distribution of k-mer counts per copy numbers found in asm"\n+                            text = "Distribution of k-mer counts per copy numbers found in asm"\n                     else:\n-                        # For 2 spectra-cn files use same text\n-                        text = "Distribution of k-mer counts per copy numbers found in asm"\n-                else:\n-                    text = filename\n-\n-                images.append([image, Paragraph(text, styles["midiStyle"])])\n+                        text = filename\n \n-    # Filter None values\n-    images = [img for img in images if img[0] is not None]\n-\n-    # Get number of rows and columns for the table\n-    num_rows = (len(images) + 1) // 2  # +1 to handle odd numbers of images\n-    num_columns = 2\n+                    images.append([image, Paragraph(text, styles["midiStyle"])])\n+                except Exception as e:\n+                    logging.error(f"Error processing image {png_file}: {str(e)}")\n \n     # Create the table with dynamic size\n-    image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]\n-    image_table = Table(image_table_data)\n+    if images:\n+        num_rows = (len(images) + 1) // 2\n+        num_columns = 2\n+        image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]\n+        image_table = Table(image_table_data)\n \n-    # Style the "table"\n-    table_style = TableStyle([\n-        (\'VALIGN\', (0, 0), (-1, -1), \'MIDDLE\'),\n-        (\'BOTTOMPADDING\', (0, 0), (-1, -1), 20),  # 20 here is a spacer between rows\n-    ])\n+        # Style the table\n+        table_style = TableStyle([\n+            (\'VALIGN\', (0, 0), (-1, -1), \'MIDDLE\'),\n+            (\'BOTTOMPADDING\', (0, 0), (-1, -1), 20),  # 20 here is a spacer between rows\n+        ])\n \n-    # Set the style\n-    image_table.setStyle(table_style)\n-\n-    # Add image table to elements\n-    elements.append(image_table)\n+        image_table.setStyle(table_style)\n+        elements.append(image_table)\n+    else:\n+        elements.append(Paragraph("No K-mer spectra images available.", styles["midiStyle"]))\n \n     # Increase counter by the number of PNGs added\n     counter += len(images)\n'