Mercurial > repos > maciek > spamr_vet_tools

--- a/spamr_vet_tools_v2/1_qualitty_script_fastp_bracken_v2.py	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-import json
-import csv
-import sys
-import os
-
-def extract_software_data(json_data, software_name):
-    """
-    Extract data for a specific software from the JSON input
-
-    For "bracken", add a "contamination" column where the value is "pass"
-    if fraction_total_reads > 0.6, otherwise "fail".
-
-    For "fastp", include only specific columns.
-    """
-    # Ensure json_data is a dictionary
-    if isinstance(json_data, list):
-        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == software_name), None)
-
-    if not isinstance(json_data, dict):
-        print(f"Invalid JSON format for {software_name} extraction.")
-        return
-
-    results = json_data.get("results", [])
-    extracted_data = []
-    headers = []  # Use list to collect headers to maintain order
-    output_csv_file = f"{software_name}_output.csv"
-
-    # Define specific columns for "fastp"
-    fastp_columns = [
-        "summary_sequencing",
-        "summary_before_filtering_total_reads",
-        "summary_before_filtering_total_bases",
-        "summary_before_filtering_q20_bases",
-        "summary_before_filtering_q30_bases",
-        "summary_before_filtering_q20_rate",
-        "summary_before_filtering_q30_rate",
-        "summary_before_filtering_read1_mean_length",
-        "summary_before_filtering_read2_mean_length",
-        "summary_before_filtering_gc_content",
-        "summary_after_filtering_total_reads",
-        "summary_after_filtering_total_bases",
-        "summary_after_filtering_q20_bases",
-        "summary_after_filtering_q30_bases",
-        "summary_after_filtering_q20_rate",
-        "summary_after_filtering_q30_rate",
-        "summary_after_filtering_read1_mean_length",
-        "summary_after_filtering_read2_mean_length",
-        "summary_after_filtering_gc_content",
-        "filtering_result_passed_filter_reads",
-        "filtering_result_low_quality_reads",
-        "filtering_result_too_many_N_reads",
-        "filtering_result_too_short_reads",
-        "filtering_result_too_long_reads",
-        "duplication_rate",
-        "insert_size_peak",
-    ]
-
-    for entry in results:
-        if "content" in entry and isinstance(entry["content"], list):
-            for content_item in entry["content"]:
-                row_data = {}
-                if software_name == "fastp":
-                    for key, value in content_item.items():
-                        if isinstance(value, dict):
-                            for sub_key, sub_value in value.items():
-                                if isinstance(sub_value, dict):
-                                    for sub_sub_key, sub_sub_value in sub_value.items():
-                                        column_name = f"{key}_{sub_key}_{sub_sub_key}"
-                                        if column_name in fastp_columns:
-                                            row_data[column_name] = sub_sub_value
-                                            if column_name not in headers:
-                                                headers.append(column_name)
-                                else:
-                                    column_name = f"{key}_{sub_key}"
-                                    if column_name in fastp_columns:
-                                        row_data[column_name] = sub_value
-                                        if column_name not in headers:
-                                            headers.append(column_name)
-                        else:
-                            if key in fastp_columns:
-                                row_data[key] = value
-                                if key not in headers:
-                                    headers.append(key)
-                elif software_name == "bracken":
-                    for key, value in content_item.items():
-                        if isinstance(value, dict):
-                            for sub_key, sub_value in value.items():
-                                column_name = f"{key}_{sub_key}"
-                                row_data[column_name] = sub_value
-                                if column_name not in headers:
-                                    headers.append(column_name)
-                        else:
-                            row_data[key] = value
-                            if key not in headers:
-                                headers.append(key)
-
-                    # Add contamination column for "bracken"
-                    fraction_total_reads = row_data.get("fraction_total_reads", 0)
-                    row_data["contamination"] = "pass" if float(fraction_total_reads) > 0.6 else "fail"
-                    if "contamination" not in headers:
-                        headers.append("contamination")
-
-                extracted_data.append(row_data)
-
-    if not extracted_data:
-        print(f"No data extracted for {software_name}")
-        # Create empty file to prevent Galaxy error
-        with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
-            f.write("No data available\n")
-        return
-
-    with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=headers)
-        writer.writeheader()
-        writer.writerows(extracted_data)
-
-    print(f"CSV file successfully generated: {output_csv_file}")
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python extract_software_data.py input.json")
-        sys.exit(1)
-
-    input_json_file = sys.argv[1]
-
-    try:
-        with open(input_json_file, "r", encoding="utf-8") as file:
-            json_data = json.load(file)
-        extract_software_data(json_data, "fastp")
-        extract_software_data(json_data, "bracken")
-        sys.exit(0)
-    except Exception as e:
-        print(f"Error processing file: {e}")
-        sys.exit(1)
--- a/spamr_vet_tools_v2/1_qualitty_script_fastp_bracken_v2.xml	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<tool id="quality_script_fastp_bracken"
-      name="Quality Control FastP Bracken"
-      version="0.1.0+galaxy0"
-      profile="21.05">
-
-    <description>Quality control using FastP and Bracken</description>
-
-    <requirements>
-        <requirement type="package" version="3.12">python</requirement>
-    </requirements>
-
-    <command detect_errors="exit_code">
-        <![CDATA[
-        python '$__tool_directory__/extract_software_data.py' '$json_input'
-        ]]>
-    </command>
-
-    <inputs>
-        <param name="json_input" type="data" format="json" label="Input JSON Data"/>
-    </inputs>
-
-    <outputs>
-        <data name="fastp_output" format="csv" from_work_dir="fastp_output.csv" label="FastP Summary Report on ${on_string}"/>
-        <data name="bracken_output" format="csv" from_work_dir="bracken_output.csv" label="Bracken Summary Report on ${on_string}"/>
-    </outputs>
-
-    <help><![CDATA[
-This tool processes sequencing data with FastP for quality control and then uses Bracken for classification or abundance estimation.
-
-### **Usage Instructions**
-1. Provide an input JSON file containing the results from FastP and Bracken analyses
-2. The tool will extract relevant data and generate two CSV files:
-   - FastP output with quality metrics
-   - Bracken output with taxonomic classification including contamination assessment
-
-### **References**
-- [FastP](https://github.com/OpenGene/fastp) - A tool for fast and efficient quality control.
-- [Bracken](https://github.com/jenniferlu717/Bracken) - A tool for accurate species abundance estimation.
-
-For support, please contact the tool maintainers.
-    ]]></help>
-
-</tool>
--- a/spamr_vet_tools_v2/2_quast_get_fasta_v2.py	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-import json
-import csv
-import sys
-import os
-
-def extract_software_data(json_data, software_name):
-    """
-    Extract QUAST data from JSON and create a CSV with assembly metrics.
-    For "quast", include specific columns and calculate a Filter_N50 based on "N50".
-    """
-    # Ensure json_data is a dictionary
-    if isinstance(json_data, list):
-        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == software_name), None)
-
-    if not isinstance(json_data, dict):
-        print(f"Invalid JSON format for {software_name} extraction.")
-        return
-
-    results = json_data.get("results", [])
-    extracted_data = []
-    headers = [
-        "Assembly",
-        "contigs_(>=_0_bp)",
-        "contigs_(>=_1000_bp)",
-        "Total_length_(>=_0_bp)",
-        "Total_length_(>=_1000_bp)",
-        "contigs",
-        "Largest_contig",
-        "Total_length",
-        "GC",
-        "N50",
-        "Filter_N50",
-        "N90",
-        "auN",
-        "L50",
-        "L90",
-        "total_reads",
-        "left",
-        "right",
-        "Mapped",
-        "Properly_paired",
-        "Avg._coverage_depth",
-        "Coverage_>=_1x",
-        "N's_per_100_kbp"
-    ]
-    output_csv_file = f"{software_name}_output.csv"
-
-    for entry in results:
-        if "content" in entry and isinstance(entry["content"], list):
-            for content_item in entry["content"]:
-                n50 = content_item.get("N50", "")
-                try:
-                    n50_value = float(n50) if n50 else 0
-                    filter_n50 = "pass" if n50_value > 20000 else "fail"
-                except ValueError:
-                    filter_n50 = "fail"  # If the value is non-numeric, consider it as "fail"
-
-                extracted_data.append({
-                    "Assembly": content_item.get("Assembly", ""),
-                    "contigs_(>=_0_bp)": content_item.get("contigs_(>=_0_bp)", ""),
-                    "contigs_(>=_1000_bp)": content_item.get("contigs_(>=_1000_bp)", ""),
-                    "Total_length_(>=_0_bp)": content_item.get("Total_length_(>=_0_bp)", ""),
-                    "Total_length_(>=_1000_bp)": content_item.get("Total_length_(>=_1000_bp)", ""),
-                    "contigs": content_item.get("contigs", ""),
-                    "Largest_contig": content_item.get("Largest_contig", ""),
-                    "Total_length": content_item.get("Total_length", ""),
-                    "GC": content_item.get("GC", ""),
-                    "N50": content_item.get("N50", ""),
-                    "Filter_N50": filter_n50,
-                    "N90": content_item.get("N90", ""),
-                    "auN": content_item.get("auN", ""),
-                    "L50": content_item.get("L50", ""),
-                    "L90": content_item.get("L90", ""),
-                    "total_reads": content_item.get("total_reads", ""),
-                    "left": content_item.get("left", ""),
-                    "right": content_item.get("right", ""),
-                    "Mapped": content_item.get("Mapped", ""),
-                    "Properly_paired": content_item.get("Properly_paired", ""),
-                    "Avg._coverage_depth": content_item.get("Avg._coverage_depth", ""),
-                    "Coverage_>=_1x": content_item.get("Coverage_>=_1x", ""),
-                    "N's_per_100_kbp": content_item.get("N's_per_100_kbp", "")
-                })
-
-    with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=headers)
-        writer.writeheader()
-        writer.writerows(extracted_data)
-
-    print(f"CSV file successfully generated: {output_csv_file}")
-
-def extract_contigs_to_fasta(json_data):
-    """
-    Extract contigs information from "shovill" and save it as a FASTA file.
-    """
-    if isinstance(json_data, list):
-        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == "shovill"), None)
-
-    if not isinstance(json_data, dict):
-        print("Invalid JSON format for shovill extraction.")
-        return
-
-    results = json_data.get("results", [])
-    output_fasta_file = "shovill_contigs.fasta"
-
-    with open(output_fasta_file, "w", encoding="utf-8") as f:
-        for entry in results:
-            if "content" in entry and isinstance(entry["content"], list):
-                for content_item in entry["content"]:
-                    name = content_item.get("name", "unknown")
-                    length = content_item.get("length", "unknown")
-                    coverage = content_item.get("coverage", "unknown")
-                    sequence = content_item.get("sequence", "")
-
-                    header = f">{name}_{length}_{coverage}"
-                    f.write(f"{header}\n{sequence}\n")
-
-    print(f"FASTA file successfully generated: {output_fasta_file}")
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python script.py input.json")
-        sys.exit(1)
-
-    input_json_file = sys.argv[1]
-
-    try:
-        with open(input_json_file, "r", encoding="utf-8") as file:
-            json_data = json.load(file)
-        extract_software_data(json_data, "quast")
-        extract_contigs_to_fasta(json_data)
-        sys.exit(0)
-    except Exception as e:
-        print(f"Error processing file: {e}")
-        sys.exit(1)
--- a/spamr_vet_tools_v2/2_quast_get_fasta_v2.xml	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="quast_get_fasta"
-      name="QUAST Analysis and FASTA Generator"
-      version="0.1.0+galaxy0"
-      profile="21.05">
-
-    <description>Extracts QUAST metrics and generates FASTA files from JSON input.</description>
-
-    <requirements>
-        <requirement type="package" version="3.12">python</requirement>
-    </requirements>
-
-    <command detect_errors="exit_code">
-        <![CDATA[
-        python '$__tool_directory__/extract_quast_fasta.py' '$json_input'
-        ]]>
-    </command>
-
-    <inputs>
-        <param name="json_input" type="data" format="json" label="Input JSON File"
-               help="Provide a JSON file containing QUAST and Shovill results."/>
-    </inputs>
-
-    <outputs>
-        <data name="csv_output" format="csv" from_work_dir="quast_output.csv"
-              label="QUAST Summary on ${on_string}"/>
-        <data name="fasta_output" format="fasta" from_work_dir="shovill_contigs.fasta"
-              label="Shovill Contigs on ${on_string}"/>
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="json_input" value="example_input.json"/>
-            <output name="csv_output" file="expected_output.csv" compare="diff"/>
-            <output name="fasta_output" file="expected_output.fasta" compare="diff"/>
-        </test>
-    </tests>
-
-    <help><![CDATA[
-QUAST Analysis and FASTA Generator
-==================================
-
-This tool extracts key statistics from **QUAST** and generates a **FASTA** file containing assembled contigs from **Shovill**.
-
-Usage Instructions
-------------------
-1. Upload or provide a **JSON file** containing **QUAST** and **Shovill** results.
-2. The tool will:
-   - Extract **assembly metrics** from QUAST and save them as a CSV.
-   - Convert **contigs from Shovill** into a FASTA file.
-3. The outputs will be:
-   - `quast_output.csv` (QUAST summary metrics)
-   - `shovill_contigs.fasta` (FASTA file with contigs)
-
-Outputs
--------
-- **CSV File:** Contains QUAST summary metrics such as `N50`, `GC content`, `total length`, `L50`, and other key assembly statistics.
-- **FASTA File:** Extracts contigs from **Shovill**, formatting them properly for downstream analysis.
-
-References
-----------
-- `QUAST <http://bioinf.spbau.ru/quast>`_ - Quality assessment tool for genome assemblies.
-- `Shovill <https://github.com/tseemann/shovill>`_ - A tool for rapid bacterial genome assembly using SPAdes.
-
-For questions or issues, please contact the tool maintainers.
-    ]]></help>
-
-</tool>
--- a/spamr_vet_tools_v2/3_MLST_AMRFINDER_STARMAR_v2.py	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-import json
-import csv
-import sys
-
-def generate_csv_from_json(json_data):
-    """
-    Parse the JSON and generate CSV files based on the analysis_software_name Abricate, AMRfinder plus and STARamr.
-    Additionally, extract and process the 'mlst_file' content into its own CSV.
-    """
-    for entry in json_data:
-        analysis_software = entry.get("analysis_software_name", "unknown")
-        results = entry.get("results", [])
-
-        if results:
-            csv_file = f"{analysis_software}_output.csv"
-            extracted_data = []
-            headers = []
-
-            for result in results:
-                if result.get("name") == "mlst_file":
-                    mlst_file_path = "mlst.csv"
-                    mlst_content = result.get("content", [])
-                    mlst_headers = ["Isolate ID", "Scheme", "Sequence Type", "Locus"]
-
-                    # Write the MLST CSV file
-                    if mlst_content:
-                        with open(mlst_file_path, "w", newline="", encoding="utf-8") as f:
-                            writer = csv.DictWriter(f, fieldnames=mlst_headers)
-                            writer.writeheader()
-                            for row in mlst_content:
-                                writer.writerow({
-                                    "Isolate ID": row.get("Isolate ID", ""),
-                                    "Scheme": row.get("Scheme", ""),
-                                    "Sequence Type": row.get("Sequence Type", ""),
-                                    "Locus": "; ".join(row.get("Locus", []))
-                                })
-
-                        print(f"MLST CSV file successfully generated: {mlst_file_path}")
-
-                if "content" in result and isinstance(result["content"], list):
-                    for content_item in result["content"]:
-                        extracted_data.append(content_item)
-                        for key in content_item.keys():
-                            if key not in headers:
-                                headers.append(key)  # Maintain the original order of the JSON keys
-
-            # Write the CSV file if there is data
-            if extracted_data:
-                with open(csv_file, "w", newline="", encoding="utf-8") as f:
-                    writer = csv.DictWriter(f, fieldnames=headers)
-                    writer.writeheader()
-                    for row in extracted_data:
-                        writer.writerow({key: row.get(key, "") for key in headers})
-
-                print(f"CSV file successfully generated: {csv_file}")
-            else:
-                print(f"No content found for {analysis_software}.")
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python script.py input.json")
-        sys.exit(1)
-
-    input_json_file = sys.argv[1]
-
-    try:
-        with open(input_json_file, "r", encoding="utf-8") as file:
-            json_data = json.load(file)
-        generate_csv_from_json(json_data)
-        sys.exit(0)
-    except Exception as e:
-        print(f"Error processing file: {e}")
-        sys.exit(1)
--- a/spamr_vet_tools_v2/3_MLST_AMRFINDER_STARMAR_v2.xml	Tue Feb 25 14:14:39 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-<tool id="mlst_amr_staramr"
-      name="MLST, AMRfinder, and STARamr Analysis"
-      version="0.1.0+galaxy0"
-      profile="21.05">
-
-    <description>Extracts MLST, AMRfinder Plus, and STARamr results from JSON input.</description>
-
-    <requirements>
-        <requirement type="package" version="3.12">python</requirement>
-    </requirements>
-
-    <command detect_errors="exit_code">
-        <![CDATA[
-        python '$__tool_directory__/extract_mlst_amr.py' '$json_input'
-        ]]>
-    </command>
-
-    <inputs>
-        <param name="json_input" type="data" format="json" label="Input JSON File"
-               help="Provide a JSON file containing MLST, AMRfinder Plus, and STARamr results."/>
-    </inputs>
-
-    <outputs>
-        <data name="mlst_csv" format="csv" from_work_dir="mlst.csv"
-              label="MLST Summary on ${on_string}"/>
-        <data name="amr_csv" format="csv" from_work_dir="AMRfinderPlus_output.csv"
-              label="AMRfinder Plus Results on ${on_string}"/>
-        <data name="staramr_csv" format="csv" from_work_dir="STARamr_output.csv"
-              label="STARamr Results on ${on_string}"/>
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="json_input" value="example_input.json"/>
-            <output name="mlst_csv" file="expected_mlst.csv" compare="diff"/>
-            <output name="amr_csv" file="expected_amr.csv" compare="diff"/>
-            <output name="staramr_csv" file="expected_staramr.csv" compare="diff"/>
-        </test>
-    </tests>
-
-    <help><![CDATA[
-MLST, AMRfinder, and STARamr Analysis
-=====================================
-
-This tool extracts MLST, AMRfinder Plus, and STARamr results from JSON input and converts them into CSV format.
-
-Usage Instructions
-------------------
-1. Provide a **JSON file** containing **MLST, AMRfinder Plus, and STARamr** results.
-2. The tool will process the data and generate:
-   - `mlst.csv`: MLST typing results.
-   - `AMRfinderPlus_output.csv`: Results from **AMRfinder Plus**.
-   - `STARamr_output.csv`: Results from **STARamr**.
-
-Outputs
--------
-- **MLST CSV File:** Contains MLST typing information, including sequence type and scheme.
-- **AMRfinder Plus CSV File:** Lists detected antimicrobial resistance genes.
-- **STARamr CSV File:** Includes resistance profiles and sequence typing.
-
-References
-----------
-- `MLST <https://pubmlst.org/>`_ - Multi-locus sequence typing database.
-- `AMRfinder Plus <https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder/>`_ - Antimicrobial resistance gene detection.
-- `STARamr <https://github.com/phac-nml/staramr>`_ - Salmonella sequence typing and resistance analysis.
-
-For questions or issues, please contact the tool maintainers.
-    ]]></help>
-
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/mlst_amrfinder_staramr.py	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,73 @@
+import json
+import csv
+import sys
+
+def generate_csv_from_json(json_data):
+    """
+    Parse the JSON and generate CSV files based on the analysis_software_name Abricate, AMRfinder plus and STARamr.
+    Additionally, extract and process the 'mlst_file' content into its own CSV.
+    """
+    for entry in json_data:
+        analysis_software = entry.get("analysis_software_name", "unknown")
+        results = entry.get("results", [])
+
+        if results:
+            csv_file = f"{analysis_software}_output.csv"
+            extracted_data = []
+            headers = []
+
+            for result in results:
+                if result.get("name") == "mlst_file":
+                    mlst_file_path = "mlst.csv"
+                    mlst_content = result.get("content", [])
+                    mlst_headers = ["Isolate ID", "Scheme", "Sequence Type", "Locus"]
+
+                    # Write the MLST CSV file
+                    if mlst_content:
+                        with open(mlst_file_path, "w", newline="", encoding="utf-8") as f:
+                            writer = csv.DictWriter(f, fieldnames=mlst_headers)
+                            writer.writeheader()
+                            for row in mlst_content:
+                                writer.writerow({
+                                    "Isolate ID": row.get("Isolate ID", ""),
+                                    "Scheme": row.get("Scheme", ""),
+                                    "Sequence Type": row.get("Sequence Type", ""),
+                                    "Locus": "; ".join(row.get("Locus", []))
+                                })
+
+                        print(f"MLST CSV file successfully generated: {mlst_file_path}")
+
+                if "content" in result and isinstance(result["content"], list):
+                    for content_item in result["content"]:
+                        extracted_data.append(content_item)
+                        for key in content_item.keys():
+                            if key not in headers:
+                                headers.append(key)  # Maintain the original order of the JSON keys
+
+            # Write the CSV file if there is data
+            if extracted_data:
+                with open(csv_file, "w", newline="", encoding="utf-8") as f:
+                    writer = csv.DictWriter(f, fieldnames=headers)
+                    writer.writeheader()
+                    for row in extracted_data:
+                        writer.writerow({key: row.get(key, "") for key in headers})
+
+                print(f"CSV file successfully generated: {csv_file}")
+            else:
+                print(f"No content found for {analysis_software}.")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python script.py input.json")
+        sys.exit(1)
+
+    input_json_file = sys.argv[1]
+
+    try:
+        with open(input_json_file, "r", encoding="utf-8") as file:
+            json_data = json.load(file)
+        generate_csv_from_json(json_data)
+        sys.exit(0)
+    except Exception as e:
+        print(f"Error processing file: {e}")
+        sys.exit(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/mlst_amrfinder_staramr.xml	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,70 @@
+<tool id="mlst_amr_staramr"
+      name="MLST, AMRfinder, and STARamr Analysis"
+      version="0.1.0+galaxy0"
+      profile="21.05">
+
+    <description>Extracts MLST, AMRfinder Plus, and STARamr results from JSON input.</description>
+
+    <requirements>
+        <requirement type="package" version="3.12">python</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code">
+        <![CDATA[
+        python '$__tool_directory__/mlst_amrfinder_staramr.py' '$json_input'
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="json_input" type="data" format="json" label="Input JSON File"
+               help="Provide a JSON file containing MLST, AMRfinder Plus, and STARamr results."/>
+    </inputs>
+
+    <outputs>
+        <data name="mlst_csv" format="csv" from_work_dir="mlst.csv"
+              label="MLST Summary on ${on_string}"/>
+        <data name="amr_csv" format="csv" from_work_dir="AMRfinderPlus_output.csv"
+              label="AMRfinder Plus Results on ${on_string}"/>
+        <data name="staramr_csv" format="csv" from_work_dir="STARamr_output.csv"
+              label="STARamr Results on ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="json_input" value="example_input.json"/>
+            <output name="mlst_csv" file="expected_mlst.csv" compare="diff"/>
+            <output name="amr_csv" file="expected_amr.csv" compare="diff"/>
+            <output name="staramr_csv" file="expected_staramr.csv" compare="diff"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+MLST, AMRfinder, and STARamr Analysis
+=====================================
+
+This tool extracts MLST, AMRfinder Plus, and STARamr results from JSON input and converts them into CSV format.
+
+Usage Instructions
+------------------
+1. Provide a **JSON file** containing **MLST, AMRfinder Plus, and STARamr** results.
+2. The tool will process the data and generate:
+   - `mlst.csv`: MLST typing results.
+   - `AMRfinderPlus_output.csv`: Results from **AMRfinder Plus**.
+   - `STARamr_output.csv`: Results from **STARamr**.
+
+Outputs
+-------
+- **MLST CSV File:** Contains MLST typing information, including sequence type and scheme.
+- **AMRfinder Plus CSV File:** Lists detected antimicrobial resistance genes.
+- **STARamr CSV File:** Includes resistance profiles and sequence typing.
+
+References
+----------
+- `MLST <https://pubmlst.org/>`_ - Multi-locus sequence typing database.
+- `AMRfinder Plus <https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder/>`_ - Antimicrobial resistance gene detection.
+- `STARamr <https://github.com/phac-nml/staramr>`_ - Salmonella sequence typing and resistance analysis.
+
+For questions or issues, please contact the tool maintainers.
+    ]]></help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/quality_script_fastp_bracken.py	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,134 @@
+import json
+import csv
+import sys
+import os
+
+def extract_software_data(json_data, software_name):
+    """
+    Extract data for a specific software from the JSON input
+
+    For "bracken", add a "contamination" column where the value is "pass"
+    if fraction_total_reads > 0.6, otherwise "fail".
+
+    For "fastp", include only specific columns.
+    """
+    # Ensure json_data is a dictionary
+    if isinstance(json_data, list):
+        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == software_name), None)
+
+    if not isinstance(json_data, dict):
+        print(f"Invalid JSON format for {software_name} extraction.")
+        return
+
+    results = json_data.get("results", [])
+    extracted_data = []
+    headers = []  # Use list to collect headers to maintain order
+    output_csv_file = f"{software_name}_output.csv"
+
+    # Define specific columns for "fastp"
+    fastp_columns = [
+        "summary_sequencing",
+        "summary_before_filtering_total_reads",
+        "summary_before_filtering_total_bases",
+        "summary_before_filtering_q20_bases",
+        "summary_before_filtering_q30_bases",
+        "summary_before_filtering_q20_rate",
+        "summary_before_filtering_q30_rate",
+        "summary_before_filtering_read1_mean_length",
+        "summary_before_filtering_read2_mean_length",
+        "summary_before_filtering_gc_content",
+        "summary_after_filtering_total_reads",
+        "summary_after_filtering_total_bases",
+        "summary_after_filtering_q20_bases",
+        "summary_after_filtering_q30_bases",
+        "summary_after_filtering_q20_rate",
+        "summary_after_filtering_q30_rate",
+        "summary_after_filtering_read1_mean_length",
+        "summary_after_filtering_read2_mean_length",
+        "summary_after_filtering_gc_content",
+        "filtering_result_passed_filter_reads",
+        "filtering_result_low_quality_reads",
+        "filtering_result_too_many_N_reads",
+        "filtering_result_too_short_reads",
+        "filtering_result_too_long_reads",
+        "duplication_rate",
+        "insert_size_peak",
+    ]
+
+    for entry in results:
+        if "content" in entry and isinstance(entry["content"], list):
+            for content_item in entry["content"]:
+                row_data = {}
+                if software_name == "fastp":
+                    for key, value in content_item.items():
+                        if isinstance(value, dict):
+                            for sub_key, sub_value in value.items():
+                                if isinstance(sub_value, dict):
+                                    for sub_sub_key, sub_sub_value in sub_value.items():
+                                        column_name = f"{key}_{sub_key}_{sub_sub_key}"
+                                        if column_name in fastp_columns:
+                                            row_data[column_name] = sub_sub_value
+                                            if column_name not in headers:
+                                                headers.append(column_name)
+                                else:
+                                    column_name = f"{key}_{sub_key}"
+                                    if column_name in fastp_columns:
+                                        row_data[column_name] = sub_value
+                                        if column_name not in headers:
+                                            headers.append(column_name)
+                        else:
+                            if key in fastp_columns:
+                                row_data[key] = value
+                                if key not in headers:
+                                    headers.append(key)
+                elif software_name == "bracken":
+                    for key, value in content_item.items():
+                        if isinstance(value, dict):
+                            for sub_key, sub_value in value.items():
+                                column_name = f"{key}_{sub_key}"
+                                row_data[column_name] = sub_value
+                                if column_name not in headers:
+                                    headers.append(column_name)
+                        else:
+                            row_data[key] = value
+                            if key not in headers:
+                                headers.append(key)
+
+                    # Add contamination column for "bracken"
+                    fraction_total_reads = row_data.get("fraction_total_reads", 0)
+                    row_data["contamination"] = "pass" if float(fraction_total_reads) > 0.6 else "fail"
+                    if "contamination" not in headers:
+                        headers.append("contamination")
+
+                extracted_data.append(row_data)
+
+    if not extracted_data:
+        print(f"No data extracted for {software_name}")
+        # Create empty file to prevent Galaxy error
+        with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
+            f.write("No data available\n")
+        return
+
+    with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writeheader()
+        writer.writerows(extracted_data)
+
+    print(f"CSV file successfully generated: {output_csv_file}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python extract_software_data.py input.json")
+        sys.exit(1)
+
+    input_json_file = sys.argv[1]
+
+    try:
+        with open(input_json_file, "r", encoding="utf-8") as file:
+            json_data = json.load(file)
+        extract_software_data(json_data, "fastp")
+        extract_software_data(json_data, "bracken")
+        sys.exit(0)
+    except Exception as e:
+        print(f"Error processing file: {e}")
+        sys.exit(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/quality_script_fastp_bracken.xml	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,43 @@
+<tool id="quality_script_fastp_bracken"
+      name="Quality Control FastP Bracken"
+      version="0.1.0+galaxy0"
+      profile="21.05">
+
+    <description>Quality control using FastP and Bracken</description>
+
+    <requirements>
+        <requirement type="package" version="3.12">python</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code">
+        <![CDATA[
+        python '$__tool_directory__/extract_software_data.py' '$json_input'
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="json_input" type="data" format="json" label="Input JSON Data"/>
+    </inputs>
+
+    <outputs>
+        <data name="fastp_output" format="csv" from_work_dir="fastp_output.csv" label="FastP Summary Report on ${on_string}"/>
+        <data name="bracken_output" format="csv" from_work_dir="bracken_output.csv" label="Bracken Summary Report on ${on_string}"/>
+    </outputs>
+
+    <help><![CDATA[
+This tool processes sequencing data with FastP for quality control and then uses Bracken for classification or abundance estimation.
+
+### **Usage Instructions**
+1. Provide an input JSON file containing the results from FastP and Bracken analyses
+2. The tool will extract relevant data and generate two CSV files:
+   - FastP output with quality metrics
+   - Bracken output with taxonomic classification including contamination assessment
+
+### **References**
+- [FastP](https://github.com/OpenGene/fastp) - A tool for fast and efficient quality control.
+- [Bracken](https://github.com/jenniferlu717/Bracken) - A tool for accurate species abundance estimation.
+
+For support, please contact the tool maintainers.
+    ]]></help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/quast_get_fasta.py	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,134 @@
+import json
+import csv
+import sys
+import os
+
+def extract_software_data(json_data, software_name):
+    """
+    Extract QUAST data from JSON and create a CSV with assembly metrics.
+    For "quast", include specific columns and calculate a Filter_N50 based on "N50".
+    """
+    # Ensure json_data is a dictionary
+    if isinstance(json_data, list):
+        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == software_name), None)
+
+    if not isinstance(json_data, dict):
+        print(f"Invalid JSON format for {software_name} extraction.")
+        return
+
+    results = json_data.get("results", [])
+    extracted_data = []
+    headers = [
+        "Assembly",
+        "contigs_(>=_0_bp)",
+        "contigs_(>=_1000_bp)",
+        "Total_length_(>=_0_bp)",
+        "Total_length_(>=_1000_bp)",
+        "contigs",
+        "Largest_contig",
+        "Total_length",
+        "GC",
+        "N50",
+        "Filter_N50",
+        "N90",
+        "auN",
+        "L50",
+        "L90",
+        "total_reads",
+        "left",
+        "right",
+        "Mapped",
+        "Properly_paired",
+        "Avg._coverage_depth",
+        "Coverage_>=_1x",
+        "N's_per_100_kbp"
+    ]
+    output_csv_file = f"{software_name}_output.csv"
+
+    for entry in results:
+        if "content" in entry and isinstance(entry["content"], list):
+            for content_item in entry["content"]:
+                n50 = content_item.get("N50", "")
+                try:
+                    n50_value = float(n50) if n50 else 0
+                    filter_n50 = "pass" if n50_value > 20000 else "fail"
+                except ValueError:
+                    filter_n50 = "fail"  # If the value is non-numeric, consider it as "fail"
+
+                extracted_data.append({
+                    "Assembly": content_item.get("Assembly", ""),
+                    "contigs_(>=_0_bp)": content_item.get("contigs_(>=_0_bp)", ""),
+                    "contigs_(>=_1000_bp)": content_item.get("contigs_(>=_1000_bp)", ""),
+                    "Total_length_(>=_0_bp)": content_item.get("Total_length_(>=_0_bp)", ""),
+                    "Total_length_(>=_1000_bp)": content_item.get("Total_length_(>=_1000_bp)", ""),
+                    "contigs": content_item.get("contigs", ""),
+                    "Largest_contig": content_item.get("Largest_contig", ""),
+                    "Total_length": content_item.get("Total_length", ""),
+                    "GC": content_item.get("GC", ""),
+                    "N50": content_item.get("N50", ""),
+                    "Filter_N50": filter_n50,
+                    "N90": content_item.get("N90", ""),
+                    "auN": content_item.get("auN", ""),
+                    "L50": content_item.get("L50", ""),
+                    "L90": content_item.get("L90", ""),
+                    "total_reads": content_item.get("total_reads", ""),
+                    "left": content_item.get("left", ""),
+                    "right": content_item.get("right", ""),
+                    "Mapped": content_item.get("Mapped", ""),
+                    "Properly_paired": content_item.get("Properly_paired", ""),
+                    "Avg._coverage_depth": content_item.get("Avg._coverage_depth", ""),
+                    "Coverage_>=_1x": content_item.get("Coverage_>=_1x", ""),
+                    "N's_per_100_kbp": content_item.get("N's_per_100_kbp", "")
+                })
+
+    with open(output_csv_file, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writeheader()
+        writer.writerows(extracted_data)
+
+    print(f"CSV file successfully generated: {output_csv_file}")
+
+def extract_contigs_to_fasta(json_data):
+    """
+    Extract contigs information from "shovill" and save it as a FASTA file.
+    """
+    if isinstance(json_data, list):
+        json_data = next((entry for entry in json_data if "analysis_software_name" in entry and entry["analysis_software_name"] == "shovill"), None)
+
+    if not isinstance(json_data, dict):
+        print("Invalid JSON format for shovill extraction.")
+        return
+
+    results = json_data.get("results", [])
+    output_fasta_file = "shovill_contigs.fasta"
+
+    with open(output_fasta_file, "w", encoding="utf-8") as f:
+        for entry in results:
+            if "content" in entry and isinstance(entry["content"], list):
+                for content_item in entry["content"]:
+                    name = content_item.get("name", "unknown")
+                    length = content_item.get("length", "unknown")
+                    coverage = content_item.get("coverage", "unknown")
+                    sequence = content_item.get("sequence", "")
+
+                    header = f">{name}_{length}_{coverage}"
+                    f.write(f"{header}\n{sequence}\n")
+
+    print(f"FASTA file successfully generated: {output_fasta_file}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python script.py input.json")
+        sys.exit(1)
+
+    input_json_file = sys.argv[1]
+
+    try:
+        with open(input_json_file, "r", encoding="utf-8") as file:
+            json_data = json.load(file)
+        extract_software_data(json_data, "quast")
+        extract_contigs_to_fasta(json_data)
+        sys.exit(0)
+    except Exception as e:
+        print(f"Error processing file: {e}")
+        sys.exit(1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spamr_vet_tools_v2/quast_get_fasta.xml	Tue Mar 25 13:35:00 2025 +0000
@@ -0,0 +1,67 @@
+<tool id="quast_get_fasta"
+      name="QUAST Analysis and FASTA Generator"
+      version="0.1.0+galaxy0"
+      profile="21.05">
+
+    <description>Extracts QUAST metrics and generates FASTA files from JSON input.</description>
+
+    <requirements>
+        <requirement type="package" version="3.12">python</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code">
+        <![CDATA[
+        python '$__tool_directory__/quast_get_fasta.py' '$json_input'
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="json_input" type="data" format="json" label="Input JSON File"
+               help="Provide a JSON file containing QUAST and Shovill results."/>
+    </inputs>
+
+    <outputs>
+        <data name="csv_output" format="csv" from_work_dir="quast_output.csv"
+              label="QUAST Summary on ${on_string}"/>
+        <data name="fasta_output" format="fasta" from_work_dir="shovill_contigs.fasta"
+              label="Shovill Contigs on ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="json_input" value="example_input.json"/>
+            <output name="csv_output" file="expected_output.csv" compare="diff"/>
+            <output name="fasta_output" file="expected_output.fasta" compare="diff"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+QUAST Analysis and FASTA Generator
+==================================
+
+This tool extracts key statistics from **QUAST** and generates a **FASTA** file containing assembled contigs from **Shovill**.
+
+Usage Instructions
+------------------
+1. Upload or provide a **JSON file** containing **QUAST** and **Shovill** results.
+2. The tool will:
+   - Extract **assembly metrics** from QUAST and save them as a CSV.
+   - Convert **contigs from Shovill** into a FASTA file.
+3. The outputs will be:
+   - `quast_output.csv` (QUAST summary metrics)
+   - `shovill_contigs.fasta` (FASTA file with contigs)
+
+Outputs
+-------
+- **CSV File:** Contains QUAST summary metrics such as `N50`, `GC content`, `total length`, `L50`, and other key assembly statistics.
+- **FASTA File:** Extracts contigs from **Shovill**, formatting them properly for downstream analysis.
+
+References
+----------
+- `QUAST <http://bioinf.spbau.ru/quast>`_ - Quality assessment tool for genome assemblies.
+- `Shovill <https://github.com/tseemann/shovill>`_ - A tool for rapid bacterial genome assembly using SPAdes.
+
+For questions or issues, please contact the tool maintainers.
+    ]]></help>
+
+</tool>