annotate spamr_vet_tools_v2/mlst_amrfinder_staramr.py @ 2:d7b099fbb003 draft default tip

Corrected file names and updated tool wrappers for consistency.
author maciek
date Tue, 25 Mar 2025 13:35:00 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
1 import json
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
2 import csv
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
3 import sys
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
4
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
5 def generate_csv_from_json(json_data):
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
6 """
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
7 Parse the JSON and generate CSV files based on the analysis_software_name Abricate, AMRfinder plus and STARamr.
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
8 Additionally, extract and process the 'mlst_file' content into its own CSV.
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
9 """
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
10 for entry in json_data:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
11 analysis_software = entry.get("analysis_software_name", "unknown")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
12 results = entry.get("results", [])
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
13
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
14 if results:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
15 csv_file = f"{analysis_software}_output.csv"
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
16 extracted_data = []
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
17 headers = []
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
18
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
19 for result in results:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
20 if result.get("name") == "mlst_file":
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
21 mlst_file_path = "mlst.csv"
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
22 mlst_content = result.get("content", [])
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
23 mlst_headers = ["Isolate ID", "Scheme", "Sequence Type", "Locus"]
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
24
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
25 # Write the MLST CSV file
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
26 if mlst_content:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
27 with open(mlst_file_path, "w", newline="", encoding="utf-8") as f:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
28 writer = csv.DictWriter(f, fieldnames=mlst_headers)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
29 writer.writeheader()
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
30 for row in mlst_content:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
31 writer.writerow({
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
32 "Isolate ID": row.get("Isolate ID", ""),
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
33 "Scheme": row.get("Scheme", ""),
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
34 "Sequence Type": row.get("Sequence Type", ""),
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
35 "Locus": "; ".join(row.get("Locus", []))
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
36 })
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
37
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
38 print(f"MLST CSV file successfully generated: {mlst_file_path}")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
39
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
40 if "content" in result and isinstance(result["content"], list):
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
41 for content_item in result["content"]:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
42 extracted_data.append(content_item)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
43 for key in content_item.keys():
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
44 if key not in headers:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
45 headers.append(key) # Maintain the original order of the JSON keys
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
46
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
47 # Write the CSV file if there is data
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
48 if extracted_data:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
49 with open(csv_file, "w", newline="", encoding="utf-8") as f:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
50 writer = csv.DictWriter(f, fieldnames=headers)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
51 writer.writeheader()
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
52 for row in extracted_data:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
53 writer.writerow({key: row.get(key, "") for key in headers})
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
54
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
55 print(f"CSV file successfully generated: {csv_file}")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
56 else:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
57 print(f"No content found for {analysis_software}.")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
58
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
59 if __name__ == "__main__":
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
60 if len(sys.argv) != 2:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
61 print("Usage: python script.py input.json")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
62 sys.exit(1)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
63
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
64 input_json_file = sys.argv[1]
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
65
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
66 try:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
67 with open(input_json_file, "r", encoding="utf-8") as file:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
68 json_data = json.load(file)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
69 generate_csv_from_json(json_data)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
70 sys.exit(0)
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
71 except Exception as e:
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
72 print(f"Error processing file: {e}")
d7b099fbb003 Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff changeset
73 sys.exit(1)