Mercurial > repos > maciek > spamr_vet_tools
annotate spamr_vet_tools_v2/mlst_amrfinder_staramr.py @ 2:d7b099fbb003 draft default tip
Corrected file names and updated tool wrappers for consistency.
author | maciek |
---|---|
date | Tue, 25 Mar 2025 13:35:00 +0000 |
parents | |
children |
rev | line source |
---|---|
2
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
1 import json |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
2 import csv |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
3 import sys |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
4 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
5 def generate_csv_from_json(json_data): |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
6 """ |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
7 Parse the JSON and generate CSV files based on the analysis_software_name Abricate, AMRfinder plus and STARamr. |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
8 Additionally, extract and process the 'mlst_file' content into its own CSV. |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
9 """ |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
10 for entry in json_data: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
11 analysis_software = entry.get("analysis_software_name", "unknown") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
12 results = entry.get("results", []) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
13 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
14 if results: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
15 csv_file = f"{analysis_software}_output.csv" |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
16 extracted_data = [] |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
17 headers = [] |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
18 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
19 for result in results: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
20 if result.get("name") == "mlst_file": |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
21 mlst_file_path = "mlst.csv" |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
22 mlst_content = result.get("content", []) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
23 mlst_headers = ["Isolate ID", "Scheme", "Sequence Type", "Locus"] |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
24 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
25 # Write the MLST CSV file |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
26 if mlst_content: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
27 with open(mlst_file_path, "w", newline="", encoding="utf-8") as f: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
28 writer = csv.DictWriter(f, fieldnames=mlst_headers) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
29 writer.writeheader() |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
30 for row in mlst_content: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
31 writer.writerow({ |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
32 "Isolate ID": row.get("Isolate ID", ""), |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
33 "Scheme": row.get("Scheme", ""), |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
34 "Sequence Type": row.get("Sequence Type", ""), |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
35 "Locus": "; ".join(row.get("Locus", [])) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
36 }) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
37 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
38 print(f"MLST CSV file successfully generated: {mlst_file_path}") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
39 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
40 if "content" in result and isinstance(result["content"], list): |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
41 for content_item in result["content"]: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
42 extracted_data.append(content_item) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
43 for key in content_item.keys(): |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
44 if key not in headers: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
45 headers.append(key) # Maintain the original order of the JSON keys |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
46 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
47 # Write the CSV file if there is data |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
48 if extracted_data: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
49 with open(csv_file, "w", newline="", encoding="utf-8") as f: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
50 writer = csv.DictWriter(f, fieldnames=headers) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
51 writer.writeheader() |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
52 for row in extracted_data: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
53 writer.writerow({key: row.get(key, "") for key in headers}) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
54 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
55 print(f"CSV file successfully generated: {csv_file}") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
56 else: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
57 print(f"No content found for {analysis_software}.") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
58 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
59 if __name__ == "__main__": |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
60 if len(sys.argv) != 2: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
61 print("Usage: python script.py input.json") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
62 sys.exit(1) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
63 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
64 input_json_file = sys.argv[1] |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
65 |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
66 try: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
67 with open(input_json_file, "r", encoding="utf-8") as file: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
68 json_data = json.load(file) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
69 generate_csv_from_json(json_data) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
70 sys.exit(0) |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
71 except Exception as e: |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
72 print(f"Error processing file: {e}") |
d7b099fbb003
Corrected file names and updated tool wrappers for consistency.
maciek
parents:
diff
changeset
|
73 sys.exit(1) |