comparison qupath_roi_splitter.py @ 4:9f136ebf73ac draft

planemo upload for repository hhttps://github.com/npinter/ROIsplitter commit 918ae25f84e7042ed36461219ff068633c1c2427
author galaxyp
date Fri, 19 Jul 2024 14:33:40 +0000
parents 24ccdcfbabac
children 17c54a716a5b
comparison
equal deleted inserted replaced
3:24ccdcfbabac 4:9f136ebf73ac
4 import geojson 4 import geojson
5 import numpy as np 5 import numpy as np
6 import pandas as pd 6 import pandas as pd
7 7
8 8
9 def draw_poly(input_df, input_img, col=(0, 0, 0), fill=False): 9 def collect_coords(input_coords, feature_index, coord_index=0):
10 s = np.array(input_df) 10 coords_with_index = []
11 if fill: 11 for coord in input_coords:
12 output_img = cv2.fillPoly(input_img, pts=np.int32([s]), color=col) 12 coords_with_index.append((coord[0], coord[1], feature_index, coord_index))
13 else: 13 coord_index += 1
14 output_img = cv2.polylines(input_img, np.int32([s]), True, color=col, thickness=1) 14 return coords_with_index
15 return output_img
16 15
17 16
18 def draw_roi(input_roi, input_img, fill): 17 def collect_roi_coords(input_roi, feature_index):
18 all_coords = []
19 if len(input_roi["geometry"]["coordinates"]) == 1: 19 if len(input_roi["geometry"]["coordinates"]) == 1:
20 # Polygon w/o holes 20 # Polygon w/o holes
21 input_img = draw_poly(input_roi["geometry"]["coordinates"][0], input_img, fill=fill) 21 all_coords.extend(collect_coords(input_roi["geometry"]["coordinates"][0], feature_index))
22 else: 22 else:
23 first_roi = True 23 coord_index = 0
24 for sub_roi in input_roi["geometry"]["coordinates"]: 24 for sub_roi in input_roi["geometry"]["coordinates"]:
25 # Polygon with holes 25 # Polygon with holes or MultiPolygon
26 if not isinstance(sub_roi[0][0], list): 26 if not isinstance(sub_roi[0][0], list):
27 if first_roi: 27 all_coords.extend(collect_coords(sub_roi, feature_index, coord_index))
28 first_roi = False 28 coord_index += len(sub_roi)
29 col = (0, 0, 0)
30 else:
31 # holes in ROI
32 col = (255, 255, 255) if not fill else (0, 0, 0)
33 input_img = draw_poly(sub_roi, input_img, col=col, fill=fill)
34 else: 29 else:
35 # MultiPolygon with holes 30 # MultiPolygon with holes
36 for sub_coord in sub_roi: 31 for sub_coord in sub_roi:
37 if first_roi: 32 all_coords.extend(collect_coords(sub_coord, feature_index, coord_index))
38 first_roi = False 33 coord_index += len(sub_coord)
39 col = (0, 0, 0) 34 return all_coords
40 else:
41 # holes in ROI
42 col = (255, 255, 255) if not fill else (0, 0, 0)
43 input_img = draw_poly(sub_coord, input_img, col=col, fill=fill)
44
45 return input_img
46 35
47 36
48 def split_qupath_roi(in_roi): 37 def split_qupath_roi(in_roi):
49 with open(in_roi) as file: 38 with open(in_roi) as file:
50 qupath_roi = geojson.load(file) 39 qupath_roi = geojson.load(file)
51 40
52 # HE dimensions 41 # HE dimensions
53 dim_plt = [qupath_roi["dim"]["width"], qupath_roi["dim"]["height"]] 42 dim_plt = [int(qupath_roi["dim"]["width"]), int(qupath_roi["dim"]["height"])]
54 43
55 tma_name = qupath_roi["name"] 44 tma_name = qupath_roi["name"]
56 cell_types = [ct.rsplit(" - ", 1)[-1] for ct in qupath_roi["featureNames"]] 45 cell_types = [ct.rsplit(" - ", 1)[-1] for ct in qupath_roi["featureNames"]]
57 46
58 for cell_type in cell_types: 47 coords_by_cell_type = {ct: [] for ct in cell_types}
59 # create numpy array with white background 48 coords_by_cell_type['all'] = [] # For storing all coordinates if args.all is True
60 img = np.zeros((dim_plt[1], dim_plt[0], 3), dtype="uint8")
61 img.fill(255)
62 49
63 for i, roi in enumerate(qupath_roi["features"]): 50 for feature_index, roi in enumerate(qupath_roi["features"]):
64 if not args.all: 51 feature_coords = collect_roi_coords(roi, feature_index)
65 if "classification" not in roi["properties"]: 52
66 continue 53 if args.all:
67 if roi["properties"]["classification"]["name"] == cell_type: 54 coords_by_cell_type['all'].extend(feature_coords)
68 img = draw_roi(roi, img, args.fill) 55 elif "classification" in roi["properties"]:
56 cell_type = roi["properties"]["classification"]["name"]
57 if cell_type in cell_types:
58 coords_by_cell_type[cell_type].extend(feature_coords)
59
60 for cell_type, coords in coords_by_cell_type.items():
61 if coords:
62 # Generate image (white background)
63 img = np.ones((dim_plt[1], dim_plt[0]), dtype="uint8") * 255
64
65 # Convert to numpy array and ensure integer coordinates
66 coords_arr = np.array(coords).astype(int)
67
68 # Sort by feature_index first, then by coord_index
69 coords_arr = coords_arr[np.lexsort((coords_arr[:, 3], coords_arr[:, 2]))]
70
71 # Get filled pixel coordinates
72 if args.fill:
73 filled_coords = np.column_stack(np.where(img == 0))
74 all_coords = np.unique(np.vstack((coords_arr[:, :2], filled_coords[:, ::-1])), axis=0)
69 else: 75 else:
70 img = draw_roi(roi, img, args.fill) 76 all_coords = coords_arr[:, :2]
71 77
72 # get all black pixel 78 # Save all coordinates to CSV
73 coords_arr = np.column_stack(np.where(img == (0, 0, 0))) 79 coords_df = pd.DataFrame(all_coords, columns=['x', 'y'], dtype=int)
80 coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False)
74 81
75 # remove duplicated rows 82 # Generate image for visualization if --img is specified
76 coords_arr_xy = coords_arr[coords_arr[:, 2] == 0] 83 if args.img:
84 # Group coordinates by feature_index
85 features = {}
86 for x, y, feature_index, coord_index in coords_arr:
87 if feature_index not in features:
88 features[feature_index] = []
89 features[feature_index].append((x, y))
77 90
78 # remove last column 91 # Draw each feature separately
79 coords_arr_xy = np.delete(coords_arr_xy, 2, axis=1) 92 for feature_coords in features.values():
93 pts = np.array(feature_coords, dtype=np.int32)
94 if args.fill:
95 cv2.fillPoly(img, [pts], color=0) # Black fill
96 else:
97 cv2.polylines(img, [pts], isClosed=True, color=0, thickness=1) # Black outline
80 98
81 # to pandas and rename columns to x and y 99 cv2.imwrite("{}_{}.png".format(tma_name, cell_type), img)
82 coords_df = pd.DataFrame(coords_arr_xy, columns=['y', 'x'])
83
84 # reorder columns
85 coords_df = coords_df[['x', 'y']]
86
87 # drop duplicates
88 coords_df = coords_df.drop_duplicates(
89 subset=['x', 'y'],
90 keep='last').reset_index(drop=True)
91
92 coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False)
93
94 # img save
95 if args.img:
96 cv2.imwrite("{}_{}.png".format(tma_name, cell_type), img)
97 100
98 101
99 if __name__ == "__main__": 102 if __name__ == "__main__":
100 parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classfication)") 103 parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classification)")
101 parser.add_argument("--qupath_roi", default=False, help="Input QuPath annotation (GeoJSON file)") 104 parser.add_argument("--qupath_roi", default=False, help="Input QuPath annotation (GeoJSON file)")
102 parser.add_argument("--fill", action="store_true", required=False, help="Fill pixels in ROIs") 105 parser.add_argument("--fill", action="store_true", required=False, help="Fill pixels in ROIs (order of coordinates will be lost)")
103 parser.add_argument('--version', action='version', version='%(prog)s 0.1.0') 106 parser.add_argument('--version', action='version', version='%(prog)s 0.3.0')
104 parser.add_argument("--all", action="store_true", required=False, help="Extracts all ROIs") 107 parser.add_argument("--all", action="store_true", required=False, help="Extracts all ROIs")
105 parser.add_argument("--img", action="store_true", required=False, help="Generates image of ROIs") 108 parser.add_argument("--img", action="store_true", required=False, help="Generates image of ROIs")
106 args = parser.parse_args() 109 args = parser.parse_args()
107 110
108 if args.qupath_roi: 111 if args.qupath_roi: