| Next changeset 1:064b53fd3131 (2023-06-14) |
|
Commit message:
planemo upload for repository hhttps://github.com/npinter/ROIsplitter commit cdf3e9652b10c7a0b179202129a797e32fd95909 |
|
added:
qupath_roi_splitter.py qupath_roi_splitter.xml test-data/annotations_TMA_E-5.geojson test-data/annotations_TMA_F-5.geojson |
| b |
| diff -r 000000000000 -r b5e9cebb27e3 qupath_roi_splitter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qupath_roi_splitter.py Tue Apr 25 09:30:06 2023 +0000 |
| [ |
| @@ -0,0 +1,83 @@ +import argparse + +import cv2 +import geojson +import numpy as np +import pandas as pd + + +def draw_poly(input_df, input_img, col=(0, 0, 0)): + s = np.array(input_df) + output_img = cv2.fillPoly(input_img, pts=np.int32([s]), color=col) + return output_img + + +def split_qupath_roi(in_roi): + with open(in_roi) as file: + qupath_roi = geojson.load(file) + + # HE dimensions + dim_plt = [qupath_roi["dim"]["width"], qupath_roi["dim"]["height"]] + + tma_name = qupath_roi["name"] + cell_types = [ct.rsplit(" - ", 1)[-1] for ct in qupath_roi["featureNames"]] + + # create numpy array with white background + img = np.zeros((dim_plt[1], dim_plt[0], 3), dtype="uint8") + img.fill(255) + + for cell_type in cell_types: + for i, roi in enumerate(qupath_roi["features"]): + if roi["properties"]["classification"]["name"] == cell_type: + if len(roi["geometry"]["coordinates"]) == 1: + # Polygon w/o holes + img = draw_poly(roi["geometry"]["coordinates"][0], img) + else: + first_roi = True + for sub_roi in roi["geometry"]["coordinates"]: + # Polygon with holes + if not isinstance(sub_roi[0][0], list): + if first_roi: + img = draw_poly(sub_roi, img) + first_roi = False + else: + # holes in ROI + img = draw_poly(sub_roi, img, col=(255, 255, 255)) + else: + # MultiPolygon with holes + for sub_coord in sub_roi: + if first_roi: + img = draw_poly(sub_coord, img) + first_roi = False + else: + # holes in ROI + img = draw_poly(sub_coord, img, col=(255, 255, 255)) + + # get all black pixel + coords_arr = np.column_stack(np.where(img == (0, 0, 0))) + + # remove duplicated rows + coords_arr_xy = coords_arr[coords_arr[:, 2] == 0] + + # remove last column + coords_arr_xy = np.delete(coords_arr_xy, 2, axis=1) + + # to pandas and rename columns to x and y + coords_df = pd.DataFrame(coords_arr_xy, columns=['x', 'y']) + + # drop duplicates + coords_df = coords_df.drop_duplicates( + subset=['x', 'y'], + keep='last').reset_index(drop=True) + + coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classfication)") + parser.add_argument("--qupath_roi", default=False, help="Input QuPath annotation (GeoJSON file)") + parser.add_argument('--version', action='version', version='%(prog)s 0.1.0') + args = parser.parse_args() + + if args.qupath_roi: + split_qupath_roi(args.qupath_roi) |
| b |
| diff -r 000000000000 -r b5e9cebb27e3 qupath_roi_splitter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qupath_roi_splitter.xml Tue Apr 25 09:30:06 2023 +0000 |
| [ |
| @@ -0,0 +1,55 @@ +<tool id="qupath_roi_splitter" name="QuPath ROI Splitter" version="0.1.0"> + <description>Split ROI coordinates of QuPath TMA annotation by cell type (classification)</description> + <requirements> + <requirement type="package" version="3.0.1">geojson</requirement> + <requirement type="package" version="1.24.2">numpy</requirement> + <requirement type="package" version="4.7.0">opencv</requirement> + <requirement type="package" version="2.0.0">pandas</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #for $input in $input_collection + python3 '$__tool_directory__/qupath_roi_splitter.py' --qupath_roi '$input' && + #end for + mkdir out && + mv *.txt out/ + ]]></command> + <inputs> + <param name="input_collection" type="data_collection" format="geojson" label="Input QuPath annotation" help="Collection containing GeoJSON files"/> + </inputs> + <outputs> + <collection name="output_txts" type="list" label="${tool.name} on ${on_string}: ROI data"> + <discover_datasets pattern="__name_and_ext__" directory="out" visible="false" format="txt"/> + </collection> + </outputs> + <tests> + <test> + <param name="input_collection"> + <collection type="list"> + <element name="annotations_TMA_E-5.geojson" value="annotations_TMA_E-5.geojson" /> + <element name="annotations_TMA_F-5.geojson" value="annotations_TMA_F-5.geojson" /> + </collection> + </param> + <output_collection name="output_txts" type="list" count="4"> + <element name="F-5_Stroma"> + <assert_contents> + <has_text text="x"/> + <has_text text="y"/> + <has_text text="14240"/> + <has_text text="21008"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ + **QuPath ROI Splitter** + + This tool extracts ROI coordinates of QuPath TMA annotation by cell type. + + Input: A collection containing GeoJSON files with QuPath annotation data. You need to run the QuPath TMA annotation script first which can be found at https://github.com/npinter/ROIsplitter. + + Output: A list of text files containing the ROI coordinates for each cell type. + + https://github.com/npinter/ROIsplitter + ]]></help> +</tool> |
| b |
| diff -r 000000000000 -r b5e9cebb27e3 test-data/annotations_TMA_E-5.geojson --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotations_TMA_E-5.geojson Tue Apr 25 09:30:06 2023 +0000 |
| [ |
| b'@@ -0,0 +1,4930 @@\n+{ "name": "E-5","featureNames":[\n+ "E-5 - Tumor",\n+ "E-5 - Stroma"\n+],"dim": {"width": 23840, "height": 18064},"type": "FeatureCollection","features":[\n+ {\n+ "type": "Feature",\n+ "geometry": {\n+ "type": "Polygon",\n+ "coordinates": [\n+ [\n+ [21383.93, 14321.04],\n+ [21379.03, 14324.49],\n+ [21377.1, 14325],\n+ [21371.26, 14323.64],\n+ [21369.59, 14324.75],\n+ [21365.28, 14331.43],\n+ [21365, 14335.41],\n+ [21366.68, 14339.02],\n+ [21369.8, 14341.47],\n+ [21373.79, 14345.9],\n+ [21377.38, 14347.67],\n+ [21381.34, 14347.83],\n+ [21384.67, 14345.66],\n+ [21386.21, 14339.93],\n+ [21387.4, 14338.32],\n+ [21389.38, 14338.08],\n+ [21397.12, 14339.98],\n+ [21404.72, 14337.61],\n+ [21406.87, 14334.27],\n+ [21407, 14332.27],\n+ [21403.9, 14325],\n+ [21398, 14325.67],\n+ [21396.28, 14324.65],\n+ [21392.42, 14325.59],\n+ [21386.9, 14323.24],\n+ [21383.93, 14321.04]\n+ ]\n+ ]\n+ },\n+ "nucleusGeometry": {\n+ "type": "Polygon",\n+ "coordinates": [\n+ [\n+ [21385.34, 14323.33],\n+ [21381.46, 14324.31],\n+ [21379.32, 14327.68],\n+ [21377.34, 14328],\n+ [21375.52, 14327.18],\n+ [21373.61, 14327.78],\n+ [21370.16, 14332.68],\n+ [21370, 14334.67],\n+ [21370.96, 14336.43],\n+ [21374.86, 14336.52],\n+ [21376.27, 14337.94],\n+ [21376.89, 14341.89],\n+ [21378.7, 14342.74],\n+ [21380.66, 14342.34],\n+ [21380.31, 14338.37],\n+ [21380.6, 14334.4],\n+ [21384.59, 14334.21],\n+ [21387.45, 14331.41],\n+ [21389.09, 14332.54],\n+ [21391.09, 14332.46],\n+ [21392.67, 14333.67],\n+ [21396.58, 14333.17],\n+ [21398.13, 14334.44],\n+ [21401.75, 14332.75],\n+ [21394.33, 14329.83],\n+ [21395.6, 14328.28],\n+ [21399.51, 14327.49],\n+ [21399.02, 14325.55],\n+ [21397.29, 14326.54],\n+ [21395.34, 14326.07],\n+ [21391.36, 14326.26],\n+ [21390.22, 14324.61],\n+ [21386.33, 14325],\n+ [21385.34, 14323.33]\n+ ]\n+ ]\n+ },\n+ "properties": {\n+ "object_type": "cell",\n+ "classification": {\n+ "name": "Tumor",\n+ "colorRGB": -3670016\n+ },\n+ "isLocked": false,\n+ "measurements": [\n+ {\n+ "name": "Nucleus: Area",\n+ "value": 215.360107421875\n+ },\n+ {\n+ "name": "Nucleus: Perimeter",\n+ "value": 94.80494689941406\n+ },\n+ {\n+ "name": "Nucleus: Circularity",\n+ "value": 0.3011016249656677\n+ },\n+ {\n+ "name": "Nucleus: Max caliper",\n+ "value": 29.585416793823242\n+ },\n+ {\n+ "name": "Nucleus: Min caliper",\n+ "value": 15.034975051879883\n+ },\n+ {\n+ "name": "Nucleus: Eccentricity",\n+ "value": 0.8886669278144836\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD mean",\n+ "value": 0.3517749011516571\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD sum",\n+ "value": 90.75792694091797\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD std dev",\n+ "value": 0.06588728725910187\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD max",\n+ "value": 0.5032011270523071\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD min",\n+ "value": 0.20275430381298065\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD range",\n+ "value": 0.3004468083381653\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD mean",\n+ "value": 0.0043587107211351395\n+ },\n+ {\n+ "na'..b'451690674\n+ },\n+ {\n+ "name": "Nucleus: Eccentricity",\n+ "value": 0.757319986820221\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD mean",\n+ "value": 0.28198176622390747\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD sum",\n+ "value": 4.5117082595825195\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD std dev",\n+ "value": 0.02770525962114334\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD max",\n+ "value": 0.32805564999580383\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD min",\n+ "value": 0.2247738242149353\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD range",\n+ "value": 0.10328182578086853\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD mean",\n+ "value": 0.006495605688542128\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD sum",\n+ "value": 0.10392969101667404\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD std dev",\n+ "value": 0.012382211163640022\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD max",\n+ "value": 0.03777380287647247\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD min",\n+ "value": -0.012739512138068676\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD range",\n+ "value": 0.050513315945863724\n+ },\n+ {\n+ "name": "Cell: Area",\n+ "value": 92.54429626464844\n+ },\n+ {\n+ "name": "Cell: Perimeter",\n+ "value": 35.43148422241211\n+ },\n+ {\n+ "name": "Cell: Circularity",\n+ "value": 0.926362156867981\n+ },\n+ {\n+ "name": "Cell: Max caliper",\n+ "value": 13.06949520111084\n+ },\n+ {\n+ "name": "Cell: Min caliper",\n+ "value": 9.288158416748047\n+ },\n+ {\n+ "name": "Cell: Eccentricity",\n+ "value": 0.6711435317993164\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD mean",\n+ "value": 0.22877590358257294\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD std dev",\n+ "value": 0.05567134916782379\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD max",\n+ "value": 0.3794621229171753\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD min",\n+ "value": 0.11053355038166046\n+ },\n+ {\n+ "name": "Cell: Eosin OD mean",\n+ "value": 0.0176229327917099\n+ },\n+ {\n+ "name": "Cell: Eosin OD std dev",\n+ "value": 0.01594710350036621\n+ },\n+ {\n+ "name": "Cell: Eosin OD max",\n+ "value": 0.04452840983867645\n+ },\n+ {\n+ "name": "Cell: Eosin OD min",\n+ "value": -0.012739512138068676\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD mean",\n+ "value": 0.21981492638587952\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD std dev",\n+ "value": 0.05423422157764435\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD max",\n+ "value": 0.3794621229171753\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD min",\n+ "value": 0.11053355038166046\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD mean",\n+ "value": 0.019497008994221687\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD std dev",\n+ "value": 0.015764081850647926\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD max",\n+ "value": 0.04452840983867645\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD min",\n+ "value": -0.009999651461839676\n+ },\n+ {\n+ "name": "Nucleus/Cell area ratio",\n+ "value": 0.11559037119150162\n+ }\n+ ]\n+ }\n+ }\n+]}\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r b5e9cebb27e3 test-data/annotations_TMA_F-5.geojson --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotations_TMA_F-5.geojson Tue Apr 25 09:30:06 2023 +0000 |
| [ |
| b'@@ -0,0 +1,4930 @@\n+{ "name": "F-5","featureNames":[\n+ "F-5 - Tumor",\n+ "F-5 - Stroma"\n+],"dim": {"width": 23840, "height": 18064},"type": "FeatureCollection","features":[\n+ {\n+ "type": "Feature",\n+ "geometry": {\n+ "type": "Polygon",\n+ "coordinates": [\n+ [\n+ [21383.93, 14321.04],\n+ [21379.03, 14324.49],\n+ [21377.1, 14325],\n+ [21371.26, 14323.64],\n+ [21369.59, 14324.75],\n+ [21365.28, 14331.43],\n+ [21365, 14335.41],\n+ [21366.68, 14339.02],\n+ [21369.8, 14341.47],\n+ [21373.79, 14345.9],\n+ [21377.38, 14347.67],\n+ [21381.34, 14347.83],\n+ [21384.67, 14345.66],\n+ [21386.21, 14339.93],\n+ [21387.4, 14338.32],\n+ [21389.38, 14338.08],\n+ [21397.12, 14339.98],\n+ [21404.72, 14337.61],\n+ [21406.87, 14334.27],\n+ [21407, 14332.27],\n+ [21403.9, 14325],\n+ [21398, 14325.67],\n+ [21396.28, 14324.65],\n+ [21392.42, 14325.59],\n+ [21386.9, 14323.24],\n+ [21383.93, 14321.04]\n+ ]\n+ ]\n+ },\n+ "nucleusGeometry": {\n+ "type": "Polygon",\n+ "coordinates": [\n+ [\n+ [21385.34, 14323.33],\n+ [21381.46, 14324.31],\n+ [21379.32, 14327.68],\n+ [21377.34, 14328],\n+ [21375.52, 14327.18],\n+ [21373.61, 14327.78],\n+ [21370.16, 14332.68],\n+ [21370, 14334.67],\n+ [21370.96, 14336.43],\n+ [21374.86, 14336.52],\n+ [21376.27, 14337.94],\n+ [21376.89, 14341.89],\n+ [21378.7, 14342.74],\n+ [21380.66, 14342.34],\n+ [21380.31, 14338.37],\n+ [21380.6, 14334.4],\n+ [21384.59, 14334.21],\n+ [21387.45, 14331.41],\n+ [21389.09, 14332.54],\n+ [21391.09, 14332.46],\n+ [21392.67, 14333.67],\n+ [21396.58, 14333.17],\n+ [21398.13, 14334.44],\n+ [21401.75, 14332.75],\n+ [21394.33, 14329.83],\n+ [21395.6, 14328.28],\n+ [21399.51, 14327.49],\n+ [21399.02, 14325.55],\n+ [21397.29, 14326.54],\n+ [21395.34, 14326.07],\n+ [21391.36, 14326.26],\n+ [21390.22, 14324.61],\n+ [21386.33, 14325],\n+ [21385.34, 14323.33]\n+ ]\n+ ]\n+ },\n+ "properties": {\n+ "object_type": "cell",\n+ "classification": {\n+ "name": "Tumor",\n+ "colorRGB": -3670016\n+ },\n+ "isLocked": false,\n+ "measurements": [\n+ {\n+ "name": "Nucleus: Area",\n+ "value": 215.360107421875\n+ },\n+ {\n+ "name": "Nucleus: Perimeter",\n+ "value": 94.80494689941406\n+ },\n+ {\n+ "name": "Nucleus: Circularity",\n+ "value": 0.3011016249656677\n+ },\n+ {\n+ "name": "Nucleus: Max caliper",\n+ "value": 29.585416793823242\n+ },\n+ {\n+ "name": "Nucleus: Min caliper",\n+ "value": 15.034975051879883\n+ },\n+ {\n+ "name": "Nucleus: Eccentricity",\n+ "value": 0.8886669278144836\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD mean",\n+ "value": 0.3517749011516571\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD sum",\n+ "value": 90.75792694091797\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD std dev",\n+ "value": 0.06588728725910187\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD max",\n+ "value": 0.5032011270523071\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD min",\n+ "value": 0.20275430381298065\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD range",\n+ "value": 0.3004468083381653\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD mean",\n+ "value": 0.0043587107211351395\n+ },\n+ {\n+ "na'..b'451690674\n+ },\n+ {\n+ "name": "Nucleus: Eccentricity",\n+ "value": 0.757319986820221\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD mean",\n+ "value": 0.28198176622390747\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD sum",\n+ "value": 4.5117082595825195\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD std dev",\n+ "value": 0.02770525962114334\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD max",\n+ "value": 0.32805564999580383\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD min",\n+ "value": 0.2247738242149353\n+ },\n+ {\n+ "name": "Nucleus: Hematoxylin OD range",\n+ "value": 0.10328182578086853\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD mean",\n+ "value": 0.006495605688542128\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD sum",\n+ "value": 0.10392969101667404\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD std dev",\n+ "value": 0.012382211163640022\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD max",\n+ "value": 0.03777380287647247\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD min",\n+ "value": -0.012739512138068676\n+ },\n+ {\n+ "name": "Nucleus: Eosin OD range",\n+ "value": 0.050513315945863724\n+ },\n+ {\n+ "name": "Cell: Area",\n+ "value": 92.54429626464844\n+ },\n+ {\n+ "name": "Cell: Perimeter",\n+ "value": 35.43148422241211\n+ },\n+ {\n+ "name": "Cell: Circularity",\n+ "value": 0.926362156867981\n+ },\n+ {\n+ "name": "Cell: Max caliper",\n+ "value": 13.06949520111084\n+ },\n+ {\n+ "name": "Cell: Min caliper",\n+ "value": 9.288158416748047\n+ },\n+ {\n+ "name": "Cell: Eccentricity",\n+ "value": 0.6711435317993164\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD mean",\n+ "value": 0.22877590358257294\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD std dev",\n+ "value": 0.05567134916782379\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD max",\n+ "value": 0.3794621229171753\n+ },\n+ {\n+ "name": "Cell: Hematoxylin OD min",\n+ "value": 0.11053355038166046\n+ },\n+ {\n+ "name": "Cell: Eosin OD mean",\n+ "value": 0.0176229327917099\n+ },\n+ {\n+ "name": "Cell: Eosin OD std dev",\n+ "value": 0.01594710350036621\n+ },\n+ {\n+ "name": "Cell: Eosin OD max",\n+ "value": 0.04452840983867645\n+ },\n+ {\n+ "name": "Cell: Eosin OD min",\n+ "value": -0.012739512138068676\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD mean",\n+ "value": 0.21981492638587952\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD std dev",\n+ "value": 0.05423422157764435\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD max",\n+ "value": 0.3794621229171753\n+ },\n+ {\n+ "name": "Cytoplasm: Hematoxylin OD min",\n+ "value": 0.11053355038166046\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD mean",\n+ "value": 0.019497008994221687\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD std dev",\n+ "value": 0.015764081850647926\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD max",\n+ "value": 0.04452840983867645\n+ },\n+ {\n+ "name": "Cytoplasm: Eosin OD min",\n+ "value": -0.009999651461839676\n+ },\n+ {\n+ "name": "Nucleus/Cell area ratio",\n+ "value": 0.11559037119150162\n+ }\n+ ]\n+ }\n+ }\n+]}\n\\ No newline at end of file\n' |