Mercurial > repos > perssond > quantification
comparison SingleCellDataExtraction.py @ 0:928db0f952e3 draft
"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit a4349062e9177b5e60fb7c49115c57299e0d648d-dirty"
author | perssond |
---|---|
date | Fri, 12 Mar 2021 00:19:24 +0000 |
parents | |
children | aba3655fdef0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:928db0f952e3 |
---|---|
1 #Functions for reading in single cell imaging data | |
2 #Joshua Hess | |
3 | |
4 #Import necessary modules | |
5 import skimage.io | |
6 import h5py | |
7 import pandas as pd | |
8 import numpy as np | |
9 import os | |
10 import skimage.measure as measure | |
11 from pathlib import Path | |
12 import csv | |
13 | |
14 import sys | |
15 | |
16 | |
17 def MaskChannel(mask_loaded,image_loaded_z): | |
18 """Function for quantifying a single channel image | |
19 | |
20 Returns a table with CellID according to the mask and the mean pixel intensity | |
21 for the given channel for each cell""" | |
22 print(f'Mask loaded: {mask_loaded.shape}', file=sys.stderr) | |
23 print(f'Image loaded: {image_loaded_z.shape}', file=sys.stderr) | |
24 dat = measure.regionprops(mask_loaded, image_loaded_z) | |
25 n = len(dat) | |
26 intensity_z = np.empty(n) | |
27 for i in range(n): | |
28 intensity_z[i] = dat[i].mean_intensity | |
29 # Clear reference to avoid memory leak -- see MaskIDs for explanation. | |
30 dat[i] = None | |
31 return intensity_z | |
32 | |
33 | |
34 def MaskIDs(mask): | |
35 """This function will extract the CellIDs and the XY positions for each | |
36 cell based on that cells centroid | |
37 | |
38 Returns a dictionary object""" | |
39 | |
40 dat = measure.regionprops(mask) | |
41 n = len(dat) | |
42 | |
43 # Pre-allocate numpy arrays for all properties we'll calculate. | |
44 labels = np.empty(n, int) | |
45 xcoords = np.empty(n) | |
46 ycoords = np.empty(n) | |
47 area = np.empty(n, int) | |
48 minor_axis_length = np.empty(n) | |
49 major_axis_length = np.empty(n) | |
50 eccentricity = np.empty(n) | |
51 solidity = np.empty(n) | |
52 extent = np.empty(n) | |
53 orientation = np.empty(n) | |
54 | |
55 for i in range(n): | |
56 labels[i] = dat[i].label | |
57 xcoords[i] = dat[i].centroid[1] | |
58 ycoords[i] = dat[i].centroid[0] | |
59 area[i] = dat[i].area | |
60 major_axis_length[i] = dat[i].major_axis_length | |
61 minor_axis_length[i] = dat[i].minor_axis_length | |
62 eccentricity[i] = dat[i].eccentricity | |
63 solidity[i] = dat[i].solidity | |
64 extent[i] = dat[i].extent | |
65 orientation[i] = dat[i].orientation | |
66 # By clearing the reference to each RegionProperties object, we allow it | |
67 # and its cache to be garbage collected immediately. Otherwise memory | |
68 # usage creeps up needlessly while this function is executing. | |
69 dat[i] = None | |
70 | |
71 IDs = { | |
72 "CellID": labels, | |
73 "X_centroid": xcoords, | |
74 "Y_centroid": ycoords, | |
75 "column_centroid": xcoords, | |
76 "row_centroid": ycoords, | |
77 "Area": area, | |
78 "MajorAxisLength": major_axis_length, | |
79 "MinorAxisLength": minor_axis_length, | |
80 "Eccentricity": eccentricity, | |
81 "Solidity": solidity, | |
82 "Extent": extent, | |
83 "Orientation": orientation, | |
84 } | |
85 | |
86 return IDs | |
87 | |
88 | |
89 def PrepareData(image,z): | |
90 """Function for preparing input for maskzstack function. Connecting function | |
91 to use with mc micro ilastik pipeline""" | |
92 | |
93 image_path = Path(image) | |
94 print(f'{image_path} at {z}', file=sys.stderr) | |
95 | |
96 #Check to see if image tif(f) | |
97 if image_path.suffix == '.tiff' or image_path.suffix == '.tif' or image_path.suffix == '.btf': | |
98 #Check to see if the image is ome.tif(f) | |
99 if image.endswith(('.ome.tif','.ome.tiff')): | |
100 #Read the image | |
101 image_loaded_z = skimage.io.imread(image,img_num=z,plugin='tifffile') | |
102 #print('OME TIF(F) found') | |
103 else: | |
104 #Read the image | |
105 image_loaded_z = skimage.io.imread(image,img_num=z,plugin='tifffile') | |
106 #print('TIF(F) found') | |
107 # Remove extra axis | |
108 #image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[3],image_loaded.shape[4])) | |
109 | |
110 #Check to see if image is hdf5 | |
111 elif image_path.suffix == '.h5' or image_path.suffix == '.hdf5': | |
112 #Read the image | |
113 f = h5py.File(image,'r+') | |
114 #Get the dataset name from the h5 file | |
115 dat_name = list(f.keys())[0] | |
116 ###If the hdf5 is exported from ilastik fiji plugin, the dat_name will be 'data' | |
117 #Get the image data | |
118 image_loaded = np.array(f[dat_name]) | |
119 #Remove the first axis (ilastik convention) | |
120 image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[2],image_loaded.shape[3])) | |
121 ###If the hdf5 is exported from ilastik fiji plugin, the order will need to be | |
122 ###switched as above --> z_stack = np.swapaxes(z_stack,0,2) --> z_stack = np.swapaxes(z_stack,0,1) | |
123 | |
124 #Return the objects | |
125 return image_loaded_z | |
126 | |
127 | |
128 def MaskZstack(masks_loaded,image,channel_names_loaded): | |
129 """This function will extract the stats for each cell mask through each channel | |
130 in the input image | |
131 | |
132 mask_loaded: dictionary containing Tiff masks that represents the cells in your image. | |
133 | |
134 z_stack: Multichannel z stack image""" | |
135 | |
136 #Get the names of the keys for the masks dictionary | |
137 mask_names = list(masks_loaded.keys()) | |
138 #Get the CellIDs for this dataset by using only a single mask (first mask) | |
139 IDs = pd.DataFrame(MaskIDs(masks_loaded[mask_names[0]])) | |
140 #Create empty dictionary to store channel results per mask | |
141 dict_of_chan = {m_name: [] for m_name in mask_names} | |
142 #Get the z channel and the associated channel name from list of channel names | |
143 print(f'channels: {channel_names_loaded}', file=sys.stderr) | |
144 print(f'num channels: {len(channel_names_loaded)}', file=sys.stderr) | |
145 for z in range(len(channel_names_loaded)): | |
146 #Run the data Prep function | |
147 image_loaded_z = PrepareData(image,z) | |
148 | |
149 #Iterate through number of masks to extract single cell data | |
150 for nm in range(len(mask_names)): | |
151 #Use the above information to mask z stack | |
152 dict_of_chan[mask_names[nm]].append(MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z)) | |
153 #Print progress | |
154 print("Finished "+str(z)) | |
155 | |
156 #Iterate through the rest of the masks to modify names of channels and convert to data table | |
157 for nm in mask_names: | |
158 #Check if this is the first mask | |
159 if nm == mask_names[0]: | |
160 #Create channel names for this mask | |
161 new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))] | |
162 #Convert the channel names list and the list of intensity values to a dictionary and combine with CellIDs and XY | |
163 dict_of_chan[nm] = pd.concat([IDs,pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))],axis=1) | |
164 #Get the name of the columns in the dataframe so we can reorder to histoCAT convention | |
165 cols = list(dict_of_chan[nm].columns.values) | |
166 #Reorder the list (Move xy position to end with spatial information) | |
167 cols.append(cols.pop(cols.index("X_centroid"))) | |
168 cols.append(cols.pop(cols.index("Y_centroid"))) | |
169 cols.append(cols.pop(cols.index("column_centroid"))) | |
170 cols.append(cols.pop(cols.index("row_centroid"))) | |
171 cols.append(cols.pop(cols.index("Area"))) | |
172 cols.append(cols.pop(cols.index("MajorAxisLength"))) | |
173 cols.append(cols.pop(cols.index("MinorAxisLength"))) | |
174 cols.append(cols.pop(cols.index("Eccentricity"))) | |
175 cols.append(cols.pop(cols.index("Solidity"))) | |
176 cols.append(cols.pop(cols.index("Extent"))) | |
177 cols.append(cols.pop(cols.index("Orientation"))) | |
178 #Reindex the dataframe with new order | |
179 dict_of_chan[nm] = dict_of_chan[nm].reindex(columns=cols) | |
180 #Otherwise, add no spatial information | |
181 else: | |
182 #Create channel names for this mask | |
183 new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))] | |
184 #Use the above information to mask z stack | |
185 dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm]))) | |
186 | |
187 #Concatenate all data from all masks to return | |
188 dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1) | |
189 | |
190 #Return the dataframe | |
191 return dat | |
192 | |
193 | |
194 def ExtractSingleCells(masks,image,channel_names,output): | |
195 """Function for extracting single cell information from input | |
196 path containing single-cell masks, z_stack path, and channel_names path.""" | |
197 | |
198 #Create pathlib object for output | |
199 output = Path(output) | |
200 | |
201 #Check if header available | |
202 #sniffer = csv.Sniffer() | |
203 #sniffer.has_header(open(channel_names).readline()) | |
204 #If header not available | |
205 #if not sniffer: | |
206 #If header available | |
207 #channel_names_loaded = pd.read_csv(channel_names) | |
208 #channel_names_loaded_list = list(channel_names_loaded.marker_name) | |
209 #else: | |
210 #print("negative") | |
211 #old one column version | |
212 #channel_names_loaded = pd.read_csv(channel_names,header=None) | |
213 #Add a column index for ease | |
214 #channel_names_loaded.columns = ["marker"] | |
215 #channel_names_loaded = list(channel_names_loaded.marker.values) | |
216 | |
217 #Read csv channel names | |
218 channel_names_loaded = pd.read_csv(channel_names) | |
219 #Check for size of columns | |
220 if channel_names_loaded.shape[1] > 1: | |
221 #Get the marker_name column if more than one column (CyCIF structure) | |
222 channel_names_loaded_list = list(channel_names_loaded.marker_name) | |
223 else: | |
224 #old one column version -- re-read the csv file and add column name | |
225 channel_names_loaded = pd.read_csv(channel_names, header = None) | |
226 #Add a column index for ease and for standardization | |
227 channel_names_loaded.columns = ["marker"] | |
228 channel_names_loaded_list = list(channel_names_loaded.marker) | |
229 | |
230 #Check for unique marker names -- create new list to store new names | |
231 channel_names_loaded_checked = [] | |
232 for idx,val in enumerate(channel_names_loaded_list): | |
233 #Check for unique value | |
234 if channel_names_loaded_list.count(val) > 1: | |
235 #If unique count greater than one, add suffix | |
236 channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1)) | |
237 else: | |
238 #Otherwise, leave channel name | |
239 channel_names_loaded_checked.append(val) | |
240 | |
241 #Clear small memory amount by clearing old channel names | |
242 channel_names_loaded, channel_names_loaded_list = None, None | |
243 | |
244 #Read the masks | |
245 masks_loaded = {} | |
246 #iterate through mask paths and read images to add to dictionary object | |
247 for m in masks: | |
248 m_full_name = os.path.basename(m) | |
249 m_name = m_full_name.split('.')[0] | |
250 masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')}) | |
251 | |
252 scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked) | |
253 #Write the singe cell data to a csv file using the image name | |
254 | |
255 im_full_name = os.path.basename(image) | |
256 im_name = im_full_name.split('.')[0] | |
257 scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False) | |
258 | |
259 | |
260 def MultiExtractSingleCells(masks,image,channel_names,output): | |
261 """Function for iterating over a list of z_stacks and output locations to | |
262 export single-cell data from image masks""" | |
263 | |
264 print("Extracting single-cell data for "+str(image)+'...') | |
265 | |
266 #Run the ExtractSingleCells function for this image | |
267 ExtractSingleCells(masks,image,channel_names,output) | |
268 | |
269 #Print update | |
270 im_full_name = os.path.basename(image) | |
271 im_name = im_full_name.split('.')[0] | |
272 print("Finished "+str(im_name)) |