quantification: SingleCellDataExtraction.py comparison

comparison SingleCellDataExtraction.py @ 0:928db0f952e3 draft

"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit a4349062e9177b5e60fb7c49115c57299e0d648d-dirty"

author	perssond
date	Fri, 12 Mar 2021 00:19:24 +0000
parents
children	aba3655fdef0

comparison

equal deleted inserted replaced

--1:000000000000
+:928db0f952e3
+#Functions for reading in single cell imaging data
+#Joshua Hess
+#Import necessary modules
+import skimage.io
+import h5py
+import pandas as pd
+import numpy as np
+import os
+import skimage.measure as measure
+from pathlib import Path
+import csv
+import sys
+def MaskChannel(mask_loaded,image_loaded_z):
+"""Function for quantifying a single channel image
+Returns a table with CellID according to the mask and the mean pixel intensity
+for the given channel for each cell"""
+print(f'Mask loaded: {mask_loaded.shape}', file=sys.stderr)
+print(f'Image loaded: {image_loaded_z.shape}', file=sys.stderr)
+dat = measure.regionprops(mask_loaded, image_loaded_z)
+n = len(dat)
+intensity_z = np.empty(n)
+for i in range(n):
+intensity_z[i] = dat[i].mean_intensity
+# Clear reference to avoid memory leak -- see MaskIDs for explanation.
+dat[i] = None
+return intensity_z
+def MaskIDs(mask):
+"""This function will extract the CellIDs and the XY positions for each
+cell based on that cells centroid
+Returns a dictionary object"""
+dat = measure.regionprops(mask)
+n = len(dat)
+# Pre-allocate numpy arrays for all properties we'll calculate.
+labels = np.empty(n, int)
+xcoords = np.empty(n)
+ycoords = np.empty(n)
+area = np.empty(n, int)
+minor_axis_length = np.empty(n)
+major_axis_length = np.empty(n)
+eccentricity = np.empty(n)
+solidity = np.empty(n)
+extent = np.empty(n)
+orientation = np.empty(n)
+for i in range(n):
+labels[i] = dat[i].label
+xcoords[i] = dat[i].centroid[1]
+ycoords[i] = dat[i].centroid[0]
+area[i] = dat[i].area
+major_axis_length[i] = dat[i].major_axis_length
+minor_axis_length[i] = dat[i].minor_axis_length
+eccentricity[i] = dat[i].eccentricity
+solidity[i] = dat[i].solidity
+extent[i] = dat[i].extent
+orientation[i] = dat[i].orientation
+# By clearing the reference to each RegionProperties object, we allow it
+# and its cache to be garbage collected immediately. Otherwise memory
+# usage creeps up needlessly while this function is executing.
+dat[i] = None
+IDs = {
+"CellID": labels,
+"X_centroid": xcoords,
+"Y_centroid": ycoords,
+"column_centroid": xcoords,
+"row_centroid": ycoords,
+"Area": area,
+"MajorAxisLength": major_axis_length,
+"MinorAxisLength": minor_axis_length,
+"Eccentricity": eccentricity,
+"Solidity": solidity,
+"Extent": extent,
+"Orientation": orientation,
+}
+return IDs
+def PrepareData(image,z):
+"""Function for preparing input for maskzstack function. Connecting function
+to use with mc micro ilastik pipeline"""
+image_path = Path(image)
+print(f'{image_path} at {z}', file=sys.stderr)
+#Check to see if image tif(f)
+if image_path.suffix == '.tiff' or image_path.suffix == '.tif' or image_path.suffix == '.btf':
+#Check to see if the image is ome.tif(f)
+if  image.endswith(('.ome.tif','.ome.tiff')):
+#Read the image
+image_loaded_z = skimage.io.imread(image,img_num=z,plugin='tifffile')
+#print('OME TIF(F) found')
+else:
+#Read the image
+image_loaded_z = skimage.io.imread(image,img_num=z,plugin='tifffile')
+#print('TIF(F) found')
+# Remove extra axis
+#image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[3],image_loaded.shape[4]))
+#Check to see if image is hdf5
+elif image_path.suffix == '.h5' or image_path.suffix == '.hdf5':
+#Read the image
+f = h5py.File(image,'r+')
+#Get the dataset name from the h5 file
+dat_name = list(f.keys())[0]
+###If the hdf5 is exported from ilastik fiji plugin, the dat_name will be 'data'
+#Get the image data
+image_loaded = np.array(f[dat_name])
+#Remove the first axis (ilastik convention)
+image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[2],image_loaded.shape[3]))
+###If the hdf5 is exported from ilastik fiji plugin, the order will need to be
+###switched as above --> z_stack = np.swapaxes(z_stack,0,2) --> z_stack = np.swapaxes(z_stack,0,1)
+#Return the objects
+return image_loaded_z
+def MaskZstack(masks_loaded,image,channel_names_loaded):
+"""This function will extract the stats for each cell mask through each channel
+in the input image
+mask_loaded: dictionary containing Tiff masks that represents the cells in your image.
+z_stack: Multichannel z stack image"""
+#Get the names of the keys for the masks dictionary
+mask_names = list(masks_loaded.keys())
+#Get the CellIDs for this dataset by using only a single mask (first mask)
+IDs = pd.DataFrame(MaskIDs(masks_loaded[mask_names[0]]))
+#Create empty dictionary to store channel results per mask
+dict_of_chan = {m_name: [] for m_name in mask_names}
+#Get the z channel and the associated channel name from list of channel names
+print(f'channels: {channel_names_loaded}', file=sys.stderr)
+print(f'num channels: {len(channel_names_loaded)}', file=sys.stderr)
+for z in range(len(channel_names_loaded)):
+#Run the data Prep function
+image_loaded_z = PrepareData(image,z)
+#Iterate through number of masks to extract single cell data
+for nm in range(len(mask_names)):
+#Use the above information to mask z stack
+dict_of_chan[mask_names[nm]].append(MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z))
+#Print progress
+print("Finished "+str(z))
+#Iterate through the rest of the masks to modify names of channels and convert to data table
+for nm in mask_names:
+#Check if this is the first mask
+if nm == mask_names[0]:
+#Create channel names for this mask
+new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))]
+#Convert the channel names list and the list of intensity values to a dictionary and combine with CellIDs and XY
+dict_of_chan[nm] = pd.concat([IDs,pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))],axis=1)
+#Get the name of the columns in the dataframe so we can reorder to histoCAT convention
+cols = list(dict_of_chan[nm].columns.values)
+#Reorder the list (Move xy position to end with spatial information)
+cols.append(cols.pop(cols.index("X_centroid")))
+cols.append(cols.pop(cols.index("Y_centroid")))
+cols.append(cols.pop(cols.index("column_centroid")))
+cols.append(cols.pop(cols.index("row_centroid")))
+cols.append(cols.pop(cols.index("Area")))
+cols.append(cols.pop(cols.index("MajorAxisLength")))
+cols.append(cols.pop(cols.index("MinorAxisLength")))
+cols.append(cols.pop(cols.index("Eccentricity")))
+cols.append(cols.pop(cols.index("Solidity")))
+cols.append(cols.pop(cols.index("Extent")))
+cols.append(cols.pop(cols.index("Orientation")))
+#Reindex the dataframe with new order
+dict_of_chan[nm] = dict_of_chan[nm].reindex(columns=cols)
+#Otherwise, add no spatial information
+else:
+#Create channel names for this mask
+new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))]
+#Use the above information to mask z stack
+dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))
+#Concatenate all data from all masks to return
+dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1)
+#Return the dataframe
+return dat
+def ExtractSingleCells(masks,image,channel_names,output):
+"""Function for extracting single cell information from input
+path containing single-cell masks, z_stack path, and channel_names path."""
+#Create pathlib object for output
+output = Path(output)
+#Check if header available
+#sniffer = csv.Sniffer()
+#sniffer.has_header(open(channel_names).readline())
+#If header not available
+#if not sniffer:
+#If header available
+#channel_names_loaded = pd.read_csv(channel_names)
+#channel_names_loaded_list = list(channel_names_loaded.marker_name)
+#else:
+#print("negative")
+#old one column version
+#channel_names_loaded = pd.read_csv(channel_names,header=None)
+#Add a column index for ease
+#channel_names_loaded.columns = ["marker"]
+#channel_names_loaded = list(channel_names_loaded.marker.values)
+#Read csv channel names
+channel_names_loaded = pd.read_csv(channel_names)
+#Check for size of columns
+if channel_names_loaded.shape[1] > 1:
+#Get the marker_name column if more than one column (CyCIF structure)
+channel_names_loaded_list = list(channel_names_loaded.marker_name)
+else:
+#old one column version -- re-read the csv file and add column name
+channel_names_loaded = pd.read_csv(channel_names, header = None)
+#Add a column index for ease and for standardization
+channel_names_loaded.columns = ["marker"]
+channel_names_loaded_list = list(channel_names_loaded.marker)
+#Check for unique marker names -- create new list to store new names
+channel_names_loaded_checked = []
+for idx,val in enumerate(channel_names_loaded_list):
+#Check for unique value
+if channel_names_loaded_list.count(val) > 1:
+#If unique count greater than one, add suffix
+channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))
+else:
+#Otherwise, leave channel name
+channel_names_loaded_checked.append(val)
+#Clear small memory amount by clearing old channel names
+channel_names_loaded, channel_names_loaded_list = None, None
+#Read the masks
+masks_loaded = {}
+#iterate through mask paths and read images to add to dictionary object
+for m in masks:
+m_full_name = os.path.basename(m)
+m_name = m_full_name.split('.')[0]
+masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')})
+scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked)
+#Write the singe cell data to a csv file using the image name
+im_full_name = os.path.basename(image)
+im_name = im_full_name.split('.')[0]
+scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False)
+def MultiExtractSingleCells(masks,image,channel_names,output):
+"""Function for iterating over a list of z_stacks and output locations to
+export single-cell data from image masks"""
+print("Extracting single-cell data for "+str(image)+'...')
+#Run the ExtractSingleCells function for this image
+ExtractSingleCells(masks,image,channel_names,output)
+#Print update
+im_full_name = os.path.basename(image)
+im_name = im_full_name.split('.')[0]
+print("Finished "+str(im_name))

Mercurial > repos > perssond > quantification

comparison SingleCellDataExtraction.py @ 0:928db0f952e3 draft