# HG changeset patch # User goeckslab # Date 1662506292 0 # Node ID 261464223fa34c724f391ce23f83e96f514d5769 # Parent c09e444635d90638c16d3ce1b3034cad7ba25d03 planemo upload for repository https://github.com/goeckslab/tools-mti commit ed91d9e0dd189986b5c31fe23f5f78bd8765d862 diff -r c09e444635d9 -r 261464223fa3 CommandSingleCellExtraction.py --- a/CommandSingleCellExtraction.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#Script for parsing command line arguments and running single-cell -#data extraction functions -#Joshua Hess -import ParseInput -import SingleCellDataExtraction - -#Parse the command line arguments -args = ParseInput.ParseInputDataExtract() - -#Run the MultiExtractSingleCells function -SingleCellDataExtraction.MultiExtractSingleCells(**args) diff -r c09e444635d9 -r 261464223fa3 ParseInput.py --- a/ParseInput.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#Functions for parsing command line arguments for ome ilastik prep -import argparse - - -def ParseInputDataExtract(): - """Function for parsing command line arguments for input to single-cell - data extraction""" - -#if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--masks',nargs='+', required=True) - parser.add_argument('--image', required=True) - parser.add_argument('--channel_names', required=True) - parser.add_argument('--output', required=True) - parser.add_argument( - '--mask_props', nargs = "+", - help=""" - Space separated list of additional metrics to be calculated for every mask. - This is for metrics that depend only on the cell mask. If the metric depends - on signal intensity, use --intensity-props instead. - See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops - """ - ) - parser.add_argument( - '--intensity_props', nargs = "+", - help=""" - Space separated list of additional metrics to be calculated for every marker separately. - By default only mean intensity is calculated. - If the metric doesn't depend on signal intensity, use --mask-props instead. - See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops - Additionally available is gini_index, which calculates a single number - between 0 and 1, representing how unequal the signal is distributed in each region. - See https://en.wikipedia.org/wiki/Gini_coefficient - """ - ) - #parser.add_argument('--suffix') - args = parser.parse_args() - #Create a dictionary object to pass to the next function - dict = {'masks': args.masks, 'image': args.image,\ - 'channel_names': args.channel_names,'output':args.output, - 'intensity_props': set(args.intensity_props if args.intensity_props is not None else []).union(["intensity_mean"]), - 'mask_props': args.mask_props, - } - #Print the dictionary object - print(dict) - #Return the dictionary - return dict diff -r c09e444635d9 -r 261464223fa3 SingleCellDataExtraction.py --- a/SingleCellDataExtraction.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,269 +0,0 @@ -#Functions for reading in single cell imaging data -#Joshua Hess - -#Import necessary modules -import skimage.io -import h5py -import pandas as pd -import numpy as np -import os -import skimage.measure as measure -import tifffile - -from pathlib import Path - -import sys - - -def gini_index(mask, intensity): - x = intensity[mask] - sorted_x = np.sort(x) - n = len(x) - cumx = np.cumsum(sorted_x, dtype=float) - return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n - -def intensity_median(mask, intensity): - return np.median(intensity[mask]) - -def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): - """Function for quantifying a single channel image - - Returns a table with CellID according to the mask and the mean pixel intensity - for the given channel for each cell""" - # Look for regionprops in skimage - builtin_props = set(intensity_props).intersection(measure._regionprops.PROP_VALS) - # Otherwise look for them in this module - extra_props = set(intensity_props).difference(measure._regionprops.PROP_VALS) - dat = measure.regionprops_table( - mask_loaded, image_loaded_z, - properties = tuple(builtin_props), - extra_properties = [globals()[n] for n in extra_props] - ) - return dat - - -def MaskIDs(mask, mask_props=None): - """This function will extract the CellIDs and the XY positions for each - cell based on that cells centroid - - Returns a dictionary object""" - - all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) - if mask_props is not None: - all_mask_props = all_mask_props.union(mask_props) - - dat = measure.regionprops_table( - mask, - properties=all_mask_props - ) - - name_map = { - "CellID": "label", - "X_centroid": "centroid-1", - "Y_centroid": "centroid-0", - "Area": "area", - "MajorAxisLength": "major_axis_length", - "MinorAxisLength": "minor_axis_length", - "Eccentricity": "eccentricity", - "Solidity": "solidity", - "Extent": "extent", - "Orientation": "orientation", - } - for new_name, old_name in name_map.items(): - dat[new_name] = dat[old_name] - for old_name in set(name_map.values()): - del dat[old_name] - - return dat - -def n_channels(image): - """Returns the number of channel in the input image. Supports [OME]TIFF and HDF5.""" - - image_path = Path(image) - - if image_path.suffix in ['.tiff', '.tif', '.btf']: - s = tifffile.TiffFile(image).series[0] - ndim = len(s.shape) - if ndim == 2: return 1 - elif ndim == 3: return min(s.shape) - else: raise Exception('mcquant supports only 2D/3D images.') - - elif image_path.suffix in ['.h5', '.hdf5']: - f = h5py.File(image, 'r') - dat_name = list(f.keys())[0] - return f[dat_name].shape[3] - - else: - raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') - -def PrepareData(image,z): - """Function for preparing input for maskzstack function. Connecting function - to use with mc micro ilastik pipeline""" - - image_path = Path(image) - print(f'{image_path} at {z}', file=sys.stderr) - - #Check to see if image tif(f) - if image_path.suffix in ['.tiff', '.tif', '.btf']: - image_loaded_z = tifffile.imread(image, key=z) - - #Check to see if image is hdf5 - elif image_path.suffix in ['.h5', '.hdf5']: - #Read the image - f = h5py.File(image,'r') - #Get the dataset name from the h5 file - dat_name = list(f.keys())[0] - #Retrieve the z^th channel - image_loaded_z = f[dat_name][0,:,:,z] - - else: - raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') - - #Return the objects - return image_loaded_z - - -def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"]): - """This function will extract the stats for each cell mask through each channel - in the input image - - mask_loaded: dictionary containing Tiff masks that represents the cells in your image. - - z_stack: Multichannel z stack image""" - - #Get the names of the keys for the masks dictionary - mask_names = list(masks_loaded.keys()) - - #Create empty dictionary to store channel results per mask - dict_of_chan = {m_name: [] for m_name in mask_names} - #Get the z channel and the associated channel name from list of channel names - print(f'channels: {channel_names_loaded}', file=sys.stderr) - print(f'num channels: {len(channel_names_loaded)}', file=sys.stderr) - for z in range(len(channel_names_loaded)): - #Run the data Prep function - image_loaded_z = PrepareData(image,z) - - #Iterate through number of masks to extract single cell data - for nm in range(len(mask_names)): - #Use the above information to mask z stack - dict_of_chan[mask_names[nm]].append( - MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props) - ) - #Print progress - print("Finished "+str(z)) - - # Column order according to histoCAT convention (Move xy position to end with spatial information) - last_cols = ( - "X_centroid", - "Y_centroid", - "column_centroid", - "row_centroid", - "Area", - "MajorAxisLength", - "MinorAxisLength", - "Eccentricity", - "Solidity", - "Extent", - "Orientation", - ) - def col_sort(x): - if x == "CellID": - return -2 - try: - return last_cols.index(x) - except ValueError: - return -1 - - #Iterate through the masks and format quantifications for each mask and property - for nm in mask_names: - mask_dict = {} - # Mean intensity is default property, stored without suffix - mask_dict.update( - zip(channel_names_loaded, [x["intensity_mean"] for x in dict_of_chan[nm]]) - ) - # All other properties are suffixed with their names - for prop_n in set(dict_of_chan[nm][0].keys()).difference(["intensity_mean"]): - mask_dict.update( - zip([f"{n}_{prop_n}" for n in channel_names_loaded], [x[prop_n] for x in dict_of_chan[nm]]) - ) - # Get the cell IDs and mask properties - mask_properties = pd.DataFrame(MaskIDs(masks_loaded[nm], mask_props=mask_props)) - mask_dict.update(mask_properties) - dict_of_chan[nm] = pd.DataFrame(mask_dict).reindex(columns=sorted(mask_dict.keys(), key=col_sort)) - - # Return the dict of dataframes for each mask - return dict_of_chan - -def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): - """Function for extracting single cell information from input - path containing single-cell masks, z_stack path, and channel_names path.""" - - #Create pathlib object for output - output = Path(output) - - #Read csv channel names - channel_names_loaded = pd.read_csv(channel_names) - #Check for the presence of `marker_name` column - if 'marker_name' in channel_names_loaded: - #Get the marker_name column if more than one column (CyCIF structure) - channel_names_loaded_list = list(channel_names_loaded.marker_name) - #Consider the old one-marker-per-line plain text format - elif channel_names_loaded.shape[1] == 1: - #re-read the csv file and add column name - channel_names_loaded = pd.read_csv(channel_names, header = None) - channel_names_loaded_list = list(channel_names_loaded.iloc[:,0]) - else: - raise Exception('%s must contain the marker_name column'%channel_names) - - #Contrast against the number of markers in the image - if len(channel_names_loaded_list) != n_channels(image): - raise Exception("The number of channels in %s doesn't match the image"%channel_names) - - #Check for unique marker names -- create new list to store new names - channel_names_loaded_checked = [] - for idx,val in enumerate(channel_names_loaded_list): - #Check for unique value - if channel_names_loaded_list.count(val) > 1: - #If unique count greater than one, add suffix - channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1)) - else: - #Otherwise, leave channel name - channel_names_loaded_checked.append(val) - - #Read the masks - masks_loaded = {} - #iterate through mask paths and read images to add to dictionary object - for m in masks: - m_full_name = os.path.basename(m) - m_name = m_full_name.split('.')[0] - masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')}) - - scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props) - #Write the singe cell data to a csv file using the image name - - im_full_name = os.path.basename(image) - im_name = im_full_name.split('.')[0] - - # iterate through each mask and export csv with mask name as suffix - for k,v in scdata_z.items(): - # export the csv for this mask name - scdata_z[k].to_csv( - str(Path(os.path.join(str(output), - str(im_name+"_{}"+".csv").format(k)))), - index=False - ) - - -def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): - """Function for iterating over a list of z_stacks and output locations to - export single-cell data from image masks""" - - print("Extracting single-cell data for "+str(image)+'...') - - #Run the ExtractSingleCells function for this image - ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props) - - #Print update - im_full_name = os.path.basename(image) - im_name = im_full_name.split('.')[0] - print("Finished "+str(im_name)) diff -r c09e444635d9 -r 261464223fa3 macros.xml --- a/macros.xml Thu Apr 07 16:54:04 2022 +0000 +++ b/macros.xml Tue Sep 06 23:18:12 2022 +0000 @@ -2,31 +2,35 @@ - labsyspharm/quantification:@VERSION@ + + labsyspharm/quantification:@TOOL_VERSION@ - echo @VERSION@ + echo @TOOL_VERSION@ - 1.5.1 + 1.5.1 + 0 diff -r c09e444635d9 -r 261464223fa3 quantification.xml --- a/quantification.xml Thu Apr 07 16:54:04 2022 +0000 +++ b/quantification.xml Tue Sep 06 23:18:12 2022 +0000 @@ -1,83 +1,94 @@ - - Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image. + + a module for single-cell data extraction macros.xml - - @VERSION_CMD@ + - - + + - - - - + + + supp_mask + + + + + + + + + + + + + + + + + + + If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified + +-------- +MCQUANT +-------- +**MCQUANT** module for single cell quantification given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output. -* `--image` Path to image(s) for quantification. (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f) - -* `--output` Path to output directory. (Ex: ./feature_extraction) - -* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv) +**Inputs** +1. A fully stitched and registered image in .ome.tif format. Nextflow will use images in the registration/ and dearray/ subfolders as appropriate. +2. One or more segmentation masks in .tif format. Nextflow will use files in the segmentation/ subfolder within the project. +3. A .csv file containing a marker_name column specifying names of individual channels. Nextflow will look for this file in the project directory. -# Run script -`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5 --output ./feature_extraction --channel_names ./my_channels.csv` +**Outputs** +A cell-by-feature table mapping Cell IDs to marker expression and morphological features (including x,y coordinates). -# Main developer -Denis Schapiro (https://github.com/DenisSch) - -Joshua Hess (https://github.com/JoshuaHess12) - -Jeremy Muhlich (https://github.com/jmuhlich) ]]> diff -r c09e444635d9 -r 261464223fa3 test-data/channels.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/channels.csv Tue Sep 06 23:18:12 2022 +0000 @@ -0,0 +1,2 @@ +channel_number,cycle_number,marker_name,Filter,excitation_wavelength,emission_wavelength +1,1,DNA_1,DAPI,395,431 diff -r c09e444635d9 -r 261464223fa3 test-data/mask.tiff Binary file test-data/mask.tiff has changed diff -r c09e444635d9 -r 261464223fa3 test-data/supp_mask.tiff Binary file test-data/supp_mask.tiff has changed diff -r c09e444635d9 -r 261464223fa3 test-data/test.tiff Binary file test-data/test.tiff has changed