Previous changeset 3:c09e444635d9 (2022-04-07) Next changeset 5:3a916c4e9f5f (2022-12-28) |
Commit message:
planemo upload for repository https://github.com/goeckslab/tools-mti commit ed91d9e0dd189986b5c31fe23f5f78bd8765d862 |
modified:
macros.xml quantification.xml |
added:
test-data/channels.csv test-data/mask.tiff test-data/supp_mask.tiff test-data/test.tiff |
removed:
CommandSingleCellExtraction.py ParseInput.py SingleCellDataExtraction.py |
b |
diff -r c09e444635d9 -r 261464223fa3 CommandSingleCellExtraction.py --- a/CommandSingleCellExtraction.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -#Script for parsing command line arguments and running single-cell -#data extraction functions -#Joshua Hess -import ParseInput -import SingleCellDataExtraction - -#Parse the command line arguments -args = ParseInput.ParseInputDataExtract() - -#Run the MultiExtractSingleCells function -SingleCellDataExtraction.MultiExtractSingleCells(**args) |
b |
diff -r c09e444635d9 -r 261464223fa3 ParseInput.py --- a/ParseInput.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,47 +0,0 @@ -#Functions for parsing command line arguments for ome ilastik prep -import argparse - - -def ParseInputDataExtract(): - """Function for parsing command line arguments for input to single-cell - data extraction""" - -#if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--masks',nargs='+', required=True) - parser.add_argument('--image', required=True) - parser.add_argument('--channel_names', required=True) - parser.add_argument('--output', required=True) - parser.add_argument( - '--mask_props', nargs = "+", - help=""" - Space separated list of additional metrics to be calculated for every mask. - This is for metrics that depend only on the cell mask. If the metric depends - on signal intensity, use --intensity-props instead. - See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops - """ - ) - parser.add_argument( - '--intensity_props', nargs = "+", - help=""" - Space separated list of additional metrics to be calculated for every marker separately. - By default only mean intensity is calculated. - If the metric doesn't depend on signal intensity, use --mask-props instead. - See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops - Additionally available is gini_index, which calculates a single number - between 0 and 1, representing how unequal the signal is distributed in each region. - See https://en.wikipedia.org/wiki/Gini_coefficient - """ - ) - #parser.add_argument('--suffix') - args = parser.parse_args() - #Create a dictionary object to pass to the next function - dict = {'masks': args.masks, 'image': args.image,\ - 'channel_names': args.channel_names,'output':args.output, - 'intensity_props': set(args.intensity_props if args.intensity_props is not None else []).union(["intensity_mean"]), - 'mask_props': args.mask_props, - } - #Print the dictionary object - print(dict) - #Return the dictionary - return dict |
b |
diff -r c09e444635d9 -r 261464223fa3 SingleCellDataExtraction.py --- a/SingleCellDataExtraction.py Thu Apr 07 16:54:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,269 +0,0 @@\n-#Functions for reading in single cell imaging data\n-#Joshua Hess\n-\n-#Import necessary modules\n-import skimage.io\n-import h5py\n-import pandas as pd\n-import numpy as np\n-import os\n-import skimage.measure as measure\n-import tifffile\n-\n-from pathlib import Path\n-\n-import sys\n-\n-\n-def gini_index(mask, intensity):\n- x = intensity[mask]\n- sorted_x = np.sort(x)\n- n = len(x)\n- cumx = np.cumsum(sorted_x, dtype=float)\n- return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n\n-\n-def intensity_median(mask, intensity):\n- return np.median(intensity[mask])\n-\n-def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]):\n- """Function for quantifying a single channel image\n-\n- Returns a table with CellID according to the mask and the mean pixel intensity\n- for the given channel for each cell"""\n- # Look for regionprops in skimage\n- builtin_props = set(intensity_props).intersection(measure._regionprops.PROP_VALS)\n- # Otherwise look for them in this module\n- extra_props = set(intensity_props).difference(measure._regionprops.PROP_VALS)\n- dat = measure.regionprops_table(\n- mask_loaded, image_loaded_z,\n- properties = tuple(builtin_props),\n- extra_properties = [globals()[n] for n in extra_props]\n- )\n- return dat\n-\n-\n-def MaskIDs(mask, mask_props=None):\n- """This function will extract the CellIDs and the XY positions for each\n- cell based on that cells centroid\n-\n- Returns a dictionary object"""\n-\n- all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])\n- if mask_props is not None:\n- all_mask_props = all_mask_props.union(mask_props)\n-\n- dat = measure.regionprops_table(\n- mask,\n- properties=all_mask_props\n- )\n-\n- name_map = {\n- "CellID": "label",\n- "X_centroid": "centroid-1",\n- "Y_centroid": "centroid-0",\n- "Area": "area",\n- "MajorAxisLength": "major_axis_length",\n- "MinorAxisLength": "minor_axis_length",\n- "Eccentricity": "eccentricity",\n- "Solidity": "solidity",\n- "Extent": "extent",\n- "Orientation": "orientation",\n- }\n- for new_name, old_name in name_map.items():\n- dat[new_name] = dat[old_name]\n- for old_name in set(name_map.values()):\n- del dat[old_name]\n-\n- return dat\n-\n-def n_channels(image):\n- """Returns the number of channel in the input image. Supports [OME]TIFF and HDF5."""\n-\n- image_path = Path(image)\n-\n- if image_path.suffix in [\'.tiff\', \'.tif\', \'.btf\']:\n- s = tifffile.TiffFile(image).series[0]\n- ndim = len(s.shape)\n- if ndim == 2: return 1\n- elif ndim == 3: return min(s.shape)\n- else: raise Exception(\'mcquant supports only 2D/3D images.\')\n-\n- elif image_path.suffix in [\'.h5\', \'.hdf5\']:\n- f = h5py.File(image, \'r\')\n- dat_name = list(f.keys())[0]\n- return f[dat_name].shape[3]\n-\n- else:\n- raise Exception(\'mcquant currently supports [OME]TIFF and HDF5 formats only\')\n-\n-def PrepareData(image,z):\n- """Function for preparing input for maskzstack function. Connecting function\n- to use with mc micro ilastik pipeline"""\n-\n- image_path = Path(image)\n- print(f\'{image_path} at {z}\', file=sys.stderr)\n-\n- #Check to see if image tif(f)\n- if image_path.suffix in [\'.tiff\', \'.tif\', \'.btf\']:\n- image_loaded_z = tifffile.imread(image, key=z)\n-\n- #Check to see if image is hdf5\n- elif image_path.suffix in [\'.h5\', \'.hdf5\']:\n- #Read the image\n- f = h5py.File(image,\'r\')\n- #Get the dataset name from the h5 file\n- dat_name = list(f.keys())[0]\n- #Retrieve the z^th channel\n- image_loaded_z = f[dat_name][0,:,:,z]\n-\n- else:\n- raise Exception(\'mcquant currently supports [OME]TIFF and HDF5 formats only\')\n-\n- #Return the objects\n- return image_loaded_z\n-\n-\n-def MaskZstack(masks_load'..b'"intensity_mean"]):\n- mask_dict.update(\n- zip([f"{n}_{prop_n}" for n in channel_names_loaded], [x[prop_n] for x in dict_of_chan[nm]])\n- )\n- # Get the cell IDs and mask properties\n- mask_properties = pd.DataFrame(MaskIDs(masks_loaded[nm], mask_props=mask_props))\n- mask_dict.update(mask_properties)\n- dict_of_chan[nm] = pd.DataFrame(mask_dict).reindex(columns=sorted(mask_dict.keys(), key=col_sort))\n-\n- # Return the dict of dataframes for each mask\n- return dict_of_chan\n-\n-def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]):\n- """Function for extracting single cell information from input\n- path containing single-cell masks, z_stack path, and channel_names path."""\n-\n- #Create pathlib object for output\n- output = Path(output)\n-\n- #Read csv channel names\n- channel_names_loaded = pd.read_csv(channel_names)\n- #Check for the presence of `marker_name` column\n- if \'marker_name\' in channel_names_loaded:\n- #Get the marker_name column if more than one column (CyCIF structure)\n- channel_names_loaded_list = list(channel_names_loaded.marker_name)\n- #Consider the old one-marker-per-line plain text format\n- elif channel_names_loaded.shape[1] == 1:\n- #re-read the csv file and add column name\n- channel_names_loaded = pd.read_csv(channel_names, header = None)\n- channel_names_loaded_list = list(channel_names_loaded.iloc[:,0])\n- else:\n- raise Exception(\'%s must contain the marker_name column\'%channel_names)\n-\n- #Contrast against the number of markers in the image\n- if len(channel_names_loaded_list) != n_channels(image):\n- raise Exception("The number of channels in %s doesn\'t match the image"%channel_names)\n- \n- #Check for unique marker names -- create new list to store new names\n- channel_names_loaded_checked = []\n- for idx,val in enumerate(channel_names_loaded_list):\n- #Check for unique value\n- if channel_names_loaded_list.count(val) > 1:\n- #If unique count greater than one, add suffix\n- channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))\n- else:\n- #Otherwise, leave channel name\n- channel_names_loaded_checked.append(val)\n-\n- #Read the masks\n- masks_loaded = {}\n- #iterate through mask paths and read images to add to dictionary object\n- for m in masks:\n- m_full_name = os.path.basename(m)\n- m_name = m_full_name.split(\'.\')[0]\n- masks_loaded.update({str(m_name):skimage.io.imread(m,plugin=\'tifffile\')})\n-\n- scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props)\n- #Write the singe cell data to a csv file using the image name\n-\n- im_full_name = os.path.basename(image)\n- im_name = im_full_name.split(\'.\')[0]\n-\n- # iterate through each mask and export csv with mask name as suffix\n- for k,v in scdata_z.items():\n- # export the csv for this mask name\n- scdata_z[k].to_csv(\n- str(Path(os.path.join(str(output),\n- str(im_name+"_{}"+".csv").format(k)))),\n- index=False\n- )\n-\n-\n-def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]):\n- """Function for iterating over a list of z_stacks and output locations to\n- export single-cell data from image masks"""\n-\n- print("Extracting single-cell data for "+str(image)+\'...\')\n-\n- #Run the ExtractSingleCells function for this image\n- ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props)\n-\n- #Print update\n- im_full_name = os.path.basename(image)\n- im_name = im_full_name.split(\'.\')[0]\n- print("Finished "+str(im_name))\n' |
b |
diff -r c09e444635d9 -r 261464223fa3 macros.xml --- a/macros.xml Thu Apr 07 16:54:04 2022 +0000 +++ b/macros.xml Tue Sep 06 23:18:12 2022 +0000 |
[ |
@@ -2,31 +2,35 @@ <macros> <xml name="requirements"> <requirements> - <container type="docker">labsyspharm/quantification:@VERSION@</container> + <!-- <requirement type="package" version="3.9">python</requirement> <requirement type="package" version="0.18.0">scikit-image</requirement> <requirement type="package">h5py</requirement> <requirement type="package">pandas</requirement> <requirement type="package">numpy</requirement> <requirement type="package">pathlib</requirement> + --> + <container type="docker">labsyspharm/quantification:@TOOL_VERSION@</container> </requirements> </xml> <xml name="version_cmd"> - <version_command>echo @VERSION@</version_command> + <version_command>echo @TOOL_VERSION@</version_command> </xml> <xml name="citations"> <citations> </citations> </xml> - <token name="@VERSION@">1.5.1</token> + <token name="@TOOL_VERSION@">1.5.1</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@CMD_BEGIN@"><![CDATA[ - QUANT_PATH=""; - if [ -f "/app/CommandSingleCellExtraction.py" ]; then - export QUANT_PATH="/app/CommandSingleCellExtraction.py"; + QUANT_PATH='' && + if [ -f '/app/CommandSingleCellExtraction.py' ]; then + export QUANT_PATH='python /app/CommandSingleCellExtraction.py'; else - export QUANT_PATH="${__tool_directory__}/CommandSingleCellExtraction.py"; - fi; + export QUANT_PATH='CommandSingleCellExtraction.py'; + fi && + \$QUANT_PATH ]]></token> </macros> |
b |
diff -r c09e444635d9 -r 261464223fa3 quantification.xml --- a/quantification.xml Thu Apr 07 16:54:04 2022 +0000 +++ b/quantification.xml Tue Sep 06 23:18:12 2022 +0000 |
[ |
@@ -1,83 +1,94 @@ -<tool id="quantification" name="Quantification" version="@VERSION@.7" profile="17.09"> - <description>Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image.</description> +<tool id="quantification" name="MCQUANT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01"> + <description>a module for single-cell data extraction</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements"/> - @VERSION_CMD@ + <expand macro="version_cmd"/> <command detect_errors="exit_code"><![CDATA[ - ln -s $image input.ome.tiff; - ln -s $primary_mask primary_mask.tiff; - #if $supp_masks - ln -s $supp_masks supp_mask.tiff; + ln -s '$image' 'input.ome.tiff' && + ln -s '$primary_mask' 'primary_mask.tiff' && + #if $supp_mask + ln -s '$supp_mask' 'supp_mask.tiff' && #end if - mkdir ./tool_out; + mkdir './tool_out' && @CMD_BEGIN@ - python \$QUANT_PATH - --masks - primary_mask.tiff - #if $supp_masks - supp_mask.tiff + --masks 'primary_mask.tiff' + #if $supp_mask + 'supp_mask.tiff' #end if - --image input.ome.tiff - --output ./tool_out + --image 'input.ome.tiff' + --output './tool_out' - #if $mask_props - --mask_props $mask_props + #if str($mask_props).strip() + --mask_props '$mask_props' #end if - #if $intensity_props - --intensity_props $intensity_props + #if str($intensity_props).strip() + --intensity_props '$intensity_props' #end if - --channel_names '$channel_names'; + --channel_names '$channel_names' && - cp tool_out/*primary_mask.csv primary_mask.csv + #if $supp_mask + mv tool_out/*supp_mask.csv supp_mask.csv && + #end if + + mv tool_out/*primary_mask.csv primary_mask.csv ]]></command> <inputs> <param name="image" type="data" format="tiff" label="Registered TIFF"/> - <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/> - <param name="supp_masks" type="data" optional="true" format="tiff" label="Additional Cell Masks"/> + <param name="primary_mask" type="data" format="tiff" label="Primary Mask"/> + <param name="supp_mask" type="data" optional="true" format="tiff" label="Additional Mask"/> <param name="channel_names" type="data" format="csv" label="Marker Channels"/> <param name="mask_props" type="text" label="Mask Metrics" help="Space separated list of additional metrics to be calculated for every mask."/> <param name="intensity_props" type="text" label="Intensity Metrics" help="Space separated list of additional metrics to be calculated for every marker separately."/> </inputs> <outputs> - <data format="csv" name="cellmask" from_work_dir="primary_mask.csv" label="CellMaskQuant"/> - <collection type="list" name="quantification" label="${tool.name} on ${on_string}"> - <discover_datasets pattern="__designation_and_ext__" format="csv" directory="tool_out/" visible="false"/> - </collection> + <data format="csv" name="cellmask" from_work_dir="primary_mask.csv" label="Primary Mask Quantification"/> + <data format="csv" name="suppmask" from_work_dir="supp_mask.csv" label="Supplemental Mask Quantification"> + <filter>supp_mask</filter> + </data> </outputs> + <tests> + <test> + <param name="image" value="test.tiff" /> + <param name="primary_mask" value="mask.tiff" /> + <param name="supp_mask" value="supp_mask.tiff" /> + <param name="channel_names" value="channels.csv" /> + <output name="cellmask" ftype="csv"> + <assert_contents> + <has_n_columns n="11" sep="," delta="1" /> + </assert_contents> + </output> + <output name="suppmask" ftype="csv"> + <assert_contents> + <has_n_columns n="11" sep="," delta="1" /> + </assert_contents> + </output> + </test> + </tests> <help><![CDATA[ -# Single cell quantification -Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output. - -**CommandSingleCellExtraction.py**: - -* `--masks` Paths to where masks are stored (Ex: ./segmentation/cellMask.tif) -> If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified + +-------- +MCQUANT +-------- +**MCQUANT** module for single cell quantification given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output. -* `--image` Path to image(s) for quantification. (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f) - -* `--output` Path to output directory. (Ex: ./feature_extraction) - -* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv) +**Inputs** +1. A fully stitched and registered image in .ome.tif format. Nextflow will use images in the registration/ and dearray/ subfolders as appropriate. +2. One or more segmentation masks in .tif format. Nextflow will use files in the segmentation/ subfolder within the project. +3. A .csv file containing a marker_name column specifying names of individual channels. Nextflow will look for this file in the project directory. -# Run script -`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5 --output ./feature_extraction --channel_names ./my_channels.csv` +**Outputs** +A cell-by-feature table mapping Cell IDs to marker expression and morphological features (including x,y coordinates). -# Main developer -Denis Schapiro (https://github.com/DenisSch) - -Joshua Hess (https://github.com/JoshuaHess12) - -Jeremy Muhlich (https://github.com/jmuhlich) ]]></help> <expand macro="citations" /> </tool> |
b |
diff -r c09e444635d9 -r 261464223fa3 test-data/channels.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/channels.csv Tue Sep 06 23:18:12 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +channel_number,cycle_number,marker_name,Filter,excitation_wavelength,emission_wavelength +1,1,DNA_1,DAPI,395,431 |
b |
diff -r c09e444635d9 -r 261464223fa3 test-data/mask.tiff |
b |
Binary file test-data/mask.tiff has changed |
b |
diff -r c09e444635d9 -r 261464223fa3 test-data/supp_mask.tiff |
b |
Binary file test-data/supp_mask.tiff has changed |
b |
diff -r c09e444635d9 -r 261464223fa3 test-data/test.tiff |
b |
Binary file test-data/test.tiff has changed |