Next changeset 1:aba3655fdef0 (2022-03-11) |
Commit message:
"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit a4349062e9177b5e60fb7c49115c57299e0d648d-dirty" |
added:
CommandSingleCellExtraction.py ParseInput.py SingleCellDataExtraction.py macros.xml quantification.xml |
b |
diff -r 000000000000 -r 928db0f952e3 CommandSingleCellExtraction.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CommandSingleCellExtraction.py Fri Mar 12 00:19:24 2021 +0000 |
b |
@@ -0,0 +1,11 @@ +#Script for parsing command line arguments and running single-cell +#data extraction functions +#Joshua Hess +import ParseInput +import SingleCellDataExtraction + +#Parse the command line arguments +args = ParseInput.ParseInputDataExtract() + +#Run the MultiExtractSingleCells function +SingleCellDataExtraction.MultiExtractSingleCells(**args) |
b |
diff -r 000000000000 -r 928db0f952e3 ParseInput.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ParseInput.py Fri Mar 12 00:19:24 2021 +0000 |
b |
@@ -0,0 +1,23 @@ +#Functions for parsing command line arguments for ome ilastik prep +import argparse + + +def ParseInputDataExtract(): + """Function for parsing command line arguments for input to single-cell + data extraction""" + +#if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--masks',nargs='*') + parser.add_argument('--image') + parser.add_argument('--channel_names') + parser.add_argument('--output') + #parser.add_argument('--suffix') + args = parser.parse_args() + #Create a dictionary object to pass to the next function + dict = {'masks': args.masks, 'image': args.image,\ + 'channel_names': args.channel_names,'output':args.output} + #Print the dictionary object + print(dict) + #Return the dictionary + return dict |
b |
diff -r 000000000000 -r 928db0f952e3 SingleCellDataExtraction.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SingleCellDataExtraction.py Fri Mar 12 00:19:24 2021 +0000 |
[ |
b'@@ -0,0 +1,272 @@\n+#Functions for reading in single cell imaging data\n+#Joshua Hess\n+\n+#Import necessary modules\n+import skimage.io\n+import h5py\n+import pandas as pd\n+import numpy as np\n+import os\n+import skimage.measure as measure\n+from pathlib import Path\n+import csv\n+\n+import sys\n+\n+\n+def MaskChannel(mask_loaded,image_loaded_z):\n+ """Function for quantifying a single channel image\n+\n+ Returns a table with CellID according to the mask and the mean pixel intensity\n+ for the given channel for each cell"""\n+ print(f\'Mask loaded: {mask_loaded.shape}\', file=sys.stderr)\n+ print(f\'Image loaded: {image_loaded_z.shape}\', file=sys.stderr)\n+ dat = measure.regionprops(mask_loaded, image_loaded_z)\n+ n = len(dat)\n+ intensity_z = np.empty(n)\n+ for i in range(n):\n+ intensity_z[i] = dat[i].mean_intensity\n+ # Clear reference to avoid memory leak -- see MaskIDs for explanation.\n+ dat[i] = None\n+ return intensity_z\n+\n+\n+def MaskIDs(mask):\n+ """This function will extract the CellIDs and the XY positions for each\n+ cell based on that cells centroid\n+\n+ Returns a dictionary object"""\n+\n+ dat = measure.regionprops(mask)\n+ n = len(dat)\n+\n+ # Pre-allocate numpy arrays for all properties we\'ll calculate.\n+ labels = np.empty(n, int)\n+ xcoords = np.empty(n)\n+ ycoords = np.empty(n)\n+ area = np.empty(n, int)\n+ minor_axis_length = np.empty(n)\n+ major_axis_length = np.empty(n)\n+ eccentricity = np.empty(n)\n+ solidity = np.empty(n)\n+ extent = np.empty(n)\n+ orientation = np.empty(n)\n+\n+ for i in range(n):\n+ labels[i] = dat[i].label\n+ xcoords[i] = dat[i].centroid[1]\n+ ycoords[i] = dat[i].centroid[0]\n+ area[i] = dat[i].area\n+ major_axis_length[i] = dat[i].major_axis_length\n+ minor_axis_length[i] = dat[i].minor_axis_length\n+ eccentricity[i] = dat[i].eccentricity\n+ solidity[i] = dat[i].solidity\n+ extent[i] = dat[i].extent\n+ orientation[i] = dat[i].orientation\n+ # By clearing the reference to each RegionProperties object, we allow it\n+ # and its cache to be garbage collected immediately. Otherwise memory\n+ # usage creeps up needlessly while this function is executing.\n+ dat[i] = None\n+\n+ IDs = {\n+ "CellID": labels,\n+ "X_centroid": xcoords,\n+ "Y_centroid": ycoords,\n+ "column_centroid": xcoords,\n+ "row_centroid": ycoords,\n+ "Area": area,\n+ "MajorAxisLength": major_axis_length,\n+ "MinorAxisLength": minor_axis_length,\n+ "Eccentricity": eccentricity,\n+ "Solidity": solidity,\n+ "Extent": extent,\n+ "Orientation": orientation,\n+ }\n+\n+ return IDs\n+\n+\n+def PrepareData(image,z):\n+ """Function for preparing input for maskzstack function. Connecting function\n+ to use with mc micro ilastik pipeline"""\n+\n+ image_path = Path(image)\n+ print(f\'{image_path} at {z}\', file=sys.stderr)\n+\n+ #Check to see if image tif(f)\n+ if image_path.suffix == \'.tiff\' or image_path.suffix == \'.tif\' or image_path.suffix == \'.btf\':\n+ #Check to see if the image is ome.tif(f)\n+ if image.endswith((\'.ome.tif\',\'.ome.tiff\')):\n+ #Read the image\n+ image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+ #print(\'OME TIF(F) found\')\n+ else:\n+ #Read the image\n+ image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+ #print(\'TIF(F) found\')\n+ # Remove extra axis\n+ #image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[3],image_loaded.shape[4]))\n+\n+ #Check to see if image is hdf5\n+ elif image_path.suffix == \'.h5\' or image_path.suffix == \'.hdf5\':\n+ #Read the image\n+ f = h5py.File(image,\'r+\')\n+ #Get the dataset name from the h5 file\n+ dat_name = list(f.keys())[0]\n+ ###If the hdf5 is exported from ilastik fiji plug'..b'an[nm] = dict_of_chan[nm].reindex(columns=cols)\n+ #Otherwise, add no spatial information\n+ else:\n+ #Create channel names for this mask\n+ new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))]\n+ #Use the above information to mask z stack\n+ dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))\n+\n+ #Concatenate all data from all masks to return\n+ dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1)\n+\n+ #Return the dataframe\n+ return dat\n+\n+\n+def ExtractSingleCells(masks,image,channel_names,output):\n+ """Function for extracting single cell information from input\n+ path containing single-cell masks, z_stack path, and channel_names path."""\n+\n+ #Create pathlib object for output\n+ output = Path(output)\n+\n+ #Check if header available\n+ #sniffer = csv.Sniffer()\n+ #sniffer.has_header(open(channel_names).readline())\n+ #If header not available\n+ #if not sniffer:\n+ #If header available\n+ #channel_names_loaded = pd.read_csv(channel_names)\n+ #channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+ #else:\n+ #print("negative")\n+ #old one column version\n+ #channel_names_loaded = pd.read_csv(channel_names,header=None)\n+ #Add a column index for ease\n+ #channel_names_loaded.columns = ["marker"]\n+ #channel_names_loaded = list(channel_names_loaded.marker.values)\n+\n+ #Read csv channel names\n+ channel_names_loaded = pd.read_csv(channel_names)\n+ #Check for size of columns\n+ if channel_names_loaded.shape[1] > 1:\n+ #Get the marker_name column if more than one column (CyCIF structure)\n+ channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+ else:\n+ #old one column version -- re-read the csv file and add column name\n+ channel_names_loaded = pd.read_csv(channel_names, header = None)\n+ #Add a column index for ease and for standardization\n+ channel_names_loaded.columns = ["marker"]\n+ channel_names_loaded_list = list(channel_names_loaded.marker)\n+\n+ #Check for unique marker names -- create new list to store new names\n+ channel_names_loaded_checked = []\n+ for idx,val in enumerate(channel_names_loaded_list):\n+ #Check for unique value\n+ if channel_names_loaded_list.count(val) > 1:\n+ #If unique count greater than one, add suffix\n+ channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))\n+ else:\n+ #Otherwise, leave channel name\n+ channel_names_loaded_checked.append(val)\n+\n+ #Clear small memory amount by clearing old channel names\n+ channel_names_loaded, channel_names_loaded_list = None, None\n+\n+ #Read the masks\n+ masks_loaded = {}\n+ #iterate through mask paths and read images to add to dictionary object\n+ for m in masks:\n+ m_full_name = os.path.basename(m)\n+ m_name = m_full_name.split(\'.\')[0]\n+ masks_loaded.update({str(m_name):skimage.io.imread(m,plugin=\'tifffile\')})\n+\n+ scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked)\n+ #Write the singe cell data to a csv file using the image name\n+\n+ im_full_name = os.path.basename(image)\n+ im_name = im_full_name.split(\'.\')[0]\n+ scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False)\n+\n+\n+def MultiExtractSingleCells(masks,image,channel_names,output):\n+ """Function for iterating over a list of z_stacks and output locations to\n+ export single-cell data from image masks"""\n+\n+ print("Extracting single-cell data for "+str(image)+\'...\')\n+\n+ #Run the ExtractSingleCells function for this image\n+ ExtractSingleCells(masks,image,channel_names,output)\n+\n+ #Print update\n+ im_full_name = os.path.basename(image)\n+ im_name = im_full_name.split(\'.\')[0]\n+ print("Finished "+str(im_name))\n' |
b |
diff -r 000000000000 -r 928db0f952e3 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Mar 12 00:19:24 2021 +0000 |
b |
@@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.6.10">python</requirement> + <requirement type="package" version="0.17.2">scikit-image</requirement> + <requirement type="package" version="2.10.0">h5py</requirement> + <requirement type="package" version="1.0.4">pandas</requirement> + <requirement type="package" version="1.18.5">numpy</requirement> + <requirement type="package" version="1.0.1">pathlib</requirement> + </requirements> + </xml> + + <xml name="version_cmd"> + <version_command>echo @VERSION@</version_command> + </xml> + <xml name="citations"> + <citations> + </citations> + </xml> + + <token name="@VERSION@">1.3.1</token> + <token name="@CMD_BEGIN@">python ${__tool_directory__}/CommandSingleCellExtraction.py</token> +</macros> |
b |
diff -r 000000000000 -r 928db0f952e3 quantification.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quantification.xml Fri Mar 12 00:19:24 2021 +0000 |
[ |
@@ -0,0 +1,71 @@ +<tool id="quantification" name="Quantification" version="@VERSION@.5" profile="17.09"> + <description>Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image.</description> + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"/> + @VERSION_CMD@ + + <command detect_errors="exit_code"><![CDATA[ + ln -s '$image' '${image.name}'.ome.tiff; + ln -s '$primary_mask' '${primary_mask.name}'.ome.tiff; + #for $mask in $supp_masks: + ln -s '$mask' '${mask.name}'.ome.tiff; + #end for + + mkdir ./tool_out; + + @CMD_BEGIN@ + + --masks + '${primary_mask.name}'.ome.tiff + #if $supp_masks + #for $mask in $supp_masks: + '${mask.name}'.ome.tiff + #end for + #end if + + --image '${image.name}'.ome.tiff + --output ./tool_out + --channel_names '$channel_names'; + + mv ./tool_out/*.csv ./tool_out/quantified.csv; + ]]></command> + + <inputs> + <param name="image" type="data" format="tiff" label="Registered TIFF"/> + <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/> + <param name="supp_masks" type="data" multiple="true" optional="true" format="tiff" label="Additional Cell Masks"/> + <param name="channel_names" type="data" format="csv" label="Marker Channels"/> + </inputs> + + <outputs> + <data format="csv" name="quant_out" from_work_dir="./tool_out/quantified.csv" label="${tool.name} on ${on_string}"/> + </outputs> + <help><![CDATA[ +# Single cell quantification +Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output. + +**CommandSingleCellExtraction.py**: + +* `--masks` Paths to where masks are stored (Ex: ./segmentation/cellMask.tif) -> If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified + +* `--image` Path to image(s) for quantification. (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f) + +* `--output` Path to output directory. (Ex: ./feature_extraction) + +* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv) + +# Run script +`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5 --output ./feature_extraction --channel_names ./my_channels.csv` + +# Main developer +Denis Schapiro (https://github.com/DenisSch) + +Joshua Hess (https://github.com/JoshuaHess12) + +Jeremy Muhlich (https://github.com/jmuhlich) + ]]></help> + <expand macro="citations" /> +</tool> |