Repository 'quantification'
hg clone https://toolshed.g2.bx.psu.edu/repos/perssond/quantification

Changeset 0:928db0f952e3 (2021-03-12)
Next changeset 1:aba3655fdef0 (2022-03-11)
Commit message:
"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit a4349062e9177b5e60fb7c49115c57299e0d648d-dirty"
added:
CommandSingleCellExtraction.py
ParseInput.py
SingleCellDataExtraction.py
macros.xml
quantification.xml
b
diff -r 000000000000 -r 928db0f952e3 CommandSingleCellExtraction.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CommandSingleCellExtraction.py Fri Mar 12 00:19:24 2021 +0000
b
@@ -0,0 +1,11 @@
+#Script for parsing command line arguments and running single-cell
+#data extraction functions
+#Joshua Hess
+import ParseInput
+import SingleCellDataExtraction
+
+#Parse the command line arguments
+args = ParseInput.ParseInputDataExtract()
+
+#Run the MultiExtractSingleCells function
+SingleCellDataExtraction.MultiExtractSingleCells(**args)
b
diff -r 000000000000 -r 928db0f952e3 ParseInput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ParseInput.py Fri Mar 12 00:19:24 2021 +0000
b
@@ -0,0 +1,23 @@
+#Functions for parsing command line arguments for ome ilastik prep
+import argparse
+
+
+def ParseInputDataExtract():
+   """Function for parsing command line arguments for input to single-cell
+   data extraction"""
+
+#if __name__ == '__main__':
+   parser = argparse.ArgumentParser()
+   parser.add_argument('--masks',nargs='*')
+   parser.add_argument('--image')
+   parser.add_argument('--channel_names')
+   parser.add_argument('--output')
+   #parser.add_argument('--suffix')
+   args = parser.parse_args()
+   #Create a dictionary object to pass to the next function
+   dict = {'masks': args.masks, 'image': args.image,\
+    'channel_names': args.channel_names,'output':args.output}
+   #Print the dictionary object
+   print(dict)
+   #Return the dictionary
+   return dict
b
diff -r 000000000000 -r 928db0f952e3 SingleCellDataExtraction.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SingleCellDataExtraction.py Fri Mar 12 00:19:24 2021 +0000
[
b'@@ -0,0 +1,272 @@\n+#Functions for reading in single cell imaging data\n+#Joshua Hess\n+\n+#Import necessary modules\n+import skimage.io\n+import h5py\n+import pandas as pd\n+import numpy as np\n+import os\n+import skimage.measure as measure\n+from pathlib import Path\n+import csv\n+\n+import sys\n+\n+\n+def MaskChannel(mask_loaded,image_loaded_z):\n+    """Function for quantifying a single channel image\n+\n+    Returns a table with CellID according to the mask and the mean pixel intensity\n+    for the given channel for each cell"""\n+    print(f\'Mask loaded: {mask_loaded.shape}\', file=sys.stderr)\n+    print(f\'Image loaded: {image_loaded_z.shape}\', file=sys.stderr)\n+    dat = measure.regionprops(mask_loaded, image_loaded_z)\n+    n = len(dat)\n+    intensity_z = np.empty(n)\n+    for i in range(n):\n+        intensity_z[i] = dat[i].mean_intensity\n+        # Clear reference to avoid memory leak -- see MaskIDs for explanation.\n+        dat[i] = None\n+    return intensity_z\n+\n+\n+def MaskIDs(mask):\n+    """This function will extract the CellIDs and the XY positions for each\n+    cell based on that cells centroid\n+\n+    Returns a dictionary object"""\n+\n+    dat = measure.regionprops(mask)\n+    n = len(dat)\n+\n+    # Pre-allocate numpy arrays for all properties we\'ll calculate.\n+    labels = np.empty(n, int)\n+    xcoords = np.empty(n)\n+    ycoords = np.empty(n)\n+    area = np.empty(n, int)\n+    minor_axis_length = np.empty(n)\n+    major_axis_length = np.empty(n)\n+    eccentricity = np.empty(n)\n+    solidity = np.empty(n)\n+    extent = np.empty(n)\n+    orientation = np.empty(n)\n+\n+    for i in range(n):\n+        labels[i] = dat[i].label\n+        xcoords[i] = dat[i].centroid[1]\n+        ycoords[i] = dat[i].centroid[0]\n+        area[i] = dat[i].area\n+        major_axis_length[i] = dat[i].major_axis_length\n+        minor_axis_length[i] = dat[i].minor_axis_length\n+        eccentricity[i] = dat[i].eccentricity\n+        solidity[i] = dat[i].solidity\n+        extent[i] = dat[i].extent\n+        orientation[i] = dat[i].orientation\n+        # By clearing the reference to each RegionProperties object, we allow it\n+        # and its cache to be garbage collected immediately. Otherwise memory\n+        # usage creeps up needlessly while this function is executing.\n+        dat[i] = None\n+\n+    IDs = {\n+        "CellID": labels,\n+        "X_centroid": xcoords,\n+        "Y_centroid": ycoords,\n+        "column_centroid": xcoords,\n+        "row_centroid": ycoords,\n+        "Area": area,\n+        "MajorAxisLength": major_axis_length,\n+        "MinorAxisLength": minor_axis_length,\n+        "Eccentricity": eccentricity,\n+        "Solidity": solidity,\n+        "Extent": extent,\n+        "Orientation": orientation,\n+    }\n+\n+    return IDs\n+\n+\n+def PrepareData(image,z):\n+    """Function for preparing input for maskzstack function. Connecting function\n+    to use with mc micro ilastik pipeline"""\n+\n+    image_path = Path(image)\n+    print(f\'{image_path} at {z}\', file=sys.stderr)\n+\n+    #Check to see if image tif(f)\n+    if image_path.suffix == \'.tiff\' or image_path.suffix == \'.tif\' or image_path.suffix == \'.btf\':\n+        #Check to see if the image is ome.tif(f)\n+        if  image.endswith((\'.ome.tif\',\'.ome.tiff\')):\n+            #Read the image\n+            image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+            #print(\'OME TIF(F) found\')\n+        else:\n+            #Read the image\n+            image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+            #print(\'TIF(F) found\')\n+            # Remove extra axis\n+            #image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[3],image_loaded.shape[4]))\n+\n+    #Check to see if image is hdf5\n+    elif image_path.suffix == \'.h5\' or image_path.suffix == \'.hdf5\':\n+        #Read the image\n+        f = h5py.File(image,\'r+\')\n+        #Get the dataset name from the h5 file\n+        dat_name = list(f.keys())[0]\n+        ###If the hdf5 is exported from ilastik fiji plug'..b'an[nm] = dict_of_chan[nm].reindex(columns=cols)\n+        #Otherwise, add no spatial information\n+        else:\n+            #Create channel names for this mask\n+            new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))]\n+            #Use the above information to mask z stack\n+            dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))\n+\n+    #Concatenate all data from all masks to return\n+    dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1)\n+\n+    #Return the dataframe\n+    return dat\n+\n+\n+def ExtractSingleCells(masks,image,channel_names,output):\n+    """Function for extracting single cell information from input\n+    path containing single-cell masks, z_stack path, and channel_names path."""\n+\n+    #Create pathlib object for output\n+    output = Path(output)\n+\n+    #Check if header available\n+    #sniffer = csv.Sniffer()\n+    #sniffer.has_header(open(channel_names).readline())\n+    #If header not available\n+    #if not sniffer:\n+        #If header available\n+        #channel_names_loaded = pd.read_csv(channel_names)\n+        #channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+    #else:\n+        #print("negative")\n+        #old one column version\n+        #channel_names_loaded = pd.read_csv(channel_names,header=None)\n+        #Add a column index for ease\n+        #channel_names_loaded.columns = ["marker"]\n+        #channel_names_loaded = list(channel_names_loaded.marker.values)\n+\n+    #Read csv channel names\n+    channel_names_loaded = pd.read_csv(channel_names)\n+    #Check for size of columns\n+    if channel_names_loaded.shape[1] > 1:\n+        #Get the marker_name column if more than one column (CyCIF structure)\n+        channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+    else:\n+        #old one column version -- re-read the csv file and add column name\n+        channel_names_loaded = pd.read_csv(channel_names, header = None)\n+        #Add a column index for ease and for standardization\n+        channel_names_loaded.columns = ["marker"]\n+        channel_names_loaded_list = list(channel_names_loaded.marker)\n+\n+    #Check for unique marker names -- create new list to store new names\n+    channel_names_loaded_checked = []\n+    for idx,val in enumerate(channel_names_loaded_list):\n+        #Check for unique value\n+        if channel_names_loaded_list.count(val) > 1:\n+            #If unique count greater than one, add suffix\n+            channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))\n+        else:\n+            #Otherwise, leave channel name\n+            channel_names_loaded_checked.append(val)\n+\n+    #Clear small memory amount by clearing old channel names\n+    channel_names_loaded, channel_names_loaded_list = None, None\n+\n+    #Read the masks\n+    masks_loaded = {}\n+    #iterate through mask paths and read images to add to dictionary object\n+    for m in masks:\n+        m_full_name = os.path.basename(m)\n+        m_name = m_full_name.split(\'.\')[0]\n+        masks_loaded.update({str(m_name):skimage.io.imread(m,plugin=\'tifffile\')})\n+\n+    scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked)\n+    #Write the singe cell data to a csv file using the image name\n+\n+    im_full_name = os.path.basename(image)\n+    im_name = im_full_name.split(\'.\')[0]\n+    scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False)\n+\n+\n+def MultiExtractSingleCells(masks,image,channel_names,output):\n+    """Function for iterating over a list of z_stacks and output locations to\n+    export single-cell data from image masks"""\n+\n+    print("Extracting single-cell data for "+str(image)+\'...\')\n+\n+    #Run the ExtractSingleCells function for this image\n+    ExtractSingleCells(masks,image,channel_names,output)\n+\n+    #Print update\n+    im_full_name = os.path.basename(image)\n+    im_name = im_full_name.split(\'.\')[0]\n+    print("Finished "+str(im_name))\n'
b
diff -r 000000000000 -r 928db0f952e3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Mar 12 00:19:24 2021 +0000
b
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.6.10">python</requirement>
+            <requirement type="package" version="0.17.2">scikit-image</requirement>
+            <requirement type="package" version="2.10.0">h5py</requirement>
+            <requirement type="package" version="1.0.4">pandas</requirement>
+            <requirement type="package" version="1.18.5">numpy</requirement>
+            <requirement type="package" version="1.0.1">pathlib</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_cmd">
+        <version_command>echo @VERSION@</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+        </citations>
+    </xml>
+
+    <token name="@VERSION@">1.3.1</token>
+    <token name="@CMD_BEGIN@">python ${__tool_directory__}/CommandSingleCellExtraction.py</token>
+</macros>
b
diff -r 000000000000 -r 928db0f952e3 quantification.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/quantification.xml Fri Mar 12 00:19:24 2021 +0000
[
@@ -0,0 +1,71 @@
+<tool id="quantification" name="Quantification" version="@VERSION@.5" profile="17.09">
+    <description>Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>

+    <expand macro="requirements"/>
+    @VERSION_CMD@
+
+    <command detect_errors="exit_code"><![CDATA[
+    ln -s '$image' '${image.name}'.ome.tiff;
+    ln -s '$primary_mask' '${primary_mask.name}'.ome.tiff; 
+    #for $mask in $supp_masks:
+    ln -s '$mask' '${mask.name}'.ome.tiff;
+    #end for
+
+    mkdir ./tool_out;
+
+    @CMD_BEGIN@
+    
+    --masks 
+    '${primary_mask.name}'.ome.tiff
+    #if $supp_masks
+    #for $mask in $supp_masks:
+    '${mask.name}'.ome.tiff
+    #end for
+    #end if
+
+    --image '${image.name}'.ome.tiff
+    --output ./tool_out
+    --channel_names '$channel_names';
+
+    mv ./tool_out/*.csv ./tool_out/quantified.csv;
+    ]]></command>
+
+    <inputs>
+        <param name="image" type="data" format="tiff" label="Registered TIFF"/>
+        <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/>
+        <param name="supp_masks" type="data" multiple="true" optional="true" format="tiff" label="Additional Cell Masks"/>
+        <param name="channel_names" type="data" format="csv" label="Marker Channels"/>
+    </inputs>
+
+    <outputs>
+        <data format="csv" name="quant_out" from_work_dir="./tool_out/quantified.csv" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <help><![CDATA[
+# Single cell quantification
+Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output.
+
+**CommandSingleCellExtraction.py**:
+
+* `--masks` Paths to where masks are stored (Ex: ./segmentation/cellMask.tif) -> If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified
+
+* `--image` Path to image(s) for quantification.  (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f)
+
+* `--output` Path to output directory. (Ex: ./feature_extraction)
+
+* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv)
+
+# Run script
+`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5  --output ./feature_extraction --channel_names ./my_channels.csv`
+
+# Main developer
+Denis Schapiro (https://github.com/DenisSch)
+
+Joshua Hess (https://github.com/JoshuaHess12)
+
+Jeremy Muhlich (https://github.com/jmuhlich)
+    ]]></help>
+    <expand macro="citations" />
+</tool>