Repository 'quantification'
hg clone https://toolshed.g2.bx.psu.edu/repos/perssond/quantification

Changeset 1:aba3655fdef0 (2022-03-11)
Previous changeset 0:928db0f952e3 (2021-03-12) Next changeset 2:46b897eb2c8e (2022-03-30)
Commit message:
"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit 897a7dc7cb43e45d6f0fdfe2b2970e59f20f8853"
modified:
ParseInput.py
SingleCellDataExtraction.py
macros.xml
quantification.xml
b
diff -r 928db0f952e3 -r aba3655fdef0 ParseInput.py
--- a/ParseInput.py Fri Mar 12 00:19:24 2021 +0000
+++ b/ParseInput.py Fri Mar 11 23:35:52 2022 +0000
[
@@ -8,15 +8,39 @@
 
 #if __name__ == '__main__':
    parser = argparse.ArgumentParser()
-   parser.add_argument('--masks',nargs='*')
-   parser.add_argument('--image')
-   parser.add_argument('--channel_names')
-   parser.add_argument('--output')
+   parser.add_argument('--masks',nargs='+', required=True)
+   parser.add_argument('--image', required=True)
+   parser.add_argument('--channel_names', required=True)
+   parser.add_argument('--output', required=True)
+   parser.add_argument(
+      '--mask_props', nargs = "+",
+      help="""
+         Space separated list of additional metrics to be calculated for every mask.
+         This is for metrics that depend only on the cell mask. If the metric depends
+         on signal intensity, use --intensity-props instead.
+         See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops
+      """
+   )
+   parser.add_argument(
+      '--intensity_props', nargs = "+",
+      help="""
+         Space separated list of additional metrics to be calculated for every marker separately.
+         By default only mean intensity is calculated.
+         If the metric doesn't depend on signal intensity, use --mask-props instead.
+         See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops
+         Additionally available is gini_index, which calculates a single number
+         between 0 and 1, representing how unequal the signal is distributed in each region.
+         See https://en.wikipedia.org/wiki/Gini_coefficient
+      """
+   )
    #parser.add_argument('--suffix')
    args = parser.parse_args()
    #Create a dictionary object to pass to the next function
    dict = {'masks': args.masks, 'image': args.image,\
-    'channel_names': args.channel_names,'output':args.output}
+    'channel_names': args.channel_names,'output':args.output,
+    'intensity_props': set(args.intensity_props if args.intensity_props is not None else []).union(["intensity_mean"]),
+    'mask_props': args.mask_props,
+   }
    #Print the dictionary object
    print(dict)
    #Return the dictionary
b
diff -r 928db0f952e3 -r aba3655fdef0 SingleCellDataExtraction.py
--- a/SingleCellDataExtraction.py Fri Mar 12 00:19:24 2021 +0000
+++ b/SingleCellDataExtraction.py Fri Mar 11 23:35:52 2022 +0000
[
b'@@ -8,83 +8,93 @@\n import numpy as np\n import os\n import skimage.measure as measure\n+import tifffile\n+\n from pathlib import Path\n-import csv\n \n import sys\n \n \n-def MaskChannel(mask_loaded,image_loaded_z):\n+def gini_index(mask, intensity):\n+    x = intensity[mask]\n+    sorted_x = np.sort(x)\n+    n = len(x)\n+    cumx = np.cumsum(sorted_x, dtype=float)\n+    return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n\n+\n+def intensity_median(mask, intensity):\n+    return np.median(intensity[mask])\n+\n+def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]):\n     """Function for quantifying a single channel image\n \n     Returns a table with CellID according to the mask and the mean pixel intensity\n     for the given channel for each cell"""\n-    print(f\'Mask loaded: {mask_loaded.shape}\', file=sys.stderr)\n-    print(f\'Image loaded: {image_loaded_z.shape}\', file=sys.stderr)\n-    dat = measure.regionprops(mask_loaded, image_loaded_z)\n-    n = len(dat)\n-    intensity_z = np.empty(n)\n-    for i in range(n):\n-        intensity_z[i] = dat[i].mean_intensity\n-        # Clear reference to avoid memory leak -- see MaskIDs for explanation.\n-        dat[i] = None\n-    return intensity_z\n+    # Look for regionprops in skimage\n+    builtin_props = set(intensity_props).intersection(measure._regionprops.PROP_VALS)\n+    # Otherwise look for them in this module\n+    extra_props = set(intensity_props).difference(measure._regionprops.PROP_VALS)\n+    dat = measure.regionprops_table(\n+        mask_loaded, image_loaded_z,\n+        properties = tuple(builtin_props),\n+        extra_properties = [globals()[n] for n in extra_props]\n+    )\n+    return dat\n \n \n-def MaskIDs(mask):\n+def MaskIDs(mask, mask_props=None):\n     """This function will extract the CellIDs and the XY positions for each\n     cell based on that cells centroid\n \n     Returns a dictionary object"""\n \n-    dat = measure.regionprops(mask)\n-    n = len(dat)\n+    all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])\n+    if mask_props is not None:\n+        all_mask_props = all_mask_props.union(mask_props)\n \n-    # Pre-allocate numpy arrays for all properties we\'ll calculate.\n-    labels = np.empty(n, int)\n-    xcoords = np.empty(n)\n-    ycoords = np.empty(n)\n-    area = np.empty(n, int)\n-    minor_axis_length = np.empty(n)\n-    major_axis_length = np.empty(n)\n-    eccentricity = np.empty(n)\n-    solidity = np.empty(n)\n-    extent = np.empty(n)\n-    orientation = np.empty(n)\n+    dat = measure.regionprops_table(\n+        mask,\n+        properties=all_mask_props\n+    )\n \n-    for i in range(n):\n-        labels[i] = dat[i].label\n-        xcoords[i] = dat[i].centroid[1]\n-        ycoords[i] = dat[i].centroid[0]\n-        area[i] = dat[i].area\n-        major_axis_length[i] = dat[i].major_axis_length\n-        minor_axis_length[i] = dat[i].minor_axis_length\n-        eccentricity[i] = dat[i].eccentricity\n-        solidity[i] = dat[i].solidity\n-        extent[i] = dat[i].extent\n-        orientation[i] = dat[i].orientation\n-        # By clearing the reference to each RegionProperties object, we allow it\n-        # and its cache to be garbage collected immediately. Otherwise memory\n-        # usage creeps up needlessly while this function is executing.\n-        dat[i] = None\n+    name_map = {\n+        "CellID": "label",\n+        "X_centroid": "centroid-1",\n+        "Y_centroid": "centroid-0",\n+        "Area": "area",\n+        "MajorAxisLength": "major_axis_length",\n+        "MinorAxisLength": "minor_axis_length",\n+        "Eccentricity": "eccentricity",\n+        "Solidity": "solidity",\n+        "Extent": "extent",\n+        "Orientation": "orientation",\n+    }\n+    for new_name, old_name in name_map.items():\n+        dat[new_name] = dat[old_name]\n+    for old_name in set(name_map.values()):\n+        del dat[old_name]\n+\n+    return dat\n \n-    IDs = {\n-        "CellID": labels,\n-        "X_centroid": xcoords'..b'fer:\n-        #If header available\n-        #channel_names_loaded = pd.read_csv(channel_names)\n-        #channel_names_loaded_list = list(channel_names_loaded.marker_name)\n-    #else:\n-        #print("negative")\n-        #old one column version\n-        #channel_names_loaded = pd.read_csv(channel_names,header=None)\n-        #Add a column index for ease\n-        #channel_names_loaded.columns = ["marker"]\n-        #channel_names_loaded = list(channel_names_loaded.marker.values)\n-\n     #Read csv channel names\n     channel_names_loaded = pd.read_csv(channel_names)\n-    #Check for size of columns\n-    if channel_names_loaded.shape[1] > 1:\n+    #Check for the presence of `marker_name` column\n+    if \'marker_name\' in channel_names_loaded:\n         #Get the marker_name column if more than one column (CyCIF structure)\n         channel_names_loaded_list = list(channel_names_loaded.marker_name)\n-    else:\n-        #old one column version -- re-read the csv file and add column name\n+    #Consider the old one-marker-per-line plain text format\n+    elif channel_names_loaded.shape[1] == 1:\n+        #re-read the csv file and add column name\n         channel_names_loaded = pd.read_csv(channel_names, header = None)\n-        #Add a column index for ease and for standardization\n-        channel_names_loaded.columns = ["marker"]\n-        channel_names_loaded_list = list(channel_names_loaded.marker)\n+        channel_names_loaded_list = list(channel_names_loaded.iloc[:,0])\n+    else:\n+        raise Exception(\'%s must contain the marker_name column\'%channel_names)\n \n+    #Contrast against the number of markers in the image\n+    if len(channel_names_loaded_list) != n_channels(image):\n+        raise Exception("The number of channels in %s doesn\'t match the image"%channel_names)\n+    \n     #Check for unique marker names -- create new list to store new names\n     channel_names_loaded_checked = []\n     for idx,val in enumerate(channel_names_loaded_list):\n@@ -238,9 +230,6 @@\n             #Otherwise, leave channel name\n             channel_names_loaded_checked.append(val)\n \n-    #Clear small memory amount by clearing old channel names\n-    channel_names_loaded, channel_names_loaded_list = None, None\n-\n     #Read the masks\n     masks_loaded = {}\n     #iterate through mask paths and read images to add to dictionary object\n@@ -249,22 +238,30 @@\n         m_name = m_full_name.split(\'.\')[0]\n         masks_loaded.update({str(m_name):skimage.io.imread(m,plugin=\'tifffile\')})\n \n-    scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked)\n+    scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props)\n     #Write the singe cell data to a csv file using the image name\n \n     im_full_name = os.path.basename(image)\n     im_name = im_full_name.split(\'.\')[0]\n-    scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False)\n+\n+    # iterate through each mask and export csv with mask name as suffix\n+    for k,v in scdata_z.items():\n+        # export the csv for this mask name\n+        scdata_z[k].to_csv(\n+                            str(Path(os.path.join(str(output),\n+                            str(im_name+"_{}"+".csv").format(k)))),\n+                            index=False\n+                            )\n \n \n-def MultiExtractSingleCells(masks,image,channel_names,output):\n+def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]):\n     """Function for iterating over a list of z_stacks and output locations to\n     export single-cell data from image masks"""\n \n     print("Extracting single-cell data for "+str(image)+\'...\')\n \n     #Run the ExtractSingleCells function for this image\n-    ExtractSingleCells(masks,image,channel_names,output)\n+    ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props)\n \n     #Print update\n     im_full_name = os.path.basename(image)\n'
b
diff -r 928db0f952e3 -r aba3655fdef0 macros.xml
--- a/macros.xml Fri Mar 12 00:19:24 2021 +0000
+++ b/macros.xml Fri Mar 11 23:35:52 2022 +0000
[
@@ -2,12 +2,13 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="3.6.10">python</requirement>
-            <requirement type="package" version="0.17.2">scikit-image</requirement>
-            <requirement type="package" version="2.10.0">h5py</requirement>
-            <requirement type="package" version="1.0.4">pandas</requirement>
-            <requirement type="package" version="1.18.5">numpy</requirement>
-            <requirement type="package" version="1.0.1">pathlib</requirement>
+            <container type="docker">labsyspharm/quantification:@VERSION@</container>
+            <requirement type="package" version="3.9">python</requirement>
+            <requirement type="package" version="0.18.0">scikit-image</requirement>
+            <requirement type="package">h5py</requirement>
+            <requirement type="package">pandas</requirement>
+            <requirement type="package">numpy</requirement>
+            <requirement type="package">pathlib</requirement>
         </requirements>
     </xml>
 
@@ -19,6 +20,13 @@
         </citations>
     </xml>
 
-    <token name="@VERSION@">1.3.1</token>
-    <token name="@CMD_BEGIN@">python ${__tool_directory__}/CommandSingleCellExtraction.py</token>
+    <token name="@VERSION@">1.5.1</token>
+    <token name="@CMD_BEGIN@"><![CDATA[
+    QUANT_PATH="";
+    if [ -f "/app/CommandSingleCellExtraction.py" ]; then
+        export QUANT_PATH="/app/CommandSingleCellExtraction.py";
+    else
+        export QUANT_PATH="${__tool_directory__}/CommandSingleCellExtraction.py";
+    fi;
+    ]]></token>
 </macros>
b
diff -r 928db0f952e3 -r aba3655fdef0 quantification.xml
--- a/quantification.xml Fri Mar 12 00:19:24 2021 +0000
+++ b/quantification.xml Fri Mar 11 23:35:52 2022 +0000
[
@@ -18,6 +18,7 @@
 
     @CMD_BEGIN@
     
+    python \$QUANT_PATH
     --masks 
     '${primary_mask.name}'.ome.tiff
     #if $supp_masks
@@ -28,9 +29,17 @@
 
     --image '${image.name}'.ome.tiff
     --output ./tool_out
+   
+    #if $mask_props
+    --mask_props $mask_props
+    #end if
+    #if $intensity_props
+    --intensity_props $intensity_props
+    #end if
+
     --channel_names '$channel_names';
 
-    mv ./tool_out/*.csv ./tool_out/quantified.csv;
+    cp tool_out/*cellMasks.csv cellMasks.csv
     ]]></command>
 
     <inputs>
@@ -38,11 +47,16 @@
         <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/>
         <param name="supp_masks" type="data" multiple="true" optional="true" format="tiff" label="Additional Cell Masks"/>
         <param name="channel_names" type="data" format="csv" label="Marker Channels"/>
+        <param name="mask_props" type="text" label="Mask Metrics" help="Space separated list of additional metrics to be calculated for every mask."/>
+        <param name="intensity_props" type="text" label="Intensity Metrics" help="Space separated list of additional metrics to be calculated for every marker separately."/>
     </inputs>
 
     <outputs>
-        <data format="csv" name="quant_out" from_work_dir="./tool_out/quantified.csv" label="${tool.name} on ${on_string}"/>
-    </outputs>
+        <data format="csv" name="cellmask" from_work_dir="cellMasks.csv" label="CellMaskQuant"/>
+        <collection type="list" name="quantification" label="${tool.name} on ${on_string}">
+            <discover_datasets pattern="__designation_and_ext__" format="csv" directory="tool_out/" visible="true"/>
+        </collection>
+     </outputs>
     <help><![CDATA[
 # Single cell quantification
 Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output.