Galaxy |

Changeset 0:928db0f952e3 (2021-03-12)

Next changeset 1:aba3655fdef0 (2022-03-11)

Commit message:
"planemo upload for repository https://github.com/ohsu-comp-bio/quantification commit a4349062e9177b5e60fb7c49115c57299e0d648d-dirty"

added:
CommandSingleCellExtraction.py
ParseInput.py
SingleCellDataExtraction.py
macros.xml
quantification.xml

diff -r 000000000000 -r 928db0f952e3 CommandSingleCellExtraction.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CommandSingleCellExtraction.py Fri Mar 12 00:19:24 2021 +0000

@@ -0,0 +1,11 @@
+#Script for parsing command line arguments and running single-cell
+#data extraction functions
+#Joshua Hess
+import ParseInput
+import SingleCellDataExtraction
+
+#Parse the command line arguments
+args = ParseInput.ParseInputDataExtract()
+
+#Run the MultiExtractSingleCells function
+SingleCellDataExtraction.MultiExtractSingleCells(**args)

diff -r 000000000000 -r 928db0f952e3 ParseInput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ParseInput.py Fri Mar 12 00:19:24 2021 +0000

@@ -0,0 +1,23 @@
+#Functions for parsing command line arguments for ome ilastik prep
+import argparse
+
+
+def ParseInputDataExtract():
+   """Function for parsing command line arguments for input to single-cell
+   data extraction"""
+
+#if __name__ == '__main__':
+   parser = argparse.ArgumentParser()
+   parser.add_argument('--masks',nargs='*')
+   parser.add_argument('--image')
+   parser.add_argument('--channel_names')
+   parser.add_argument('--output')
+   #parser.add_argument('--suffix')
+   args = parser.parse_args()
+   #Create a dictionary object to pass to the next function
+   dict = {'masks': args.masks, 'image': args.image,\
+    'channel_names': args.channel_names,'output':args.output}
+   #Print the dictionary object
+   print(dict)
+   #Return the dictionary
+   return dict

diff -r 000000000000 -r 928db0f952e3 SingleCellDataExtraction.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SingleCellDataExtraction.py Fri Mar 12 00:19:24 2021 +0000

[

b'@@ -0,0 +1,272 @@\n+#Functions for reading in single cell imaging data\n+#Joshua Hess\n+\n+#Import necessary modules\n+import skimage.io\n+import h5py\n+import pandas as pd\n+import numpy as np\n+import os\n+import skimage.measure as measure\n+from pathlib import Path\n+import csv\n+\n+import sys\n+\n+\n+def MaskChannel(mask_loaded,image_loaded_z):\n+ """Function for quantifying a single channel image\n+\n+ Returns a table with CellID according to the mask and the mean pixel intensity\n+ for the given channel for each cell"""\n+ print(f\'Mask loaded: {mask_loaded.shape}\', file=sys.stderr)\n+ print(f\'Image loaded: {image_loaded_z.shape}\', file=sys.stderr)\n+ dat = measure.regionprops(mask_loaded, image_loaded_z)\n+ n = len(dat)\n+ intensity_z = np.empty(n)\n+ for i in range(n):\n+ intensity_z[i] = dat[i].mean_intensity\n+ # Clear reference to avoid memory leak -- see MaskIDs for explanation.\n+ dat[i] = None\n+ return intensity_z\n+\n+\n+def MaskIDs(mask):\n+ """This function will extract the CellIDs and the XY positions for each\n+ cell based on that cells centroid\n+\n+ Returns a dictionary object"""\n+\n+ dat = measure.regionprops(mask)\n+ n = len(dat)\n+\n+ # Pre-allocate numpy arrays for all properties we\'ll calculate.\n+ labels = np.empty(n, int)\n+ xcoords = np.empty(n)\n+ ycoords = np.empty(n)\n+ area = np.empty(n, int)\n+ minor_axis_length = np.empty(n)\n+ major_axis_length = np.empty(n)\n+ eccentricity = np.empty(n)\n+ solidity = np.empty(n)\n+ extent = np.empty(n)\n+ orientation = np.empty(n)\n+\n+ for i in range(n):\n+ labels[i] = dat[i].label\n+ xcoords[i] = dat[i].centroid[1]\n+ ycoords[i] = dat[i].centroid[0]\n+ area[i] = dat[i].area\n+ major_axis_length[i] = dat[i].major_axis_length\n+ minor_axis_length[i] = dat[i].minor_axis_length\n+ eccentricity[i] = dat[i].eccentricity\n+ solidity[i] = dat[i].solidity\n+ extent[i] = dat[i].extent\n+ orientation[i] = dat[i].orientation\n+ # By clearing the reference to each RegionProperties object, we allow it\n+ # and its cache to be garbage collected immediately. Otherwise memory\n+ # usage creeps up needlessly while this function is executing.\n+ dat[i] = None\n+\n+ IDs = {\n+ "CellID": labels,\n+ "X_centroid": xcoords,\n+ "Y_centroid": ycoords,\n+ "column_centroid": xcoords,\n+ "row_centroid": ycoords,\n+ "Area": area,\n+ "MajorAxisLength": major_axis_length,\n+ "MinorAxisLength": minor_axis_length,\n+ "Eccentricity": eccentricity,\n+ "Solidity": solidity,\n+ "Extent": extent,\n+ "Orientation": orientation,\n+ }\n+\n+ return IDs\n+\n+\n+def PrepareData(image,z):\n+ """Function for preparing input for maskzstack function. Connecting function\n+ to use with mc micro ilastik pipeline"""\n+\n+ image_path = Path(image)\n+ print(f\'{image_path} at {z}\', file=sys.stderr)\n+\n+ #Check to see if image tif(f)\n+ if image_path.suffix == \'.tiff\' or image_path.suffix == \'.tif\' or image_path.suffix == \'.btf\':\n+ #Check to see if the image is ome.tif(f)\n+ if image.endswith((\'.ome.tif\',\'.ome.tiff\')):\n+ #Read the image\n+ image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+ #print(\'OME TIF(F) found\')\n+ else:\n+ #Read the image\n+ image_loaded_z = skimage.io.imread(image,img_num=z,plugin=\'tifffile\')\n+ #print(\'TIF(F) found\')\n+ # Remove extra axis\n+ #image_loaded = image_loaded.reshape((image_loaded.shape[1],image_loaded.shape[3],image_loaded.shape[4]))\n+\n+ #Check to see if image is hdf5\n+ elif image_path.suffix == \'.h5\' or image_path.suffix == \'.hdf5\':\n+ #Read the image\n+ f = h5py.File(image,\'r+\')\n+ #Get the dataset name from the h5 file\n+ dat_name = list(f.keys())[0]\n+ ###If the hdf5 is exported from ilastik fiji plug'..b'an[nm] = dict_of_chan[nm].reindex(columns=cols)\n+ #Otherwise, add no spatial information\n+ else:\n+ #Create channel names for this mask\n+ new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))]\n+ #Use the above information to mask z stack\n+ dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))\n+\n+ #Concatenate all data from all masks to return\n+ dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1)\n+\n+ #Return the dataframe\n+ return dat\n+\n+\n+def ExtractSingleCells(masks,image,channel_names,output):\n+ """Function for extracting single cell information from input\n+ path containing single-cell masks, z_stack path, and channel_names path."""\n+\n+ #Create pathlib object for output\n+ output = Path(output)\n+\n+ #Check if header available\n+ #sniffer = csv.Sniffer()\n+ #sniffer.has_header(open(channel_names).readline())\n+ #If header not available\n+ #if not sniffer:\n+ #If header available\n+ #channel_names_loaded = pd.read_csv(channel_names)\n+ #channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+ #else:\n+ #print("negative")\n+ #old one column version\n+ #channel_names_loaded = pd.read_csv(channel_names,header=None)\n+ #Add a column index for ease\n+ #channel_names_loaded.columns = ["marker"]\n+ #channel_names_loaded = list(channel_names_loaded.marker.values)\n+\n+ #Read csv channel names\n+ channel_names_loaded = pd.read_csv(channel_names)\n+ #Check for size of columns\n+ if channel_names_loaded.shape[1] > 1:\n+ #Get the marker_name column if more than one column (CyCIF structure)\n+ channel_names_loaded_list = list(channel_names_loaded.marker_name)\n+ else:\n+ #old one column version -- re-read the csv file and add column name\n+ channel_names_loaded = pd.read_csv(channel_names, header = None)\n+ #Add a column index for ease and for standardization\n+ channel_names_loaded.columns = ["marker"]\n+ channel_names_loaded_list = list(channel_names_loaded.marker)\n+\n+ #Check for unique marker names -- create new list to store new names\n+ channel_names_loaded_checked = []\n+ for idx,val in enumerate(channel_names_loaded_list):\n+ #Check for unique value\n+ if channel_names_loaded_list.count(val) > 1:\n+ #If unique count greater than one, add suffix\n+ channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))\n+ else:\n+ #Otherwise, leave channel name\n+ channel_names_loaded_checked.append(val)\n+\n+ #Clear small memory amount by clearing old channel names\n+ channel_names_loaded, channel_names_loaded_list = None, None\n+\n+ #Read the masks\n+ masks_loaded = {}\n+ #iterate through mask paths and read images to add to dictionary object\n+ for m in masks:\n+ m_full_name = os.path.basename(m)\n+ m_name = m_full_name.split(\'.\')[0]\n+ masks_loaded.update({str(m_name):skimage.io.imread(m,plugin=\'tifffile\')})\n+\n+ scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked)\n+ #Write the singe cell data to a csv file using the image name\n+\n+ im_full_name = os.path.basename(image)\n+ im_name = im_full_name.split(\'.\')[0]\n+ scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False)\n+\n+\n+def MultiExtractSingleCells(masks,image,channel_names,output):\n+ """Function for iterating over a list of z_stacks and output locations to\n+ export single-cell data from image masks"""\n+\n+ print("Extracting single-cell data for "+str(image)+\'...\')\n+\n+ #Run the ExtractSingleCells function for this image\n+ ExtractSingleCells(masks,image,channel_names,output)\n+\n+ #Print update\n+ im_full_name = os.path.basename(image)\n+ im_name = im_full_name.split(\'.\')[0]\n+ print("Finished "+str(im_name))\n'

diff -r 000000000000 -r 928db0f952e3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Mar 12 00:19:24 2021 +0000

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.6.10">python</requirement>
+            <requirement type="package" version="0.17.2">scikit-image</requirement>
+            <requirement type="package" version="2.10.0">h5py</requirement>
+            <requirement type="package" version="1.0.4">pandas</requirement>
+            <requirement type="package" version="1.18.5">numpy</requirement>
+            <requirement type="package" version="1.0.1">pathlib</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_cmd">
+        <version_command>echo @VERSION@</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+        </citations>
+    </xml>
+
+    <token name="@VERSION@">1.3.1</token>
+    <token name="@CMD_BEGIN@">python ${__tool_directory__}/CommandSingleCellExtraction.py</token>
+</macros>

diff -r 000000000000 -r 928db0f952e3 quantification.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/quantification.xml Fri Mar 12 00:19:24 2021 +0000

[

@@ -0,0 +1,71 @@
+<tool id="quantification" name="Quantification" version="@VERSION@.5" profile="17.09">
+    <description>Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    @VERSION_CMD@
+
+    <command detect_errors="exit_code"><![CDATA[
+    ln -s '$image' '${image.name}'.ome.tiff;
+    ln -s '$primary_mask' '${primary_mask.name}'.ome.tiff;
+    #for $mask in $supp_masks:
+    ln -s '$mask' '${mask.name}'.ome.tiff;
+    #end for
+
+    mkdir ./tool_out;
+
+    @CMD_BEGIN@
+
+    --masks
+    '${primary_mask.name}'.ome.tiff
+    #if $supp_masks
+    #for $mask in $supp_masks:
+    '${mask.name}'.ome.tiff
+    #end for
+    #end if
+
+    --image '${image.name}'.ome.tiff
+    --output ./tool_out
+    --channel_names '$channel_names';
+
+    mv ./tool_out/*.csv ./tool_out/quantified.csv;
+    ]]></command>
+
+    <inputs>
+        <param name="image" type="data" format="tiff" label="Registered TIFF"/>
+        <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/>
+        <param name="supp_masks" type="data" multiple="true" optional="true" format="tiff" label="Additional Cell Masks"/>
+        <param name="channel_names" type="data" format="csv" label="Marker Channels"/>
+    </inputs>
+
+    <outputs>
+        <data format="csv" name="quant_out" from_work_dir="./tool_out/quantified.csv" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <help><![CDATA[
+# Single cell quantification
+Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output.
+
+**CommandSingleCellExtraction.py**:
+
+* `--masks` Paths to where masks are stored (Ex: ./segmentation/cellMask.tif) -> If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified
+
+* `--image` Path to image(s) for quantification.  (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f)
+
+* `--output` Path to output directory. (Ex: ./feature_extraction)
+
+* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv)
+
+# Run script
+`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5  --output ./feature_extraction --channel_names ./my_channels.csv`
+
+# Main developer
+Denis Schapiro (https://github.com/DenisSch)
+
+Joshua Hess (https://github.com/JoshuaHess12)
+
+Jeremy Muhlich (https://github.com/jmuhlich)
+    ]]></help>
+    <expand macro="citations" />
+</tool>