changeset 4:dc5f72f6b1e9 draft

planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/superdsm/ commit b0b09d6788778541d1c0b89ca96101fc57d60e22
author imgteam
date Mon, 12 Feb 2024 14:58:45 +0000
parents 7fd8dba15bd3
children 9b4830300f3a
files run-superdsm.py superdsm.xml test-data/cfg-full.tsv test-data/cfg.tsv
diffstat 4 files changed, 181 insertions(+), 106 deletions(-) [+]
line wrap: on
line diff
--- a/run-superdsm.py	Thu Nov 16 12:29:41 2023 +0000
+++ b/run-superdsm.py	Mon Feb 12 14:58:45 2024 +0000
@@ -17,24 +17,25 @@
 
 hyperparameters = [
     ('AF_scale', float),
-    ('c2f_region_analysis/min_atom_radius', float),
-    ('c2f_region_analysis_min_norm_energy_improvement', float),
-    ('c2f_region_analysis_max_atom_norm_energy', float),
-    ('c2f_region_analysis_max_cluster_marker_irregularity', float),
-    ('dsm_alpha', float),
-    ('dsm_AF_alpha', float),
-    ('global_energy_minimization_betai', float),
-    ('global_energy_minimization_AF_beta', float),
-    ('postprocess_mask_max_distance', int),
-    ('postprocess_mask_stdamp', float),
-    ('postprocess_max_norm_energy', float),
-    ('postprocess_min_contrast', float),
-    ('postprocess_min_object_radius', float),
+    ('c2f-region-analysis/min_atom_radius', float),
+    ('c2f-region-analysis/min_norm_energy_improvement', float),
+    ('c2f-region-analysis/max_atom_norm_energy', float),
+    ('c2f-region-analysis/max_cluster_marker_irregularity', float),
+    ('dsm/alpha', float),
+    ('dsm/AF_alpha', float),
+    ('global-energy-minimization/pruning', str),
+    ('global-energy-minimization/beta', float),
+    ('global-energy-minimization/AF_beta', float),
+    ('postprocess/mask_max_distance', int),
+    ('postprocess/mask_stdamp', float),
+    ('postprocess/max_norm_energy', float),
+    ('postprocess/min_contrast', float),
+    ('postprocess/min_object_radius', float),
 ]
 
 
 def get_param_name(key):
-    return key.replace('/', '_')
+    return key.replace('/', '_').replace('-', '_')
 
 
 def create_config(args):
@@ -96,24 +97,32 @@
         cfg = create_config(args)
         img = superdsm.io.imread(img_filepath)
 
-        if args.do_cfg:
-            print(f'Writing config to: {args.do_cfg}')
+        # Create configuration if it is required:
+        if args.do_cfg or args.do_overlay or args.do_masks:
             cfg, _ = superdsm.automation.create_config(pipeline, cfg, img)
-            with open(args.do_cfg, 'w') as fp:
-                tsv_out = csv.writer(fp, delimiter='\t')
-                tsv_out.writerow(['Hyperparameter', 'Value'])
-                for key, value in flatten_dict(cfg.entries).items():
-                    tsv_out.writerow([key, value])
 
+        # Perform segmentation if it is required:
         if args.do_overlay or args.do_masks:
             print('Performing segmentation')
             data, cfg, _ = pipeline.process_image(img, cfg)
 
+        # Write configuration used for segmentation, or the automatically created one, otherwise:
+        if args.do_cfg:
+            print(f'Writing config to: {args.do_cfg}')
+            with open(args.do_cfg, 'w') as fp:
+                tsv_out = csv.writer(fp, delimiter='\t')
+                tsv_out.writerow(['Hyperparameter', 'Value'])
+                rows = sorted(flatten_dict(cfg.entries).items(), key=lambda item: item[0])
+                for key, value in rows:
+                    tsv_out.writerow([key, value])
+
+        # Write the overlay image:
         if args.do_overlay:
             print(f'Writing overlay to: {args.do_overlay}')
             overlay = superdsm.render.render_result_over_image(data, border_width=args.do_overlay_border, normalize_img=False)
             superdsm.io.imwrite(args.do_overlay, overlay)
 
+        # Write the label map:
         if args.do_masks:
             print(f'Writing masks to: {args.do_masks}')
             masks = superdsm.render.rasterize_labels(data)
--- a/superdsm.xml	Thu Nov 16 12:29:41 2023 +0000
+++ b/superdsm.xml	Mon Feb 12 14:58:45 2024 +0000
@@ -1,8 +1,8 @@
 <tool id="ip_superdsm" name="Perform segmentation using deformable shape models" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
     <description>with SuperDSM</description>
     <macros>
-        <token name="@TOOL_VERSION@">0.1.3</token>
-        <token name="@VERSION_SUFFIX@">3</token>
+        <token name="@TOOL_VERSION@">0.2.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
     </macros>
     <edam_operations>
         <edam_operation>operation_3443</edam_operation>
@@ -11,12 +11,35 @@
         <xref type="bio.tools">superdsm</xref>
         <xref type="biii">superdsm</xref>
     </xrefs>
-    <requirements> 
-        <requirement type="package" version="0.1.3">superdsm</requirement>
+    <requirements>
+
+        <requirement type="package" version="0.2.0">superdsm</requirement>
+
+        <!--
+        Pin the dependencies to specific versions for reproducibility:
+        https://github.com/BMCV/SuperDSM#dependency-version-considerations
+        -->
+        <requirement type="package" version="1.20">numpy</requirement>
+        <requirement type="package" version="1.6.3">scipy</requirement>
+        <requirement type="package" version="0.18.1">scikit-image</requirement>
+        <requirement type="package" version="1.2.6">cvxopt</requirement>
+        <requirement type="package" version="1.1.13">cvxpy</requirement>
         <requirement type="package" version="1.6.0">ray-core</requirement>
-        <requirement type="package" version="0.18.1">scikit-image</requirement>
-        <requirement type="package" version="2020.0">mkl</requirement><!-- this seems to be the last version of MKL which supports the "MKL_DEBUG_CPU_TYPE" environment variable -->
-        <requirement type="package" version="*=mkl">blas</requirement><!-- using MKL instead of other BLAS can significantly improve performance on some hardware, cf. https://stackoverflow.com/questions/62783262/why-is-numpy-with-ryzen-threadripper-so-much-slower-than-xeon -->
+
+        <!--
+        2020.0 is the last version of MKL which supports the "MKL_DEBUG_CPU_TYPE" environment variable.
+        -->
+        <requirement type="package" version="2020.0">mkl</requirement>
+
+        <!--
+        Using MKL instead of other BLAS can significantly improve performance on some hardware:
+        https://stackoverflow.com/questions/62783262/why-is-numpy-with-ryzen-threadripper-so-much-slower-than-xeon
+
+        Pinning BLAS to version 1.0 is required for reproducibility:
+        https://github.com/BMCV/SuperDSM#dependency-version-considerations
+        -->
+        <requirement type="package" version="1.0=mkl">blas</requirement>
+
     </requirements>
     <command detect_errors="aggressive">
     <![CDATA[
@@ -59,6 +82,7 @@
     #if str($config.dsm_AF_alpha) != '':
         --dsm_AF_alpha '${config.dsm_AF_alpha}'
     #end if
+    --global_energy_minimization_pruning '${global_energy_minimization_pruning}'
     #if str($config.global_energy_minimization_beta) != '':
         --global_energy_minimization_beta '${config.global_energy_minimization_beta}'
     #end if
@@ -83,11 +107,20 @@
     ]]>
     </command>
     <environment_variables>
-        <environment_variable name="MKL_DEBUG_CPU_TYPE">5</environment_variable><!-- this enables accelerated CPU instruction sets on AMD hardware, does nothing in Intel hardware, thus no need to change this -->
+
+        <!--
+        This enables accelerated CPU instruction sets on AMD hardware, does nothing in Intel hardware, thus no need to change this:
+        -->
+        <environment_variable name="MKL_DEBUG_CPU_TYPE">5</environment_variable>
+
     </environment_variables>
     <inputs>
         <param name="dataset" type="data" format="tiff,png" label="Dataset" />
-        <param name="outputs" type="select" label="Outputs" multiple="true" optional="false">
+        <param argument="--global_energy_minimization_pruning" type="select" label="Graph pruning for global energy minimization" help="Exact graph pruning corresponds to the original algorithm, which provably yields globally optimal results. Robust graph pruning is more greedy and has a provably bounded approximation error. Depending on the data, this can be significantly faster than exact graph pruning, without degrading the segmentation or cluster splitting performance.">
+            <option value="exact">Exact graph pruning (Kostrykin and Rohr, TPAMI 2023)</option>
+            <option value="isbi24" selected="true">Robust graph pruning (Kostrykin and Rohr, ISBI 2024)</option>
+        </param>
+        <param name="outputs" type="select" label="Tool outputs" multiple="true" optional="false" help="Note that if neither a segmentation overlay nor a label map is created, segmentation and cluster splitting will not be performed. As a consequence, hyperparameters which are determined automatically during segmentation and cluster splitting will not be reported, even if &quot;Report all hyperparameters&quot; is selected.">
             <option value="overlay" selected="true">Create a segmentation overlay</option>
             <option value="masks">Create a label map (e.g., for further processing)</option>
             <option value="cfg">Report all hyperparameters (manually set and automatically determined values)</option>
@@ -124,14 +157,16 @@
     <tests>
         <test expect_num_outputs="3">
             <param name="dataset" value="BBBC033_C2_z28.png" />
+            <param name="global_energy_minimization_pruning" value="exact" />
             <param name="outputs" value="overlay,masks,cfg" />
             <output name="overlay" value="overlay.png" ftype="png" compare="sim_size" />
-            <output name="cfg" value="cfg.tsv" ftype="tsv" compare="sim_size" />
+            <output name="cfg" value="cfg-full.tsv" ftype="tsv" compare="diff" />
         </test>
         <test expect_num_outputs="1">
             <param name="dataset" value="BBBC033_C2_z28.png" />
+            <param name="global_energy_minimization_pruning" value="exact" />
             <param name="outputs" value="cfg" />
-            <output name="cfg" value="cfg.tsv" ftype="tsv" compare="sim_size" />
+            <output name="cfg" value="cfg.tsv" ftype="tsv" compare="diff" />
         </test>
     </tests>
     <help>
@@ -140,6 +175,15 @@
         You can either use an individual input image (PNG, TIF) or a collection of such images.
     </help>
     <citations>
+        <citation type="bibtex">
+        @inproceedings{kostrykin2024,
+          author = {Kostrykin, L. and Rohr, K.},
+          title = {Robust Graph Pruning for Efficient Segmentation and Cluster Splitting of Cell Nuclei using Deformable Shape Models},
+          booktitle = {Proc. IEEE International Symposium on Biomedical Imaging (ISBI'24)},
+          pages = {accepted for presentation},
+          year = {2024},
+        }
+        </citation>
         <citation type="doi">10.1109/TPAMI.2022.3185583</citation>
     </citations>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cfg-full.tsv	Mon Feb 12 14:58:45 2024 +0000
@@ -0,0 +1,68 @@
+Hyperparameter	Value
+AF_scale	
+c2f-region-analysis/AF_min_atom_radius	0.33
+c2f-region-analysis/enabled	True
+c2f-region-analysis/max_atom_norm_energy	0.05
+c2f-region-analysis/max_cluster_marker_irregularity	0.2
+c2f-region-analysis/min_atom_radius	30
+c2f-region-analysis/min_norm_energy_improvement	0.1
+c2f-region-analysis/seed_connectivity	8
+dsm/AF_alpha	0.0005
+dsm/AF_background_margin	0.4
+dsm/AF_smooth_amount	0.2
+dsm/AF_smooth_subsample	0.4
+dsm/alpha	2.116
+dsm/background_margin	26
+dsm/cachesize	1
+dsm/cachetest	
+dsm/cp_timeout	300
+dsm/enabled	True
+dsm/epsilon	1.0
+dsm/gaussian_shape_multiplier	2
+dsm/init	elliptical
+dsm/scale	1000
+dsm/smooth_amount	13
+dsm/smooth_mat_dtype	float32
+dsm/smooth_mat_max_allocations	inf
+dsm/smooth_subsample	26
+dsm/sparsity_tol	0
+global-energy-minimization/AF_beta	0.66
+global-energy-minimization/AF_max_seed_distance	inf
+global-energy-minimization/beta	2793.1200000000003
+global-energy-minimization/enabled	True
+global-energy-minimization/gamma	0.8
+global-energy-minimization/max_iter	5
+global-energy-minimization/max_seed_distance	inf
+global-energy-minimization/max_work_amount	1000000
+global-energy-minimization/pruning	exact
+histological	False
+postprocess/AF_max_object_radius	inf
+postprocess/AF_min_glare_radius	inf
+postprocess/AF_min_object_radius	0.0
+postprocess/contrast_epsilon	0.0001
+postprocess/discard_image_boundary	False
+postprocess/enabled	True
+postprocess/exterior_offset	5
+postprocess/exterior_scale	5
+postprocess/fill_holes	True
+postprocess/glare_detection_min_layer	0.5
+postprocess/glare_detection_num_layers	5
+postprocess/glare_detection_smoothness	3
+postprocess/mask_max_distance	1
+postprocess/mask_smoothness	3
+postprocess/mask_stdamp	2.0
+postprocess/max_boundary_eccentricity	inf
+postprocess/max_eccentricity	0.99
+postprocess/max_norm_energy	0.2
+postprocess/max_object_radius	inf
+postprocess/min_boundary_glare_radius	inf
+postprocess/min_boundary_obj_radius	0
+postprocess/min_contrast	1.35
+postprocess/min_glare_radius	inf
+postprocess/min_object_radius	0.0
+preprocess/AF_sigma2	1.0
+preprocess/enabled	True
+preprocess/lower_clip_mean	False
+preprocess/offset_clip	3
+preprocess/sigma1	1.4142135623730951
+preprocess/sigma2	65.05382386916237
--- a/test-data/cfg.tsv	Thu Nov 16 12:29:41 2023 +0000
+++ b/test-data/cfg.tsv	Mon Feb 12 14:58:45 2024 +0000
@@ -1,78 +1,32 @@
 Hyperparameter	Value
-c2f_region_analysis_min_norm_energy_improvement	0.1
-c2f_region_analysis_max_atom_norm_energy	0.05
-c2f_region_analysis_max_cluster_marker_irregularity	0.2
-dsm_AF_alpha	0.0005
-global_energy_minimization_AF_beta	0.66
-postprocess_mask_max_distance	1
-postprocess_mask_stdamp	2.0
-postprocess_max_norm_energy	0.2
-postprocess_min_contrast	1.35
-postprocess_min_object_radius	0.0
 AF_scale	
+c2f-region-analysis/AF_min_atom_radius	0.33
+c2f-region-analysis/max_atom_norm_energy	0.05
+c2f-region-analysis/max_cluster_marker_irregularity	0.2
+c2f-region-analysis/min_atom_radius	30
+c2f-region-analysis/min_norm_energy_improvement	0.1
+dsm/AF_alpha	0.0005
+dsm/AF_background_margin	0.4
+dsm/AF_smooth_amount	0.2
+dsm/AF_smooth_subsample	0.4
+dsm/alpha	2.116
+dsm/background_margin	26
+dsm/smooth_amount	13
+dsm/smooth_subsample	26
+global-energy-minimization/AF_beta	0.66
+global-energy-minimization/AF_max_seed_distance	inf
+global-energy-minimization/beta	2793.1200000000003
+global-energy-minimization/max_seed_distance	inf
+global-energy-minimization/pruning	exact
+postprocess/AF_max_object_radius	inf
+postprocess/AF_min_glare_radius	inf
+postprocess/AF_min_object_radius	0.0
+postprocess/mask_max_distance	1
+postprocess/mask_stdamp	2.0
+postprocess/max_norm_energy	0.2
+postprocess/max_object_radius	inf
+postprocess/min_contrast	1.35
+postprocess/min_glare_radius	inf
+postprocess/min_object_radius	0.0
 preprocess/AF_sigma2	1.0
 preprocess/sigma2	65.05382386916237
-preprocess/enabled	True
-preprocess/sigma1	1.4142135623730951
-preprocess/offset_clip	3
-preprocess/lower_clip_mean	False
-dsm/AF_alpha	0.0005
-dsm/alpha	2.116
-dsm/AF_smooth_amount	0.2
-dsm/smooth_amount	13
-dsm/AF_smooth_subsample	0.4
-dsm/smooth_subsample	26
-dsm/AF_background_margin	0.4
-dsm/background_margin	26
-dsm/enabled	True
-dsm/cachesize	1
-dsm/cachetest	
-dsm/sparsity_tol	0
-dsm/init	elliptical
-dsm/epsilon	1.0
-dsm/scale	1000
-dsm/gaussian_shape_multiplier	2
-dsm/smooth_mat_dtype	float32
-dsm/smooth_mat_max_allocations	inf
-dsm/cp_timeout	300
-c2f-region-analysis/AF_min_atom_radius	0.33
-c2f-region-analysis/min_atom_radius	30
-c2f-region-analysis/enabled	True
-c2f-region-analysis/seed_connectivity	8
-c2f-region-analysis/max_atom_norm_energy	0.05
-c2f-region-analysis/min_norm_energy_improvement	0.1
-c2f-region-analysis/max_cluster_marker_irregularity	0.2
-global-energy-minimization/AF_beta	0.66
-global-energy-minimization/beta	2793.1200000000003
-global-energy-minimization/AF_max_seed_distance	inf
-global-energy-minimization/max_seed_distance	inf
-global-energy-minimization/enabled	True
-global-energy-minimization/strict	True
-global-energy-minimization/max_iter	5
-global-energy-minimization/gamma	0.8
-global-energy-minimization/max_work_amount	1000000
-postprocess/AF_min_object_radius	0.0
-postprocess/min_object_radius	0.0
-postprocess/AF_max_object_radius	inf
-postprocess/max_object_radius	inf
-postprocess/AF_min_glare_radius	inf
-postprocess/min_glare_radius	inf
-postprocess/enabled	True
-postprocess/max_norm_energy	0.2
-postprocess/discard_image_boundary	False
-postprocess/min_boundary_obj_radius	0
-postprocess/max_eccentricity	0.99
-postprocess/max_boundary_eccentricity	inf
-postprocess/exterior_scale	5
-postprocess/exterior_offset	5
-postprocess/min_contrast	1.35
-postprocess/contrast_epsilon	0.0001
-postprocess/mask_stdamp	2
-postprocess/mask_max_distance	1
-postprocess/mask_smoothness	3
-postprocess/fill_holes	True
-postprocess/glare_detection_smoothness	3
-postprocess/glare_detection_num_layers	5
-postprocess/glare_detection_min_layer	0.5
-postprocess/min_boundary_glare_radius	inf
-histological	False