diff selection.xml @ 0:c7527212b66b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spapros/ commit aed7fe13fa0ed09d77a31eeecaf3ec3fba7eed3b
author iuc
date Mon, 16 Sep 2024 11:37:46 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/selection.xml	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,541 @@
+<tool id="spapros_selection" name="Selection" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
+    <description>of marker genes with spapros</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+    </expand>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+@CMD@
+      ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+random.seed($seed)
+
+mpl.rcParams['figure.dpi'] = $general_figure_options.dpi
+plt.rcParams["font.size"] = $general_figure_options.fontsize
+
+header_markerset='infer'
+#if $cond_markerset.select_markerset == 'True' and $cond_markerset.header_markerset == 'not_included':
+header_markerset=None
+#end if
+
+selector = sp.se.ProbesetSelector(
+adata, 
+celltype_key='$celltype_key',
+#if $genes_key != '':
+genes_key='$genes_key',
+#else:
+genes_key=None,
+#end if
+#if $cond_n.select_n == 'True':
+n=$cond_n.n,
+#else:
+n=None,
+#end if
+#if $preselected_genes != ''
+preselected_genes = '$preselected_genes'.split(','),
+#end if
+#if $prior_genes != ''
+prior_genes = '$prior_genes'.split(','),
+#end if
+#if $cond_n_pca_genes.select_n_pca_genes == 'True':
+n_pca_genes=$cond_n_pca_genes.n_pca_genes,
+#end if
+#if $min_mean_difference != 0.0:
+min_mean_difference=$min_mean_difference,
+#end if
+n_min_markers=$n_min_markers,
+#if $celltypes != 'all':
+celltypes='$celltypes'.split(','),
+#else
+celltypes='$celltypes',
+#end if
+#if $cond_markerset.select_markerset == 'True':
+marker_list = {key: [v for v in list(value.values()) if pd.notna(v)] for key, value in pd.read_csv('$cond_markerset.markerset', sep='\t', index_col=0, header=header_markerset).to_dict(orient='index').items()},
+#end if
+n_list_markers=$n_list_markers,
+marker_corr_th=$marker_corr_th,
+#if $pca_penalties != ''
+pca_penalties = '$pca_penalties'.split(','),
+#end if
+#if $DE_penalties != ''
+DE_penalties = '$DE_penalties'.split(','),
+#end if
+#if $m_penalties_adata_celltypes != ''
+m_penalties_adata_celltypes = '$m_penalties_adata_celltypes'.split(','),
+#end if
+#if $m_penalties_list_celltypes != ''
+m_penalties_list_celltypes = '$m_penalties_list_celltypes'.split(','),
+#end if
+#if $advanced_options.cond_DE_selection_hparams.select_DE_selection_hparams == 'True':
+DE_selection_hparams={"n": $advanced_options.cond_DE_selection_hparams.n_DE_selection_hparams, "per_group": $advanced_options.cond_DE_selection_hparams.per_group}
+#end if
+#if $advanced_options.cond_forest_hparams.select_forest_hparams == 'True':
+forest_hparams={"n_trees": $advanced_options.cond_forest_hparams.n_trees, "subsample": $advanced_options.cond_forest_hparams.subsample, "test_subsample": $advanced_options.cond_forest_hparams.test_subsample},
+#end if
+#if $advanced_options.cond_forest_DE_baseline_hparams.select_forest_DE_baseline_hparams == 'True':
+forest_DE_baseline_hparams={
+"n_DE": $advanced_options.cond_forest_DE_baseline_hparams.n_DE,
+"min_score": $advanced_options.cond_forest_DE_baseline_hparams.min_score,
+"n_stds": $advanced_options.cond_forest_DE_baseline_hparams.n_stds,
+"max_step": $advanced_options.cond_forest_DE_baseline_hparams.max_step,
+"min_outlier_dif": $advanced_options.cond_forest_DE_baseline_hparams.min_outlier_dif,
+"n_terminal_repeats": $advanced_options.cond_forest_DE_baseline_hparams.n_terminal_repeats,
+},
+#end if
+#if $advanced_options.cond_add_forest_genes_hparams.select_add_forest_genes_hparams == 'True':
+add_forest_genes_hparams={"n_max_per_it": $advanced_options.cond_add_forest_genes_hparams.n_max_per_it, "performance_th": $advanced_options.cond_add_forest_genes_hparams.performance_th, "importance_th": $advanced_options.cond_add_forest_genes_hparams.importance_th},
+#end if
+#if $advanced_options.cond_marker_selection_hparams.select_marker_selection_hparams == 'True':
+marker_selection_hparams={"penalty_threshold": $advanced_options.cond_marker_selection_hparams.penalty_threshold},
+#end if
+verbosity=0,
+seed=$seed,
+n_jobs=int(os.environ.get('GALAXY_SLOTS', '4'))
+)
+
+selector.select_probeset()
+
+sp.pl.masked_dotplot(
+adata, 
+selector,
+ct_key='$figure_options_masked_dotplot.ct_key',
+imp_threshold=$figure_options_masked_dotplot.imp_threshold,
+#if $figure_options_masked_dotplot.celltypes != '':
+celltypes='$figure_options_masked_dotplot.celltypes',
+#end if
+#if $figure_options_masked_dotplot.n_genes != 0:
+n_genes=$figure_options_masked_dotplot.n_genes,
+#end if
+#if $figure_options_masked_dotplot.comb_markers_only:
+comb_markers_only=True,
+#end if
+#if $figure_options_masked_dotplot.markers_only:
+markers_only=True,
+#end if
+cmap='$figure_options_masked_dotplot.cmap',
+comb_marker_color='$figure_options_masked_dotplot.comb_marker_color',
+marker_color='$figure_options_masked_dotplot.marker_color',
+non_adata_celltypes_color='$figure_options_masked_dotplot.non_adata_celltypes_color',
+use_raw=$figure_options_masked_dotplot.use_raw,
+save='masked_dotplot.$format',
+)
+
+selector.plot_gene_overlap(
+style='$figure_options_plot_gene_overlap.style',
+save='gene_overlap.$format',
+show=False
+)
+
+probe_genes = selector.probeset.index[selector.probeset.selection]
+celltypes_DE_1vsall = list(selector.probeset[selector.probeset.selection]['celltypes_DE_1vsall'])
+celltypes_DE_specific = list(selector.probeset[selector.probeset.selection]['celltypes_DE_specific'])
+celltypes_DE = list(selector.probeset[selector.probeset.selection]['celltypes_DE'])
+celltypes_marker = list(selector.probeset[selector.probeset.selection]['celltypes_marker'])
+
+marker_dict = dict()
+
+for i,g in enumerate(probe_genes):
+    recognized_celltypes =  list(set(celltypes_DE_1vsall[i].split(',') + celltypes_DE_specific[i].split(',') + \
+                                 celltypes_DE[i].split(',') + celltypes_marker[i].split(',')))
+
+    if (len(recognized_celltypes) > 1 and '' in recognized_celltypes):
+      recognized_celltypes.remove('')
+
+    for c in recognized_celltypes:
+        if c == '':
+           c = 'Unkown'
+        if c not in marker_dict:
+          marker_dict[c] = [g]
+        else:
+          marker_dict[c] = marker_dict[c] + [g]
+
+# Find the maximum length of lists
+max_len = max(len(lst) for lst in marker_dict.values())
+sorted_marker_dict_by_keys = {key: marker_dict[key] for key in sorted(marker_dict.keys())}
+# Fill smaller lists with empty values
+for key, value in sorted_marker_dict_by_keys.items():
+    sorted_marker_dict_by_keys[key] = value + [''] * (max_len - len(value))
+df = pd.DataFrame(sorted_marker_dict_by_keys).T
+df.to_csv('marker.tsv', sep='\t', index=True)
+
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_anndata"/>
+        <expand macro="param_plot_format"/>
+        <param name="celltype_key" type="text" value="celltype" optional="false" label="Key in adata.obs with celltype annotations"/>
+        <param name="genes_key" type="text" optional="true" label="Key in adata.var for preselected genes" help="This is typically highly_variable_genes. Leave empty to not subset genes."/>
+        <conditional name="cond_n">
+            <param name="select_n" type="select" label="Do you want to set the number of finally selected genes?" help="Note that when `No` we automatically infer n as the minimal number of recommended genes.">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+            <when value="True">
+                <param argument="n" type="integer" value="20" min="1" optional="false" label="Number of finally selected genes" help="Setting n might change the gene ranking since the final added list_markers are added based on the theoretically added genes without list_markers."/>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param name="preselected_genes" type="text" optional="true" label="Pre selected genes (comma separated)" help="These will also have the highest ranking in the final list."/>
+        <param name="prior_genes" type="text" optional="true" label="Prioritized genes (comma separated)"/>
+        <conditional name="cond_n_pca_genes">
+            <param name="select_n_pca_genes" type="select" label="Do you want to set the number of preselected pca genes?" help="If not, then this step will be skipped.">
+                <option value="True">Yes</option>
+                <option value="False">No</option>
+            </param>
+            <when value="True">
+                <param argument="n_pca_genes" type="integer" value="100" min="1" optional="false" label="Number of preselected pca genes"/>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param argument="min_mean_difference" type="float" value="0.0" optional="false" label="Minimal difference of mean expression between at least one celltype and the background" help="This minimal difference is applied as an additional binary penalty in pca_penalties, DE_penalties and m_penalties_adata_celltypes."/>
+        <param argument="n_min_markers" type="integer" value="2" min="1" optional="false" label="The minimal number of identified and added markers"/>
+        <param name="celltypes" type="text" value="all" optional="false" label="Cell types for which trees are trained" help="If not `all` then seperate the cell type with a comma (e.g., Glia,Neuron)"/>
+        <conditional name="cond_markerset">
+            <param name="select_markerset" type="select" label="Do you want to provide a set of marker genes?">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+            <when value="True">
+                <param name="markerset" type="data" format="tabular" label="Markerset tabular file with rows=conditions (e.g., celltypes) and column=features (e.g., genes)"/>
+                <param name="header_markerset" type="select" optional="false" label="Header in the list of markers?">
+                    <option value="included">Header included</option>
+                    <option value="not_included">Header not included</option>
+                </param>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param argument="n_list_markers" type="integer" value="2" min="1" optional="false" label="Minimal number of markers per celltype that are at least selected" help="Selected means either selecting genes from the marker list or having correlated genes in the already selected panel."/>
+        <param argument="marker_corr_th" type="float" value="0.5" optional="false" label="Minimal correlation to consider a gene as captured"/>
+        <param name="pca_penalties" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors that are multiplied with the scores for PCA based gene selection" help="(comma separted)"/>
+        <param name="DE_penalties" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors that are multiplied with the scores for DE based gene selection (comma separted)" help="(comma separted)"/>
+        <param name="m_penalties_adata_celltypes" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors to filter out marker genes if a gene's penalty &lt; threshold for celltypes in adata" help="(comma separted)"/>
+        <param name="m_penalties_list_celltypes" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors to filter out marker genes if a gene's penalty &lt; threshold for celltypes **not** in adata" help="(comma separted)"/>
+        <param argument="seed" type="integer" value="123" min="0" optional="false" label="Random number seed"/>
+
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <conditional name="cond_DE_selection_hparams">
+                <param name="select_DE_selection_hparams" type="select" label="Do you want to tune hyperparameters for the DE based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_DE_selection_hparams" type="integer" value="3" optional="false" label="n"/>
+                    <param name="per_group" type="select" label="per_group">
+                        <option value="False">No</option>
+                        <option value="True">Yes</option>
+                    </param>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_forest_hparams">
+                <param name="select_forest_hparams" type="select" label="Do you want to tune hyperparameters for the forest based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_trees" type="integer" value="50" optional="false" label="n_trees"/>
+                    <param argument="subsample" type="integer" value="1000" optional="false" label="subsample"/>
+                    <param argument="test_subsample" type="integer" value="3000" optional="false" label="test_subsample"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_forest_DE_baseline_hparams">
+                <param name="select_forest_DE_baseline_hparams" type="select" label="Do you want to tune hyperparameters for the DE based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_DE" type="integer" value="1" optional="false" label="n_DE"/>
+                    <param argument="min_score" type="float" value="0.9" optional="false" label="min_score"/>
+                    <param argument="n_stds" type="float" value="1.0" optional="false" label="n_stds"/>
+                    <param argument="max_step" type="integer" value="3" optional="false" label="max_step"/>
+                    <param argument="min_outlier_dif" type="float" value="0.02" optional="false" label="min_outlier_dif"/>
+                    <param argument="n_terminal_repeats" type="integer" value="3" optional="false" label="n_terminal_repeats"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_add_forest_genes_hparams">
+                <param name="select_add_forest_genes_hparams" type="select" label="Do you want to tune hyperparameters for adding marker genes to decision trees?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_max_per_it" type="integer" value="5" optional="false" label="n_max_per_it"/>
+                    <param argument="performance_th" type="float" value="0.02" optional="false" label="performance_th"/>
+                    <param argument="importance_th" type="integer" value="0" optional="false" label="importance_th"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_marker_selection_hparams">
+                <param name="select_marker_selection_hparams" type="select" label="Do you want to tune marker selection hyperparameters?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="penalty_threshold" type="integer" value="1" optional="false" label="penalty_threshold"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+        </section>
+
+        <section name="general_figure_options" title="General Figure Output Options" expanded="false">
+            <param argument="dpi" type="integer" value="300" min="1" label="Dpi of figures"/>
+            <param argument="fontsize" type="integer" value="100" min="1" label="Font size of figures"/>
+        </section>
+
+        <section name="figure_options_masked_dotplot" title="Figure Output Options for masked_dotplot" expanded="false">
+            <param name="ct_key" type="text" value="celltype" optional="false" label="Key in adata.var for preselected genes" help="Column of adata.obs with cell type annotation"/>
+            <param argument="imp_threshold" type="float" value="0.05" min="0.0" optional="false" label="Annotate genes as Spapros marker only for those genes with importance > imp_threshold"/>
+            <param name="celltypes" type="text" optional="true" label="Subset of celltypes (rows of dotplot)"/>
+            <param argument="n_genes" type="integer" value="0" min="0" label="Plot top n_genes genes." help="If 0 then all."/>
+            <param name="comb_markers_only" type="boolean" value="false" label="Do you want to plot only genes that are Spapros markers for the plotted cell types?"/>
+            <param name="markers_only" type="boolean" value="false" label="Do you want to plot only genes that are markers for the plotted cell types?"/>
+            <param name="cmap" type="text" value="Reds" optional="false" label="Colormap of mean expressions"/>
+            <param name="comb_marker_color" type="text" value="darkblue" optional="false" label="Color for Spapros markers"/>
+            <param name="marker_color" type="text" value="blue" optional="false" label="Color for marker genes"/>
+            <param name="non_adata_celltypes_color" type="text" value="grey" optional="false" label="Color for celltypes that don't occur in the data set."/>
+            <param name="use_raw" type="select" label="Do you want to use adata.raw for plotting?">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+        </section>
+
+        <section name="figure_options_plot_gene_overlap" title="Figure Output Options for plot_gene_overlap" expanded="false">
+            <param name="style" type="select" label="Plot type">
+                <option value="upset">Upset plot</option>
+                <option value="venn">Venn diagram</option>
+            </param>
+        </section>
+
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="out_masked_dotplot_png" format="png" from_work_dir="*masked_dotplot.png" label="PNG masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'png'</filter>
+        </data>
+        <data name="out_masked_dotplot_pdf" format="pdf" from_work_dir="*masked_dotplot.pdf" label="PDF masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'pdf'</filter>
+        </data>
+        <data name="out_masked_dotplot_svg" format="svg" from_work_dir="*masked_dotplot.svg" label="SVG masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'svg'</filter>
+        </data>
+        <data name="out_gene_overlap_png" format="png" from_work_dir="*gene_overlap.png" label="PNG gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'png'</filter>
+        </data>
+        <data name="out_gene_overlap_pdf" format="pdf" from_work_dir="*gene_overlap.pdf" label="PDF gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'pdf'</filter>
+        </data>
+        <data name="out_gene_overlap_svg" format="svg" from_work_dir="*gene_overlap.svg" label="SVG gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'svg'</filter>
+        </data>
+        <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"/>
+        <expand macro="hidden_outputs"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="show_log" value="true" />
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>  
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1189" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test1.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="select_n" value="True"/>
+            <param name="n" value="10"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true" />
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="n=10,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>  
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="2914" delta="2"/>
+                    <has_image_height height="882" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1032" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test2.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="celltypes" value="CD34+,CD56+ NK"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true" />
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>  
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="2776" delta="2"/>
+                    <has_image_height height="882" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="929" delta="2"/>
+                    <has_image_height height="565" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test3.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="select_markerset" value="True"/>
+            <param name="markerset" value="marker.tsv"/>
+            <param name="header_markerset" value="not_included"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true"/>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>  
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1154" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test4.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <section name="advanced_options">
+                <param name="select_DE_selection_hparams" value="True"/>
+                <param name="select_forest_hparams" value="True"/>
+                <param name="select_forest_DE_baseline_hparams" value="True"/>
+                <param name="select_add_forest_genes_hparams" value="True"/>
+                <param name="select_marker_selection_hparams" value="True"/>
+            </section>    
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true"/>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>  
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1189" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test5.tsv" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Probe set selection for single-cell sequencing data using spapros.
+============================================================================================================
+
+Spapros is a python package that provides a pipeline for probe set selection and evaluation for targeted spatial transcriptomics data.
+
+Key Features:
+Select probe sets for spatial transcriptomics which identify cell types of interest, capture general transcriptomic variation, and incorporate prior knowledge
+
+Evaluate probe sets with an extensive pipeline
+
+Further documentation can be found here: https://spapros.readthedocs.io/en/latest/index.html.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>