Mercurial > repos > iuc > spapros_selection

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,96 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.1.5</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@profile@">22.05</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">spapros</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <xml name="creators">
+        <creator>
+            <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/" />
+        </creator>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/2022.08.16.504115</citation>
+            <citation type="doi">10.1093/gigascience/giaa102</citation>
+        </citations>
+    </xml>
+    <xml name="version_command">
+        <version_command><![CDATA[python -c "import spapros;print('%s' % spapros.__version__ )"]]></version_command>
+    </xml>
+    <token name="@CMD@"><![CDATA[
+cp '$adata' 'anndata.h5ad' &&
+cat '$script_file' > '$hidden_output' &&
+python '$script_file' >> '$hidden_output' &&
+ls . >> '$hidden_output'
+    ]]>
+    </token>
+    <token name="@CMD_imports@"><![CDATA[
+import spapros as sp
+import os
+import pandas as pd
+import scanpy as sc
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import random
+    ]]>
+    </token>
+    <token name="@CMD_plot@"><![CDATA[
+#if $method.set_ids != 'all' and $method.set_ids != '':
+set_ids=[$method.set_ids],
+#end if
+save='plot.$format',
+show=False
+    ]]>
+    </token>
+    <xml name="set_ids">
+        <param name="set_ids" type="text" value="all" optional="true" label="List of probeset ids (comma seperated, e.g., DE,HVG,random)" help="Kepp it with all or empty to select all probeset ids."/>
+    </xml>
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+            </valid>
+       </sanitizer>
+    </xml>
+    <xml name="sanitize_vectors" token_validinitial="string.digits">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <add value=","/>
+            </valid>
+        </sanitizer>
+    </xml>
+    <xml name="inputs_anndata">
+        <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/>
+    </xml>
+    <token name="@CMD_read_inputs@"><![CDATA[
+adata = sc.read_h5ad('anndata.h5ad')
+]]>
+    </token>
+    <xml name="inputs_common_advanced">
+        <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+    </xml>
+    <xml name="param_plot_format">
+        <param name="format" type="select" label="Format for saving figures">
+            <option value="png">png</option>
+            <option value="pdf">pdf</option>
+            <option value="svg">svg</option>
+        </param>
+    </xml>
+    <xml name="param_markerset">
+        <param name="markerset" type="data" format="tabular" label="Markerset tabular file with rows=conditions (e.g., celltypes) and column=features (e.g., genes)" help="This is beeing used to calculate the corelations betweens your probeset features and marker features. Marker features are for example genes that you know are important for your condition (e.g., celltypes)."/>
+        <param name="header_markerset" type="select" optional="false" label="Header in the list of markers?">
+            <option value="included">Header included</option>
+            <option value="not_included">Header not included</option>
+        </param>
+    </xml>
+    <xml name="hidden_outputs">
+        <data name="hidden_output" format="txt" label="Log file" >
+            <filter>show_log</filter>
+        </data>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/selection.xml	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,541 @@
+<tool id="spapros_selection" name="Selection" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
+    <description>of marker genes with spapros</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+    </expand>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+@CMD@
+      ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+random.seed($seed)
+
+mpl.rcParams['figure.dpi'] = $general_figure_options.dpi
+plt.rcParams["font.size"] = $general_figure_options.fontsize
+
+header_markerset='infer'
+#if $cond_markerset.select_markerset == 'True' and $cond_markerset.header_markerset == 'not_included':
+header_markerset=None
+#end if
+
+selector = sp.se.ProbesetSelector(
+adata,
+celltype_key='$celltype_key',
+#if $genes_key != '':
+genes_key='$genes_key',
+#else:
+genes_key=None,
+#end if
+#if $cond_n.select_n == 'True':
+n=$cond_n.n,
+#else:
+n=None,
+#end if
+#if $preselected_genes != ''
+preselected_genes = '$preselected_genes'.split(','),
+#end if
+#if $prior_genes != ''
+prior_genes = '$prior_genes'.split(','),
+#end if
+#if $cond_n_pca_genes.select_n_pca_genes == 'True':
+n_pca_genes=$cond_n_pca_genes.n_pca_genes,
+#end if
+#if $min_mean_difference != 0.0:
+min_mean_difference=$min_mean_difference,
+#end if
+n_min_markers=$n_min_markers,
+#if $celltypes != 'all':
+celltypes='$celltypes'.split(','),
+#else
+celltypes='$celltypes',
+#end if
+#if $cond_markerset.select_markerset == 'True':
+marker_list = {key: [v for v in list(value.values()) if pd.notna(v)] for key, value in pd.read_csv('$cond_markerset.markerset', sep='\t', index_col=0, header=header_markerset).to_dict(orient='index').items()},
+#end if
+n_list_markers=$n_list_markers,
+marker_corr_th=$marker_corr_th,
+#if $pca_penalties != ''
+pca_penalties = '$pca_penalties'.split(','),
+#end if
+#if $DE_penalties != ''
+DE_penalties = '$DE_penalties'.split(','),
+#end if
+#if $m_penalties_adata_celltypes != ''
+m_penalties_adata_celltypes = '$m_penalties_adata_celltypes'.split(','),
+#end if
+#if $m_penalties_list_celltypes != ''
+m_penalties_list_celltypes = '$m_penalties_list_celltypes'.split(','),
+#end if
+#if $advanced_options.cond_DE_selection_hparams.select_DE_selection_hparams == 'True':
+DE_selection_hparams={"n": $advanced_options.cond_DE_selection_hparams.n_DE_selection_hparams, "per_group": $advanced_options.cond_DE_selection_hparams.per_group}
+#end if
+#if $advanced_options.cond_forest_hparams.select_forest_hparams == 'True':
+forest_hparams={"n_trees": $advanced_options.cond_forest_hparams.n_trees, "subsample": $advanced_options.cond_forest_hparams.subsample, "test_subsample": $advanced_options.cond_forest_hparams.test_subsample},
+#end if
+#if $advanced_options.cond_forest_DE_baseline_hparams.select_forest_DE_baseline_hparams == 'True':
+forest_DE_baseline_hparams={
+"n_DE": $advanced_options.cond_forest_DE_baseline_hparams.n_DE,
+"min_score": $advanced_options.cond_forest_DE_baseline_hparams.min_score,
+"n_stds": $advanced_options.cond_forest_DE_baseline_hparams.n_stds,
+"max_step": $advanced_options.cond_forest_DE_baseline_hparams.max_step,
+"min_outlier_dif": $advanced_options.cond_forest_DE_baseline_hparams.min_outlier_dif,
+"n_terminal_repeats": $advanced_options.cond_forest_DE_baseline_hparams.n_terminal_repeats,
+},
+#end if
+#if $advanced_options.cond_add_forest_genes_hparams.select_add_forest_genes_hparams == 'True':
+add_forest_genes_hparams={"n_max_per_it": $advanced_options.cond_add_forest_genes_hparams.n_max_per_it, "performance_th": $advanced_options.cond_add_forest_genes_hparams.performance_th, "importance_th": $advanced_options.cond_add_forest_genes_hparams.importance_th},
+#end if
+#if $advanced_options.cond_marker_selection_hparams.select_marker_selection_hparams == 'True':
+marker_selection_hparams={"penalty_threshold": $advanced_options.cond_marker_selection_hparams.penalty_threshold},
+#end if
+verbosity=0,
+seed=$seed,
+n_jobs=int(os.environ.get('GALAXY_SLOTS', '4'))
+)
+
+selector.select_probeset()
+
+sp.pl.masked_dotplot(
+adata,
+selector,
+ct_key='$figure_options_masked_dotplot.ct_key',
+imp_threshold=$figure_options_masked_dotplot.imp_threshold,
+#if $figure_options_masked_dotplot.celltypes != '':
+celltypes='$figure_options_masked_dotplot.celltypes',
+#end if
+#if $figure_options_masked_dotplot.n_genes != 0:
+n_genes=$figure_options_masked_dotplot.n_genes,
+#end if
+#if $figure_options_masked_dotplot.comb_markers_only:
+comb_markers_only=True,
+#end if
+#if $figure_options_masked_dotplot.markers_only:
+markers_only=True,
+#end if
+cmap='$figure_options_masked_dotplot.cmap',
+comb_marker_color='$figure_options_masked_dotplot.comb_marker_color',
+marker_color='$figure_options_masked_dotplot.marker_color',
+non_adata_celltypes_color='$figure_options_masked_dotplot.non_adata_celltypes_color',
+use_raw=$figure_options_masked_dotplot.use_raw,
+save='masked_dotplot.$format',
+)
+
+selector.plot_gene_overlap(
+style='$figure_options_plot_gene_overlap.style',
+save='gene_overlap.$format',
+show=False
+)
+
+probe_genes = selector.probeset.index[selector.probeset.selection]
+celltypes_DE_1vsall = list(selector.probeset[selector.probeset.selection]['celltypes_DE_1vsall'])
+celltypes_DE_specific = list(selector.probeset[selector.probeset.selection]['celltypes_DE_specific'])
+celltypes_DE = list(selector.probeset[selector.probeset.selection]['celltypes_DE'])
+celltypes_marker = list(selector.probeset[selector.probeset.selection]['celltypes_marker'])
+
+marker_dict = dict()
+
+for i,g in enumerate(probe_genes):
+    recognized_celltypes =  list(set(celltypes_DE_1vsall[i].split(',') + celltypes_DE_specific[i].split(',') + \
+                                 celltypes_DE[i].split(',') + celltypes_marker[i].split(',')))
+
+    if (len(recognized_celltypes) > 1 and '' in recognized_celltypes):
+      recognized_celltypes.remove('')
+
+    for c in recognized_celltypes:
+        if c == '':
+           c = 'Unkown'
+        if c not in marker_dict:
+          marker_dict[c] = [g]
+        else:
+          marker_dict[c] = marker_dict[c] + [g]
+
+# Find the maximum length of lists
+max_len = max(len(lst) for lst in marker_dict.values())
+sorted_marker_dict_by_keys = {key: marker_dict[key] for key in sorted(marker_dict.keys())}
+# Fill smaller lists with empty values
+for key, value in sorted_marker_dict_by_keys.items():
+    sorted_marker_dict_by_keys[key] = value + [''] * (max_len - len(value))
+df = pd.DataFrame(sorted_marker_dict_by_keys).T
+df.to_csv('marker.tsv', sep='\t', index=True)
+
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_anndata"/>
+        <expand macro="param_plot_format"/>
+        <param name="celltype_key" type="text" value="celltype" optional="false" label="Key in adata.obs with celltype annotations"/>
+        <param name="genes_key" type="text" optional="true" label="Key in adata.var for preselected genes" help="This is typically highly_variable_genes. Leave empty to not subset genes."/>
+        <conditional name="cond_n">
+            <param name="select_n" type="select" label="Do you want to set the number of finally selected genes?" help="Note that when `No` we automatically infer n as the minimal number of recommended genes.">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+            <when value="True">
+                <param argument="n" type="integer" value="20" min="1" optional="false" label="Number of finally selected genes" help="Setting n might change the gene ranking since the final added list_markers are added based on the theoretically added genes without list_markers."/>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param name="preselected_genes" type="text" optional="true" label="Pre selected genes (comma separated)" help="These will also have the highest ranking in the final list."/>
+        <param name="prior_genes" type="text" optional="true" label="Prioritized genes (comma separated)"/>
+        <conditional name="cond_n_pca_genes">
+            <param name="select_n_pca_genes" type="select" label="Do you want to set the number of preselected pca genes?" help="If not, then this step will be skipped.">
+                <option value="True">Yes</option>
+                <option value="False">No</option>
+            </param>
+            <when value="True">
+                <param argument="n_pca_genes" type="integer" value="100" min="1" optional="false" label="Number of preselected pca genes"/>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param argument="min_mean_difference" type="float" value="0.0" optional="false" label="Minimal difference of mean expression between at least one celltype and the background" help="This minimal difference is applied as an additional binary penalty in pca_penalties, DE_penalties and m_penalties_adata_celltypes."/>
+        <param argument="n_min_markers" type="integer" value="2" min="1" optional="false" label="The minimal number of identified and added markers"/>
+        <param name="celltypes" type="text" value="all" optional="false" label="Cell types for which trees are trained" help="If not `all` then seperate the cell type with a comma (e.g., Glia,Neuron)"/>
+        <conditional name="cond_markerset">
+            <param name="select_markerset" type="select" label="Do you want to provide a set of marker genes?">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+            <when value="True">
+                <param name="markerset" type="data" format="tabular" label="Markerset tabular file with rows=conditions (e.g., celltypes) and column=features (e.g., genes)"/>
+                <param name="header_markerset" type="select" optional="false" label="Header in the list of markers?">
+                    <option value="included">Header included</option>
+                    <option value="not_included">Header not included</option>
+                </param>
+            </when>
+            <when value="False"/>
+        </conditional>
+        <param argument="n_list_markers" type="integer" value="2" min="1" optional="false" label="Minimal number of markers per celltype that are at least selected" help="Selected means either selecting genes from the marker list or having correlated genes in the already selected panel."/>
+        <param argument="marker_corr_th" type="float" value="0.5" optional="false" label="Minimal correlation to consider a gene as captured"/>
+        <param name="pca_penalties" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors that are multiplied with the scores for PCA based gene selection" help="(comma separted)"/>
+        <param name="DE_penalties" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors that are multiplied with the scores for DE based gene selection (comma separted)" help="(comma separted)"/>
+        <param name="m_penalties_adata_celltypes" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors to filter out marker genes if a gene's penalty &lt; threshold for celltypes in adata" help="(comma separted)"/>
+        <param name="m_penalties_list_celltypes" type="text" optional="true" label="List of keys for columns in adata.var containing penalty factors to filter out marker genes if a gene's penalty &lt; threshold for celltypes **not** in adata" help="(comma separted)"/>
+        <param argument="seed" type="integer" value="123" min="0" optional="false" label="Random number seed"/>
+
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <conditional name="cond_DE_selection_hparams">
+                <param name="select_DE_selection_hparams" type="select" label="Do you want to tune hyperparameters for the DE based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_DE_selection_hparams" type="integer" value="3" optional="false" label="n"/>
+                    <param name="per_group" type="select" label="per_group">
+                        <option value="False">No</option>
+                        <option value="True">Yes</option>
+                    </param>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_forest_hparams">
+                <param name="select_forest_hparams" type="select" label="Do you want to tune hyperparameters for the forest based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_trees" type="integer" value="50" optional="false" label="n_trees"/>
+                    <param argument="subsample" type="integer" value="1000" optional="false" label="subsample"/>
+                    <param argument="test_subsample" type="integer" value="3000" optional="false" label="test_subsample"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_forest_DE_baseline_hparams">
+                <param name="select_forest_DE_baseline_hparams" type="select" label="Do you want to tune hyperparameters for the DE based gene selection?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_DE" type="integer" value="1" optional="false" label="n_DE"/>
+                    <param argument="min_score" type="float" value="0.9" optional="false" label="min_score"/>
+                    <param argument="n_stds" type="float" value="1.0" optional="false" label="n_stds"/>
+                    <param argument="max_step" type="integer" value="3" optional="false" label="max_step"/>
+                    <param argument="min_outlier_dif" type="float" value="0.02" optional="false" label="min_outlier_dif"/>
+                    <param argument="n_terminal_repeats" type="integer" value="3" optional="false" label="n_terminal_repeats"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_add_forest_genes_hparams">
+                <param name="select_add_forest_genes_hparams" type="select" label="Do you want to tune hyperparameters for adding marker genes to decision trees?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="n_max_per_it" type="integer" value="5" optional="false" label="n_max_per_it"/>
+                    <param argument="performance_th" type="float" value="0.02" optional="false" label="performance_th"/>
+                    <param argument="importance_th" type="integer" value="0" optional="false" label="importance_th"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+            <conditional name="cond_marker_selection_hparams">
+                <param name="select_marker_selection_hparams" type="select" label="Do you want to tune marker selection hyperparameters?">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="True">
+                    <param argument="penalty_threshold" type="integer" value="1" optional="false" label="penalty_threshold"/>
+                </when>
+                <when value="False"/>
+            </conditional>
+        </section>
+
+        <section name="general_figure_options" title="General Figure Output Options" expanded="false">
+            <param argument="dpi" type="integer" value="300" min="1" label="Dpi of figures"/>
+            <param argument="fontsize" type="integer" value="100" min="1" label="Font size of figures"/>
+        </section>
+
+        <section name="figure_options_masked_dotplot" title="Figure Output Options for masked_dotplot" expanded="false">
+            <param name="ct_key" type="text" value="celltype" optional="false" label="Key in adata.var for preselected genes" help="Column of adata.obs with cell type annotation"/>
+            <param argument="imp_threshold" type="float" value="0.05" min="0.0" optional="false" label="Annotate genes as Spapros marker only for those genes with importance > imp_threshold"/>
+            <param name="celltypes" type="text" optional="true" label="Subset of celltypes (rows of dotplot)"/>
+            <param argument="n_genes" type="integer" value="0" min="0" label="Plot top n_genes genes." help="If 0 then all."/>
+            <param name="comb_markers_only" type="boolean" value="false" label="Do you want to plot only genes that are Spapros markers for the plotted cell types?"/>
+            <param name="markers_only" type="boolean" value="false" label="Do you want to plot only genes that are markers for the plotted cell types?"/>
+            <param name="cmap" type="text" value="Reds" optional="false" label="Colormap of mean expressions"/>
+            <param name="comb_marker_color" type="text" value="darkblue" optional="false" label="Color for Spapros markers"/>
+            <param name="marker_color" type="text" value="blue" optional="false" label="Color for marker genes"/>
+            <param name="non_adata_celltypes_color" type="text" value="grey" optional="false" label="Color for celltypes that don't occur in the data set."/>
+            <param name="use_raw" type="select" label="Do you want to use adata.raw for plotting?">
+                <option value="False">No</option>
+                <option value="True">Yes</option>
+            </param>
+        </section>
+
+        <section name="figure_options_plot_gene_overlap" title="Figure Output Options for plot_gene_overlap" expanded="false">
+            <param name="style" type="select" label="Plot type">
+                <option value="upset">Upset plot</option>
+                <option value="venn">Venn diagram</option>
+            </param>
+        </section>
+
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="out_masked_dotplot_png" format="png" from_work_dir="*masked_dotplot.png" label="PNG masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'png'</filter>
+        </data>
+        <data name="out_masked_dotplot_pdf" format="pdf" from_work_dir="*masked_dotplot.pdf" label="PDF masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'pdf'</filter>
+        </data>
+        <data name="out_masked_dotplot_svg" format="svg" from_work_dir="*masked_dotplot.svg" label="SVG masked_dotplot from ${tool.name} on ${on_string}">
+            <filter>format == 'svg'</filter>
+        </data>
+        <data name="out_gene_overlap_png" format="png" from_work_dir="*gene_overlap.png" label="PNG gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'png'</filter>
+        </data>
+        <data name="out_gene_overlap_pdf" format="pdf" from_work_dir="*gene_overlap.pdf" label="PDF gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'pdf'</filter>
+        </data>
+        <data name="out_gene_overlap_svg" format="svg" from_work_dir="*gene_overlap.svg" label="SVG gene_overlap from ${tool.name} on ${on_string}">
+            <filter>format == 'svg'</filter>
+        </data>
+        <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"/>
+        <expand macro="hidden_outputs"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="show_log" value="true" />
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1189" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test1.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="select_n" value="True"/>
+            <param name="n" value="10"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true" />
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="n=10,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="2914" delta="2"/>
+                    <has_image_height height="882" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1032" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test2.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="celltypes" value="CD34+,CD56+ NK"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true" />
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="2776" delta="2"/>
+                    <has_image_height height="882" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="929" delta="2"/>
+                    <has_image_height height="565" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test3.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <param name="select_markerset" value="True"/>
+            <param name="markerset" value="marker.tsv"/>
+            <param name="header_markerset" value="not_included"/>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true"/>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1154" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test4.tsv" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/>
+            <param name="format" value="png"/>
+            <param name="genes_key" value="highly_variable"/>
+            <section name="advanced_options">
+                <param name="select_DE_selection_hparams" value="True"/>
+                <param name="select_forest_hparams" value="True"/>
+                <param name="select_forest_DE_baseline_hparams" value="True"/>
+                <param name="select_add_forest_genes_hparams" value="True"/>
+                <param name="select_marker_selection_hparams" value="True"/>
+            </section>
+            <section name="general_figure_options">
+                <param name="dpi" value="100"/>
+            </section>
+            <param name="show_log" value="true"/>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="celltype_key='celltype',"/>
+                    <has_text_matching expression="genes_key='highly_variable',"/>
+                    <has_text_matching expression="n_pca_genes=100,"/>
+                    <has_text_matching expression="seed=123,"/>
+                    <has_text_matching expression="cmap='Reds',"/>
+                    <has_text_matching expression="save='masked_dotplot.png',"/>
+                    <has_text_matching expression="style='upset',"/>
+                    <has_text_matching expression="save='gene_overlap.png',"/>
+                </assert_contents>
+            </output>
+            <output name="out_masked_dotplot_png">
+                <assert_contents>
+                    <has_image_width width="4055" delta="2"/>
+                    <has_image_height height="1108" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="out_gene_overlap_png">
+                <assert_contents>
+                    <has_image_width width="1189" delta="2"/>
+                    <has_image_height height="600" delta="2"/>
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_out_test5.tsv" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Probe set selection for single-cell sequencing data using spapros.
+============================================================================================================
+
+Spapros is a python package that provides a pipeline for probe set selection and evaluation for targeted spatial transcriptomics data.
+
+Key Features:
+Select probe sets for spatial transcriptomics which identify cell types of interest, capture general transcriptomic variation, and incorporate prior knowledge
+
+Evaluate probe sets with an extensive pipeline
+
+Further documentation can be found here: https://spapros.readthedocs.io/en/latest/index.html.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,1 @@
+CD14+ Monocyte	PILRA	PSAP	CD68	TMEM176B
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_out_test1.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,11 @@
+	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30
+CD14+ Monocyte	FTL	FCER1G
+CD19+ B	ISG20	LY86
+CD34+	PRSS57	SNHG7
+CD4+/CD25 T Reg	SIT1	IL32
+CD4+/CD45RO+ Memory	GZMK	IL32	CD3E
+CD56+ NK	CD7	GNLY
+CD8+ Cytotoxic T	CCL5	S100A4	NKG7
+CD8+/CD45RA+ Naive Cytotoxic	CD7	CD8A	CD8B	AES
+Dendritic	CD74	LYZ
+Unkown	LTB	CD247	SERPINB1	RPLP1	TNFRSF13B	BLK	SPON2	TPD52	RNF138	NUCB2	CD27	AMICA1	BTG1	CD63	HOPX	PTPRCAP	CPVL	JUN	RAB3IP	SPOCK2	PRF1	GZMA	STK17A	RPL3	GYPC	SOX4	GZMH	LINC00402	C9orf142	VIMP	DENND2D
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_out_test2.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,10 @@
+	0	1	2
+CD14+ Monocyte	FTL
+CD19+ B	ISG20
+CD34+	PRSS57
+CD4+/CD25 T Reg	SIT1	IL32
+CD4+/CD45RO+ Memory	GZMK	IL32	CD3E
+CD56+ NK	CD7
+CD8+ Cytotoxic T	CCL5
+CD8+/CD45RA+ Naive Cytotoxic	CD7
+Dendritic	CD74
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_out_test3.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,4 @@
+	0	1
+CD34+	PRSS57	SNHG7
+CD56+ NK	CD7	CTSW
+Unkown	RPL3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_out_test4.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,11 @@
+	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30
+CD14+ Monocyte	FTL	FCER1G
+CD19+ B	ISG20	CD79A
+CD34+	PRSS57	SNHG7
+CD4+/CD25 T Reg	SIT1	IL32
+CD4+/CD45RO+ Memory	GZMK	IL32	CD3E
+CD56+ NK	CD7	GNLY
+CD8+ Cytotoxic T	CCL5	S100A4	NKG7
+CD8+/CD45RA+ Naive Cytotoxic	CD7	CD8A	CD8B	AES
+Dendritic	CD74	CST3
+Unkown	LTB	CD247	SERPINB1	RPLP1	TNFRSF13B	BLK	SPON2	TPD52	RNF138	NUCB2	CD27	AMICA1	BTG1	CD63	HOPX	PTPRCAP	CPVL	JUN	RAB3IP	SPOCK2	PRF1	GZMA	STK17A	RPL3	GYPC	SOX4	GZMH	LINC00402	C9orf142	VIMP	DENND2D
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_out_test5.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,11 @@
+	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30
+CD14+ Monocyte	FTL	FCER1G
+CD19+ B	ISG20	LY86
+CD34+	PRSS57	SNHG7
+CD4+/CD25 T Reg	SIT1	IL32
+CD4+/CD45RO+ Memory	GZMK	IL32	CD3E
+CD56+ NK	CD7	GNLY
+CD8+ Cytotoxic T	CCL5	S100A4	NKG7
+CD8+/CD45RA+ Naive Cytotoxic	CD7	CD8A	CD8B	AES
+Dendritic	CD74	LYZ
+Unkown	LTB	CD247	SERPINB1	RPLP1	TNFRSF13B	BLK	SPON2	TPD52	RNF138	NUCB2	CD27	AMICA1	BTG1	CD63	HOPX	PTPRCAP	CPVL	JUN	RAB3IP	SPOCK2	PRF1	GZMA	STK17A	RPL3	GYPC	SOX4	GZMH	LINC00402	C9orf142	VIMP	DENND2D
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv	Mon Sep 16 11:37:46 2024 +0000
@@ -0,0 +1,11 @@
+	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49
+CD14+ Monocyte	PILRA	PSAP	CD68	TMEM176B	FTL	NPC2	LST1	FCGR3A	FCER1G	CEBPB	FCN1	SERPINA1	OAZ1	CFD	FTH1	HCK	AIF1	SAT1	CTSS	S100A11	MS4A7	TYROBP	COTL1	STXBP2	RP11-290F20.3	S100A4	IFITM2	SPI1	DUSP1	SESN2	IFITM3	MPP1	GALE	CORO1B	RP11-390E23.6	VIMP	RSBN1L-AS1	CHD4	CFP	GSTP1	PFN1	FCGRT	ADTRP	ARHGDIB	AMICA1	HLA-DRB5	CST3	GRN	HLA-DPA1	SSR3
+CD19+ B	TNFRSF13B	CD79B	SMARCB1	PNOC	CCDC50	AL928768.3	BANK1	MS4A1	CD79A	ISG20	IGLL5	TNFRSF17	KIAA0125	TPD52	PEBP1	FKBP11	CCDC132	SUB1	POU2AF1	MZB1	PTPRCAP	UBE2J1	BLK	SPIB	DERL3	FAM63B	MPHOSPH9	IGJ	FCRLA	XBP1	NCF1	SSR3	CD52	TSHZ2	PDLIM1	VIMP	SSR4	S1PR4	SELL	HMGA1	NUCB2	JUN	CD27	ARHGDIB	GYPC	CALR	ADTRP	BTG1	EXOG	RARRES3
+CD34+	PRSS57	C19orf77	SPINK2	RP11-620J15.3	SNHG7	CYTL1	EGFL7	NGFRAP1	SOX4	NFE2	EGR1	RP3-467N11.1	H1FX	CDK6	SERPINB1	SPINT2	HMGA1	IL1B	NUCB2	RPLP0	IGFBP7	RPLP1	ATXN7L3B	RPS3	C1orf228	KIAA0125	RPL3	SYPL1	CD63	LDHB	SEPT1	JUN	FAM101B	PRKCQ-AS1	MATK	PEBP1	SELL	ITM2A	SSR3	SPON2	XBP1	UBE2J1	VIMP	GYPC	STK17A	STMN1	VIM	MZB1	HOPX	CD99
+CD4+/CD25 T Reg	IL32	SPOCK2	ACTG1	CD2	CD3D	GPR171	ARHGDIB	ACOX1	MAL	SIT1	GIMAP4	AES	CD52	SEPT1	TMSB10	LAT	STMN1	LINC00402	CD27	TSHZ2	S1PR4	CD3E	PFN1	CD99	AQP3	PTPRCAP	CD3G	LY9	LCK	CD247	S100A4	CCR7	TTC39C	CORO1B	MPHOSPH9	FYB	RPSA	FLT3LG	B2M	GIMAP7	PRKCQ-AS1	SELL	BTG1	CCDC132	GYPC	DENND2D	LDHB	IL7R	ITM2A	RPLP0
+CD4+/CD45RA+/CD25- Naive T	EAF2	GNG7	SSR4	CALR	DERL3	MANF	IGJ	XBP1	ATXN7L3B	SSR3	UBE2J1	CD79A	MZB1	RP3-467N11.1	TNFRSF17	NCF1	CDK6	SUB1	POU2AF1	AL928768.3	FKBP11	VIMP	GYPC	JUN	CD27	PEBP1	SMARCB1	FLT3LG	RPLP1	RPLP0	CCDC50	ISG20	IGLL5	HCST	GSTP1	GPX1	CD52	VIM	PTPRCAP	FCGRT	CD74	B2M	RPL3	CYTL1	SPINK2	PRSS57	C19orf77	RP11-620J15.3	FAM101B	CCDC132
+CD4+/CD45RO+ Memory	RNF138	NOSIP	IFITM1	LCK	RARRES3	ALOX5AP	FAM63B	RAB3IP	GZMK	CD3G	SEPT1	LDHB	SELL	CD3D	EXOG	RPSA	CD247	AES	CD52	TMSB10	NUCB2	DENND2D	RPL3	RPLP1	ACTG1	FYB	GIMAP7	CORO1B	LY9	CD7	PFN1	RPS3	GYPC	CD2	ARHGDIB	IL32	RPLP0	CD99	CD3E	GIMAP4	HCST	B2M	LAT	ISG20	ITM2A	FKBP11	SERPINB1	STK17A	CCR7	PTPRCAP
+CD56+ NK	CST7	SPON2	HOPX	GNLY	NKG7	CTSW	KLRC2	CD7	MATK	PCIF1	CLIC3	FGFBP2	SYPL1	GZMB	C9orf142	PRF1	CD247	HCST	GZMA	GZMH	STMN1	ALOX5AP	CD63	CD99	IGFBP7	GZMM	CCL5	B2M	DENND2D	GIMAP7	RARRES3	SIT1	IFITM1	PFN1	EXOG	XBP1	IFITM2	GIMAP4	VIMP	STK17A	LCK	GZMK	SEPT1	SSR3	CD8A	CD3G	SPOCK2	RPS3	LDHB	IL32
+CD8+ Cytotoxic T	FAM101B	ADTRP	GZMK	HCST	LAT	EGR1	CD8B	CCL5	RPL3	LINC00402	FGFBP2	GZMM	RPS3	CD3E	GYPC	DENND2D	C9orf142	GZMA	SEPT1	JUN	FYB	CD8A	SELL	ALOX5AP	CD3G	STK17A	AQP3	C1orf228	CD3D	HOPX	NKG7	CD2	NGFRAP1	RPLP1	RPSA	CCR7	IL7R	SPON2	PRF1	RARRES3	PRKCQ-AS1	FKBP11	MANF	CTSW	GNLY	CD27	LDHB	MAL	LTB	RPLP0
+CD8+/CD45RA+ Naive Cytotoxic	RP11-291B21.2	CD8A	CD8B	RSBN1L-AS1	GIMAP5	GZMM	GALE	CCR7	STK17A	RAB3IP	GZMH	GIMAP7	CD3E	C1orf228	LCK	CCL5	PEBP1	CD27	GYPC	LDHB	RNF34	CD99	CD3G	PFN1	IL7R	CD2	C9orf142	TMSB10	NGFRAP1	S1PR4	ITM2A	CD7	RPS3	IL32	FYB	IFITM1	CD52	LAT	GIMAP4	MAL	STMN1	NOSIP	RARRES3	SPOCK2	ACTG1	PRF1	CD3D	RPLP1	SELL	GZMA
+Dendritic	HLA-DQB1	CST3	HLA-DRB1	HLA-DQA2	HLA-DQA1	LYZ	HLA-DPB1	HLA-DPA1	HLA-DMA	HLA-DRA	VIM	CD74	ALDH2	FCER1A	GPX1	HLA-DRB5	LGALS2	MNDA	FCGRT	GRN	HLA-DMB	FOS	CPVL	CLEC10A	AMICA1	CFP	LY86	GSTP1	RP11-473M20.7	IL1B	GSN	SPINT2	CCDC163P	IGFBP7	EXOG	DUSP1	CD63	COTL1	FTH1	SPI1	TYROBP	SPIB	S100A11	OAZ1	CTSS	CCDC50	AIF1	SERPINB1	TMSB10	PCIF1
Binary file test-data/tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad has changed