Mercurial > repos > iuc > spapros_evaluation
changeset 0:0c1b0a3b19df draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spapros/ commit aed7fe13fa0ed09d77a31eeecaf3ec3fba7eed3b
author | iuc |
---|---|
date | Mon, 16 Sep 2024 11:37:34 +0000 |
parents | |
children | |
files | evaluation.xml macros.xml test-data/marker.tsv test-data/marker_out_test1.tsv test-data/marker_out_test2.tsv test-data/marker_out_test3.tsv test-data/marker_out_test4.tsv test-data/marker_out_test5.tsv test-data/tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv test-data/tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad |
diffstat | 10 files changed, 753 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluation.xml Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,598 @@ +<tool id="spapros_evaluation" name="Evaluation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> + <description>of marker genes with spapros</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + </expand> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +mpl.rcParams['figure.dpi'] = $figure_options.dpi +plt.rcParams["font.size"] = $figure_options.fontsize + +header_probeset='infer' +#if $header_probeset == 'not_included': +header_probeset=None +#end if + +## Probe set generation +probeset = pd.read_csv('$probeset', sep='\t', index_col=0, header=header_probeset) +probeset = [x for x in probeset.values.flatten().tolist() if not pd.isna(x)] + +## Reference set generation +methods_pre="#echo ','.join(["%s" % $s for $i, $s in enumerate($methods_reference)])#" +methods=methods_pre.split(',') + +reference_sets = sp.se.select_reference_probesets( +adata, +n=$nreference, +#if $genes_key != '': +genes_key='$genes_key', +#else: +genes_key=None, +#end if +#if $obs_key != '': +obs_key='$obs_key', +#end if +#if $seeds != '': +seeds=[$seeds], +#end if +methods=methods, +verbosity=0 +) + +## Parameter setting + +custom_params = { +'data': { +'name': '$probeset_name', +'celltype_key': '$celltype_key' +}, +'metrics': { +#if $method.method == 'plot_summary' or $method.method == 'plot_cluster_similarity': +'cluster_similarity': { +'ns': [$method.ns_start, $method.ns_end], +'AUC_borders': [#echo ",".join(["[%s, %s]" % ($s.auc_borders_start, $s.auc_borders_end) for $i, $s in enumerate($method.series_auc_borders)])#] +}, +#end if +#if $method.method == 'plot_summary' or $method.method == 'plot_knn_overlap': +'knn_overlap': { +'ks': [#echo ",".join([ "%s" % $s.k for $i, $s in enumerate($method.knn) ])#] +}, +#end if +#if $method.method == 'plot_confusion_matrix': +'forest_clfs': { +'ct_key': '$celltype_key', +#if $method.method == 'plot_summary': +'threshold': $method.forest_clfs_threshold +#end if +}, +#end if +#if $method.method == 'plot_summary': +#if $method.select_marker_corr.use_marker_corr == 'True': +'marker_corr': { +'per_celltype': $method.select_marker_corr.per_celltype, +'per_marker': $method.select_marker_corr.per_marker, +#if $method.select_marker_corr.select_per_celltype_min_mean.use_per_celltype_min_mean == 'True' +'per_celltype_min_mean': $method.select_marker_corr.select_per_celltype_min_mean.per_celltype_min_mean, +#end if +#if $method.select_marker_corr.select_per_marker_min_mean.use_per_marker_min_mean == 'True' +'per_marker_min_mean': $method.select_marker_corr.select_per_marker_min_mean.per_marker_min_mean +#end if +}, +#end if +#end if +#if $method.method == 'plot_marker_corr': +'marker_corr': { +'per_celltype': $method.per_celltype, +'per_marker': $method.per_marker, +#if $method.select_per_celltype_min_mean.use_per_celltype_min_mean == 'True' +'per_celltype_min_mean': $method.select_per_celltype_min_mean.per_celltype_min_mean, +#else: +'per_celltype_min_mean': None, +#end if +#if $method.select_per_marker_min_mean.use_per_marker_min_mean == 'True' +'per_marker_min_mean': $method.select_per_marker_min_mean.per_marker_min_mean +#else: +'per_marker_min_mean': None +#end if +}, +#end if +#if $method.method == 'plot_summary': +'gene_corr': { +'threshold': $method.gene_corr_threshold +} +#end if +} +} + +## Setup evaluator + +#if $method.method == 'plot_summary' or $method.method == 'plot_marker_corr': + +header_markerset='infer' +feature_marker=dict() + +#if $method.method == 'plot_summary': +#if $method.select_marker_corr.header_markerset == 'not_included' : +header_markerset=None +#end if +feature_marker = {key: [v for v in list(value.values()) if pd.notna(v)] for key, value in pd.read_csv('$method.select_marker_corr.markerset', sep='\t', index_col=0, header=header_markerset).to_dict(orient='index').items()} +#end if +#if $method.method == 'plot_marker_corr': +#if $method.header_markerset == 'not_included': +header_markerset=None +#end if +feature_marker = {key: [v for v in list(value.values()) if pd.notna(v)] for key, value in pd.read_csv('$method.markerset', sep='\t', index_col=0, header=header_markerset).to_dict(orient='index').items()} +#end if + +#end if + +evaluator = sp.ev.ProbesetEvaluator( +adata, +metrics_params=custom_params, +scheme="full", +verbosity=0, +#if $method.method == 'plot_summary' or $method.method == 'plot_marker_corr': +marker_list=feature_marker, +#end if +results_dir=None +) + +## Evaluation reference set +for set_id, df in reference_sets.items(): + gene_set = df[df["selection"]].index.to_list() + evaluator.evaluate_probeset(gene_set, set_id=set_id) + +## Evaluation probe set +evaluator.evaluate_probeset(probeset, set_id='$probeset_name') + +## Execution of method ## + +#if $method.method == 'plot_summary': +evaluator.plot_summary( +@CMD_plot@ +) +#end if + +#if $method.method == 'plot_confusion_matrix': +evaluator.plot_confusion_matrix( +@CMD_plot@ +) +#end if + +#if $method.method == 'plot_coexpression': +evaluator.plot_coexpression( +@CMD_plot@ +) +#end if + +#if $method.method == 'plot_cluster_similarity': +evaluator.plot_cluster_similarity( +@CMD_plot@ +) +#end if + +#if $method.method == 'plot_knn_overlap': +evaluator.plot_knn_overlap( +@CMD_plot@ +) +#end if + +## plot_marker_corr does not allow for the show keyword +#if $method.method == 'plot_marker_corr': +evaluator.plot_marker_corr( +#if $method.set_ids != 'all' and $method.set_ids != '': +set_ids=[$method.set_ids], +#end if +save='plot.$format' +) +#end if + +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_anndata"/> + <expand macro="param_plot_format"/> + <param name="probeset" type="data" format="tabular" label="Probeset tabular file with rows=conditions (e.g., celltypes) and column=features (e.g., genes)"/> + <param name="header_probeset" type="select" optional="false" label="Header in the list of probes?"> + <option value="included">Header incldued</option> + <option value="not_included">Header not included</option> + </param> + <param argument="nreference" type="integer" value="10" min="1" optional="false" label="Number of selected genes to generate a reference set"/> + <param name="methods_reference" type="select" multiple="True" optional="false" label="Select the methods to genereate a reference probeset"> + <option value="random" selected="true">Random</option> + <option value="PCA" selected="false">PCA</option> + <option value="DE" selected="false">Differential expressed features based on wilxocon rank genes test (DE)</option> + <option value="HVG" selected="false">Highly variable features (HVG)</option> + </param> + <param name="genes_key" type="text" optional="true" label="adata.var key for subset of preselected genes to run the selections" help="This is typically highly_variable_genes. Leave empty to not subset genes."/> + <param name="obs_key" type="text" optional="true" label="Column name of adata.obs for which marker scores are calculated" help="Only required for method DE."/> + <param name="seeds" type="text" optional="true" label="List of random integer seeds (comma seperated, e.g., 123,999,22)" help="For each seed, one random gene set is selected if random in methods. Leave it empty to for one random set with a random seed."/> + <param name="probeset_name" type="text" value="probeset" optional="false" label="Name for your probeset that should be displayed in plots"/> + <param name="celltype_key" type="text" value="celltype" optional="false" label="Column name of adata.obs that represents groups of your cells (e.g., celltype)"/> + <conditional name="method"> + <param argument="method" type="select" optional="false" label="Method of spapros"> + <option value="plot_summary">Plot statistic summary, using 'evaluator.plot_summary'</option> + <option value="plot_confusion_matrix">Plot confusion matrix, using 'evaluator.plot_confusion_matrix'</option> + <option value="plot_coexpression">Plot coexpression heatmap, using 'evaluator.plot_coexpression'</option> + <option value="plot_cluster_similarity">Plot cluster similariy, using 'evaluator.plot_cluster_similarity'</option> + <option value="plot_knn_overlap">Plot knn overlap, using 'evaluator.plot_knn_overlap'</option> + <option value="plot_marker_corr">Plot featue marker correlation, using 'evaluator.plot_marker_corr'</option> + </param> + <when value="plot_summary"> + <expand macro="set_ids"/> + <param name="forest_clfs_threshold" type="float" value="0.8" min="0.0" max="1.0" label="Assesses how many cell types (%) can be predicted with an accuracy of at least threshold" help="The metric gives an idea about how many cell types can be identified with high confidence with the given gene set."/> + <param name="gene_corr_threshold" type="float" value="0.8" min="0.0" max="1.0" label="Percentage of features (e.g., genes) that have a maximum correlation of less than threshold with all other features" help="This metric gives an idea about how many features show unique expression profiles in the gene set."/> + <param name="ns_start" type="integer" value="5" min="1" label="The minimum number of leiden clusters clusters" help="Clusterings are calculated with different leiden resolutions to genertate clusterings of n = ns_start to ns_end clusters."/> + <param name="ns_end" type="integer" value="21" min="1" label="The maximum number of leiden clusters clusters" help="Clusterings are calculated with different leiden resolutions to genertate clusterings of n = ns_start to ns_end clusters."/> + <repeat name="series_auc_borders" title="Calculates nmi AUCs over given borders"> + <param name="auc_borders_start" type="integer" value="15" min="1" label="Calculates nmi over n ranges auc_borders_start to auc_borders_end" help="Defined border shouldn't exceed values in nmis."/> + <param name="auc_borders_end" type="integer" value="20" min="1" label="Calculates nmi over n ranges auc_borders_start to auc_borders_end" help="Defined border shouldn't exceed values in nmis."/> + </repeat> + <repeat name="knn" title="Calculate knn graphs for each k"> + <param name="k" type="integer" value="10" min="1" label="Includes nearest neighbors for all k"/> + </repeat> + <conditional name="select_marker_corr"> + <param name="use_marker_corr" type="select" label="Do you want to calculate the correlation between your probeset and marker features?"> + <option value="False">No</option> + <option value="True">Yes</option> + </param> + <when value="True"> + <expand macro="param_markerset"/> + <param name="per_celltype" type="boolean" truevalue="True" falsevalue="False" label="Wether to return columns with per cell type max correlations" checked="true"/> + <param name="per_marker" type="boolean" truevalue="True" falsevalue="False" label="Wether to return columns with per marker max correlations" checked="true"/> + <conditional name="select_per_celltype_min_mean"> + <param name="use_per_celltype_min_mean" type="select" label="Add a column for correlation per cell type that only takes into accounts markers with mean expression > per_celltype_min_mean"> + <option value="False">No</option> + <option value="True">Yes</option> + </param> + <when value="True"> + <param name="per_celltype_min_mean" type="float" value="0.0" min="0.0" label="Set per_celltype_min_mean"/> + </when> + <when value="False"/> + </conditional> + <conditional name="select_per_marker_min_mean"> + <param name="use_per_marker_min_mean" type="select" label="Add a column for correlation per cell type that only takes into accounts markers with mean expression > per_celltype_min_mean"> + <option value="False">No</option> + <option value="True">Yes</option> + </param> + <when value="True"> + <param name="per_marker_min_mean" type="float" value="0.0" min="0.0" label="Set per_marker_min_mean"/> + </when> + <when value="False"/> + </conditional> + </when> + <when value="False"/> + </conditional> + </when> + <when value="plot_confusion_matrix"> + <expand macro="set_ids"/> + </when> + <when value="plot_coexpression"> + <expand macro="set_ids"/> + </when> + <when value="plot_cluster_similarity"> + <expand macro="set_ids"/> + <param name="ns_start" type="integer" value="5" min="1" label="The minimum number of leiden clusters clusters" help="Clusterings are calculated with different leiden resolutions to genertate clusterings of n = ns_start to ns_end clusters."/> + <param name="ns_end" type="integer" value="21" min="1" label="The maximum number of leiden clusters clusters" help="Clusterings are calculated with different leiden resolutions to genertate clusterings of n = ns_start to ns_end clusters."/> + <repeat name="series_auc_borders" title="Calculates nmi AUCs over given borders"> + <param name="auc_borders_start" type="integer" value="15" min="1" label="Calculates nmi over n ranges auc_borders_start to auc_borders_end" help="Defined border shouldn't exceed values in nmis."/> + <param name="auc_borders_end" type="integer" value="20" min="1" label="Calculates nmi over n ranges auc_borders_start to auc_borders_end" help="Defined border shouldn't exceed values in nmis."/> + </repeat> + </when> + <when value="plot_knn_overlap"> + <expand macro="set_ids"/> + <repeat name="knn" title="Calculate knn graphs for each k"> + <param name="k" type="integer" value="10" min="1" label="Includes nearest neighbors for all k"/> + </repeat> + </when> + <when value="plot_marker_corr"> + <expand macro="param_markerset"/> + <expand macro="set_ids"/> + <param name="per_celltype" type="boolean" truevalue="True" falsevalue="False" label="Wether to return columns with per cell type max correlations" checked="true"/> + <param name="per_marker" type="boolean" truevalue="True" falsevalue="False" label="Wether to return columns with per marker max correlations" checked="true"/> + <conditional name="select_per_celltype_min_mean"> + <param name="use_per_celltype_min_mean" type="select" label="Add a column for correlation per cell type that only takes into accounts markers with mean expression > per_celltype_min_mean"> + <option value="False">No</option> + <option value="True">Yes</option> + </param> + <when value="True"> + <param name="per_celltype_min_mean" type="float" value="0.0" min="0.0" label="Set per_celltype_min_mean"/> + </when> + <when value="False"/> + </conditional> + <conditional name="select_per_marker_min_mean"> + <param name="use_per_marker_min_mean" type="select" label="Add a column for correlation per cell type that only takes into accounts markers with mean expression > per_celltype_min_mean"> + <option value="False">No</option> + <option value="True">Yes</option> + </param> + <when value="True"> + <param name="per_marker_min_mean" type="float" value="0.0" min="0.0" label="Set per_marker_min_mean"/> + </when> + <when value="False"/> + </conditional> + </when> + </conditional> + <section name="figure_options" title="Figure Output Options" expanded="false"> + <param argument="dpi" type="integer" value="300" min="1" label="Dpi of figures"/> + <param argument="fontsize" type="integer" value="100" min="1" label="Font size of figures"/> + </section> + <expand macro="inputs_common_advanced"/> + </inputs> + <outputs> + <data name="out_png" format="png" from_work_dir="*.png" label="PNG plot from ${tool.name} (${method.method}) on ${on_string}"> + <filter>format == 'png'</filter> + </data> + <data name="out_pdf" format="pdf" from_work_dir="*.pdf" label="PDF plot from ${tool.name} (${method.method}) on ${on_string}"> + <filter>format == 'pdf'</filter> + </data> + <data name="out_svg" format="svg" from_work_dir="*.svg" label="SVG plot from ${tool.name} (${method.method}) on ${on_string}"> + <filter>format == 'svg'</filter> + </data> + <expand macro="hidden_outputs"/> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="30"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <param name="seeds" value="123,124"/> + <conditional name="method"> + <param name="method" value="plot_summary"/> + <param name="set_ids" value="all"/> + <param name="forest_clfs_threshold" value="0.8"/> + <param name="gene_corr_threshold" value="0.8"/> + <param name="ns_start" value="5"/> + <param name="ns_end" value="21"/> + <repeat name="series_auc_borders"> + <param name="auc_borders_start" value="7"/> + <param name="auc_borders_end" value="14"/> + </repeat> + <repeat name="series_auc_borders"> + <param name="auc_borders_start" value="15"/> + <param name="auc_borders_end" value="20"/> + </repeat> + <repeat name="knn"> + <param name="k" value="5"/> + </repeat> + <repeat name="knn"> + <param name="k" value="10"/> + </repeat> + <conditional name="select_marker_corr"> + <param name="use_marker_corr" value="True"/> + <param name="markerset" value="marker.tsv"/> + <param name="header_markerset" value="not_included"/> + <param name="per_celltype" value="True"/> + <param name="per_marker" value="True"/> + <conditional name="select_per_celltype_min_mean"> + <param name="use_per_celltype_min_mean" value="False"/> + </conditional> + <conditional name="select_per_marker_min_mean"> + <param name="use_per_marker_min_mean" value="True"/> + <param name="per_marker_min_mean" value="0.025"/> + </conditional> + </conditional> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="'per_celltype': True,"/> + <has_text_matching expression="'per_marker': True,"/> + <has_text_matching expression="'per_marker_min_mean': 0.025"/> + <has_text_matching expression="evaluator.plot_summary"/> + <has_text_matching expression="save='plot.png',"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="3253" delta="2"/> + <has_image_height height="1446" delta="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="30"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <param name="seeds" value="123,124"/> + <conditional name="method"> + <param name="method" value="plot_confusion_matrix"/> + <param name="set_ids" value="all"/> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="'ct_key': 'celltype',"/> + <has_text_matching expression="evaluator.plot_confusion_matrix"/> + <has_text_matching expression="save='plot.png',"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="4560" delta="2"/> + <has_image_height height="1859" delta="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="30"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <param name="seeds" value="123,124"/> + <conditional name="method"> + <param name="method" value="plot_coexpression"/> + <param name="set_ids" value="all"/> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="evaluator.plot_coexpression"/> + <has_text_matching expression="save='plot.png',"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="5412" delta="2"/> + <has_image_height height="3463" delta="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="30"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <param name="seeds" value="123,124"/> + <conditional name="method"> + <param name="method" value="plot_cluster_similarity"/> + <param name="set_ids" value="all"/> + <param name="ns_start" value="3"/> + <param name="ns_end" value="20"/> + <repeat name="series_auc_borders"> + <param name="auc_borders_start" value="7"/> + <param name="auc_borders_end" value="14"/> + </repeat> + <repeat name="series_auc_borders"> + <param name="auc_borders_start" value="15"/> + <param name="auc_borders_end" value="20"/> + </repeat> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="evaluator.plot_cluster_similarity"/> + <has_text_matching expression="save='plot.png',"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="3223" delta="2"/> + <has_image_height height="1406" delta="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="30"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <param name="seeds" value="123,124"/> + <conditional name="method"> + <param name="method" value="plot_knn_overlap"/> + <param name="set_ids" value="all"/> + <repeat name="knn"> + <param name="k" value="6"/> + </repeat> + <repeat name="knn"> + <param name="k" value="11"/> + </repeat> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="evaluator.plot_knn_overlap"/> + <has_text_matching expression="save='plot.png',"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="3223" delta="2"/> + <has_image_height height="1406" delta="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced_240cells.h5ad"/> + <param name="format" value="png"/> + <param name="probeset" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv"/> + <param name="header_probeset" value="included"/> + <param name="nreference" value="50"/> + <param name="methods_reference" value="random,PCA,DE,HVG"/> + <param name="genes_key" value="highly_variable"/> + <param name="obs_key" value="celltype"/> + <conditional name="method"> + <param name="method" value="plot_marker_corr"/> + <param name="set_ids" value="all"/> + <param name="use_marker_corr" value="True"/> + <param name="markerset" value="marker.tsv"/> + <param name="header_markerset" value="not_included"/> + <param name="per_celltype" value="True"/> + <param name="per_marker" value="True"/> + <conditional name="select_per_celltype_min_mean"> + <param name="use_per_celltype_min_mean" value="False"/> + </conditional> + <conditional name="select_per_marker_min_mean"> + <param name="use_per_marker_min_mean" value="True"/> + <param name="per_marker_min_mean" value="0.025"/> + </conditional> + </conditional> + <param name="show_log" value="true" /> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="'name': 'probeset',"/> + <has_text_matching expression="'per_celltype': True,"/> + <has_text_matching expression="'per_marker': True,"/> + <has_text_matching expression="'per_marker_min_mean': 0.025"/> + <has_text_matching expression="evaluator.plot_marker_corr"/> + <has_text_matching expression="save='plot.png'"/> + </assert_contents> + </output> + <output name="out_png"> + <assert_contents> + <has_image_width width="5064" delta="2"/> + <has_image_height height="4554" delta="2"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Probe set evaluation for single-cell sequencing data using spapros. +============================================================================================================ + +Spapros is a python package that provides a pipeline for probe set selection and evaluation for targeted spatial transcriptomics data. + +Key Features: +* Select probe sets for spatial transcriptomics which identify cell types of interest, capture general transcriptomic variation, and incorporate prior knowledge +* Evaluate probe sets with an extensive pipeline + +Further documentation can be found here: https://spapros.readthedocs.io/en/latest/index.html. + + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,96 @@ +<macros> + <token name="@TOOL_VERSION@">0.1.5</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@profile@">22.05</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">spapros</requirement> + <yield /> + </requirements> + </xml> + <xml name="creators"> + <creator> + <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/" /> + </creator> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/2022.08.16.504115</citation> + <citation type="doi">10.1093/gigascience/giaa102</citation> + </citations> + </xml> + <xml name="version_command"> + <version_command><![CDATA[python -c "import spapros;print('%s' % spapros.__version__ )"]]></version_command> + </xml> + <token name="@CMD@"><![CDATA[ +cp '$adata' 'anndata.h5ad' && +cat '$script_file' > '$hidden_output' && +python '$script_file' >> '$hidden_output' && +ls . >> '$hidden_output' + ]]> + </token> + <token name="@CMD_imports@"><![CDATA[ +import spapros as sp +import os +import pandas as pd +import scanpy as sc +import matplotlib as mpl +import matplotlib.pyplot as plt +import random + ]]> + </token> + <token name="@CMD_plot@"><![CDATA[ +#if $method.set_ids != 'all' and $method.set_ids != '': +set_ids=[$method.set_ids], +#end if +save='plot.$format', +show=False + ]]> + </token> + <xml name="set_ids"> + <param name="set_ids" type="text" value="all" optional="true" label="List of probeset ids (comma seperated, e.g., DE,HVG,random)" help="Kepp it with all or empty to select all probeset ids."/> + </xml> + <xml name="sanitize_query" token_validinitial="string.printable"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <remove value="'" /> + </valid> + </sanitizer> + </xml> + <xml name="sanitize_vectors" token_validinitial="string.digits"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <add value=","/> + </valid> + </sanitizer> + </xml> + <xml name="inputs_anndata"> + <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/> + </xml> + <token name="@CMD_read_inputs@"><![CDATA[ +adata = sc.read_h5ad('anndata.h5ad') +]]> + </token> + <xml name="inputs_common_advanced"> + <param name="show_log" type="boolean" checked="false" label="Output Log?" /> + </xml> + <xml name="param_plot_format"> + <param name="format" type="select" label="Format for saving figures"> + <option value="png">png</option> + <option value="pdf">pdf</option> + <option value="svg">svg</option> + </param> + </xml> + <xml name="param_markerset"> + <param name="markerset" type="data" format="tabular" label="Markerset tabular file with rows=conditions (e.g., celltypes) and column=features (e.g., genes)" help="This is beeing used to calculate the corelations betweens your probeset features and marker features. Marker features are for example genes that you know are important for your condition (e.g., celltypes)."/> + <param name="header_markerset" type="select" optional="false" label="Header in the list of markers?"> + <option value="included">Header included</option> + <option value="not_included">Header not included</option> + </param> + </xml> + <xml name="hidden_outputs"> + <data name="hidden_output" format="txt" label="Log file" > + <filter>show_log</filter> + </data> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,1 @@ +CD14+ Monocyte PILRA PSAP CD68 TMEM176B \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker_out_test1.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +CD14+ Monocyte FTL FCER1G +CD19+ B ISG20 LY86 +CD34+ PRSS57 SNHG7 +CD4+/CD25 T Reg SIT1 IL32 +CD4+/CD45RO+ Memory GZMK IL32 CD3E +CD56+ NK CD7 GNLY +CD8+ Cytotoxic T CCL5 S100A4 NKG7 +CD8+/CD45RA+ Naive Cytotoxic CD7 CD8A CD8B AES +Dendritic CD74 LYZ +Unkown LTB CD247 SERPINB1 RPLP1 TNFRSF13B BLK SPON2 TPD52 RNF138 NUCB2 CD27 AMICA1 BTG1 CD63 HOPX PTPRCAP CPVL JUN RAB3IP SPOCK2 PRF1 GZMA STK17A RPL3 GYPC SOX4 GZMH LINC00402 C9orf142 VIMP DENND2D
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker_out_test2.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,10 @@ + 0 1 2 +CD14+ Monocyte FTL +CD19+ B ISG20 +CD34+ PRSS57 +CD4+/CD25 T Reg SIT1 IL32 +CD4+/CD45RO+ Memory GZMK IL32 CD3E +CD56+ NK CD7 +CD8+ Cytotoxic T CCL5 +CD8+/CD45RA+ Naive Cytotoxic CD7 +Dendritic CD74
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker_out_test3.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,4 @@ + 0 1 +CD34+ PRSS57 SNHG7 +CD56+ NK CD7 CTSW +Unkown RPL3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker_out_test4.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +CD14+ Monocyte FTL FCER1G +CD19+ B ISG20 CD79A +CD34+ PRSS57 SNHG7 +CD4+/CD25 T Reg SIT1 IL32 +CD4+/CD45RO+ Memory GZMK IL32 CD3E +CD56+ NK CD7 GNLY +CD8+ Cytotoxic T CCL5 S100A4 NKG7 +CD8+/CD45RA+ Naive Cytotoxic CD7 CD8A CD8B AES +Dendritic CD74 CST3 +Unkown LTB CD247 SERPINB1 RPLP1 TNFRSF13B BLK SPON2 TPD52 RNF138 NUCB2 CD27 AMICA1 BTG1 CD63 HOPX PTPRCAP CPVL JUN RAB3IP SPOCK2 PRF1 GZMA STK17A RPL3 GYPC SOX4 GZMH LINC00402 C9orf142 VIMP DENND2D
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/marker_out_test5.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +CD14+ Monocyte FTL FCER1G +CD19+ B ISG20 LY86 +CD34+ PRSS57 SNHG7 +CD4+/CD25 T Reg SIT1 IL32 +CD4+/CD45RO+ Memory GZMK IL32 CD3E +CD56+ NK CD7 GNLY +CD8+ Cytotoxic T CCL5 S100A4 NKG7 +CD8+/CD45RA+ Naive Cytotoxic CD7 CD8A CD8B AES +Dendritic CD74 LYZ +Unkown LTB CD247 SERPINB1 RPLP1 TNFRSF13B BLK SPON2 TPD52 RNF138 NUCB2 CD27 AMICA1 BTG1 CD63 HOPX PTPRCAP CPVL JUN RAB3IP SPOCK2 PRF1 GZMA STK17A RPL3 GYPC SOX4 GZMH LINC00402 C9orf142 VIMP DENND2D
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv Mon Sep 16 11:37:34 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 +CD14+ Monocyte PILRA PSAP CD68 TMEM176B FTL NPC2 LST1 FCGR3A FCER1G CEBPB FCN1 SERPINA1 OAZ1 CFD FTH1 HCK AIF1 SAT1 CTSS S100A11 MS4A7 TYROBP COTL1 STXBP2 RP11-290F20.3 S100A4 IFITM2 SPI1 DUSP1 SESN2 IFITM3 MPP1 GALE CORO1B RP11-390E23.6 VIMP RSBN1L-AS1 CHD4 CFP GSTP1 PFN1 FCGRT ADTRP ARHGDIB AMICA1 HLA-DRB5 CST3 GRN HLA-DPA1 SSR3 +CD19+ B TNFRSF13B CD79B SMARCB1 PNOC CCDC50 AL928768.3 BANK1 MS4A1 CD79A ISG20 IGLL5 TNFRSF17 KIAA0125 TPD52 PEBP1 FKBP11 CCDC132 SUB1 POU2AF1 MZB1 PTPRCAP UBE2J1 BLK SPIB DERL3 FAM63B MPHOSPH9 IGJ FCRLA XBP1 NCF1 SSR3 CD52 TSHZ2 PDLIM1 VIMP SSR4 S1PR4 SELL HMGA1 NUCB2 JUN CD27 ARHGDIB GYPC CALR ADTRP BTG1 EXOG RARRES3 +CD34+ PRSS57 C19orf77 SPINK2 RP11-620J15.3 SNHG7 CYTL1 EGFL7 NGFRAP1 SOX4 NFE2 EGR1 RP3-467N11.1 H1FX CDK6 SERPINB1 SPINT2 HMGA1 IL1B NUCB2 RPLP0 IGFBP7 RPLP1 ATXN7L3B RPS3 C1orf228 KIAA0125 RPL3 SYPL1 CD63 LDHB SEPT1 JUN FAM101B PRKCQ-AS1 MATK PEBP1 SELL ITM2A SSR3 SPON2 XBP1 UBE2J1 VIMP GYPC STK17A STMN1 VIM MZB1 HOPX CD99 +CD4+/CD25 T Reg IL32 SPOCK2 ACTG1 CD2 CD3D GPR171 ARHGDIB ACOX1 MAL SIT1 GIMAP4 AES CD52 SEPT1 TMSB10 LAT STMN1 LINC00402 CD27 TSHZ2 S1PR4 CD3E PFN1 CD99 AQP3 PTPRCAP CD3G LY9 LCK CD247 S100A4 CCR7 TTC39C CORO1B MPHOSPH9 FYB RPSA FLT3LG B2M GIMAP7 PRKCQ-AS1 SELL BTG1 CCDC132 GYPC DENND2D LDHB IL7R ITM2A RPLP0 +CD4+/CD45RA+/CD25- Naive T EAF2 GNG7 SSR4 CALR DERL3 MANF IGJ XBP1 ATXN7L3B SSR3 UBE2J1 CD79A MZB1 RP3-467N11.1 TNFRSF17 NCF1 CDK6 SUB1 POU2AF1 AL928768.3 FKBP11 VIMP GYPC JUN CD27 PEBP1 SMARCB1 FLT3LG RPLP1 RPLP0 CCDC50 ISG20 IGLL5 HCST GSTP1 GPX1 CD52 VIM PTPRCAP FCGRT CD74 B2M RPL3 CYTL1 SPINK2 PRSS57 C19orf77 RP11-620J15.3 FAM101B CCDC132 +CD4+/CD45RO+ Memory RNF138 NOSIP IFITM1 LCK RARRES3 ALOX5AP FAM63B RAB3IP GZMK CD3G SEPT1 LDHB SELL CD3D EXOG RPSA CD247 AES CD52 TMSB10 NUCB2 DENND2D RPL3 RPLP1 ACTG1 FYB GIMAP7 CORO1B LY9 CD7 PFN1 RPS3 GYPC CD2 ARHGDIB IL32 RPLP0 CD99 CD3E GIMAP4 HCST B2M LAT ISG20 ITM2A FKBP11 SERPINB1 STK17A CCR7 PTPRCAP +CD56+ NK CST7 SPON2 HOPX GNLY NKG7 CTSW KLRC2 CD7 MATK PCIF1 CLIC3 FGFBP2 SYPL1 GZMB C9orf142 PRF1 CD247 HCST GZMA GZMH STMN1 ALOX5AP CD63 CD99 IGFBP7 GZMM CCL5 B2M DENND2D GIMAP7 RARRES3 SIT1 IFITM1 PFN1 EXOG XBP1 IFITM2 GIMAP4 VIMP STK17A LCK GZMK SEPT1 SSR3 CD8A CD3G SPOCK2 RPS3 LDHB IL32 +CD8+ Cytotoxic T FAM101B ADTRP GZMK HCST LAT EGR1 CD8B CCL5 RPL3 LINC00402 FGFBP2 GZMM RPS3 CD3E GYPC DENND2D C9orf142 GZMA SEPT1 JUN FYB CD8A SELL ALOX5AP CD3G STK17A AQP3 C1orf228 CD3D HOPX NKG7 CD2 NGFRAP1 RPLP1 RPSA CCR7 IL7R SPON2 PRF1 RARRES3 PRKCQ-AS1 FKBP11 MANF CTSW GNLY CD27 LDHB MAL LTB RPLP0 +CD8+/CD45RA+ Naive Cytotoxic RP11-291B21.2 CD8A CD8B RSBN1L-AS1 GIMAP5 GZMM GALE CCR7 STK17A RAB3IP GZMH GIMAP7 CD3E C1orf228 LCK CCL5 PEBP1 CD27 GYPC LDHB RNF34 CD99 CD3G PFN1 IL7R CD2 C9orf142 TMSB10 NGFRAP1 S1PR4 ITM2A CD7 RPS3 IL32 FYB IFITM1 CD52 LAT GIMAP4 MAL STMN1 NOSIP RARRES3 SPOCK2 ACTG1 PRF1 CD3D RPLP1 SELL GZMA +Dendritic HLA-DQB1 CST3 HLA-DRB1 HLA-DQA2 HLA-DQA1 LYZ HLA-DPB1 HLA-DPA1 HLA-DMA HLA-DRA VIM CD74 ALDH2 FCER1A GPX1 HLA-DRB5 LGALS2 MNDA FCGRT GRN HLA-DMB FOS CPVL CLEC10A AMICA1 CFP LY86 GSTP1 RP11-473M20.7 IL1B GSN SPINT2 CCDC163P IGFBP7 EXOG DUSP1 CD63 COTL1 FTH1 SPI1 TYROBP SPIB S100A11 OAZ1 CTSS CCDC50 AIF1 SERPINB1 TMSB10 PCIF1