changeset 0:9f8381c5d808 draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 3365d5d4f7930e736fa6814df36d68e578d45d46-dirty"
author ebi-gxa
date Thu, 22 Jul 2021 20:57:12 +0000
parents
children 70cb0596ffef
files scanpy-multiplet-scrublet.xml scanpy_macros.xml scanpy_macros2.xml
diffstat 3 files changed, 396 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy-multiplet-scrublet.xml	Thu Jul 22 20:57:12 2021 +0000
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="scanpy_multiplet_scrublet" name="Scanpy Scrublet" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+  <description>remove multiplets from annData objects with Scrublet</description>
+  <macros>
+    <import>scanpy_macros2.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="exit_code"><![CDATA[
+ln -s '${input_obj_file}' input.h5 &&
+PYTHONIOENCODING=utf-8 scanpy-multiplet scrublet 
+#if $threshold
+    --threshold '${threshold}'
+#end if
+#if $batch_key
+  --batch-key '${batch_key}'
+#end if  
+$filter
+#if $settings.default == "false"
+    #if $settings.n_neighbors
+        --n-neighbours '${settings.n_neighbors}'
+    #end if
+    --sim-doublet-ratio '${settings.sim_doublet_ratio}'
+    --synthetic-doublet-umi-subsampling '${settings.synthetic_doublet_umi_subsampling}'
+    --expected-doublet-rate '${settings.expected_doublet_rate}' 
+    --stdev-doublet-rate '${settings.stdev_doublet_rate}'
+    --knn-dist-metric "${settings.knn_dist_metric}"
+    --n-pcs '${settings.n_pcs}'
+    --random-state '${settings.random_state}'
+    ${settings.normalise_variance} ${settings.log_transform} ${settings.mean_center} 
+    ${settings.approx} ${settings.get_doublet_neighbor_parents}
+#end if    
+@INPUT_OPTS@
+@OUTPUT_OPTS@
+]]></command>
+
+  <inputs>
+    <expand macro="input_object_params"/>
+    <expand macro="output_object_params"/>
+    <param name="threshold" argument="--threshold" type="float" optional="true" label="Doublet score threshold." help="Threshold for calling a transcriptome a doublet. If not set, this is set automatically by looking for the minimum between the two modes of the doublet_scores_sim_ histogram. It is best  practice to check the threshold visually using the doublet_scores_sim_ histogram and/or based on co-localization of predicted doublets in a 2-D embedding."/>
+    <param name="filter" argument="--filter" type="boolean" truevalue="--filter" falsevalue="" checked="False"
+         label="Remove predicted multiplets?" help="By default, the output object is annotated but not filtered according to the scrublet status."/>
+     <param name="batch_key" type="text" argument="--batch-key" optional="true" label="Where batches are present, the name of the column in adata.obs that differentiates among experiments/batches."/>
+    <conditional name="settings">
+      <param name="default" type="boolean" checked="true" label="Use programme defaults"/>
+      <when value="true"/>
+      <when value="false">
+        <param name="sim_doublet_ratio" argument="--sim-doublet-ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed transcriptomes."/>
+        <param name="synthetic_doublet_umi_subsampling" argument="--synthetic-doublet-umi-subsampling" type="float" value="1.0" label="Rate for sampling UMIs when creating synthetic doublets." help="If 1.0, each doublet is created by simply adding the UMI counts from two randomly sampled observed transcriptomes. For values less than 1, the UMI counts are added and then randomly sampled at the specified rate."/>    
+        <param name="expected_doublet_rate" argument="--expected-doublet-rate" type="float" value="0.05" label="Estimated doublet rate for the experiment."/>    
+        <param name="stdev_doublet_rate" argument="--stdev-doublet-rate" type="float" value="0.02" label="Uncertainty in the expected doublet rate."/>    
+        <param name="knn_dist_metric" argument="--knn-dist-metric" type="select" label="A known metric’s name.">
+          <option value="euclidean" selected="true">Euclidean</option>
+          <option value="angular">angular</option>
+          <option value="cityblock">cityblock</option>
+          <option value="cosine">cosine</option>
+          <option value="l1">l1</option>
+          <option value="l2">l2</option>
+          <option value="manhattan">manhattan</option>
+          <option value="braycurtis">braycurtis</option>
+          <option value="canberra">canberra</option>
+          <option value="chebyshev">chebyshev</option>
+          <option value="correlation">correlation</option>
+          <option value="dice">dice</option>
+          <option value="hamming">hamming</option>
+          <option value="jaccard">jaccard</option>
+          <option value="kulsinski">kulsinski</option>
+          <option value="mahalanobis">mahalanobis</option>
+          <option value="minkowski">minkowski</option>
+          <option value="rogerstanimoto">rogerstanimoto</option>
+          <option value="russelrao">russelrao</option>
+          <option value="seuclidan">seuclidian</option>
+          <option value="sokalmichener">sokalmichener</option>
+          <option value="sokalsneath">sokalsneath</option>
+          <option value="sqeuclidean">sqeuclidean</option>
+          <option value="yule">yule</option>
+        </param>
+        <param name="normalise_variance" argument="--no-normalize-variance" type="boolean" truevalue="" falsevalue="--no-normalize-variance" checked="True"
+            label="Normalize the data such that each gene has a variance of 1?" help="klearn.decomposition.TruncatedSVD will be used for dimensionality reduction, if --no-mean-center is set. Use this flag to disable that behaviour." />
+        <param name="log_transform" argument="--log-transform" type="boolean" truevalue="--log-transform" falsevalue="" checked="False"
+             label="Apply log transform?" help="Whether to use :func:~scanpy.pp.log1p to log-transform the data prior to PCA."/>
+        <param name="mean_center" argument="--no-mean-center" type="boolean" truevalue="" falsevalue="--no-mean-center" checked="True"
+             label="Center the data such that each gene has a mean of 0" help="sklearn.decomposition.PCA will be used for dimensionality reduction."/>
+        <param name="n_pcs" argument="--n-pcs" type="integer" value="30" optional="true" label="Number of principal components." help="Used to embed the transcriptomes prior to k-nearest-neighbor graph construction."/>
+        <param name="approx" argument="--no-approx" type="boolean" truevalue="" falsevalue="--no-approx" checked="True"
+             label="Use approximate nearest neighbor (annoy) method for the KNN classifier"/>
+        <param name="get_doublet_neighbor_parents" argument="--get-doublet-neighbor-parents" type="boolean" truevalue="--get-doublet-neighbor-parents " falsevalue="" checked="False"
+            label="Get doublet neighbor parents" help="Return (in .uns) the parent transcriptomes that generated the doublet neighbors of each observed transcriptome. This information can be used to infer the cell states that generated a given doublet state."/>
+        <param name="n_neighbors" argument="--n-neighbors" type="integer" optional="true" label="Number of neighbors." help="Used to construct the KNN graph of observed transcriptomes and simulated doublets. If not set, this is automatically set to np.round(0.5 * np.sqrt(n_obs))."/>
+        <param name="random_state" argument="--random-state" type="integer" value="0" label="Seed for random number generator." />
+      </when>
+    </conditional>  
+  </inputs>
+
+  <outputs>
+    <expand macro="output_data_obj" description="Scrublet-processed annData"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input_obj_file" value="read_10x.h5"/>
+      <param name="input_format" value="anndata"/>
+      <param name="output_format" value="anndata"/>
+      <output name="output_h5" file="scrublet.h5" ftype="h5" compare="sim_size"/>
+    </test>
+  </tests>
+
+  <help><![CDATA[
+    .. class:: infomark
+
+    **What it does**
+
+    Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. Works best if the input is a raw (unnormalized) counts matrix from a single sample or a collection of similar samples from the same experiment. This function is a wrapper around functions that pre-process using Scanpy and directly call functions of Scrublet(). 
+
+    This is a wrapper around the Scanpy wrapper for Scrublet- see https://scanpy.readthedocs.io/en/docsearch/external/scanpy.external.pp.scrublet.html.
+
+    @HELP@
+
+    @VERSION_HISTORY@
+]]></help>
+  <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy_macros.xml	Thu Jul 22 20:57:12 2021 +0000
@@ -0,0 +1,109 @@
+<macros>
+  <token name="@TOOL_VERSION@">1.3.2</token>
+  <token name="@HELP@">More information can be found at https://scanpy.readthedocs.io</token>
+  <token name="@PLOT_OPTS@">
+#if $do_plotting.plot
+                  -P output.png
+                  --projectio $do_plotting.projection
+                  --components $do_plotting.components
+    #if $do_plotting.color_by
+                  --color-by $do_plotting.color_by
+    #end if
+    #if $do_plotting.groups
+                  --group $do_plotting.groups
+    #end if
+    #if $do_plotting.use_raw
+                  --use-raw
+    #end if
+    #if $do_plotting.palette
+                  --palette $do_plotting.palette
+    #end if
+    #if $do_plotting.edges
+                  --edges
+    #end if
+    #if $do_plotting.arrows
+                  --arrows
+    #end if
+    #if not $do_plotting.sort_order
+                  --no-sort-order
+    #end if
+    #if $do_plotting.frameoff
+                  --frameoff
+    #end if
+#end if
+  </token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="0.0.5">scanpy-scripts</requirement>
+      <yield/>
+    </requirements>
+  </xml>
+  <token name="@EXPORT_MTX_OPTS@">
+      ${export_mtx}
+  </token>
+  <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+
+1.3.2+galaxy1: Normalise-data and filter-genes: Exposes ability to output 10x files.
+
+1.3.2+galaxy0: Initial contribution. Ni Huang and Pablo Moreno, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/ and Teichmann Lab at Wellcome Sanger Institute.
+    ]]></token>
+  <xml name="citations">
+    <citations>
+      <citation type="doi">10.1186/s13059-017-1382-0</citation>
+      <citation type="bibtex">
+	@misc{githubscanpy-scripts,
+	author = {Ni Huang, EBI Gene Expression Team},
+	year = {2018},
+	title = {Scanpy-scripts: command line interface for Scanpy},
+	publisher = {GitHub},
+	journal = {GitHub repository},
+	url = {https://github.com/ebi-gene-expression-group/scanpy-scripts},
+      }</citation>
+      <yield />
+    </citations>
+  </xml>
+  <xml name="input_object_params">
+    <param name="input_obj_file" argument="--input-object-file" type="data" format="h5" label="Input object in hdf5 format"/>
+    <param name="input_format" argument="--input-format" type="select" label="Format of input object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5, current support is incomplete</option>
+    </param>
+  </xml>
+  <xml name="output_object_params">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5, current support is defective</option>
+    </param>
+  </xml>
+  <xml name="output_plot_params">
+    <param name="color_by" argument="--color-by" type="text" value="n_genes" label="Color by attributes, comma separated strings"/>
+    <param name="groups" argument="--groups" type="text" optional="ture" label="Restrict plotting to named groups, comma separated strings"/>
+    <param name="projection" argument="--projection" type="select" label="Plot projection">
+      <option value="2d" selected="true">2D</option>
+      <option value="3d">3D</option>
+    </param>
+    <param name="components" argument="--components" type="text" value="1,2" label="Components to plot, comma separated integers"/>
+    <param name="palette" argument="--palette" type="text" optional="true" label="Palette"/>
+    <param name="use_raw" argument="--use-raw" type="boolean" checked="false" label="Use raw attributes if present"/>
+    <param name="edges" argument="--edges" type="boolean" checked="false" label="Show edges"/>
+    <param name="arrows" argument="--arrows" type="boolean" checked="false" label="Show arrows"/>
+    <param name="sort_order" argument="--no-sort-order" type="boolean" checked="true" label="Element with high color-by value plot on top"/>
+    <param name="frameoff" argument="--frameoff" type="boolean" checked="false" label="Omit frame"/>
+  </xml>
+  <xml name="export_mtx_params">
+    <param name="export_mtx" argument="--export-mtx" type="boolean" truevalue="--export-mtx ./" falsevalue="" checked="false" label="Save normalised data to 10x format" help="If enabled, it will generate in addition to the main output in Loom or AnnData an export in 10x format of the normalised data."/>
+  </xml>
+  <xml name="export_mtx_outputs">
+    <data name="matrix_10x" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string}: 10x matrix">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="genes_10x" format="tsv" from_work_dir="genes.tsv" label="${tool.name} on ${on_string}: 10x genes">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="barcodes_10x" format="tsv" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string}: 10x barcodes">
+      <filter>export_mtx</filter>
+    </data>
+  </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy_macros2.xml	Thu Jul 22 20:57:12 2021 +0000
@@ -0,0 +1,166 @@
+<macros>
+  <token name="@TOOL_VERSION@">1.8.1</token>
+  <token name="@HELP@">More information can be found at https://scanpy.readthedocs.io</token>
+  <token name="@PROFILE@">18.01</token>
+  <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+
+1.8.1+galaxy0: Upate to scanpy-scripts 1.0.1 (running scanpy ==1.8.1), including Scrublet integration.
+
+1.7.2+galaxy0: Upate to scanpy-scripts 0.3.3 (running scanpy ==1.7.2) to incorporate fix for object output from PAGA plotting, to allow PAGA init of FDG.
+
+1.6.0+galaxy0: Update to scanpy-scripts 0.2.13 (running scanpy ==1.6.0) to incorporate new options, code simplifications, and batch integration methods. Jonathan Manning, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/
+
+1.4.3+galaxy10: Update to scanpy-scripts 0.2.10 (running scanpy ==1.4.3) to address bugfixes in run-pca.
+
+1.4.3+galaxy10: Update to scanpy-scripts 0.2.9 (running scanpy ==1.4.3) to address bugfixes in find-variable-genes.
+
+1.4.3+galaxy10: Use profile 18.01 for modules.
+
+1.4.3+galaxy6: Update to scanpy-scripts 0.2.8 (running scanpy ==1.4.3) and wider compatibility with other Galaxy modules. Bug fixes in filtering and plotting improvements.
+
+1.4.3+galaxy0: Update to scanpy-scripts 0.2.5 (running scanpy ==1.4.3).
+
+1.4.2+galaxy0: Update to scanpy-scripts 0.2.4 (requires scanpy >=1.4.2).
+
+1.3.2+galaxy1: Normalise-data and filter-genes: Exposes ability to output 10x files.
+
+1.3.2+galaxy0: Initial contribution. Ni Huang and Pablo Moreno, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/ and Teichmann Lab at Wellcome Sanger Institute.
+    ]]></token>
+  <token name="@INPUT_OPTS@">
+    --input-format '${input_format}' input.h5
+  </token>
+  <token name="@SAVE_MATRIX_OPTS@">
+    #if $save_raw
+      --save-raw
+    #end if
+    #if $save_layer
+      --save-layer ${settings.save_layer}
+    #end if
+  </token>
+  <token name="@OUTPUT_OPTS@">
+#if str($output_format).startswith('anndata')
+    --show-obj stdout --output-format anndata output.h5
+#else
+    --show-obj stdout --output-format loom output.h5
+#end if
+  </token>
+  <token name="@PLOT_OPTS@">
+#if $fig_title
+    --title '${fig_title}'
+#end if
+    --fig-size '${fig_size}'
+    --fig-dpi ${fig_dpi}
+    --fig-fontsize ${fig_fontsize}
+    ${fig_frame}
+    ./output.png
+  </token>
+  <token name="@EXPORT_MTX_OPTS@">${export_mtx}</token>
+
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="1.1.0">scanpy-scripts</requirement>
+      <yield/>
+    </requirements>
+  </xml>
+
+  <xml name="citations">
+    <citations>
+      <yield />
+      <citation type="doi">10.1186/s13059-017-1382-0</citation>
+      <citation type="bibtex">
+	@misc{githubscanpy-scripts,
+	author = {Ni Huang, EBI Gene Expression Team},
+	year = {2018},
+	title = {Scanpy-scripts: command line interface for Scanpy},
+	publisher = {GitHub},
+	journal = {GitHub repository},
+	url = {https://github.com/ebi-gene-expression-group/scanpy-scripts},
+      }</citation>
+      <citation type="doi">10.1101/2020.04.08.032698</citation>
+      <citation type="doi">10.1038/s41592-021-01102-w</citation>
+    </citations>
+  </xml>
+
+  <xml name="input_object_params">
+    <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in AnnData/Loom format"/>
+    <param name="input_format" argument="--input-format" type="select" label="Format of input object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5</option>
+    </param>
+  </xml>
+
+  <xml name="output_object_params">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata_h5ad" selected="true">AnnData format</option>
+      <option value="anndata">AnnData format (h5 for older versions)</option>
+      <option value="loom">Loom format</option>
+      <option value="loom_legacy">Loom format (h5 for older versions)</option>
+    </param>
+  </xml>
+
+  <xml name="output_object_params_no_loom">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata_h5ad" selected="true">AnnData format</option>
+      <option value="anndata">AnnData format (h5 for older versions)</option>
+    </param>
+  </xml>
+
+  <xml name="output_data_obj_no_loom" token_description="operation">
+    <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata_h5ad'</filter>
+    </data>
+    <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata'</filter>
+    </data>
+  </xml>
+
+  <xml name="output_data_obj" token_description="operation">
+    <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata_h5ad'</filter>
+    </data>
+    <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata'</filter>
+    </data>
+    <data name="output_loom_legacy" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ Loom">
+      <filter>output_format == 'loom_legacy'</filter>
+    </data>
+    <data name="output_loom" format="loom" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ Loom">
+      <filter>output_format == 'loom'</filter>
+    </data>
+  </xml>
+
+  <xml name="save_matrix_params">
+    <param name="save_raw" argument="--save-raw" type="boolean" truevalue="yes" falsevalue="no" checked="false"
+      label="Save adata to adata.raw before processing?" />
+    <param name="save_layer" argument="--save-layer" type="text" optional="true"
+      label="Save adata.X to the specified layer before processing."/>
+  </xml>  
+
+  <xml name="output_plot_params">
+    <param name="fig_title" argument="--title" type="text" label="Figure title"/>
+    <param name="fig_size" argument="--fig-size" type="text" value="4,4" label="Figure size as 'width,height', e.g, '7,7'"/>
+    <param name="fig_dpi" argument="--fig-dpi" type="integer" min="1" value="80" label="Figure dpi"/>
+    <param name="fig_fontsize" argument="--fig-fontsize" type="integer" min="0" value="10" label="Figure font size"/>
+    <param name="fig_frame" type="boolean" truevalue="--frameon" falsevalue="--frameoff" checked="false"
+           label="Show plot frame"/>
+  </xml>
+
+  <xml name="export_mtx_params">
+    <param name="export_mtx" argument="--export-mtx" type="boolean" truevalue="--export-mtx ./" falsevalue="" checked="false" label="Save to 10x mtx format" help="If enabled, it will generate in addition to the main output in Loom or AnnData an export in 10x format."/>
+  </xml>
+
+  <xml name="export_mtx_outputs">
+    <data name="matrix_10x" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string}: 10x matrix">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="genes_10x" format="tsv" from_work_dir="genes.tsv" label="${tool.name} on ${on_string}: 10x genes">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="barcodes_10x" format="tsv" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string}: 10x barcodes">
+      <filter>export_mtx</filter>
+    </data>
+  </xml>
+</macros>