Repository 'scanpy_integrate_bbknn'
hg clone https://toolshed.g2.bx.psu.edu/repos/ebi-gxa/scanpy_integrate_bbknn

Changeset 0:e6d5b3fed639 (2020-09-07)
Next changeset 1:c2be8a21eaa2 (2020-09-09)
Commit message:
"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 62f47287c7e8449c59a1f1f454852ddc669b1b1e-dirty"
added:
scanpy-integrate-bbknn.xml
scanpy_macros.xml
scanpy_macros2.xml
b
diff -r 000000000000 -r e6d5b3fed639 scanpy-integrate-bbknn.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy-integrate-bbknn.xml Mon Sep 07 13:05:34 2020 +0000
[
b'@@ -0,0 +1,146 @@\n+<?xml version="1.0" encoding="utf-8"?>\n+<tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">\n+  <description>batch-balanced K-nearest neighbours</description>\n+  <macros>\n+    <import>scanpy_macros2.xml</import>\n+  </macros>\n+  <expand macro="requirements"/>\n+  <command detect_errors="exit_code"><![CDATA[\n+#if $batch_key\n+  ln -s \'${input_obj_file}\' input.h5 &&\n+  PYTHONIOENCODING=utf-8 scanpy-integrate bbknn \n+  --batch-key \'${batch_key}\'\n+  #if $settings.default == "false"\n+    #if $settings.use_rep\n+      --use-rep \'${settings.use_rep}\'\n+    #end if\n+    #if $settings.key_added\n+      --key-added \'${settings.key_added}\'\n+    #end if\n+    #if $settings.n_pcs\n+      --n-pcs \'${settings.n_pcs}\'\n+    #end if\n+    #if not $settings.approx\n+        ${settings.approx}\n+    #end if\n+    #if $settings.metric\n+        --metric \'${settings.metric}\'\n+    #end if\n+    #if $settings.neighbors_within_batch\n+        --neighbors-within-batch \'${settings.neighbors_within_batch}\'\n+    #end if\n+    #if $settings.trim\n+        --trim \'${settings.trim}\'\n+    #end if\n+    #if $settings.n_trees\n+        --n-trees \'${settings.n_trees}\'\n+    #end if\n+    #if not $settings.use_faiss\n+        ${settings.use_faiss}\n+    #end if\n+    #if $settings.set_op_mix_ratio\n+        --set-op-mix-ratio \'${settings.set_op_mix_ratio}\'\n+    #end if\n+    #if $settings.local_connectivity\n+        --local-connectivity \'${settings.local_connectivity}\'\n+    #end if\n+  #end if\n+  @INPUT_OPTS@\n+  @OUTPUT_OPTS@\n+#else\n+  echo "No batch variables passed, simply passing original input as output unchanged.";\n+  cp \'${input_obj_file}\' \'${output_h5}\'\n+#end if\n+]]></command>\n+\n+  <inputs>\n+    <expand macro="input_object_params"/>\n+    <expand macro="output_object_params"/>\n+    <param name="batch_key" type="text" argument="--batch-key" label="The name of the column in adata.obs that differentiates among experiments/batches.">\n+      <sanitizer>\n+        <valid initial="string.printable"/>\n+      </sanitizer>\n+    </param>\n+    <conditional name="settings">\n+      <param name="default" type="boolean" checked="true" label="Use programme defaults"/>\n+      <when value="true"/>\n+      <when value="false">\n+        <param name="use_rep" argument="--use-rep" type="text" value=\'X_pca\' label="The dimensionality reduction in .obsm to use for neighbour detection.">\n+          <sanitizer>\n+            <valid initial="string.printable"/>\n+          </sanitizer>\n+        </param>\n+        <param name="key_added" argument="--key-added" type="text" optional="true" label="Key under which to add the computed results." help="If not specified, the neighbors data is stored in .uns[\xe2\x80\x98neighbors\xe2\x80\x99], distances and connectivities are stored in .obsp[\xe2\x80\x98distances\xe2\x80\x99] and .obsp[\xe2\x80\x98connectivities\xe2\x80\x99] respectively. If specified, the neighbors data is added to .uns[key_added], distances are stored in .obsp[key_added+\xe2\x80\x99_distances\xe2\x80\x99] and connectivities in .obsp[key_added+\xe2\x80\x99_connectivities\xe2\x80\x99].">\n+          <sanitizer>\n+            <valid initial="string.printable"/>\n+          </sanitizer>\n+        </param>\n+        <param name="n_pcs" argument="--n-pcs" type="integer" value="50" optional="true" label="Number of PCs to use"/>\n+        <param name="approx" argument="--no-approx" type="boolean" truevalue="" falsevalue="--no-approx" checked="True"\n+            label="Use annoy\xe2\x80\x99s approximate neighbour finding?" help="This results in a quicker run time for large datasets while also potentially increasing the degree of batch correction."/>\n+        <param name="metric" argument="--metric" type="select" label="A known metric\xe2\x80\x99s name">\n+          <option value="angular" selected="true">angular</option>\n+          <option value="euclidean">Euclidean</option>\n+          <option value="cityblock">cityblock</option>\n+          <option value="cosine">cosine</option>\n+          <option value="l1">l1</option>\n+          <option value="l2">l2</o'..b'urtis</option>\n+          <option value="canberra">canberra</option>\n+          <option value="chebyshev">chebyshev</option>\n+          <option value="correlation">correlation</option>\n+          <option value="dice">dice</option>\n+          <option value="hamming">hamming</option>\n+          <option value="jaccard">jaccard</option>\n+          <option value="kulsinski">kulsinski</option>\n+          <option value="mahalanobis">mahalanobis</option>\n+          <option value="minkowski">minkowski</option>\n+          <option value="rogerstanimoto">rogerstanimoto</option>\n+          <option value="russelrao">russelrao</option>\n+          <option value="seuclidan">seuclidian</option>\n+          <option value="sokalmichener">sokalmichener</option>\n+          <option value="sokalsneath">sokalsneath</option>\n+          <option value="sqeuclidean">sqeuclidean</option>\n+          <option value="yule">yule</option>\n+        </param>\n+        <param name="neighbors_within_batch" argument="--neighbors-within-batch" type="integer" value="3" optional="true" label="How many top neighbours to report for each batch" help="Total number of neighbours will be this number times the number of batches."/>\n+        <param name="trim" argument="--trim" type="integer" value="" optional="true" label="Trim the neighbours of each cell to these many top connectivities." help="May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If not set, sets the parameter value automatically to 10 times the total number of neighbours for each cell. Set to 0 to skip."/>\n+        <param name="n_trees" argument="--n-trees" type="integer" value="10" optional="true" label="The number of trees to construct in the annoy forest." help="More trees give higher precision when querying, at the cost of increased run time and resource intensity."/>\n+        <param name="use_faiss" argument="--no-use-faiss" type="boolean" truevalue="" falsevalue="--no-use-faiss" checked="True"\n+            label="Use the faiss package to compute nearest neighbours if installed" help="If approx=False and the metric is \'euclidean\' use the faiss package to compute nearest neighbours if installed. This improves performance at a minor cost to numerical precision as faiss operates on float32."/>\n+        <param name="set_op_mix_ratio" argument="--set-op-mix-ratio" type="float" value="1" min="0" max="1" label="UMAP connectivity computation parameter" help="Float between 0 and 1, controlling the blend between a connectivity matrix formed exclusively from mutual nearest neighbour pairs (0) and a union of all observed neighbour relationships with the mutual pairs emphasised (1)."/>\n+        <param name="local_connectivity" argument="--local-connectivity" type="integer" value="1" label="UMAP connectivity computation parameter, how many nearest neighbors of each cell are assumed to be fully connected (and given a connectivity value of 1)."/>\n+      </when>\n+    </conditional>    \n+  </inputs>\n+\n+  <outputs>\n+    <expand macro="output_data_obj" description="Batch-corrected for ${batch_key}"/>\n+  </outputs>\n+\n+  <tests>\n+    <test>\n+      <param name="input_obj_file" value="find_cluster.h5"/>\n+      <param name="input_format" value="anndata"/>\n+      <param name="output_format" value="anndata"/>\n+      <param name="batch_key" value="louvain"/>\n+      <output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size"/>\n+    </test>\n+  </tests>\n+\n+  <help><![CDATA[\n+    .. class:: infomark\n+\n+    **What it does**\n+\n+    Batch balanced kNN alters the kNN procedure to identify each cell\xe2\x80\x99s top neighbours in each batch separately instead of the entire cell pool with no accounting for batch. Aligns batches in a quick and lightweight manner.\n+\n+    Use as an alternative to Scanpy ComputeGraph.\n+\n+    @HELP@\n+\n+    @VERSION_HISTORY@\n+]]></help>\n+  <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r e6d5b3fed639 scanpy_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy_macros.xml Mon Sep 07 13:05:34 2020 +0000
[
@@ -0,0 +1,109 @@
+<macros>
+  <token name="@TOOL_VERSION@">1.3.2</token>
+  <token name="@HELP@">More information can be found at https://scanpy.readthedocs.io</token>
+  <token name="@PLOT_OPTS@">
+#if $do_plotting.plot
+                  -P output.png
+                  --projectio $do_plotting.projection
+                  --components $do_plotting.components
+    #if $do_plotting.color_by
+                  --color-by $do_plotting.color_by
+    #end if
+    #if $do_plotting.groups
+                  --group $do_plotting.groups
+    #end if
+    #if $do_plotting.use_raw
+                  --use-raw
+    #end if
+    #if $do_plotting.palette
+                  --palette $do_plotting.palette
+    #end if
+    #if $do_plotting.edges
+                  --edges
+    #end if
+    #if $do_plotting.arrows
+                  --arrows
+    #end if
+    #if not $do_plotting.sort_order
+                  --no-sort-order
+    #end if
+    #if $do_plotting.frameoff
+                  --frameoff
+    #end if
+#end if
+  </token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="0.0.5">scanpy-scripts</requirement>
+      <yield/>
+    </requirements>
+  </xml>
+  <token name="@EXPORT_MTX_OPTS@">
+      ${export_mtx}
+  </token>
+  <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+
+1.3.2+galaxy1: Normalise-data and filter-genes: Exposes ability to output 10x files.
+
+1.3.2+galaxy0: Initial contribution. Ni Huang and Pablo Moreno, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/ and Teichmann Lab at Wellcome Sanger Institute.
+    ]]></token>
+  <xml name="citations">
+    <citations>
+      <citation type="doi">10.1186/s13059-017-1382-0</citation>
+      <citation type="bibtex">
+ @misc{githubscanpy-scripts,
+ author = {Ni Huang, EBI Gene Expression Team},
+ year = {2018},
+ title = {Scanpy-scripts: command line interface for Scanpy},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ url = {https://github.com/ebi-gene-expression-group/scanpy-scripts},
+      }</citation>
+      <yield />
+    </citations>
+  </xml>
+  <xml name="input_object_params">
+    <param name="input_obj_file" argument="--input-object-file" type="data" format="h5" label="Input object in hdf5 format"/>
+    <param name="input_format" argument="--input-format" type="select" label="Format of input object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5, current support is incomplete</option>
+    </param>
+  </xml>
+  <xml name="output_object_params">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5, current support is defective</option>
+    </param>
+  </xml>
+  <xml name="output_plot_params">
+    <param name="color_by" argument="--color-by" type="text" value="n_genes" label="Color by attributes, comma separated strings"/>
+    <param name="groups" argument="--groups" type="text" optional="ture" label="Restrict plotting to named groups, comma separated strings"/>
+    <param name="projection" argument="--projection" type="select" label="Plot projection">
+      <option value="2d" selected="true">2D</option>
+      <option value="3d">3D</option>
+    </param>
+    <param name="components" argument="--components" type="text" value="1,2" label="Components to plot, comma separated integers"/>
+    <param name="palette" argument="--palette" type="text" optional="true" label="Palette"/>
+    <param name="use_raw" argument="--use-raw" type="boolean" checked="false" label="Use raw attributes if present"/>
+    <param name="edges" argument="--edges" type="boolean" checked="false" label="Show edges"/>
+    <param name="arrows" argument="--arrows" type="boolean" checked="false" label="Show arrows"/>
+    <param name="sort_order" argument="--no-sort-order" type="boolean" checked="true" label="Element with high color-by value plot on top"/>
+    <param name="frameoff" argument="--frameoff" type="boolean" checked="false" label="Omit frame"/>
+  </xml>
+  <xml name="export_mtx_params">
+    <param name="export_mtx" argument="--export-mtx" type="boolean" truevalue="--export-mtx ./" falsevalue="" checked="false" label="Save normalised data to 10x format" help="If enabled, it will generate in addition to the main output in Loom or AnnData an export in 10x format of the normalised data."/>
+  </xml>
+  <xml name="export_mtx_outputs">
+    <data name="matrix_10x" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string}: 10x matrix">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="genes_10x" format="tsv" from_work_dir="genes.tsv" label="${tool.name} on ${on_string}: 10x genes">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="barcodes_10x" format="tsv" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string}: 10x barcodes">
+      <filter>export_mtx</filter>
+    </data>
+  </xml>
+</macros>
b
diff -r 000000000000 -r e6d5b3fed639 scanpy_macros2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scanpy_macros2.xml Mon Sep 07 13:05:34 2020 +0000
[
@@ -0,0 +1,146 @@
+<macros>
+  <token name="@TOOL_VERSION@">1.6.0</token>
+  <token name="@HELP@">More information can be found at https://scanpy.readthedocs.io</token>
+  <token name="@PROFILE@">18.01</token>
+  <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+
+1.6.0+galaxy0: Update to scanpy-scripts 0.2.13 (running scanpy ==1.6.0) to incorporate new options, code simplifications, and batch integration methods. Jonathan Manning, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/
+
+1.4.3+galaxy10: Update to scanpy-scripts 0.2.10 (running scanpy ==1.4.3) to address bugfixes in run-pca.
+
+1.4.3+galaxy10: Update to scanpy-scripts 0.2.9 (running scanpy ==1.4.3) to address bugfixes in find-variable-genes.
+
+1.4.3+galaxy10: Use profile 18.01 for modules.
+
+1.4.3+galaxy6: Update to scanpy-scripts 0.2.8 (running scanpy ==1.4.3) and wider compatibility with other Galaxy modules. Bug fixes in filtering and plotting improvements.
+
+1.4.3+galaxy0: Update to scanpy-scripts 0.2.5 (running scanpy ==1.4.3).
+
+1.4.2+galaxy0: Update to scanpy-scripts 0.2.4 (requires scanpy >=1.4.2).
+
+1.3.2+galaxy1: Normalise-data and filter-genes: Exposes ability to output 10x files.
+
+1.3.2+galaxy0: Initial contribution. Ni Huang and Pablo Moreno, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/ and Teichmann Lab at Wellcome Sanger Institute.
+    ]]></token>
+  <token name="@INPUT_OPTS@">
+    --input-format '${input_format}' input.h5
+  </token>
+  <token name="@OUTPUT_OPTS@">
+#if str($output_format).startswith('anndata')
+    --show-obj stdout --output-format anndata output.h5
+#else
+    --show-obj stdout --output-format loom output.h5
+#end if
+  </token>
+  <token name="@PLOT_OPTS@">
+#if $fig_title
+    --title '${fig_title}'
+#end if
+    --fig-size '${fig_size}'
+    --fig-dpi ${fig_dpi}
+    --fig-fontsize ${fig_fontsize}
+    ${fig_frame}
+    ./output.png
+  </token>
+  <token name="@EXPORT_MTX_OPTS@">${export_mtx}</token>
+
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="0.3.0">scanpy-scripts</requirement>
+      <yield/>
+    </requirements>
+  </xml>
+
+  <xml name="citations">
+    <citations>
+      <yield />
+      <citation type="doi">10.1186/s13059-017-1382-0</citation>
+      <citation type="bibtex">
+ @misc{githubscanpy-scripts,
+ author = {Ni Huang, EBI Gene Expression Team},
+ year = {2018},
+ title = {Scanpy-scripts: command line interface for Scanpy},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ url = {https://github.com/ebi-gene-expression-group/scanpy-scripts},
+      }</citation>
+      <citation type="doi">10.1101/2020.04.08.032698</citation>
+    </citations>
+  </xml>
+
+  <xml name="input_object_params">
+    <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in AnnData/Loom format"/>
+    <param name="input_format" argument="--input-format" type="select" label="Format of input object">
+      <option value="anndata" selected="true">AnnData format hdf5</option>
+      <option value="loom">Loom format hdf5</option>
+    </param>
+  </xml>
+
+  <xml name="output_object_params">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata_h5ad" selected="true">AnnData format</option>
+      <option value="anndata">AnnData format (h5 for older versions)</option>
+      <option value="loom">Loom format</option>
+      <option value="loom_legacy">Loom format (h5 for older versions)</option>
+    </param>
+  </xml>
+
+  <xml name="output_object_params_no_loom">
+    <param name="output_format" argument="--output-format" type="select" label="Format of output object">
+      <option value="anndata_h5ad" selected="true">AnnData format</option>
+      <option value="anndata">AnnData format (h5 for older versions)</option>
+    </param>
+  </xml>
+
+  <xml name="output_data_obj_no_loom" token_description="operation">
+    <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata_h5ad'</filter>
+    </data>
+    <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata'</filter>
+    </data>
+  </xml>
+
+  <xml name="output_data_obj" token_description="operation">
+    <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata_h5ad'</filter>
+    </data>
+    <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
+      <filter>output_format == 'anndata'</filter>
+    </data>
+    <data name="output_loom_legacy" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ Loom">
+      <filter>output_format == 'loom_legacy'</filter>
+    </data>
+    <data name="output_loom" format="loom" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ Loom">
+      <filter>output_format == 'loom'</filter>
+    </data>
+  </xml>
+
+  <xml name="output_plot_params">
+    <param name="fig_title" argument="--title" type="text" label="Figure title"/>
+    <param name="fig_size" argument="--fig-size" type="text" value="4,4" label="Figure size as 'width,height', e.g, '7,7'"/>
+    <param name="fig_dpi" argument="--fig-dpi" type="integer" min="1" value="80" label="Figure dpi"/>
+    <param name="fig_fontsize" argument="--fig-fontsize" type="integer" min="0" value="10" label="Figure font size"/>
+    <param name="fig_frame" type="boolean" truevalue="--frameon" falsevalue="--frameoff" checked="false"
+           label="Show plot frame"/>
+  </xml>
+
+  <xml name="export_mtx_params">
+    <param name="export_mtx" argument="--export-mtx" type="boolean" truevalue="--export-mtx ./" falsevalue="" checked="false" label="Save to 10x mtx format" help="If enabled, it will generate in addition to the main output in Loom or AnnData an export in 10x format."/>
+  </xml>
+
+  <xml name="export_mtx_outputs">
+    <data name="matrix_10x" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string}: 10x matrix">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="genes_10x" format="tsv" from_work_dir="genes.tsv" label="${tool.name} on ${on_string}: 10x genes">
+      <filter>export_mtx</filter>
+    </data>
+    <data name="barcodes_10x" format="tsv" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string}: 10x barcodes">
+      <filter>export_mtx</filter>
+    </data>
+  </xml>
+</macros>