Galaxy |

Changeset 0:af821711b356 (2024-05-16)

Next changeset 1:8f8bef61fd0b (2024-05-23)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2 commit be132b56781bede5dc6e020aa80ca315546666cd

added:
dimension_reduction_clustering.xml
macros.xml

diff -r 000000000000 -r af821711b356 dimension_reduction_clustering.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dimension_reduction_clustering.xml Thu May 16 13:15:57 2024 +0000

[

b'@@ -0,0 +1,579 @@\n+<tool id="snapatac2_clustering" name="SnapATAC2 Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+ <description>and dimension reduction</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <requirements>\n+ <expand macro="requirements"/>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+export NUMBA_CACHE_DIR="\\${TEMP:-/tmp}";\n+@PREP_ADATA@\n+@CMD@\n+ ]]></command>\n+ <configfiles>\n+ <configfile name="script_file"><![CDATA[\n+\n+@CMD_imports@\n+@CMD_read_inputs@\n+\n+#if $method.method == \'tl.spectral\'\n+\t#if $method.features\n+with open(\'$method.features\') as f:\n+\tfeatures_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()]\n+\t#end if\n+sa.tl.spectral(\n+\tadata,\n+\tn_comps = $method.n_comps,\n+\t#if $method.features\n+\tfeatures = features_mask,\n+\t#end if\n+\trandom_state = $method.random_state,\n+\t#if $method.sample_size\n+\tsample_size = $method.sample_size,\n+\t#end if\n+\tchunk_size = $method.chunk_size,\n+\tdistance_metric = \'$method.distance_metric\',\n+\tweighted_by_sd = $method.weighted_by_sd,\n+\tinplace = True\n+)\n+\n+#else if $method.method == \'tl.umap\'\n+sa.tl.umap(\n+\tadata,\n+\tn_comps = $method.n_comps,\n+\t#if $method.use_dims != \'\'\n+\t #set $dims = ([x.strip() for x in str($method.use_dims).split(\',\')])\n+\tuse_dims=$dims,\n+\t#end if\n+\tuse_rep = \'$method.use_rep\',\n+\tkey_added = \'$method.key_added\',\n+\trandom_state = $method.random_state,\n+\tinplace = True\n+)\n+\n+#else if $method.method == \'pp.knn\'\n+sa.pp.knn(\n+\tadata,\n+\tn_neighbors = $method.n_neighbors,\n+\t#if $method.use_dims != \'\'\n+\t #set $dims = ([x.strip() for x in str($method.use_dims).split(\',\')])\n+\tuse_dims=$dims,\n+\t#end if\n+\tuse_rep = \'$method.use_rep\',\n+\tmethod = \'$method.algorithm\',\n+\tinplace = True,\n+\trandom_state = $method.random_state\n+)\n+\n+#else if $method.method == \'tl.dbscan\'\n+sa.tl.dbscan(\n+\tadata,\n+\teps = $method.eps,\n+\tmin_samples = $method.min_samples,\n+\tleaf_size = $method.leaf_size,\n+\tuse_rep = \'$method.use_rep\',\n+\tkey_added = \'$method.key_added\'\n+)\n+\n+#else if $method.method == \'tl.hdbscan\'\n+sa.tl.hdbscan(\n+\tadata,\n+\tmin_cluster_size = $method.min_cluster_size,\n+\t#if $method.min_samples\n+\tmin_samples = $method.min_samples,\n+\t#end if\n+\tcluster_selection_epsilon = $method.cluster_selection_epsilon,\n+\talpha = $method.alpha,\n+\tcluster_selection_method = \'$method.cluster_selection_method\',\n+\trandom_state = $method.random_state,\n+\tuse_rep = \'$method.use_rep\',\n+\tkey_added = \'$method.key_added\'\n+)\n+\n+#else if $method.method == \'tl.leiden\'\n+sa.tl.leiden(\n+\tadata,\n+\tresolution = $method.resolution,\n+\tobjective_function = \'$method.objective_function\',\n+\tmin_cluster_size = $method.min_cluster_size,\n+\tn_iterations = $method.n_iterations,\n+\trandom_state = $method.random_state,\n+\tkey_added = \'$method.key_added\',\n+\tweighted = $method.weighted,\n+\tinplace = True\n+)\n+\n+#else if $method.method == \'tl.kmeans\'\n+sa.tl.kmeans(\n+\tadata,\n+\tn_clusters = $method.n_clusters,\n+\tn_iterations = $method.n_iterations,\n+\trandom_state = $method.random_state,\n+\tuse_rep = \'$method.use_rep\',\n+\tkey_added = \'$method.key_added\'\n+)\n+\n+#else if $method.method == \'tl.aggregate_X\'\n+sa.tl.aggregate_X(\n+\tadata,\n+\t#if $method.groupby != \'\'\n+\tgroupby = \'$method.groupby\',\n+\t#end if\n+\tnormalize = \'$method.normalize\'\n+)\n+\n+#else if $method.method == \'tl.aggregate_cells\'\n+sa.tl.aggregate_cells(\n+\tadata,\n+\tuse_rep = \'$method.use_rep\',\n+\t#if $method.target_num_cells\n+\ttarget_num_cells = $method.target_num_cells,\n+\t#end if\n+\tmin_cluster_size = $method.min_cluster_size,\n+\trandom_state = $method.random_state,\n+\tkey_added = \'$method.key_added\',\n+\tinplace = True\n+)\n+#end if\n+\n+@CMD_anndata_write_outputs@\n+\t]]></configfile>\n+ </configfiles>\n+ <inputs>\n+ <conditional name="method">\n+ <param name="method" type="select" label="Dimension reduction and Clustering">\n+ <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using \'tl.sp'..b'frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+Perform dimension reduction using Laplacian Eigenmap, using `tl.spectral`\n+=========================================================================\n+\n+Perform dimension reduction using Laplacian Eigenmaps.\n+\n+Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is \xe2\x80\x9ccosine\xe2\x80\x9d, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__\n+\n+Compute Umap, using `tl.umap`\n+=============================\n+\n+Compute Umap\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__\n+\n+Compute a neighborhood graph of observations, using `pp.knn`\n+============================================================\n+\n+Compute a neighborhood graph of observations.\n+\n+Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__\n+\n+Cluster cells into subgroups, using `tl.leiden`\n+===============================================\n+\n+Cluster cells into subgroups.\n+\n+Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__\n+\n+Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans`\n+===========================================================================\n+\n+Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__\n+\n+Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan`\n+==========================================================================\n+\n+Cluster cells into subgroups using the DBSCAN algorithm.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__\n+\n+Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan`\n+============================================================================\n+\n+Cluster cells into subgroups using the HDBSCAN algorithm.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__\n+\n+Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X`\n+=========================================================================\n+\n+Aggregate values in adata.X in a row-wise fashion.\n+\n+Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__\n+\n+Aggregate cells into pseudo-cells, using `tl.aggregate_cells`\n+=============================================================\n+\n+Aggregate cells into pseudo-cells.\n+\n+Aggregate cells into pseudo-cells by iterative clustering.\n+\n+More details on the `SnapATAC2 documentation\n+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__\n+ ]]></help>\n+ <expand macro="citations"/>\n+</tool>\n'

diff -r 000000000000 -r af821711b356 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu May 16 13:15:57 2024 +0000

[

b'@@ -0,0 +1,187 @@\n+<macros>\n+\t<token name="@TOOL_VERSION@">2.5.3</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <token name="@PROFILE@">23.0</token>\n+ <xml name="requirements">\n+ <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>\n+ <requirement type="package" version="5.18.0">plotly</requirement>\n+ <requirement type="package" version="0.2.1">python-kaleido</requirement>\n+ <requirement type="package" version="0.19.19">polars</requirement>\n+ <requirement type="package" version="14.0.1">pyarrow</requirement>\n+ <requirement type="package" version="0.11.3">python-igraph</requirement>\n+ <requirement type="package" version="0.8.33">hdbscan</requirement>\n+ <requirement type="package" version="0.0.9">harmonypy</requirement>\n+ <requirement type="package" version="1.7.4">scanorama</requirement>\n+ <yield />\n+ </xml>\n+\n+ <token name="@PREP_ADATA@"><![CDATA[\n+ cp \'$method.adata\' \'anndata.h5ad\' &&\n+ ]]>\n+ </token>\n+\n+ <token name="@CMD@"><![CDATA[\n+ cat \'$script_file\' > \'$hidden_output\' &&\n+ python \'$script_file\' >> \'$hidden_output\' &&\n+\t\ttouch \'anndata_info.txt\' &&\n+\t\tcat \'anndata_info.txt\' @CMD_prettify_stdout@\n+ ]]>\n+ </token>\n+\n+ <token name="@CMD_prettify_stdout@"><![CDATA[ | sed -r \'1 s|AnnData object with (.+) = (.*)\\s*|\\1: \\2|g\' | sed "s|\'||g" | sed -r \'s|^\\s*(.*):\\s(.*)|[\\1]\\n- \\2|g\' | sed \'s|, |\\n- |g\'\n+ ]]></token>\n+\n+ <token name="@CMD_imports@"><![CDATA[\n+import snapatac2 as sa\n+import os\n+ ]]>\n+ </token>\n+ <xml name="sanitize_query" token_validinitial="string.printable">\n+ <sanitizer>\n+ <valid initial="@VALIDINITIAL@">\n+ <remove value="'" />\n+ </valid>\n+ </sanitizer>\n+ </xml>\n+\n+ <xml name="inputs_anndata">\n+ <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/>\n+ </xml>\n+\n+ <token name="@CMD_read_inputs@"><![CDATA[\n+\n+adata = sa.read(\'anndata.h5ad\', backed = None)\n+]]>\n+ </token>\n+\n+ <xml name="dimentions_plot">\n+ <param argument="width" type="integer" value="500" label="Width of the plot"/>\n+\t\t<param argument="height" type="integer" value="400" label="Height of the plot"/>\n+ </xml>\n+\n+ <xml name="param_groupby">\n+ <param argument="groupby" type="text" label="The key of the observation grouping to consider">\n+ <expand macro="sanitize_query" />\n+ </param>\n+ </xml>\n+\n+ <xml name="out_file">\n+ <param name="out_file" type="select" optional="true" label="Type of output file">\n+ <option value="png" selected="true">PNG</option>\n+ <option value="svg">SVG</option>\n+ <option value="pdf">PDF</option>\n+\t\t</param>\n+ </xml>\n+ <token name="@CMD_anndata_write_outputs@"><![CDATA[\n+adata.write(\'anndata.h5ad\')\n+with open(\'anndata_info.txt\',\'w\', encoding=\'utf-8\') as ainfo:\n+ print(adata, file=ainfo)\n+]]>\n+ </token>\n+ <xml name="inputs_common_advanced">\n+ <section name="advanced_common" title="Advanced Options" expanded="false">\n+ <param name="show_log" type="boolean" checked="false" label="Output Log?" />\n+ </section>\n+ </xml>\n+ <xml name="params_render_plot">\n+ <param argument="width" type="integer" value="600" label="Width of the plot"/>\n+ <param argument="height" type="integer" value="400" label="Height of the plot"/>\n+ <expand macro="out_file"/>\n+ </xml>\n+ <xml name="param_shift">\n+ \t<param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>\n+ \t<param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>\n+ </xml>\n+ <xml name="param_chunk_size" tokens="size">\n+ \t<param argument="chunk_size" ty'..b'nt="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>\n+\t\t<param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">\n+\t\t\t<expand macro="sanitize_query"/>\n+\t\t</param>\n+\t\t<param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">\n+ <expand macro="sanitize_query" />\n+ </param>\n+\t\t<param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>\n+\t</xml>\n+ <xml name="param_n_comps">\n+s <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>\n+ </xml>\n+ <xml name="param_random_state">\n+ <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/>\n+ </xml>\n+ <xml name="param_key_added" tokens="key_added">\n+ <param argument="key_added" type="text" value="@KEY_ADDED@" label="`adata.obs` key under which t add cluster labels"/>\n+ </xml>\n+ <xml name="param_use_rep">\n+ <param argument="use_rep" type="text" value="X_spectral" label="Use the indicated representation in `.obsm`"/>\n+ </xml>\n+ <xml name="genome_fasta">\n+ <param argument="genome_fasta" type="text" label="A fasta file containing the genome sequences or a Genome object"/>\n+ </xml>\n+ <xml name="background">\n+ <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background">\n+\t\t\t<expand macro="sanitize_query"/>\n+\t\t</param>\n+ </xml>\n+ <xml name="mat">\n+ <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/>\n+\t\t<param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>\n+ </xml>\n+ <xml name="param_network">\n+ <param argument="network" type="text" label="network"/>\n+ </xml>\n+ <xml name="param_n_iterations">\n+ <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"\n+ help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>\n+ </xml>\n+\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1038/s41592-023-02139-9</citation>\n+ </citations>\n+ </xml>\n+ <xml name="render_plot_test">\n+ \t<param name="width" value="650"/>\n+ <param name="height" value="450"/>\n+ </xml>\n+ <xml name="render_plot_matching_text">\n+ \t<has_text_matching expression="width = 650"/>\n+ <has_text_matching expression="height = 450"/>\n+ </xml>\n+ <xml name="param_counting_strategy">\n+ <param argument="counting_strategy" type="select" label="he strategy to compute feature counts">\n+ <option value="fragment">fragment</option>\n+ <option value="insertion" selected="true">insertion</option>\n+ <option value="paired-insertion">paired-insertion</option>\n+ </param>\n+ </xml>\n+\n+ <token name="@CMD_params_data_integration@"><![CDATA[\n+use_rep = \'$method.use_rep\',\n+#if $method.use_dims != \'\'\n+#set $dims = ([x.strip() for x in str($method.use_dims).split(\',\')])\n+use_dims=$dims,\n+#end if\n+#if $method.groupby != \'\'\n+#set $groupby = ([x.strip() for x in str($method.groupby).split(\',\')])\n+groupby=$groupby,\n+#end if\n+#if $method.key_added != \'\'\n+key_added = \'$method.key_added\',\n+#end if\n+ ]]>\n+ </token>\n+\n+ <token name="@CMD_params_render_plot@"><![CDATA[\n+ width = $method.width,\n+ height = $method.height,\n+ out_file = \'plot.$method.out_file\',\n+ ]]>\n+ </token>\n+</macros>\n'