Mercurial > repos > ebi-gxa > seurat_find_clusters

--- a/seurat_find_clusters.xml	Wed Apr 03 11:17:04 2019 -0400
+++ b/seurat_find_clusters.xml	Mon Nov 25 06:09:15 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="seurat_find_clusters" name="Seurat FindClusters" version="2.3.1+galaxy1">
+<tool id="seurat_find_clusters" name="Seurat FindClusters" version="@SEURAT_VERSION@_@VERSION@+galaxy0">
     <description>find clusters of cells</description>
     <macros>
         <import>seurat_macros.xml</import>
@@ -8,30 +8,10 @@
     <command detect_errors="exit_code"><![CDATA[
         seurat-find-clusters.R

-        --input-object-file '$input'
-        --output-object-file '$output'
+        @INPUT_OBJECT@
+        @OUTPUT_OBJECT@
         --output-text-file output_tab

-        #if $genes_use:
-          --genes-use '$genes_use'
-        #end if
-
-        #if str($adv.reduction_type):
-          --reduction-type '$adv.reduction_type'
-        #end if
-
-        #if str($adv.dims_use):
-          --dims-use \$(seq -s , 1 '$adv.dims_use')
-        #end if
-
-        #if str($adv.k_num_clusters):
-          --k-param '$adv.k_num_clusters'
-        #end if
-
-        #if str($adv.prune_snn):
-          --prune-snn '$adv.prune_snn'
-        #end if
-
         #if str($adv.resolution):
           --resolution '$adv.resolution'
         #end if
@@ -40,31 +20,58 @@
           --algorithm '$adv.algorithm'
         #end if

+        #if str($adv.modularity_fxn):
+          --modularity-fxn '$adv.modularity_fxn'
+        #end if
+
+        #if str($adv.method):
+          --method '$adv.method'
+        #end if
+
+        #if str($adv.graph_name):
+          --graph-name '$adv.graph_name'
+        #end if
+
+        #if str($adv.nrandom_starts):
+          --nrandom-starts '$adv.nrandom_starts'
+        #end if
+
+        $adv.group_singletons
+
+
+
   ## TODO add pdf support as optional
   ]]></command>

       <inputs>
-        <param name="input" argument="--input-object-file" type="data" format="rdata" label="Seurat RDS object" help="Seurat object produced by Seurat run PCA or other." />
-        <expand macro="genes-use-input"/>
+        <expand macro="input_object_params"/>
+        <expand macro="output_object_params"/>
           <section name="adv" title="Advanced Options">
-            <param name="reduction_type" argument="--reduction-type" optional="true" type="select" label="Dimensional reduction type" help="dimensional reduction technique to use in construction of SNN graph. (e.g. 'pca', 'ica'). PCA by default.">
-              <option value="pca" selected="true">PCA</option>
-              <option value="ica">ICA</option>
-            </param>
-            <expand macro="dims-use-input"/>
-            <param name="k_num_clusters" argument="--k-param" optional="true" type="integer" label="Number of clusters (k) to compute" help="Defines k for the k-nearest neighbor algorithm."/>
-            <param name="prune_snn" argument="--prune-snn" optional="true" type="float" label="Prune SNN cutoff" help="Sets the cutoff for acceptable Jaccard distances when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the strigency of pruning (0 — no pruning, 1 — prune everything). Defaults to 1/15."/>
             <param name="resolution" argument="--resolution" optional="true" type="float" label="Resolution" help="Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. Defaults to 0.8."/>
             <param name="algorithm" argument="--algorithm" optional="true" type="select" label="Modularity organization algorithm">
               <option value="1" selected="true">Louvain</option>
               <option value="2">Louvain algorithm with multilevel refinement</option>
               <option value="3">SLM algorithm</option>
+              <option value="4">Leiden</option>
             </param>
+            <param name="modularity_fxn" argument="--modularity-fxn" optional="true" type="select" label="Modularity function">
+              <option value="1" selected="true">Standard</option>
+              <option value="2">Alternative</option>
+            </param>
+            <param name="method" argument="--method" type="select" label="Method for Leiden" help="Method for leiden  (defaults to matrix which is fast for small datasets). Select iGraph to avoid casting large data to a dense matrix.">
+              <option value="matrix" selected="true">Matrix</option>
+              <option value="igraph">iGraph</option>
+            </param>
+            <param name="graph_name" argument="--graph-name" type="text" value="RNA_nn" label="Graph Name" help="Name of graph to use for the clustering algorith."/>
+            <param name="nrandom_starts" argument="--nrandom-starts" type="integer" optional="true" label="Random starts" help="Number of random starts, 10 by default."/>
+            <param name="group_singletons" argument="--group-singletons" type="boolean" truevalue="--group-singletons" falsevalue="" checked="false" label="Group singletons" help="Group singletons into nearest cluster. If FALSE, assign all singletons to a 'singleton' group."/>
+            <param name="random_seed" argument="--random-seed" type="integer" optional="true" label="Random seed" help="Seed of the random number generator"/>
           </section>
+
       </inputs>
       <outputs>
           <!-- <data name="out_pdf" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots" /> -->
-          <data name="output" format="rdata" from_work_dir="*.rds" label="${tool.name} on ${on_string}: Seurat RDS"/>
+          <expand macro="output_files"/>
           <data name="output_tab" format="csv" from_work_dir="output_tab" label="${tool.name} on ${on_string}: CSV Seurat Clusters"/>
       </outputs>

@@ -72,7 +79,7 @@
         <!-- Ensure count matrix input works -->
         <test>
             <param name="input" ftype="rdata" value="out_runpca.rds"/>
-            <output name="output" ftype="rdata" value="out_findclust.rds" compare="sim_size"/>
+            <output name="rds_seurat_file" ftype="rdata" value="out_findclust.rds" compare="sim_size"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -80,16 +87,14 @@

 **What it does**

-Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
-It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
-interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
-types of single cell data.
+Identify clusters of cells by a shared nearest neighbor (SNN) modularity optimization
+based clustering algorithm. First calculate k-nearest neighbors and construct t
+he SNN graph (using Seurat find neighbours).
+Then optimize the modularity function to determine clusters.
+For a full description of the algorithms, see Waltman and van Eck (2013)
+The European Physical Journal B.

-Seurat clustering use SNN method to determine different clusters in your dataset. In order to construct a
-SNN graph, you must have perform a PCA before launch this tool (you can use Seurat dimensional reduction).
-It will search k (30) nearest neighbors for each cells and link cells to each other if they shared the
-same neighbors. You can modulate the resolution in order to get larger (resolution superior to 1) or smaller
-(inferior to 1) clusters.
+@SEURAT_INTRO@

 -----
--- a/seurat_macros.xml	Wed Apr 03 11:17:04 2019 -0400
+++ b/seurat_macros.xml	Mon Nov 25 06:09:15 2019 -0500
@@ -1,34 +1,110 @@
 <?xml version="1.0"?>
 <macros>
-
-    <token name="@VERSION@">0.0.5</token>
-
+    <token name="@VERSION@">0.0.6</token>
+    <token name="@SEURAT_VERSION@">3.1.1</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">seurat-scripts</requirement>
         </requirements>
     </xml>
-
     <xml name="version">
     	<version_command><![CDATA[
 echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     </xml>

+   <xml name="input_object_params">
+     <conditional name="input" label="Input format">
+       <param type="select" name="format" label="Choose the format of the input" help="RData, Loom or AnnData">
+         <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+         <option value="loom">Loom</option>
+         <option value="anndata">AnnData</option>
+         <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+       </param>
+       <when value="anndata">
+         <param type="data" name="anndata_file" label="AnnData file" help="The AnnData format provided by Scanpy" format="h5,h5ad"/>
+       </when>
+       <when value="loom">
+         <param type="data" name="loom_file" label="Loom file" help="Input as Loom v? file" format="h5,h5loom"/>
+       </when>
+       <when value="rds_seurat">
+         <param type="data" name="rds_seurat_file" label="RDS file" help="Input as RDS file with Seurat 3 object" format="rdata"/>
+       </when>
+       <when value="rds_sce">
+         <param type="data" name="rds_sce_file" label="RDS file" help="Input as RDS file with Single Cell Experiment object" format="rdata"/>
+       </when>
+     </conditional>
+   </xml>
+
+   <token name="@INPUT_OBJECT@">
+    #if $input.format == "anndata"
+        --input-object-file '$input.anndata_file' --input-format anndata
+    #else if $input.format == "loom"
+        --input-object-file '$input.loom_file' --input-format loom
+    #else if $input.format == "rds_seurat"
+        --input-object-file '$input.rds_seurat_file' --input-format seurat
+    #else if $input.format == "rds_sce"
+        --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment
+    #end if
+   </token>
+
+   <xml name="output_object_params">
+     <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment or Loom">
+       <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+       <option value="loom">Loom</option>
+       <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+     </param>
+   </xml>
+
+   <xml name="output_files">
+    <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom">
+      <filter>format == 'loom'</filter>
+    </data>
+    <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS">
+      <filter>format == 'rds_seurat'</filter>
+    </data>
+    <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS">
+      <filter>format == 'rds_sce'</filter>
+    </data>
+   </xml>
+
+   <token name="@OUTPUT_OBJECT@">
+    #if $format == "anndata"
+        --output-object-file '$anndata_file' --output-format anndata
+    #else if $format == "loom"
+        --output-object-file seurat_obj.loom --output-format loom
+    #else if $format == "rds_seurat"
+        --output-object-file '$rds_seurat_file' --output-format seurat
+    #else if $format == "rds_sce"
+        --output-object-file '$rds_sce_file' --output-format singlecellexperiment
+    #end if
+   </token>
+
     <xml name="genes-use-input">
-      <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv, txt" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>
+      <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>
     </xml>
     <xml name="dims-use-input">
       <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/>
     </xml>

+    <token name="@SEURAT_INTRO@"><![CDATA[
+Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
+It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
+interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
+types of single cell data.
+      ]]></token>
+
     <token name="@VERSION_HISTORY@"><![CDATA[
 **Version history**

-0.0.1: Initial contribution. Maria Doyle, https://github.com/mblue9.
+3.1.1_0.0.6+galaxy0: Moved to Seurat 3.
+
+  Find clusters: removed dims-use, k-param, prune-snn.

 2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
 EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(https://github.com/drosofff) and Lea Bellenger(https://github.com/bellenger-l).
+
+0.0.1: Initial contribution. Maria Doyle, https://github.com/mblue9.
       ]]></token>