Mercurial > repos > iuc > seurat_clustering

<tool id="seurat_clustering" name="Seurat Find Clusters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>- Neighbors and Markers</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
    ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == 'FindNeighbors'
seurat_obj<-FindNeighbors(
    seurat_obj,
    #if $method.reduction != ''
    reduction = '$method.reduction',
    #end if
    #if $method.dims != ''
    dims = 1:$method.dims,
    #end if
    k.param = $method.k_param,
    nn.method = '$method.nn_method.nn_method',
    #if $method.nn_method.nn_method == 'rann'
    nn.eps = $method.nn_method.nn_eps,
    #else if $method.nn_method.nn_method == 'annoy'
    annoy.metric = '$method.nn_method.annoy_metric',
    #end if
    compute.snn = $method.adv.compute_snn.compute_snn,
    #if $method.adv.compute_snn.compute_snn == 'TRUE'
        #if $method.adv.compute_snn.prune_snn
        prune.snn = $method.adv.compute_snn.prune_snn,
        #end if
        distance.matrix = $method.adv.compute_snn.distance_matrix,
    #else if $method.adv.compute_snn.compute_snn == 'FALSE'
        distance.matrix = $method.adv.compute_snn.distance_matrix.distance_matrix,
        #if $method.adv.compute_snn.distance_matrix.distance_matrix == 'FALSE'
        return.neighbor = $method.adv.compute_snn.distance_matrix.return_neighbor,
        #end if
    #end if
    l2.norm = $method.adv.l2_norm,
    n.trees = $method.adv.n_trees
)

#else if $method.method == 'FindMultiModalNeighbors'
seurat_obj<-FindMultiModalNeighbors(
    seurat_obj,
    reduction.list = list('$method.reduction_1', '$method.reduction_2'),
    dims.list = list(1:$method.dims_1, 1:$method.dims_2),
    k.nn = $method.k_nn,
    knn.graph.name = '$method.adv.knn_graph_name',
    snn.graph.name = '$method.adv.snn_graph_name',
    weighted.nn.name = '$method.adv.weighted_nn_name',
    #if $method.adv.modality_weight_name != ''
    modality.weight.name = '$method.adv.modality_weight_name',
    #end if
    knn.range = $method.adv.knn_range
)

#else if $method.method == 'FindClusters'
@reticulate_hack@
seurat_obj<-FindClusters(
    seurat_obj,
    modularity.fxn = $method.modularity_fxn,
    resolution = $method.resolution,
    algorithm = $method.algorithm.algorithm,
    #if $method.algorithm.algorithm == '4'
        #if $method.algorithm.initial_membership
        initial.membership = $method.algorithm.initial_membership,
        #end if
        #if $method.algorithm.node_sizes
        node.sizes = $method.algorithm.node_sizes,
        #end if
        method = '$method.algorithm.method_cluster',
    #end if
    n.start = $method.n_start,
    n.iter = $method.n_iter,
    random.seed = $method.random_seed,
    #if $method.graph_name != ''
    graph.name = '$method.graph_name',
    #end if
    #if $method.cluster_name != ''
    cluster.name = '$method.cluster_name'
    #end if
)

#else if $method.method == 'FindAllMarkers'

    #if $method.features
    features_list<-paste(readLines('$method.features'), collapse=",")
    #end if

seurat_obj<-FindAllMarkers(
    seurat_obj,
    #if $method.features
        features = c(unlist(strsplit(features_list, ","))),
    #end if
    logfc.threshold = $method.logfc_threshold,
    test.use = '$method.test_use.test_use',
    #if $method.test_use.test_use == 'negbinom'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
        min.cells.feature = $method.test_use.min_cells_feature,
    #else if $method.test_use.test_use == 'poisson'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
        min.cells.feature = $method.test_use.min_cells_feature,
    #else if $method.test_use.test_use =='LR'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
    #else if $method.test_use.test_use == 'MAST'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
    #else if $method.test_use.test_use == 'roc'
        return.thresh = $method.test_use.return_thresh,
    #end if
    slot = '$method.slot',
    #if $method.adv.assay != ''
    assay = '$method.adv.assay',
    #end if
    min.pct = $method.adv.min_pct,
    #if $method.adv.min_diff_pct
    min.diff.pct = $method.adv.min_diff_pct,
    #end if
    only.pos = $method.adv.only_pos,
    #if $method.adv.max_cells_per_ident
    max.cells.per.ident = $method.adv.max_cells_per_ident,
    #end if
    #if $method.adv.random_seed
    random.seed = $method.adv.random_seed,
    #end if
    min.cells.group = $method.adv.min_cells_group,
    #if $method.fc_name != ''
    fc.name = '$method.adv.fc_name',
    #end if
    base = $method.adv.base,
    densify = $method.adv.densify
)

    #if $method.set_top_markers.set_top_markers == 'true'
    N = $method.set_top_markers.topN
    seurat_obj<-dplyr::slice_head(seurat_obj, n = N, by = cluster)
    #end if

@CMD_write_markers_tab@

#else if $method.method == 'FindMarkers'

    #if $method.features
    features_list<-paste(readLines('$method.features'), collapse=",")
    #end if
    #if $method.cells.cells == 'true'
    cell_1_list<-paste(readLines('$method.cells_1'), collapse=",")
    cell_2_list<-paste(readLines('$method.cells_2'), collapse=",")
    #end if

seurat_obj<-FindMarkers(
    seurat_obj,
    slot = '$method.slot',
    #if $method.cells.cells == 'true'
    cells.1 = c(unlist(strsplit(cell_1_list, ","))),
    cells.2 = c(unlist(strsplit(cell_2_list, ","))),
    #end if
    #if $method.regroup.regroup == 'true'
    group.by = '$method.regroup.group_by',
        #if $method.regroup.subset_ident != ''
        subset.ident = '$method.regroup.subset_ident',
        #end if
    #end if
    #if $method.ident.ident == 'true'
    ident.1 = '$method.ident.ident_1',
        #if $method.ident.ident_2 != ''
        ident.2 = c(unlist(strsplit(gsub(" ", "", '$method.ident.ident_2'), ","))),
        #end if
    #end if
    #if $method.features
    features = c(unlist(strsplit(features_list, ","))),
    #end if
    logfc.threshold = $method.logfc_threshold,
    test.use = '$method.test_use.test_use',
    #if $method.test_use.test_use == 'negbinom'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
        min.cells.feature = $method.test_use.min_cells_feature,
    #else if $method.test_use.test_use == 'poisson'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
        min.cells.feature = $method.test_use.min_cells_feature,
    #else if $method.test_use.test_use =='LR'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
    #else if $method.test_use.test_use == 'MAST'
        #if $method.test_use.latent_vars != ''
        latent.vars = c(unlist(strsplit(gsub(" ", "", '$method.test_use.latent_vars'), ","))),
        #end if
    #end if
    #if $method.adv.assay != ''
    assay = '$method.adv.assay',
    #end if
    min.pct = $method.adv.min_pct,
    #if $method.adv.min_diff_pct
    min.diff.pct = $method.adv.min_diff_pct,
    #end if
    only.pos = $method.adv.only_pos,
    #if $method.adv.max_cells_per_ident
    max.cells.per.ident = $method.adv.max_cells_per_ident,
    #end if
    #if $method.adv.random_seed
    random.seed = $method.adv.random_seed,
    #end if
    min.cells.group = $method.adv.min_cells_group,
    #if $method.adv.fc_name != ''
    fc.name = '$method.adv.fc_name',
    #end if
    densify = $method.adv.densify
)

@CMD_write_markers_tab@

#else if $method.method == 'FindConservedMarkers'
seurat_obj<-FindConservedMarkers(
    seurat_obj,
    ident.1 = $method.ident_1,
    #if $method.ident_2 != ''
    ident.2 = $method.ident_2,
    #end if
    grouping.var = '$method.grouping_var',
    #if $method.assay != ''
    assay = '$method.assay',
    #end if
    slot = '$method.slot',
    min.cells.group = $method.min_cells_group
)

@CMD_write_markers_tab@

#end if

@CMD_rds_write_outputs@

]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="input_rds"/>
        <conditional name="method">
            <param name="method" type="select" label="Method used">
                <option value="FindNeighbors">Compute nearest neighbors with 'FindNeighbors'</option>
                <option value="FindMultiModalNeighbors">Compute nearest neighbors for multimodal data with 'FindMultiModalNeighbors'</option>
                <option value="FindClusters">Identify cell clusters with 'FindClusters'</option>
                <option value="FindAllMarkers">Identify marker genes with 'FindAllMarkers'</option>
                <option value="FindMarkers">Identify marker genes for specific groups with 'FindMarkers'</option>
                <option value="FindConservedMarkers">Find markers conserved between groups with 'FindConservedMarkers'</option>
            </param>
            <when value="FindNeighbors">
                <expand macro="select_reduction_pca"/>
                <expand macro="set_dims"/>
                <param name="k_param" type="integer" value="20" label="Set k for k-nearest neighbors" help="(k.param)"/>
                <conditional name="nn_method">
                    <param name="nn_method" type="select" label="Method for finding nearest neighbors" help="(nn.method)">
                        <option value="rann">rann</option>
                        <option value="annoy" selected="true">annoy</option>
                    </param>
                    <when value="rann">
                        <param name="nn_eps" type="float" value="0.0" label="Set error bound for nearest neighbor search" help="(nn.eps)"/>
                    </when>
                    <when value="annoy">
                        <param name="annoy_metric" type="select" label="Distance metric for annoy method" help="(annoy.metric)">
                            <option value="euclidean" selected="true">euclidean</option>
                            <option value="cosine">cosine</option>
                            <option value="manhattan">manhattan</option>
                            <option value="hamming">hamming</option>
                        </param>
                    </when>
                </conditional>
                <section name="adv" title="Advanced Options">
                    <param name="n_trees" type="integer" value="50" label="Number of trees for nearest neighbor search" help="(n.trees)"/>
                    <param name="l2_norm" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Take l2Norm of data" help="(l2.norm)"/>
                    <conditional name="compute_snn">
                        <param name="compute_snn" type="select" label="Compute the shared nearest neighbor (SNN) graph" help="(compute.snn)">
                            <option value="FALSE">No</option>
                            <option value="TRUE" selected="true">Yes</option>
                        </param>
                        <when value="FALSE">
                            <conditional name="distance_matrix">
                                <param name="distance_matrix" type="select" label="Use a distance matrix" help="(distance.matrix)">
                                    <option value="FALSE" selected="true">No</option>
                                    <option value="TRUE">Yes</option>
                                </param>
                                <when value="FALSE">
                                    <param name="return_neighbor" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Return result as neighbor object" help="(return.neighbor)"/>
                                </when>
                                <when value="TRUE"></when>
                            </conditional>
                        </when>
                        <when value="TRUE">
                            <param name="prune_snn" type="float" optional="true" value="" min="0" max="1" label="Set cutoff for Jaccard index when computing overlap for SNN" help="0 no pruning, 1 prune everything (prune.SNN)"/>
                            <param name="distance_matrix" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Use a distance matrix" help="(distance.matrix)"/>
                        </when>
                    </conditional>
                </section>
            </when>
            <when value="FindMultiModalNeighbors">
                <param name="reduction_1" type="text" value="pca" label="Reduction to use for first modality">
                    <expand macro="valid_name"/>
                </param>
                <param name="dims_1" type="integer" value="10" label="Number of dimensions to use from first reduction"/>
                <param name="reduction_2" type="text" value="apca" label="Reduction to use for second modality">
                    <expand macro="valid_name"/>
                </param>
                <param name="dims_2" type="integer" value="10" label="Number of dimensions to use from second reduction"/>
                <param name="k_nn" type="integer" value="20" label="Number of multimodal neighbors to compute" help="(k.nn)"/>
                <section name="adv" title="Advanced Options">
                    <param name="knn_graph_name" type="text" value="wknn" label="Name for multimodal knn graph" help="(knn.graph.name)">
                        <expand macro="valid_name"/>
                    </param>
                    <param name="snn_graph_name" type="text" value="wsnn" label="Name for multimodal snn graph" help="(snn.graph.name)">
                        <expand macro="valid_name"/>
                    </param>
                    <param name="weighted_nn_name" type="text" value="weighted.nn" label="Name for multimodal neighbor object" help="(weighted.nn.name)">
                        <expand macro="valid_name"/>
                    </param>
                    <param name="modality_weight_name" optional="true" type="text" value="" label="Name for storing modality weights in metadata" help="(modality.weight.name)">
                        <expand macro="valid_name"/>
                    </param>
                    <param name="knn_range" type="integer" value="200" label="Number of approximate neighbors to compute" help="(knn.range)"/>
                </section>
            </when>
            <when value="FindClusters">
                <param name="modularity_fxn" type="select" label="Select modularity function" help="(modularity.fxn)">
                    <option value="1" selected="true">standard</option>
                    <option value="2">alternative</option>
                </param>
                <param argument="resolution" type="float" value="0.8" label="Resolution"/>
                <conditional name="algorithm">
                    <param argument="algorithm" type="select" label="Algorithm for modularity optimization">
                        <option value="1" selected="true">1. Original Louvain</option>
                        <option value="2">2. Louvain with multilevel refinement</option>
                        <option value="3">3. SLM</option>
                        <option value="4">4. Leiden</option>
                    </param>
                    <when value="4">
                        <param name="initial_membership" type="integer" optional="true" value="" label="Set initial membership when using Python leidenalg function" help="defaults to singleton partition (initial.membership)"/>
                        <param name="node_sizes" type="integer" optional="true" value="" label="Set node size when using Python leidenalg function" help="(node.sizes)"/>
                        <param name="method_cluster" type="select" label="Method for leiden" help="matrix is fast for small data, enable igraph for larger data (method.cluster)">
                            <option value="matrix" selected="true">matrix</option>
                            <option value="igraph">igraph</option>
                        </param>
                    </when>
                    <when value="1">
                    </when>
                    <when value="2">
                    </when>
                    <when value="3">
                    </when>
                </conditional>
                <param name="n_start" type="integer" value="10" label="Number of random starts" help="(n.start)"/>
                <param name="n_iter" type="integer" value="10" label="Maximal number of iterations per random start" help="(n.iter)"/>
                <param name="random_seed" type="integer" value="0" label="Set random seed" help="(random.seed)"/>
                <param name="group_singletons" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Group singletons into nearest cluster" help="Set to false to create a cluster for all singletons (group.singletons)"/>
                <param name="graph_name" type="text" optional="true" value="" label="Name of graph to use for the clustering algorithm" help="(graph.name)">
                    <expand macro="valid_name"/>
                </param>
                <param name="cluster_name" type="text" optional="true" value="" label="Name for output clusters" help="(cluster.name)">
                    <expand macro="valid_name"/>
                </param>
            </when>
            <when value="FindAllMarkers">
                <expand macro="markers_inputs"/>
                <conditional name="set_top_markers">
                    <param name="set_top_markers" type="select" label="Limit output to top N markers per cluster">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <expand macro="set_topN"/>
                    </when>
                    <when value="false">
                    </when>
                </conditional>
                <section name="adv" title="Advanced Options">
                    <param argument="base" type="integer" value="2" label="Base with respect to which logarithms are computed"/>
                    <expand macro="advanced_markers_inputs"/>
                </section>
            </when>
            <when value="FindMarkers">
                <conditional name="cells">
                    <param name="cells" type="select" label="Compare markers for two groups of cells">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <param name="cells_1" type="data" format="txt,tabular" label="List of cell names for group 1" help="text file with one cell on each line (cells.1)"/>
                        <param name="cells_2" type="data" format="txt,tabular" label="List of cell names for group 2" help="text file with one cell on each line (cells.2)"/>
                    </when>
                    <when value="false">
                    </when>
                </conditional>
                <conditional name="regroup">
                    <param name="regroup" type="select" label="Change cell identities before finding markers">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <param name="group_by" type="text" value="group" label="Name of identity class to regroup cells into" help="a group from the cell metadata to find markers for (group.by)"/>
                        <param name="subset_ident" type="text" optional="true" value="" label="Identity class to subset before regrouping" help="only include cells from this cluster/identity in each new group (subset.ident)"/>
                    </when>
                    <when value="false">
                    </when>
                </conditional>
                <conditional name="ident">
                    <param name="ident" type="select" label="Compare markers between clusters of cells">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <param name="ident_1" type="text" optional="true" value="" label="Identity class to define markers for" help="e.g. cluster number or ident group name (ident.1)"/>
                        <param name="ident_2" type="text" optional="true" value="" label="Second identity class to compare" help="e.g. comma-separated list of cluster numbers or idents, leave blank to compare ident.1 against all other clusters. (ident.2)">
                            <expand macro="valid_list"/>
                        </param>
                    </when>
                    <when value="false">
                    </when>
                </conditional>
                <expand macro="markers_inputs"/>
                <section name="adv" title="Advanced Options">
                    <expand macro="advanced_markers_inputs"/>
                </section>
            </when>
            <when value="FindConservedMarkers">
                <param name="ident_1" type="text" value="ident1" label="Identity class to define markers for" help="(ident.1)"/>
                <param name="ident_2" type="text" optional="true" value="" label="Second identity class for comparison" help="leave blank to compare ident.1 to all other cells (ident.2)"/>
                <param name="grouping_var" type="text" value="group" label="Grouping variable" help="(grouping.var)"/>
                <expand macro="select_assay_RNA"/>
                <expand macro="select_slot_data"/>
                <param name="min_cells_group" type="integer" value="3" label="Minimum number of cells in one group" help="(min.cells.group)"/>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <expand macro="seurat_outputs"/>
        <expand macro="markers_out"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- test1: FindNeighbors -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/pca.rds"/>
            <conditional name="method">
                <param name="method" value="FindNeighbors"/>
                <param name="dims" value="9"/>
                <conditional name="nn_method">
                    <param name="nn_method" value="annoy"/>
                    <param name="annoy_metric" value="euclidean"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindNeighbors"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/neighbors.rds" ftype="rds" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test2: FindMultiModalNeighbors -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/citeseq_dims.rds"/>
            <conditional name="method">
                <param name="method" value="FindMultiModalNeighbors"/>
                <param name="reduction_1" value="pca"/>
                <param name="dims_1" value="8"/>
                <param name="reduction_2" value="apca"/>
                <param name="dims_2" value="8"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindMultiModalNeighbors"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/multimodalneighbors.rds" ftype="rds" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test3: FindClusters -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/neighbors.rds"/>
            <conditional name="method">
                <param name="method" value="FindClusters"/>
                <param name="resolution" value="0.8"/>
                <conditional name="algorithm">
                    <param name="algorithm" value="1"/>
                </conditional>
                <param name="n_start" value="10"/>
                <param name="n_iter" value="10"/>
                <param name="random_seed" value="0"/>
                <param name="group_singletons" value="TRUE"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindClusters"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/clusters.rds" ftype="rds" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test4: FindClusters - leidenalg Installed -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/neighbors.rds"/>
            <conditional name="method">
                <param name="method" value="FindClusters"/>
                <param name="modularity_fxn" value="1"/>
                <param name="resolution" value="0.5"/>
                <conditional name="algorithm">
                    <param name="algorithm" value="4"/>
                    <param name="method_cluster" value="matrix"/>
                </conditional>
                <param name="n_start" value="10"/>
                <param name="n_iter" value="10"/>
                <param name="random_seed" value="0"/>
                <param name="group_singletons" value="TRUE"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindClusters"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/clusters_leiden.rds" ftype="rds" compare="sim_size"/>
        </test>
        <test expect_num_outputs="3">
            <!-- test5: FindAllMarkers -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/clusters.rds"/>
            <conditional name="method">
                <param name="method" value="FindAllMarkers"/>
                <param name="logfc_threshold" value="0.1"/>
                <param name="slot" value="data"/>
                <conditional name="test_use">
                    <param name="test_use" value="wilcox"/>
                </conditional>
                <conditional name="set_top_markers">
                    <param name="set_top_markers" value="true"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindAllMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/allmarkers.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/allmarkers.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test6: FindMarkers - Default -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/clusters.rds"/>
            <conditional name="method">
                <param name="method" value="FindMarkers"/>
                <param name="slot" value="data"/>
                <conditional name="cells">
                    <param name="cells" value="false"/>
                </conditional>
                <conditional name="ident">
                    <param name="ident" value="true"/>
                    <param name="ident_1" value="0"/>
                    <param name="ident_2" value="1"/>
                </conditional>
                <param name="logfc_threshold" value="0.1"/>
                <conditional name="test_use">
                    <param name="test_use" value="wilcox"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/markers.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/markers.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test7: FindMarkers - Limma Installed -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/clusters.rds"/>
            <conditional name="method">
                <param name="method" value="FindMarkers"/>
                <param name="slot" value="data"/>
                <conditional name="cells">
                    <param name="cells" value="false"/>
                </conditional>
                <conditional name="ident">
                    <param name="ident" value="true"/>
                    <param name="ident_1" value="0"/>
                    <param name="ident_2" value="1"/>
                </conditional>
                <param name="logfc_threshold" value="0.1"/>
                <conditional name="test_use">
                    <param name="test_use" value="wilcox_limma"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/markersLimma.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/markersLimma.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test8: FindMarkers - MAST Installed -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/clusters.rds"/>
            <conditional name="method">
                <param name="method" value="FindMarkers"/>
                <param name="slot" value="data"/>
                <conditional name="cells">
                    <param name="cells" value="false"/>
                </conditional>
                <conditional name="ident">
                    <param name="ident" value="true"/>
                    <param name="ident_1" value="0"/>
                    <param name="ident_2" value="1"/>
                </conditional>
                <param name="logfc_threshold" value="0.1"/>
                <conditional name="test_use">
                    <param name="test_use" value="MAST"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/markersMAST.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/markersMAST.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test9: FindMarkers - DESeq2 Installed -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/clusters.rds"/>
            <conditional name="method">
                <param name="method" value="FindMarkers"/>
                <param name="slot" value="counts"/>
                <conditional name="cells">
                    <param name="cells" value="false"/>
                </conditional>
                <conditional name="ident">
                    <param name="ident" value="true"/>
                    <param name="ident_1" value="0"/>
                    <param name="ident_2" value="1"/>
                </conditional>
                <param name="logfc_threshold" value="0.1"/>
                <conditional name="test_use">
                    <param name="test_use" value="DESeq2"/>
                </conditional>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/markersDESeq2.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/markersDESeq2.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test10: FindConservedMarkers -->
            <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/integrated_umap.rds"/>
            <conditional name="method">
                <param name="method" value="FindConservedMarkers"/>
                <param name="ident_1" value="0"/>
                <param name="ident_2" value="1"/>
                <param name="grouping_var" value="Group"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true"/>
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="FindConservedMarkers"/>
                </assert_contents>
            </output>
            <output name="rds_out" location="https://zenodo.org/records/13732784/files/conserved_markers.rds" ftype="rds"/>
            <output name="markers_tabular" location="https://zenodo.org/records/13732784/files/conserved_markers.csv" ftype="csv">
                <assert_contents>
                    <has_text_matching expression="Group_B_avg_log2FC"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Seurat
======

Seurat is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data.

Seurat aims to enable users to identify and interpret sources of heterogeneity from single-cell transcriptomic measurements, and to integrate diverse types of single-cell data.

FindNeighbors
=============

Compute the k.param nearest neighbors for a given dataset.

Can also optionally (via compute.SNN), construct a shared nearest neighbor graph by calculating the neighborhood overlap (Jaccard index) between every cell and its k.param nearest neighbors.

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findneighbors>`__

FindMultiModalNeighbors
=======================

This function will construct a weighted nearest neighbor (WNN) graph for two modalities (e.g. RNA-seq and CITE-seq). For each cell, we identify the nearest neighbors based on a weighted combination of two modalities.

Takes as input two dimensional reductions, one computed for each modality.

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findmultimodalneighbors>`__

FindClusters
============

Identify clusters of cells by a shared nearest neighbor (SNN) modularity optimization based clustering algorithm.

First calculate k-nearest neighbors and construct the SNN graph. Then optimize the modularity function to determine clusters.

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findclusters>`__


FindAllMarkers
==============

Find markers (differentially expressed genes) for each of the identity classes in a dataset

Outputs a matrix containing a ranked list of putative markers, and associated statistics (p-values, ROC score, etc.)

Methods:

"wilcox" : Identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test (default); will use a fast implementation by Presto if installed

"wilcox_limma" : Identifies differentially expressed genes between two groups of cells using the limma implementation of the Wilcoxon Rank Sum test; set this option to reproduce results from Seurat v4

"bimod" : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013)

"roc" : Identifies 'markers' of gene expression using ROC analysis. For each gene, evaluates (using AUC) a classifier built on that gene alone, to classify between two groups of cells. An AUC value of 1 means that expression values for this gene alone can perfectly classify the two groupings (i.e. Each of the cells in cells.1 exhibit a higher level than each of the cells in cells.2). An AUC value of 0 also means there is perfect classification, but in the other direction. A value of 0.5 implies that the gene has no predictive power to classify the two groups. Returns a 'predictive power' (abs(AUC-0.5) * 2) ranked matrix of putative differentially expressed genes.

"t" : Identify differentially expressed genes between two groups of cells using Student's t-test.

"negbinom" : Identifies differentially expressed genes between two groups of cells using a negative binomial generalized linear model. Use only for UMI-based datasets

"poisson" : Identifies differentially expressed genes between two groups of cells using a poisson generalized linear model. Use only for UMI-based datasets

"LR" : Uses a logistic regression framework to determine differentially expressed genes. Constructs a logistic regression model predicting group membership based on each feature individually and compares this to a null model with a likelihood ratio test.

"MAST" : Identifies differentially expressed genes between two groups of cells using a hurdle model tailored to scRNA-seq data. Utilizes the MAST package to run the DE testing.

"DESeq2" : Identifies differentially expressed genes between two groups of cells based on a model using DESeq2 which uses a negative binomial distribution (Love et al, Genome Biology, 2014).This test does not support pre-filtering of genes based on average difference (or percent detection rate) between cell groups. However, genes may be pre-filtered based on their minimum detection rate (min.pct) across both cell groups.

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findallmarkers>`__

FindMarkers
===========

Find markers (differentially expressed genes) for identity classes (clusters) or groups of cells

Outputs a data.frame with a ranked list of putative markers as rows, and associated statistics as columns (p-values, ROC score, etc., depending on the test used (test.use)).

Methods - as for FindAllMarkers

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findmarkers>`__

FindConservedMarkers
====================

Finds markers that are conserved between the groups

Uses metap::minimump as meta.method.

More details on the `seurat documentation
<https://satijalab.org/seurat/reference/findconservedmarkers>`__

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Tue, 05 Nov 2024 11:54:58 +0000
parents	94f1b9c7286f
children