Mercurial > repos > ebi-gxa > run_sccaf
changeset 0:ca26d4b4b02c draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/sccaf commit 68be7a6fdb93e8b59e80e5f16e7fecdaa16f288c
author | ebi-gxa |
---|---|
date | Mon, 14 Oct 2019 08:11:29 -0400 |
parents | |
children | 647d34f125bc |
files | run_sccaf.xml sccaf_macros.xml static/images/example_sccaf_workflow.png |
diffstat | 3 files changed, 268 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_sccaf.xml Mon Oct 14 08:11:29 2019 -0400 @@ -0,0 +1,171 @@ +<?xml version="1.0" encoding="utf-8"?> +<tool id="run_sccaf" name="Run SCCAF" version="@TOOL_VERSION@+galaxy0"> + <description>to assess and optimise clustering</description> + <macros> + <import>sccaf_macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +ln -s ${input_obj_file} input.h5 && +sccaf -i input.h5 + +--cores \${GALAXY_SLOTS:-1} + +#if $cluster_source.use_tsv + --external-clustering-tsv ${cluster_source.input_tsv} +#else + --slot-for-existing-clustering ${cluster_source.obj_attr} +#end if + +#if $mode.skip_init_assessment + --skip-assessment +#end if + +#if $mode.optimise + #if not $mode.init.from_input + --resolution ${mode.init.resolution} + #end if + --optimise + #if $mode.optimisation_stop.condition == "accuracy_threshold" + --min-accuracy ${mode.optimisation_stop.accuracy} + #else + --min-accuracy 0.955 + --undercluster-boundary ${mode.optimisation_stop.min_resolution_clustering_slot} + #end if + --produce-rounds-summary + + --optimisation-plots-output ${opt_pdf_out} + + #if $mode.citer + --conf-sampling-iterations ${mode.citer} + #end if + + > ${opt_text_out}; +#end if + +#if $mode.optimise and $mode.distribute_assesment: +mkdir -p outputs; +for round in \$(cat rounds.txt); do + echo "Round: "\$round; + echo \$round > outputs/round_\$round\.txt; +done +#end if + + + +]]></command> + + <inputs> + <expand macro="input_object_params"/> + + <conditional name="cluster_source"> + <param name="use_tsv" type="boolean" checked="true" label="Use external cluster information" help="If the provided AnnData/Loom file does not include the clustering, or if you want to use an external clustering assigment."/> + <when value="true"> + <param name="input_tsv" type="data" argument="--external-clustering-tsv" format="tsv" label="Cluster table for assessment in tsv format"/> + </when> + <when value="false"> + <param name="obj_attr" type="text" argument="--slot-for-existing-clustering" value="louvain" label="Attribute in input object that contains cluster information" help="If you are not using an external clustering, then you must specify the slot/index in the AnnData or Loom file where the clustering to be used for assessment (and potentially optimisation starting point) is saved."/> + </when> + </conditional> + + <conditional name="mode"> + <param name="optimise" type="boolean" checked="false" argument="--optimise" label="Run clustering optimisation" help="By default the tool only runs an assesment of the clustering quality. To further optimise the clustering, enable this option."/> + <when value="true"> + <conditional name="init"> + <param name="from_input" type="boolean" checked="true" label="Use input clustering to initialise optimisation" help="This option uses the previously specified clustering (either external file provided or internal index/slot specified) to serve as starting point for the optimisation process."/> + <when value="true"> + </when> + <when value="false"> + <param name="resolution" type="float" value="1.5" argument="--resolution" label="Resolution for initialising louvain clustering" help="Perform an initial clustering and use this as an starting point."/> + </when> + </conditional> + <conditional name="optimisation_stop"> + <param type="select" name="condition" help="How should the optimisation algorithm be stopped." label="Stop condition"> + <option value="accuracy_threshold" selected="true">Set a threshold for accuracy</option> + <option value="min_resolution_clustering">Use a slot with a low resolution clustering</option> + </param> + <when value="accuracy_threshold"> + <param name="accuracy" type="float" value="0.90" argument="--accuracy" label="Accuracy for convergence of the optimisation process" help="The SCCAF optimisation process will converge once this accuracy is achieved."/> + </when> + <when value="min_resolution_clustering"> + <param name="min_resolution_clustering_slot" type="text" argument="--undercluster-boundary" label="Underclustering boundary to use in the optimisation." help="The slot inside the ann data object with the desired underclustering (low resolution) to be used as exit condition."/> + </when> + </conditional> + <param name="skip_init_assessment" type="boolean" argument="--skip-assessment" checked="false" label="Run only the optimisation and skip the initial assessment" help="If you are running the optimisation, you can choose skip the initial assessment to save time."/> + <param name="distribute_assesment" type="boolean" checked="false" label="Produce parameter walk for asessment distribution" help="Will split rounds in files so that multiple assess processes can be distributed down the line"/> + <param name="citer" type="integer" value="3" argument="--conf-sampling-iterations" label="Number of iterations of sampling for the confusion matrix." help="By default is 3, higher numbers mean more stable results (number of rounds) but also longer execution times." optional="True"/> + </when> + <when value="false"> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output_png" format="png" from_work_dir="roc-curve.png" label="${tool.name} on ${on_string} ROC Curve for initial assesment"> + <filter>not mode['skip_init_assessment']</filter> + </data> + <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string} output.h5"> + <filter>mode['optimise']</filter> + </data> + <data name="optim_png" format="png" from_work_dir="optim.png" label="${tool.name} on ${on_string} ROC Curve for optimisation result"> + <filter>mode['optimise']</filter> + </data> + <collection name="rounds_walk" type="list" label="Rounds for assesment distribution"> + <filter>mode['optimise'] and mode['distribute_assesment']</filter> + <discover_datasets pattern="__name_and_ext__" directory="outputs"/> + </collection> + <data name="opt_text_out" format="txt" label="${tool.name} on ${on_string} Optimisation process log"> + <filter>mode['optimise']</filter> + </data> + <data name="opt_pdf_out" format="pdf" label="${tool.name} on ${on_string} Optimisation process plots"> + <filter>mode['optimise']</filter> + </data> + </outputs> + + <tests> + <test> + <param name="input_obj_file" value="find_cluster.h5"/> + <param name="use_tsv" value="true"/> + <param name="input_tsv" value="find_cluster.tsv"/> + <output name="output_png" file="run_sccaf.png" ftype="png"/> + </test> + </tests> + + <help><![CDATA[ +@SCCAF_INTRO@ + +Inputs +------ + +* AnnData object which contains the expression matrix and pre-calculated coordinates for UMAP. The AnnData object can include already clustering data, in which case the user will need to know on which AnnData slot/label is contained. +* Optional external text file with mappings between cells and clusters (when no clustering is given inside the AnnData file). + +Modes of operation +------------------ + +* Optimisation with exit condition based on accuracy cut-off. In this case the user provides a minimum cut-off for the accuracy to be achieved and the optimisation process will exit at that point. +* Optimisation with exit condition based on under-clustered scenario. In the case the AnnData object given must include a low resolution, under clustered clustering, and its label must be know to be specified. +* Assesment only (no optimisation), where an existing clustering is assessed. + +This is resource intensive. + +Distributed assesment +--------------------- + +If running the optimisation you can distribute assessments of the optimisation +results. For this, activate the "Produce parameter walk for assessment +distribution" option, which will generate a "Rounds for assesment distribution". +Then feed the AnnData output of the optimisation process and the rounds output +to the SCCAF Assesment module. Then merge all assessment results with +SCCAF Assesment Merger (this also receives the rounds output). The workflow +would look like this: + +.. image:: example_sccaf_workflow.png + :height: 400 px + :width: 850 px + :scale: 80 % + + +]]></help> + <!-- <expand macro="citations"/> --> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sccaf_macros.xml Mon Oct 14 08:11:29 2019 -0400 @@ -0,0 +1,97 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">sccaf</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">0.0.7</token> + <token name="@SCCAF_INTRO@"> +SCCAF explained +=============== + +Single Cell Clustering Assessment Framework (SCCAF) is a novel method for +automated identification of putative cell types from single cell RNA-seq +(scRNA-seq) data. By iteratively applying clustering and a machine learning +approach to gene expression profiles of a given set of cells, SCCAF +simultaneously identifies distinct cell groups and a weighted list of feature +genes for each group. The feature genes, which are overexpressed in the +particular cell group, jointly discriminate the given cell group from other +cells. Each such group of cells corresponds to a putative cell type or state, +characterised by the feature genes as markers. + </token> + <token name="@HELP@">More information can be found at https://github.com/SCCAF/SCCAF</token> + <token name="@PLOT_OPTS@"> + #if $do_plotting.plot + -P output.png + --projectio $do_plotting.projection + --components $do_plotting.components + #if $do_plotting.color_by + --color-by $do_plotting.color_by + #end if + #if $do_plotting.groups + --group $do_plotting.groups + #end if + #if $do_plotting.use_raw + --use-raw + #end if + #if $do_plotting.palette + --palette $do_plotting.palette + #end if + #if $do_plotting.show_edges + --edges + #end if + #if $do_plotting.show_arrows + --arrows + #end if + #if not $do_plotting.color_order + --no-sort-order + #end if + #if $do_plotting.omit_frame + --frameoff + #end if +#end if + </token> + <xml name="citations"> + <citations> + <citation type="doi"></citation> + <citation type="bibtex"> + @misc{githubsccaf, + author = {Miao, Zhichao}, + year = {2018}, + title = {SCCAF}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/Functional-Genomics/SCCAF}, + }</citation> + <yield /> + </citations> + </xml> + <xml name="input_object_params"> + <param name="input_obj_file" argument="--input-object-file" type="data" format="h5" label="Input object in AnnData hdf5 format" help="Normally the result of Scanpy (or equivalent), which already has both a visualisation (either tSNE, UMAP or PCA - needed) and clustering (ideally) pre-computed."/> + <param name="input_format" argument="--input-format" type="select" label="Format of input object"> + <option value="anndata" selected="true">AnnData format hdf5</option> + <option value="loom">Loom format hdf5, current support is incomplete</option> + </param> + </xml> + <xml name="output_object_params"> + <param name="output_format" argument="--output-format" type="select" label="Format of output object"> + <option value="anndata" selected="true">AnnData format hdf5</option> + <option value="loom">Loom format hdf5, current support is defective</option> + </param> + </xml> + <xml name="output_plot_params"> + <param name="color_by" argument="--color-by" type="text" value="n_genes" label="Color by attributes, comma separated strings"/> + <param name="groups" argument="--groups" type="text" optional="ture" label="Restrict plotting to named groups, comma separated strings"/> + <param name="projection" argument="--projection" type="select" label="Plot projection"> + <option value="2d" selected="true">2D</option> + <option value="3d">3D</option> + </param> + <param name="components" argument="--components" type="text" value="1,2" label="Components to plot, comma separated integers"/> + <param name="palette" argument="--palette" type="text" optional="true" label="Palette"/> + <param name="use_raw" argument="--use-raw" type="boolean" checked="false" label="Use raw attributes if present"/> + <param name="show_edges" argument="--edges" type="boolean" checked="false" label="Show edges"/> + <param name="show_arrows" argument="--arrows" type="boolean" checked="false" label="Show arrows"/> + <param name="color_order" argument="--no-sort-order" type="boolean" checked="true" label="Element with high color-by value plot on top"/> + <param name="omit_frame" argument="--frameoff" type="boolean" checked="false" label="Show frame"/> + </xml> +</macros>