Mercurial > repos > iuc > baredsc_1d

<macros>
    <token name="@TOOL_VERSION@">1.1.2</token>
    <token name="@VERSION_SUFFIX@">1</token>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">baredsc</requirement>
            <requirement type="package" version="1.12">gzip</requirement>
        </requirements>
        <version_command><![CDATA[baredSC_1d --version]]></version_command>
    </xml>
    <xml name="helpcitations">
        <help><![CDATA[

            .. class:: infomark

            **BARED (Bayesian Approach to Retreive Expression Distribution of) Single Cell**

            baredSC is a tool that uses a Monte-Carlo Markov Chain to estimate a confidence interval on the probability density function (PDF) of expression of one or two genes from single-cell RNA-seq data. It uses the raw counts and the total number of UMI for each cell. The PDF is approximated by a number of 1d or 2d gaussians provided by the user. The likelihood is estimated using the asumption that the raw counts follow a Poisson distribution of parameter equal to the proportion of mRNA for the gene in the cell multiplied by the total number of UMI identified in this cell.

            To get a description of outputs, please read the `Documentation <https://baredsc.readthedocs.io/en/latest/index.html>`_

            This is a description of the figure with the results.

            - When the 1d version is used, it displays the mean PDF in solid red line, the median in black dashed lines (/!\backslash the integral of the median is not equal to 1) with the confidence interval of 1 sigma (68%), 2 sigma (95%) and 3 sigma (99.7%) as well as in green, the kernel density estimate of the input values, the detected expression (``log(1 + targetSum * raw / total UMI)``).

            - When the 2d version is used, it displays the PDF as a heatmap as well as a projection on the x and y axis. On the projection, the confidence interval 68% is indicated as a shaded area as well as the mean with a solid red line and the median with a dashed black line. On the top right corner, the correlation is indicated with the confidence interval 68% as well as a confidence interval on the one-sided p-value (the probability that the correlation is the opposite sign of the mean, one sigma confidence interval).

            Usually you should run baredSC_1d or baredSC_2d with 1 to 4 gaussians. Then you combine the different models with combineMultipleModels_1d or combineMultipleModels_2d.

        ]]></help>
        <citations>
            <citation type="doi">10.1186/s12859-021-04507-8</citation>
        </citations>
    </xml>
    <xml name="edam_topics">
        <edam_topics>
            <edam_topic>topic_3170</edam_topic>
            <edam_topic>topic_4028</edam_topic>
            <edam_topic>topic_2269</edam_topic>
        </edam_topics>
    </xml>
    <xml name="macro_input_counts">
        <conditional name="input_counts">
            <param name="filetype" type="select" label="Input type">
                <option value="tabular">Tabular</option>
                <option value="anndata">Anndata (for example from Scanpy)</option>
            </param>
            <when value="tabular">
                <param argument="--input" type="data" format="tabular" label="Input table (with header)" help="Expected format is one line per cell, columns with raw counts and one column 'nCount_RNA' with the total number of UMI per cell (optionally other meta data to filter)" />
            </when>
            <when value="anndata">
                <param argument="--inputAnnData" type="data" format="h5ad" label="AnnData containing raw counts" />
            </when>
        </conditional>
    </xml>
    <xml name="macro_single_gene">
        <param argument="--geneColName" type="text" value="" label="Name of the column with gene counts."/>
    </xml>
    <xml name="macro_two_genes">
        <param argument="--geneXColName" type="text" value="" label="Name of the column with gene counts for gene in x."/>
        <param argument="--geneYColName" type="text" value="" label="Name of the column with gene counts for gene in y."/>
    </xml>
    <xml name="macro_filter_cells">
        <conditional name="filter">
            <param name="nb" type="select" label="How many filters columns do you need?">
                <option value="0">0 (keep all cells from file)</option>
                <option value="1">1</option>
                <option value="2">2</option>
                <option value="3">3</option>
            </param>
            <when value="0"/>
            <when value="1">
                <param argument="--metadata1ColName" type="text" value="" label="Name of the column with first filter."/>
                <param argument="--metadata1Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
            </when>
            <when value="2">
                <param argument="--metadata1ColName" type="text" value="" label="Name of the column with first filter."/>
                <param argument="--metadata1Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
                <param argument="--metadata2ColName" type="text" value="" label="Name of the column with second filter."/>
                <param argument="--metadata2Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
            </when>
            <when value="3">
                <param argument="--metadata1ColName" type="text" value="" label="Name of the column with first filter."/>
                <param argument="--metadata1Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
                <param argument="--metadata2ColName" type="text" value="" label="Name of the column with second filter."/>
                <param argument="--metadata2Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
                <param argument="--metadata3ColName" type="text" value="" label="Name of the column with third filter."/>
                <param argument="--metadata3Values" type="text" value="" label="Values accepted in this column (separated by comma)."/>
            </when>
        </conditional>
    </xml>
    <xml name="macro_MCMC_params_common_axis" token_axis="x">
        <param argument="--@AXIS@min" type="float" value="0" label="Minimum value to consider in @AXIS@ axis." help="Choose value small enough to go below smallest value."/>
        <param argument="--@AXIS@max" type="float" value="2.5" label="Maximum value to consider in @AXIS@ axis." help="Choose value large enough to go above largest value."/>
        <param argument="--n@AXIS@" type="integer" min="1" value="100" label="Number of values in @AXIS@ to check how your evaluated PDF is compatible with the model." help="Larger values will increase computing time while smaller values will decrease the resolution of your PDF." />
        <param argument="--minScale@AXIS@" type="float" value="0.1" label="Minimal value of the scale of Gaussians on @AXIS@" help="Cannot be smaller than max of twice the bin size of PDF evaluation and half the bin size on @AXIS@ axis."/>
    </xml>
    <xml name="macro_scale_seed">
        <conditional name="scale">
            <param name="type" type="select" label="Scale for gene expression">
                <option value="Seurat">Like in Seurat (log(1+targetSum*X))</option>
                <option value="log">simply log</option>
            </param>
            <when value="Seurat">
                <param argument="--targetSum" type="float" value="10000" label="targetSum" help="use 0 for the median of nRNA_Counts"/>
            </when>
            <when value="log"/>
        </conditional>
        <param argument="--seed" type="integer" value="1" label="Seed value to control randomness." help="Change seed value to get new result"/>
    </xml>
    <xml name="macro_MCMC_common_baredSC">
        <param argument="--nnorm" type="integer" min="1" value="2" label="Number of Gaussians to fit." />
        <param argument="--nsampMCMC" type="integer" min="1" value="100000" label="Number of samplings (iterations) of MCMC." />
        <conditional name="automaticRestart">
            <param name="set_minNeff" type="select" label="Auto-rerun in case of obvious non-convergence">
                <option value="yes">Yes (the job may never stop)</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param argument="--minNeff" type="float" value="200" label="Minimum number of effective samples to output result." help="If the number of effective samples is below this threshold, the MCMC is automatically rerun with 10 times more samples"/>
            </when>
            <when value="no"/>
        </conditional>
    </xml>
    <xml name="combine_outputs" token_d="1">
        <param argument="--outputs" type="data" format="npz" label="Numpy archives from baredSC_@D@d with different number of Gaussians." multiple="true"/>
    </xml>
    <xml name="macro_plots">
        <param name="image_file_format" type="select" label="Image output format">
            <option value="png">png</option>
            <option value="svg">svg</option>
            <option value="pdf">pdf</option>
        </param>
        <param argument="--title" type="text" value="" label="Title to set to all figures."/>
        <param argument="--removeFirstSamples" type="integer" value="-1" label="Number of samples to ignore before making the plots" help="Use -1 to use a fourth of the number of samples"/>
        <param argument="--nsampInPlot" type="integer" value="100000" min="1" label="Approximate number of samples to use in plots"/>
    </xml>
    <xml name="macro_prettybins_1d">
        <param argument="--prettyBins" type="integer" value="-1" min="-1" label="Number of bins to use in plots." help="Use -1 to use the number of bins used in MCMC"/>
    </xml>
    <xml name="macro_prettybins_axis" token_axis="x">
        <param argument="--prettyBins@AXIS@" type="integer" value="-1" min="-1" label="Number of bins to use in @AXIS@ in plots." help="Use -1 to use the number of bins used in MCMC"/>
    </xml>
    <xml name="macro_splity">
        <param argument="--splity" type="text" value="" label="Threshold values separated by space to plot the density for genex for 2 categories in geney values" help="Leave empty if you don't need this type of analysis.">
            <validator type="regex">(-?[0-9]+( -?[0-9]+)*)?</validator>
        </param>
    </xml>
    <xml name="macro_colorscale">
        <param argument="--log1pColorScale" type="boolean" truevalue="--log1pColorScale" falsevalue="" checked="false" label="Enable to see regions in plot with low proportion of cells"/>
    </xml>
    <xml name="macro_advanced_common_axis" token_axis="x" token_default_osamppdf="5">
        <param argument="--osamp@AXIS@" type="integer" min="1" value="10" label="Oversampling factor of @AXIS@ values when evaluating PDF of Poisson distribution." />
        <param argument="--osamp@AXIS@pdf" type="integer" value="@DEFAULT_OSAMPPDF@" label="Oversampling factor of @AXIS@ values when evaluating PDF at each step of the MCMC."/>
    </xml>
    <xml name="macro_advanced_evidence">
        <param argument="--coviscale" type="float" value="1" label="Scale factor to apply to covariance of parameters to get random parameters in logevidence evaluation." />
        <param argument="--nis" type="integer" value="1000" label="Size of sampling of random parameters in logevidence evaluation." />
    </xml>
    <xml name="macro_advanced_common_baredSC">
        <conditional name="burn">
            <param name="custom" type="select" label="Custom parameters of the burning phase of MCMC">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param argument="--nsampBurnMCMC" type="integer" value="-1" label="Number of samplings (iterations) in the burning phase of mcmc (Set -1 for a fourth of total number of samples)" />
                <param argument="--T0BurnMCMC" type="float" value="100" label="Initial temperature in the burning phase of MCMC" min="1"/>
            </when>
        </conditional>
    </xml>
    <xml name="macro_scaleprior">
        <param argument="--scalePrior" type="float" value="0.3" label="Scale of the truncnorm used in the prior for the correlation."/>
    </xml>
    <token name="@REQUIRED_INPUTS_1D@" ><![CDATA[
        #if str( $input_counts.filetype ) == "tabular":
            --input '$input_counts.input'
        #elif str( $input_counts.filetype ) == "anndata":
            --inputAnnData '$input_counts.inputAnnData'
        #end if
        --geneColName '$geneColName'
    ]]></token>
    <token name="@REQUIRED_INPUTS_2D@" ><![CDATA[
        #if str( $input_counts.filetype ) == "tabular":
            --input '$input_counts.input'
        #elif str( $input_counts.filetype ) == "anndata":
            --inputAnnData '$input_counts.inputAnnData'
        #end if
        --geneXColName '$geneXColName'
        --geneYColName '$geneYColName'
    ]]></token>
    <token name="@FILTER_CELLS@" ><![CDATA[
        #if str( $filter.nb ) == "1":
            --metadata1ColName '$filter.metadata1ColName'
            --metadata1Values '$filter.metadata1Values'
        #elif str( $filter.nb ) == "2":
            --metadata1ColName '$filter.metadata1ColName'
            --metadata1Values '$filter.metadata1Values'
            --metadata2ColName '$filter.metadata2ColName'
            --metadata2Values '$filter.metadata2Values'
        #elif str( $filter.nb ) == "3":
            --metadata1ColName '$filter.metadata1ColName'
            --metadata1Values '$filter.metadata1Values'
            --metadata2ColName '$filter.metadata2ColName'
            --metadata2Values '$filter.metadata2Values'
            --metadata3ColName '$filter.metadata3ColName'
            --metadata3Values '$filter.metadata3Values'
        #end if
    ]]></token>
    <token name="@MCMC_1D@" ><![CDATA[
        --xmin $MCMC.xmin
        --xmax $MCMC.xmax
        --xscale '$MCMC.scale.type'
        #if str( $MCMC.scale.type ) == "Seurat":
            --targetSum $MCMC.scale.targetSum
        #end if
        --nx $MCMC.nx
        --minScale $MCMC.minScalex
        --seed $MCMC.seed
    ]]></token>
    <token name="@MCMC_2D@" ><![CDATA[
        --xmin $MCMC.xmin
        --xmax $MCMC.xmax
        --nx $MCMC.nx
        --minScalex $MCMC.minScalex
        --ymin $MCMC.ymin
        --ymax $MCMC.ymax
        --ny $MCMC.ny
        --minScaley $MCMC.minScaley
        --scale '$MCMC.scale.type'
        #if str( $MCMC.scale.type ) == "Seurat":
            --targetSum $MCMC.scale.targetSum
        #end if
        --seed $MCMC.seed
    ]]></token>
    <token name="@BAREDSC_COMMON@" ><![CDATA[
        --nnorm $MCMC.nnorm
        --nsampMCMC $MCMC.nsampMCMC
        #if str( $MCMC.automaticRestart.set_minNeff ) == "yes":
            --minNeff $MCMC.automaticRestart.minNeff
        #end if
    ]]></token>
    <token name="@PLOTS@" ><![CDATA[
        #if str( $plots.title ) != '':
            --title '$plots.title'
        #end if
        #if $plots.removeFirstSamples != -1:
            --removeFirstSamples $plots.removeFirstSamples
        #end if
        --nsampInPlot $plots.nsampInPlot
    ]]></token>
    <token name="@PRETTYBINS_1D@" ><![CDATA[
        #if $plots.prettyBins != -1:
            --prettyBins $plots.prettyBins
        #end if
    ]]></token>
    <token name="@PRETTYBINS_SPLITY_COLORSCALE_2D@" ><![CDATA[
        #if $plots.prettyBinsx != -1:
            --prettyBinsx $plots.prettyBinsx
        #end if
        #if $plots.prettyBinsy != -1:
            --prettyBinsy $plots.prettyBinsy
        #end if
        ## splity is space separated floats
        #if str($plots.splity) != '':
            --splity $plots.splity
        #end if
        #if str($plots.log1pColorScale) != '':
            '$plots.log1pColorScale'
        #end if
    ]]></token>
    <token name="@ADVANCED_COMMON_X@" ><![CDATA[
        --osampx $advanced.osampx
        --osampxpdf $advanced.osampxpdf
        --coviscale $advanced.coviscale
        --nis $advanced.nis
    ]]></token>
    <token name="@ADVANCED_COMMON_COMPLEMENT_2D@" ><![CDATA[
        --osampy $advanced.osampy
        --osampypdf $advanced.osampypdf
        --scalePrior $advanced.scalePrior
    ]]></token>
    <token name="@ADVANCED_BAREDSC_COMMON@" ><![CDATA[
        #if str( $advanced.burn.custom ) == "yes":
            #if str( $advanced.burn.nsampBurnMCMC ) != "-1":
                --nsampBurnMCMC $advanced.burn.nsampBurnMCMC
            #end if
            --T0BurnMCMC $advanced.burn.T0BurnMCMC
        #end if
    ]]></token>
    <token name="@COMBINE_OUTPUTS@"><![CDATA[
        --outputs
        #for $i, $output in enumerate($MCMC.outputs):
            $i
        #end for
    ]]></token>
    <token name="@ORDER_OUTPUTS_1D@"><![CDATA[
        mv baredSC_pdf.txt output &&
        mv baredSC.$plots.image_file_format baredSC &&
        gunzip baredSC_means.txt.gz
    ]]></token>
    <token name="@ORDER_OUTPUTS_2D@"><![CDATA[
        mv baredSC_pdf2d.txt output &&
        mv baredSC_pdf2d_flat.txt output &&
        mv baredSC.$plots.image_file_format baredSC
        #if str($plots.splity) != '':
            #for $value in str($plots.splity).split(' '):
                && mv baredSC_split'$value'.txt baredSC_split'$value'_pdf.txt
            #end for
        #end if
    ]]></token>
</macros>
author	iuc
date	Fri, 01 Dec 2023 11:54:49 +0000
parents	4fff5a293013
children	769d815e4b0d