Mercurial > repos > iuc > cosg

<tool id="cosg" name="COSG" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
    <description>Cell marker gene identification</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">cosg</xref>
    </xrefs>
    <expand macro="requirements">
    </expand>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method_options.groups != 'all'
    #set $method_options.groups=[$groups]
#end if

cosg.cosg(adata,
        groupby='$method_options.groupby',
        groups='$method_options.groups',
        n_genes_user=$method_options.n_genes_user,
        mu=$advanced_options.mu,
        remove_lowly_expressed=$advanced_options.filter_expression.remove_lowly_expressed,
        #if $advanced_options.filter_expression.remove_lowly_expressed == "True"
        expressed_pct=$advanced_options.filter_expression.expressed_pct,
        #end ifs
        key_added='$advanced_options.key_added',
        use_raw=$advanced_options.layer_selection.use_raw,
        #if $advanced_options.layer_selection.use_raw == "False"
        #if $advanced_options.layer_selection.layer
        layer='$advanced_options.layer_selection.layer',
        #end if
        #end if
        reference='$advanced_options.reference'
        )

df=pd.DataFrame(adata.uns['cosg']['names']).T
df.to_csv('marker.tsv', sep='\t', index=True)

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <section name="method_options" title="Method Options" expanded="true">
            <param argument="groupby" type="text" value="" optional="false" label="The key of the cell groups in .obs"/>
            <param argument="groups" type="text" value="all" optional="false" label="Subset of cell groups" help="e.g. 'g1','g2','g3'."/>
            <param argument="n_genes_user" type="integer" value="50" min="1" label="The number of genes that appear in the returned tables"/>
        </section>
        <section name="advanced_options" title="Advanced Options">
            <param argument="mu" type="float" value="1.0" min="0.0" max="1.0" label="The penalty restricting marker genes expressing in non-target cell groups" help="Larger value represents more strict restrictions. mu should be >= 0, and by default, mu = 1."/>
            <conditional name="filter_expression">
                <param name="remove_lowly_expressed" type="select" label="Remove lowly expressed genes" help="If yes, genes that express a percentage of target cells smaller than a specific value (`expressed_pct`) are not considered as marker genes for the target cells.">
                    <option value="False">No</option>
                    <option value="True">Yes</option>
                </param>
                <when value="False"/>
                <when value="True">
                    <param argument="expressed_pct" type="float" value="0.1" min="0.01" max="1.0" label="Percentage of target cells" help="Genes that express a percentage of target cells smaller than a specific value (`expressed_pct`) are not considered as marker genes for the target cells."/>
                </when>
            </conditional>
            <param argument="key_added" type="text" value="cosg" optional="false" label="The key in adata.uns information is saved to.">
                <validator type="empty_field"/>
            </param>
            <conditional name="layer_selection">
                <param name="use_raw" type="select" label="Use raw attribute of adata if present to perform tests on." help="If use_raw is set to True then adata.raw.X if it exists.">
                    <option value="False">No</option>
                    <option value="True">Yes</option>
                </param>
                <when value="False">
                    <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to perform tests on." help="If empty then adata.X will be used. If use_raw is set to True then adata.raw.X. If layers specified then use adata.layers[layer]."/>
                </when>
                <when value="True"/>
            </conditional>
            <param argument="reference" type="text" value="rest" optional="false" label="If a group identifier, compare with respect to this group." help=" If you use the keyword 'rest', compare each group to the union of the rest of the group.">
                <validator type="empty_field"/>
            </param>
        </section>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
        <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"/>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <!-- test 1 -->
            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
            <param name="groupby" value="bulk_labels"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="cosg.cosg"/>
                    <has_text_matching expression="groupby='bulk_labels'"/>
                    <has_text_matching expression="groups='all'"/>
                    <has_text_matching expression="n_genes_user=50"/>
                    <has_text_matching expression="mu=1.0"/>
                    <has_text_matching expression="remove_lowly_expressed=False"/>
                    <has_text_matching expression="key_added='cosg'"/>
                    <has_text_matching expression="use_raw=False"/>
                    <has_text_matching expression="reference='rest'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs, var, uns" />
                </assert_contents>
            </output>
            <output name="marker_out" file="marker_1.tsv" ftype="tabular" compare="sim_size">
                <assert_contents>
                    <has_n_columns n="51" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test 2 -->
            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
            <param name="groupby" value="louvain"/>
            <param name="remove_lowly_expressed" value="True" />
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="cosg.cosg"/>
                    <has_text_matching expression="groupby='louvain'"/>
                    <has_text_matching expression="groups='all'"/>
                    <has_text_matching expression="n_genes_user=50"/>
                    <has_text_matching expression="mu=1.0"/>
                    <has_text_matching expression="remove_lowly_expressed=True"/>
                    <has_text_matching expression="expressed_pct=0.1"/>
                    <has_text_matching expression="key_added='cosg'"/>
                    <has_text_matching expression="use_raw=False"/>
                    <has_text_matching expression="reference='rest'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_2.h5ad" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs, var, uns" />
                </assert_contents>
            </output>
            <output name="marker_out" file="marker_2.tsv" ftype="tabular">
                <assert_contents>
                    <has_n_columns n="51" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <!-- test 3 -->
            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
            <param name="groupby" value="bulk_labels"/>
            <param name="use_raw" value="True"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="cosg.cosg"/>
                    <has_text_matching expression="groupby='bulk_labels'"/>
                    <has_text_matching expression="groups='all'"/>
                    <has_text_matching expression="n_genes_user=50"/>
                    <has_text_matching expression="mu=1.0"/>
                    <has_text_matching expression="remove_lowly_expressed=False"/>
                    <has_text_matching expression="key_added='cosg'"/>
                    <has_text_matching expression="use_raw=True"/>
                    <has_text_matching expression="reference='rest'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_3.h5ad" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs, var, uns" />
                </assert_contents>
            </output>
            <output name="marker_out" file="marker_3.tsv" ftype="tabular">
                <assert_contents>
                    <has_n_columns n="51" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Marker gene identification for single-cell sequencing data using COSG.
============================================================================================================

Accurate and fast cell marker gene identification with COSG

COSG is a cosine similarity-based method for more accurate and scalable marker gene identification.

- COSG is a general method for cell marker gene identification across different data modalities, e.g., scRNA-seq, scATAC-seq and spatially resolved transcriptome data.
- Marker genes or genomic regions identified by COSG are more indicative and with greater cell-type specificity.
- COSG is ultrafast for large-scale datasets, and is capable of identifying marker genes for one million cells in less than two minutes.

Here is the R version for COSG, and the Python version is hosted in https://github.com/genecell/COSG.

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Tue, 05 Nov 2024 13:32:17 +0000
parents	cf880680fd0b
children