diff cosg.xml @ 0:cf880680fd0b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cosg/ commit 2e477dea2f014c265e6a2c6d25432d0e2bace733
author iuc
date Thu, 30 May 2024 11:10:21 +0000
parents
children fe5996e87b41
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cosg.xml	Thu May 30 11:10:21 2024 +0000
@@ -0,0 +1,202 @@
+<tool id="cosg" name="COSG" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
+    <description>Cell marker gene identification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+    </expand>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+@CMD@
+      ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method_options.groups != 'all'
+    #set $method_options.groups=[$groups]
+#end if
+
+cosg.cosg(adata,
+        groupby='$method_options.groupby',
+        groups='$method_options.groups',
+        n_genes_user=$method_options.n_genes_user,
+        mu=$advanced_options.mu,   
+        remove_lowly_expressed=$advanced_options.filter_expression.remove_lowly_expressed,
+        #if $advanced_options.filter_expression.remove_lowly_expressed == "True"
+        expressed_pct=$advanced_options.filter_expression.expressed_pct,  
+        #end ifs
+        key_added='$advanced_options.key_added',
+        use_raw=$advanced_options.layer_selection.use_raw,
+        #if $advanced_options.layer_selection.use_raw == "False"
+        #if $advanced_options.layer_selection.layer
+        layer='$advanced_options.layer_selection.layer',
+        #end if
+        #end if
+        reference='$advanced_options.reference'
+        )
+
+df=pd.DataFrame(adata.uns['cosg']['names']).T
+df.to_csv('marker.tsv', sep='\t', index=True)
+
+@CMD_anndata_write_outputs@
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_anndata"/>
+        <section name="method_options" title="Method Options" expanded="true">
+            <param argument="groupby" type="text" value="" optional="false" label="The key of the cell groups in .obs"/>
+            <param argument="groups" type="text" value="all" optional="false" label="Subset of cell groups" help="e.g. 'g1','g2','g3'."/>
+            <param argument="n_genes_user" type="integer" value="50" min="1" label="The number of genes that appear in the returned tables"/>
+        </section>
+        <section name="advanced_options" title="Advanced Options">
+            <param argument="mu" type="float" value="1.0" min="0.0" max="1.0" label="The penalty restricting marker genes expressing in non-target cell groups" help="Larger value represents more strict restrictions. mu should be >= 0, and by default, mu = 1."/>
+            <conditional name="filter_expression">
+                <param name="remove_lowly_expressed" type="select" label="Remove lowly expressed genes" help="If yes, genes that express a percentage of target cells smaller than a specific value (`expressed_pct`) are not considered as marker genes for the target cells.">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="False"/>
+                <when value="True">
+                    <param argument="expressed_pct" type="float" value="0.1" min="0.01" max="1.0" label="Percentage of target cells" help="Genes that express a percentage of target cells smaller than a specific value (`expressed_pct`) are not considered as marker genes for the target cells."/>
+                </when>
+            </conditional>
+            <param argument="key_added" type="text" value="cosg" optional="false" label="The key in adata.uns information is saved to.">
+                <validator type="empty_field"/>
+            </param>
+            <conditional name="layer_selection">
+                <param name="use_raw" type="select" label="Use raw attribute of adata if present to perform tests on." help="If use_raw is set to True then adata.raw.X if it exists.">
+                    <option value="False">No</option>
+                    <option value="True">Yes</option>
+                </param>
+                <when value="False">
+                    <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to perform tests on." help="If empty then adata.X will be used. If use_raw is set to True then adata.raw.X. If layers specified then use adata.layers[layer]."/>
+                </when>
+                <when value="True"/>
+            </conditional>
+            <param argument="reference" type="text" value="rest" optional="false" label="If a group identifier, compare with respect to this group." help=" If you use the keyword 'rest', compare each group to the union of the rest of the group.">
+                <validator type="empty_field"/>
+            </param>
+        </section>
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <expand macro="anndata_outputs"/>
+        <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <!-- test 1 -->
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
+            <param name="groupby" value="bulk_labels"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="cosg.cosg"/>
+                    <has_text_matching expression="groupby='bulk_labels'"/>
+                    <has_text_matching expression="groups='all'"/>
+                    <has_text_matching expression="n_genes_user=50"/>
+                    <has_text_matching expression="mu=1.0"/>
+                    <has_text_matching expression="remove_lowly_expressed=False"/>
+                    <has_text_matching expression="key_added='cosg'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs, var, uns" />
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_1.tsv" ftype="tabular" compare="sim_size">
+                <assert_contents>
+                    <has_n_columns n="51" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="3">
+            <!-- test 2 -->
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
+            <param name="groupby" value="louvain"/>
+            <param name="remove_lowly_expressed" value="True" />
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="cosg.cosg"/>
+                    <has_text_matching expression="groupby='louvain'"/>
+                    <has_text_matching expression="groups='all'"/>
+                    <has_text_matching expression="n_genes_user=50"/>
+                    <has_text_matching expression="mu=1.0"/>
+                    <has_text_matching expression="remove_lowly_expressed=True"/>
+                    <has_text_matching expression="expressed_pct=0.1"/>
+                    <has_text_matching expression="key_added='cosg'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_2.h5ad" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs, var, uns" />
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_2.tsv" ftype="tabular">
+                <assert_contents>
+                    <has_n_columns n="51" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="3">
+            <!-- test 3 -->
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced.h5ad" />
+            <param name="groupby" value="bulk_labels"/>
+            <param name="use_raw" value="True"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="cosg.cosg"/>
+                    <has_text_matching expression="groupby='bulk_labels'"/>
+                    <has_text_matching expression="groups='all'"/>
+                    <has_text_matching expression="n_genes_user=50"/>
+                    <has_text_matching expression="mu=1.0"/>
+                    <has_text_matching expression="remove_lowly_expressed=False"/>
+                    <has_text_matching expression="key_added='cosg'"/>
+                    <has_text_matching expression="use_raw=True"/>
+                    <has_text_matching expression="reference='rest'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_3.h5ad" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs, var, uns" />
+                </assert_contents>
+            </output>
+            <output name="marker_out" file="marker_3.tsv" ftype="tabular">
+                <assert_contents>
+                    <has_n_columns n="51" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Marker gene identification for single-cell sequencing data using COSG.
+============================================================================================================
+
+Accurate and fast cell marker gene identification with COSG
+
+COSG is a cosine similarity-based method for more accurate and scalable marker gene identification.
+
+- COSG is a general method for cell marker gene identification across different data modalities, e.g., scRNA-seq, scATAC-seq and spatially resolved transcriptome data. 
+- Marker genes or genomic regions identified by COSG are more indicative and with greater cell-type specificity.
+- COSG is ultrafast for large-scale datasets, and is capable of identifying marker genes for one million cells in less than two minutes.
+
+Here is the R version for COSG, and the Python version is hosted in https://github.com/genecell/COSG.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>