comparison decoupler_pseudobulk.xml @ 0:59a7f3f83aec draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
author ebi-gxa
date Sun, 24 Sep 2023 08:44:24 +0000
parents
children 046d8ff974ff
comparison
equal deleted inserted replaced
-1:000000000000 0:59a7f3f83aec
1 <tool id="decoupler_pseudobulk" name="Decoupler pseudo-bulk" version="1.4.0+galaxy0" profile="20.05">
2 <description>aggregates single cell RNA-seq data for running bulk RNA-seq methods</description>
3 <requirements>
4 <requirement type="package" version="1.4.0">decoupler</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7 mkdir deseq_output_dir &&
8 mkdir plots_output_dir &&
9 python '$__tool_directory__/decoupler_pseudobulk.py' $input_file
10 #if $adata_obs_fields_to_merge:
11 --adata_obs_fields_to_merge $adata_obs_fields_to_merge
12 #end if
13 --groupby $groupby
14 --sample_key $sample_key
15 #if $layer:
16 --layer $layer
17 #end if
18 --mode $mode
19 #if $use_raw:
20 --use_raw
21 #end if
22 #if $min_cells:
23 --min_cells $min_cells
24 #end if
25 #if $produce_plots:
26 --save_path plots_output_dir
27 #end if
28 #if $min_counts:
29 --min_counts $min_counts
30 #end if
31 #if $min_total_counts:
32 --min_total_counts $min_total_counts
33 #end if
34 #if $produce_anndata:
35 --anndata_output_path $pbulk_anndata
36 #end if
37 #if $filter_expr:
38 --filter_expr
39 #end if
40 #if $factor_fields:
41 --factor_fields '$factor_fields'
42 #end if
43 --deseq2_output_path deseq_output_dir
44 --plot_samples_figsize $plot_samples_figsize
45 --plot_filtering_figsize $plot_filtering_figsize
46 ]]></command>
47 <environment_variables>
48 <environment_variable name="NUMBA_CACHE_DIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
49 <environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
50 </environment_variables>
51 <inputs>
52 <param type="data" name="input_file" format="data" label="Input AnnData file"/>
53 <param type="text" name="adata_obs_fields_to_merge" label="Obs Fields to Merge" optional="true" help="Fields in adata.obs to merge, comma separated (optional). They will be available as field1_field2_field3 in the AnnData Obs dataframe."/>
54 <param type="text" name="groupby" label="Groupby column" help="The column in adata.obs that defines the groups. Merged columns in the above field are available here."/>
55 <param type="text" name="sample_key" label="Sample Key column" help="The column in adata.obs that defines the samples. Merged columns in the above field are available here."/>
56 <param type="text" name="layer" label="Layer" optional="true" help="The name of the layer of the AnnData object to use. It needs to be present in the AnnData object."/>
57 <param type="select" name="mode" label="Decoupler pseudobulk Mode" optional="true" help="Determines how counts are aggregated across cells with the specificied groups: sum, mean or median.">
58 <option value="sum" selected="true">sum</option>
59 <option value="mean">mean</option>
60 <option value="median">median</option>
61 </param>
62 <param type="text" name="factor_fields" label="Factor Fields" optional="true" help="Fields in adata.obs to use as factors, comma separated (optional). For EdgeR make sure that the first field is the main contrast field desired and the rest of the fields are the covariates desired. Decoupler produces two fields in the intermediate AnnData (which can be added here if desired for covariates): psbulk_n_cells and psbulk_counts."/>
63 <param type="boolean" name="use_raw" label="Use Raw" optional="true"/>
64 <param type="integer" name="min_cells" label="Minimum Cells" optional="true"/>
65 <param type="boolean" name="produce_plots" label="Produce plots"/>
66 <param type="boolean" name="produce_anndata" label="Produce AnnData with Pseudo-bulk"/>
67 <param type="integer" name="min_counts" label="Minimum Counts" optional="true"/>
68 <param type="integer" name="min_total_counts" label="Minimum Total Counts" optional="true"/>
69 <param type="boolean" name="filter_expr" label="Enable Filtering by Expression"/>
70 <param type="text" name="plot_samples_figsize" label="Plot Samples Figsize" value="10 10" help="X and Y sizes in points separated by a space"/>
71 <param type="text" name="plot_filtering_figsize" label="Plot Filtering Figsize" value="10 10" help="X and Y sizes in points separated by a space"/>
72 </inputs>
73 <outputs>
74 <data name="pbulk_anndata" format="h5ad" label="${tool.name} on ${on_string}: Pseudo-bulk AnnData">
75 <filter>produce_anndata</filter>
76 </data>
77 <data name="count_matrix" format="csv" label="${tool.name} on ${on_string}: Count Matrix" from_work_dir="deseq_output_dir/counts_matrix.csv"/>
78 <data name="samples_metadata" format="csv" label="${tool.name} on ${on_string}: Samples Metadata (factors file)" from_work_dir="deseq_output_dir/col_metadata.csv"/>
79 <data name="genes_metadata" format="csv" label="${tool.name} on ${on_string}: Genes Metadata" from_work_dir="deseq_output_dir/gene_metadata.csv"/>
80 <data name="plot_output" format="png" label="${tool.name} on ${on_string}: Pseudobulk plot" from_work_dir="plots_output_dir/pseudobulk_samples.png">
81 <filter>produce_plots</filter>
82 </data>
83 <data name="filter_by_expr_plot" format="png" label="${tool.name} on ${on_string}: Filter by Expression plot" from_work_dir="plots_output_dir/filter_by_expr.png">
84 <filter>produce_plots</filter>
85 </data>
86 </outputs>
87 <tests>
88 <test expect_num_outputs="6">
89 <param name="input_file" value="mito_counted_anndata.h5ad"/>
90 <param name="adata_obs_fields_to_merge" value="batch,sex"/>
91 <param name="groupby" value="batch_sex"/>
92 <param name="sample_key" value="genotype"/>
93 <param name="factor_fields" value="genotype,batch_sex"/>
94 <param name="mode" value="sum"/>
95 <param name="min_cells" value="10"/>
96 <param name="produce_plots" value="true"/>
97 <param name="produce_anndata" value="true"/>
98 <param name="min_counts" value="10"/>
99 <param name="min_total_counts" value="1000"/>
100 <param name="filter_expr" value="true"/>
101 <param name="plot_samples_figsize" value="10 10"/>
102 <param name="plot_filtering_figsize" value="10 10"/>
103 <output name="pbulk_anndata" ftype="h5ad">
104 <assert_contents>
105 <has_h5_keys keys="obs/psbulk_n_cells"/>
106 </assert_contents>
107 </output>
108 <output name="count_matrix" ftype="csv">
109 <assert_contents>
110 <has_n_lines n="3620"/>
111 </assert_contents>
112 </output>
113 <output name="samples_metadata" ftype="csv">
114 <assert_contents>
115 <has_n_lines n="8"/>
116 </assert_contents>
117 </output>
118 <output name="genes_metadata" ftype="csv">
119 <assert_contents>
120 <has_n_lines n="3620"/>
121 </assert_contents>
122 </output>
123 <output name="plot_output" ftype="png">
124 <assert_contents>
125 <has_size value="31853" delta="3000"/>
126 </assert_contents>
127 </output>
128 <output name="filter_by_expr_plot" ftype="png">
129 <assert_contents>
130 <has_size value="21656" delta="2000"/>
131 </assert_contents>
132 </output>
133 </test>
134 </tests>
135 <help>
136 <![CDATA[
137 This tool performs pseudobulk analysis and filtering using Decoupler-py. Provide the input AnnData file and specify the necessary parameters.
138
139 - Input AnnData file: The input AnnData file to be processed.
140 - Obs Fields to Merge: Fields in adata.obs to merge, comma separated (optional).
141 - Groupby column: The column in adata.obs that defines the groups.
142 - Sample Key column: The column in adata.obs that defines the samples.
143 - Layer (optional): The name of the layer of the AnnData object to use.
144 - Mode: The mode for Decoupler pseudobulk analysis (sum, mean, median). Sum by default.
145 - Factor Fields (optional): Fields in adata.obs to use as factors, comma separated (optional). For EdgeR make sure that the first field is the main contrast field desired and the rest of the fields are the covariates desired.
146 - Use Raw: Whether to use the raw part of the AnnData object.
147 - Minimum Cells: Minimum number of cells for pseudobulk analysis (optional).
148 - Minimum Counts: Minimum count threshold for filtering by expression (optional).
149 - Minimum Total Counts: Minimum total count threshold for filtering by expression (optional).
150 - Enable Filtering by Expression: Check this box to enable filtering by expression.
151 - Plot Samples Figsize: Size of the samples plot as a tuple (two arguments).
152 - Plot Filtering Figsize: Size of the filtering plot as a tuple (two arguments).
153
154 The tool will output the filtered AnnData, count matrix, samples metadata, genes metadata (in DESeq2 format), and the pseudobulk plot and filter by expression plot (if enabled).
155
156 ]]>
157 </help>
158 <citations>
159 <citation type="doi">doi.org/10.1093/bioadv/vbac016</citation>
160 </citations>
161 </tool>