comparison music-deconvolution.xml @ 1:3ca0132c182a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author bgruening
date Fri, 26 Nov 2021 15:54:51 +0000
parents 224721e76869
children 1c4cf4b7debe
comparison
equal deleted inserted replaced
0:224721e76869 1:3ca0132c182a
1 <tool id="music_deconvolution" name="MuSiC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" 1 <tool id="music_deconvolution" name="MuSiC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
2 profile="20.05" license="GPL-3.0-or-later" > 2 profile="21.09" license="GPL-3.0-or-later" >
3 <description>estimate cell type proportions in bulk RNA-seq data</description> 3 <description>estimate cell type proportions in bulk RNA-seq data</description>
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code" ><![CDATA[
9 mkdir report_data && 9 mkdir report_data &&
10 Rscript --vanilla '$__tool_directory__/scripts/${do.method}.R' '$conf' 10 Rscript --vanilla '$__tool_directory__/scripts/${do.method}.R' '$conf'
11 ]]></command> 11 ]]></command>
12 <configfiles> 12 <configfiles>
13 <configfile name="conf" > 13 <configfile name="conf" >
27 scrna_eset = readRDS('$scrna_eset') 27 scrna_eset = readRDS('$scrna_eset')
28 28
29 #if str($do.method) == "estimateprops": 29 #if str($do.method) == "estimateprops":
30 30
31 phenotype_factors = null_str_vec('$do.phenotype_factors') 31 phenotype_factors = null_str_vec('$do.phenotype_factors')
32 phenotype_factors_always_exclude = null_str_vec('$do.phenotype_factors_always_exclude')
32 celltypes_label = null_str_vec('$do.celltypes_label') 33 celltypes_label = null_str_vec('$do.celltypes_label')
33 samples_label = null_str_vec('$do.samples_label') 34 samples_label = null_str_vec('$do.samples_label')
34 celltypes = null_str_vec('$do.celltypes') 35 celltypes = null_str_vec('$do.celltypes')
35 methods = null_str_vec('$do.methods') 36 methods = c("MuSiC", "NNLS")
36 phenotype_gene = null_str_vec('$do.phenotype_gene') 37 phenotype_target = null_str_vec('$do.phenotype_target')
37 sample_groups = null_str_vec('$do.sample_groups') 38 phenotype_target_threshold = as.numeric('$do.phenotype_target_threshold')
38 sample_disease_group = null_str_vec('$do.sample_disease_group') 39 sample_disease_group = null_str_vec('$do.sample_disease_group')
39 sample_disease_group_scale = as.integer('$do.sample_disease_group_scale') 40 sample_disease_group_scale = as.integer('$do.sample_disease_group_scale')
40 healthy_phenotype = null_str_vec('$do.healthy_phenotype')
41 compare_title = null_str_vec('$do.compare_title') 41 compare_title = null_str_vec('$do.compare_title')
42
42 outfile_pdf='$out_pdf' 43 outfile_pdf='$out_pdf'
43 44
44 #elif str($do.method) == "dendrogram": 45 #elif str($do.method) == "dendrogram":
45 46
46 celltypes_label = null_str_vec('$do.celltypes_label') 47 celltypes_label = null_str_vec('$do.celltypes_label')
89 <param name="samples_label" type="text" value="sampleID" 90 <param name="samples_label" type="text" value="sampleID"
90 label="Samples Identifier from scRNA dataset" > 91 label="Samples Identifier from scRNA dataset" >
91 <expand macro="validator_text" /> 92 <expand macro="validator_text" />
92 </param> 93 </param>
93 <expand macro="celltypes_macro" /> 94 <expand macro="celltypes_macro" />
94 <param name="methods" multiple="true" type="select" display="checkboxes" label="Cell Proportion Method" >
95 <option value="MuSiC" selected="true" />
96 <option value="NNLS" selected="true" />
97 </param>
98 <param name="phenotype_factors" type="text" 95 <param name="phenotype_factors" type="text"
99 label="List of phenotypes factors" help="If blank, then use all phenotypes." > 96 label="Phenotype factors"
97 help="List of phenotypes factors to be used in the linear regression. Please make sure that each factor has more than one unique value. Names correspond to column names in the bulk RNA dataset phenotype table. If blank, then treat all bulk phenotype columns as factors." >
100 <expand macro="validator_index_identifiers" /> 98 <expand macro="validator_index_identifiers" />
101 </param> 99 </param>
102 <param name="phenotype_gene" type="text" label="Causative Gene" 100 <param name="phenotype_factors_always_exclude" type="text"
103 help="MUST exist in the phenotype factors above." > 101 label="Excluded phenotype factors"
104 <expand macro="validator_text" /> 102 help="List of phenotype factors to always exclude in the analysis"
105 </param> 103 value="sampleID,SubjectName" >
106 <param name="sample_groups" type="text" label="List of Sample Groups" >
107 <expand macro="validator_index_identifiers" /> 104 <expand macro="validator_index_identifiers" />
105 </param>
106 <param name="phenotype_target" type="text" label="Phenotype Target"
107 help="MUST exist in the bulk RNA datasets phenotype factors, as above." >
108 <expand macro="validator_text" />
109 </param>
110 <param name="phenotype_target_threshold" type="float" label="Phenotype Target Threshold"
111 value="-99"
112 help="The (%) threshold at which the phenotype target manifests. Leave at -99 to select all." >
108 </param> 113 </param>
109 <param name="sample_disease_group" type="text" label="Sample Disease Group" 114 <param name="sample_disease_group" type="text" label="Sample Disease Group"
110 help="MUST exist in the sample_groups above." > 115 help="MUST exist in the sample_groups above." >
111 <expand macro="validator_text" /> 116 <expand macro="validator_text" />
112 </param> 117 </param>
113 <param name="sample_disease_group_scale" type="integer" 118 <param name="sample_disease_group_scale" type="integer"
114 label="Sample Disease Group (Scale)" value="5" 119 label="Sample Disease Group (Scale)" value="5"
115 help="Used to accentutate certain features in the plots. Increase this number to reduce the effect." /> 120 help="Used to accentutate certain features in the plots. Increase this number to reduce the effect." />
116 <param name="healthy_phenotype" type="text" label="Healthy Phenotype" >
117 <expand macro="validator_text" />
118 </param>
119 <param name="compare_title" type="text" label="Plot Title" > 121 <param name="compare_title" type="text" label="Plot Title" >
120 <expand macro="validator_text" /> 122 <expand macro="validator_text" />
121 </param> 123 </param>
122 </when> 124 </when>
123 <when value="dendrogram" > 125 <when value="dendrogram" >
124 <param name="celltypes_label" type="text" value="cellType" 126 <param name="celltypes_label" type="text" value="cellType"
125 label="Cell Types Label from scRNA dataset" > 127 label="Cell Types Label from scRNA dataset" >
126 <expand macro="validator_text" /> 128 <expand macro="validator_text" />
127 </param> 129 </param>
128 <param name="clustertype_label" type="text" value="clusterType" 130 <param name="clustertype_label" type="text" value="clusterType"
129 label="Cell Types Label from scRNA dataset" > 131 label="Cluster Types Label from scRNA dataset" >
130 <expand macro="validator_text" /> 132 <expand macro="validator_text" />
131 </param> 133 </param>
132 <param name="samples_label" type="text" value="sampleID" 134 <param name="samples_label" type="text" value="sampleID"
133 label="Samples Identifier from scRNA dataset" > 135 label="Samples Identifier from scRNA dataset" >
134 <expand macro="validator_text" /> 136 <expand macro="validator_text" />
135 </param> 137 </param>
136 <expand macro="celltypes_macro" /> 138 <expand macro="celltypes_macro" />
137 <repeat name="cluster_groups" title="Cluster Groups" min="2" > 139 <repeat name="cluster_groups" title="Cluster Groups" min="0"
140 help="Insert cell cluster groups based on a previous clustering." >
138 <param name="cluster_id" label="Cluster ID" type="text" value="" 141 <param name="cluster_id" label="Cluster ID" type="text" value=""
139 help="e.g. C1 or Cluster1, etc." /> 142 help="e.g. C1 or Cluster1, etc." />
140 <expand macro="celltypes_macro" /> 143 <expand macro="celltypes_macro" />
141 <param name="marker_name" label="Marker Gene Group Name" type="text" 144 <param name="marker_name" label="Marker Gene Group Name" type="text"
142 optional="true" value="" 145 optional="true" value=""
143 help="Name of the list of geme markers used to describe the marker list supplied below." > 146 help="Name of the list of gene markers used to describe the marker list supplied below." >
144 <expand macro="validator_text" /> 147 <expand macro="validator_text" />
145 </param> 148 </param>
146 <param name="marker_list" label="List of Gene Markers" type="data" format="txt,tabular" 149 <param name="marker_list" label="List of Gene Markers" type="data" format="txt,tabular"
147 optional="true" 150 optional="true"
148 help="A single column of marker genes" /> 151 help="A single column of marker genes" />
151 </conditional> 154 </conditional>
152 </inputs> 155 </inputs>
153 <outputs> 156 <outputs>
154 <data name="out_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF Plots" /> 157 <data name="out_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF Plots" />
155 <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Cell Proportions by Sample" > 158 <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Cell Proportions by Sample" >
156 <filter>do["method"] == "dendrogram"</filter> 159 <filter>do["method"] == "dendrogram" and len(do["cluster_groups"]) > 0</filter>
157 </data> 160 </data>
158 <collection name="summaries" type="list" label="${tool.name} on ${on_string}: Method Summaries"> 161 <collection name="props" type="list" label="${tool.name} on ${on_string}: Proportion Matrices" >
159 <filter>do["method"] == "estimateprops"</filter> 162 <filter>do["method"] == "estimateprops"</filter>
160 <discover_datasets pattern="summ_(?P&lt;designation&gt;.+)\.txt" format="txt" 163 <discover_datasets pattern="prop_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
161 directory="report_data" /> 164 </collection>
165 <collection name="summaries" type="list" label="${tool.name} on ${on_string}: Summaries and Logs">
166 <filter>do["method"] == "estimateprops"</filter>
167 <discover_datasets pattern="summ_(?P&lt;designation&gt;.+)\.txt" format="txt" directory="report_data" />
168 <discover_datasets pattern="varprop_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
169 <discover_datasets pattern="rsquared_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
170 <discover_datasets pattern="weightgene_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
162 </collection> 171 </collection>
163 </outputs> 172 </outputs>
164 <tests> 173 <tests>
165 <test expect_num_outputs="2" > 174 <test expect_num_outputs="1" >
166 <!-- Dendrogram test --> 175 <!-- Dendrogram test 1 -->
167 <param name="bulk_eset" value="Mousebulkeset.rds" /> 176 <param name="bulk_eset" value="Mousebulkeset.rds" />
168 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" /> 177 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />
169 <conditional name="do" > 178 <conditional name="do" >
170 <param name="method" value="dendrogram" /> 179 <param name="method" value="dendrogram" />
171 <param name="celltypes_label" value="cellType" /> 180 <param name="celltypes_label" value="cellType" />
172 <param name="samples_label" value="sampleID" /> 181 <param name="samples_label" value="sampleID" />
173 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" /> 182 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" />
183 </conditional>
184 <output name="out_pdf" value="dendro_1.pdf" compare="sim_size" />
185 </test>
186 <test expect_num_outputs="2" >
187 <!-- Dendrogram test 2 -->
188 <param name="bulk_eset" value="Mousebulkeset.rds" />
189 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />
190 <conditional name="do" >
191 <param name="method" value="dendrogram" />
192 <param name="celltypes_label" value="cellType" />
193 <param name="samples_label" value="sampleID" />
194 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" />
174 <repeat name="cluster_groups" > 195 <repeat name="cluster_groups" >
175 <param name="cluster_id" value="C1" /> 196 <param name="cluster_id" value="C1" />
176 <param name="celltypes" value="Neutro" /> 197 <param name="celltypes" value="Neutro" />
177 </repeat> 198 </repeat>
178 <repeat name="cluster_groups" > 199 <repeat name="cluster_groups" >
193 </repeat> 214 </repeat>
194 </conditional> 215 </conditional>
195 <output name="out_pdf" value="dendro.pdf" compare="sim_size" /> 216 <output name="out_pdf" value="dendro.pdf" compare="sim_size" />
196 <output name="out_tab"> 217 <output name="out_tab">
197 <assert_contents> 218 <assert_contents>
198 <has_text_matching expression="^\s+Est\.prop\.weighted\.cluster\.Neutro\s+Est\.prop\.weighted\.cluster\.Podo\s+Est\.prop\.weighted\.cluster\.Endo" /> 219 <has_text_matching expression="^\s+Neutro\s+Podo\s+Endo" />
199 <has_text text="APOL1.GNA78M"/> 220 <has_text text="APOL1.GNA78M"/>
200 </assert_contents> 221 </assert_contents>
201 </output> 222 </output>
202 </test> 223 </test>
203 <test expect_num_outputs="2" > 224 <test expect_num_outputs="3" >
204 <!-- Estimate Proportions test --> 225 <!-- Estimate Proportions test -->
205 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" /> 226 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />
206 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" /> 227 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />
207 <conditional name="do" > 228 <conditional name="do" >
208 <param name="method" value="estimateprops" /> 229 <param name="method" value="estimateprops" />
209 <param name="celltypes_label" value="cellType" /> 230 <param name="celltypes_label" value="cellType" />
210 <param name="samples_label" value="sampleID" /> 231 <param name="samples_label" value="sampleID" />
211 <param name="celltypes" value="alpha,beta,delta,gamma,acinar,ductal" /> 232 <param name="celltypes" value="alpha,beta,delta,gamma,acinar,ductal" />
212 <param name="methods" value="MuSiC,NNLS" />
213 <param name="phenotype_factors" value="age,bmi,hba1c,gender" /> 233 <param name="phenotype_factors" value="age,bmi,hba1c,gender" />
214 <param name="phenotype_gene" value="hba1c" /> 234 <param name="phenotype_target" value="hba1c" />
215 <param name="sample_groups" value="Normal,T2D" /> 235 <param name="phenotype_target_threshold" value="6.5" />
216 <param name="sample_disease_group" value="T2D" /> 236 <param name="sample_disease_group" value="T2D" />
217 <param name="sample_disease_group_scale" value="5" /> 237 <param name="sample_disease_group_scale" value="5" />
218 <param name="healthy_phenotype" value="Normal" />
219 <param name="compare_title" value="HbA1c vs Beta Cell Type Proportion" /> 238 <param name="compare_title" value="HbA1c vs Beta Cell Type Proportion" />
220 </conditional> 239 </conditional>
221 <output name="out_pdf" value="default_output.pdf" compare="sim_size" /> 240 <output name="out_pdf" value="default_output.pdf" compare="sim_size" />
222 <output_collection name="summaries" count="2"> 241 <output_collection name="summaries" count="5">
223 <element name="MuSiC" ftype="txt"> 242 <element name="Log of MuSiC fitting" ftype="txt">
224 <assert_contents> 243 <assert_contents>
225 <has_text text="Residual standard error: 0.1662 on 72 degrees of freedom"/> 244 <has_text text="Residual standard error: 0.1704 on 72 degrees of freedom"/>
226 </assert_contents> 245 </assert_contents>
227 </element> 246 </element>
228 <element name="NNLS" ftype="txt"> 247 <element name="Log of NNLS fitting" ftype="txt">
229 <assert_contents> 248 <assert_contents>
230 <has_text text="Residual standard error: 0.06561 on 72 degrees of freedom"/> 249 <has_text text="Residual standard error: 0.0645 on 72 degrees of freedom"/>
231 </assert_contents> 250 </assert_contents>
232 </element> 251 </element>
233 </output_collection> 252 </output_collection>
234 </test> 253 </test>
235 </tests> 254 </tests>
236 <help><![CDATA[ 255 <help><![CDATA[
237 MuSiC utilizes cell-type specific gene expression from single-cell RNA sequencing (RNA-seq) data to characterize cell type compositions from bulk RNA-seq data in complex tissues. By appropriate weighting of genes showing cross-subject and cross-cell consistency, MuSiC enables the transfer of cell type-specific gene expression information from one dataset to another. 256 MuSiC utilizes cell-type specific gene expression from single-cell RNA sequencing (RNA-seq) data to characterize cell type compositions from bulk RNA-seq data in complex tissues. By appropriate weighting of genes showing cross-subject and cross-cell consistency, MuSiC enables the transfer of cell type-specific gene expression information from one dataset to another.
238 257
239 Solid tissues often contain closely related cell types which leads to collinearity. To deal with collinearity, MuSiC employs a tree-guided procedure that recursively zooms in on closely related cell types. Briefly, we first group similar cell types into the same cluster and estimate cluster proportions, then recursively repeat this procedure within each cluster. 258 Solid tissues often contain closely related cell types which leads to collinearity. To deal with collinearity, MuSiC employs a tree-guided procedure that recursively zooms in on closely related cell types. Briefly, we first group similar cell types into the same cluster and estimate cluster proportions, then recursively repeat this procedure within each cluster.
240 259
241 .. image:: https://xuranw.github.io/MuSiC/articles/images/FigureMethod.jpg 260 .. image:: $PATH_TO_IMAGES/FigureMethod.jpg
261
242 ]]></help> 262 ]]></help>
243 <citations> 263 <citations>
244 <citation type="doi">https://doi.org/10.1038/s41467-018-08023-x</citation> 264 <citation type="doi">https://doi.org/10.1038/s41467-018-08023-x</citation>
245 </citations> 265 </citations>
246 </tool> 266 </tool>