comparison bioconductor_scp.xml @ 0:cd2f3a280463 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-scp commit a0a1a3de5dd24b2aabe96ec3d6f89acdcf5e462b
author recetox
date Wed, 22 Jan 2025 07:44:00 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cd2f3a280463
1 <tool id="bioconductor_scp" name="bioconductor-scp" version="@TOOL_VERSION@+galaxy0" profile="23.0">
2 <description>single cell proteomics data analysis workflow</description>
3 <macros>
4 <import>macros.xml</import>
5 <import>help.xml</import>
6 </macros>
7 <xrefs>
8 <xref type="bio.tools">scp</xref>
9 </xrefs>
10 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">bioconductor-scp</requirement>
12 <requirement type="package" version="3.54.0">bioconductor-sva</requirement>
13 <requirement type="package" version="1.80.0">bioconductor-impute</requirement>
14 <requirement type="package" version="1.34.0">bioconductor-scater</requirement>
15 <requirement type="package" version="1.16.0">bioconductor-qfeatures</requirement>
16 <requirement type="package" version="3.62.1">bioconductor-limma</requirement>
17 <requirement type="package" version="3.5.1">r-ggplot2</requirement>
18 </requirements>
19 <required_files>
20 <include path="utils.r" />
21 </required_files>
22 <expand macro="creator" />
23 <command detect_errors="exit_code"><![CDATA[
24 echo ${run_script} &&
25 Rscript -e 'source("${__tool_directory__}/utils.r")' -e 'source("${run_script}")'
26 #if $data_export.export_R_script
27 && cat ${run_script} >> $script
28 #end if
29 ]]></command>
30 <configfiles>
31 <configfile name="run_script"><![CDATA[
32 data_input <- read.delim("$input_data", sep="\t")
33 metadata <- read.delim("$input_annotations", sep="\t")
34 runCol <- colnames(data_input)[$runcol]
35 fcol_aggregation_pep <- colnames(data_input)[${peptide_aggregation.column_aggregation_peptides}]
36 fcol_aggregation_prot <- colnames(data_input)[${protein_aggregation.column_aggregation_proteins}]
37
38 dir.create("plots")
39
40 scp <- scp::readSCP(
41 assayData = data_input,
42 colData = metadata,
43 runCol = runCol,
44 removeEmptyCols = $remove_empty_columns
45 )
46 number_of_assays <- length(scp)
47 scp <- QFeatures::zeroIsNA(scp, i = 1:number_of_assays)
48
49 #if $filtering_data.filter_reverse
50 scp <- QFeatures::filterFeatures(scp,
51 ~ Reverse != "+")
52 #end if
53
54 #if $filtering_data.filter_contaminants
55 scp <- QFeatures::filterFeatures(scp,
56 ~ Potential.contaminant != "+")
57 #end if
58
59 scp <- QFeatures::filterFeatures(scp,
60 ~ !is.na(PIF) & PIF > ${filtering_data.PIF_threshold})
61
62 keepAssay <- QFeatures::dims(scp)[1, ] > ${filtering_data.minimum_features}
63 scp <- scp[, , keepAssay]
64
65 number_of_assays <- length(scp)
66 single_cell_channels <- gsub(",", "|", "${filtering_data.single_cells}")
67
68 scp <- scp::computeSCR(scp,
69 i = 1:number_of_assays,
70 colvar = "SampleType",
71 carrierPattern = "Carrier",
72 samplePattern = single_cell_channels,
73 sampleFUN = "mean",
74 rowDataName = "MeanSCR")
75
76 #if $generate_QC_plots
77 QC_plot_SCR <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |>
78 data.frame() |>
79 ggplot2::ggplot(ggplot2::aes(x = MeanSCR)) +
80 ggplot2::geom_histogram() +
81 ggplot2::geom_vline(xintercept = c(1/$count_cell_carrier, 0.1),
82 lty = c(2, 1)) +
83 ggplot2::scale_x_log10()
84 ggplot2::ggsave(filename = file.path("plots", "QC_plot_SCR.pdf"), QC_plot_SCR)
85
86 QC_plot_SCR_col <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |>
87 data.frame() |>
88 ggplot2::ggplot(ggplot2::aes(x = MeanSCR, color = runCol)) +
89 ggplot2::geom_density() +
90 ggplot2::geom_vline(xintercept = 0.02, lty = 2) +
91 ggplot2::geom_vline(xintercept = 1, lty = 1)+
92 ggplot2::scale_x_log10()
93 ggplot2::ggsave(filename = file.path("plots", "QC_plot_SCR_col.pdf"), QC_plot_SCR_col)
94 #end if
95
96 scp <- QFeatures::filterFeatures(scp,
97 ~ !is.na(MeanSCR) &
98 MeanSCR < ${filtering_data.SCR_threshold})
99
100 #if $filtering_data.qvalue_level == "PSM"
101 scp <- scp::pep2qvalue(scp,
102 i = 1:number_of_assays,
103 PEP = "dart_PEP",
104 rowDataName = "qvalue")
105
106 scp <- QFeatures::filterFeatures(scp,
107 ~ qvalue < ${filtering_data.qvalue_threshold})
108 #else
109 scp <- scp::pep2qvalue(scp,
110 i = 1:number_of_assays,
111 PEP = "dart_PEP",
112 groupBy = "$filtering_data.qvalue_level",
113 rowDataName = "qvalue")
114
115 scp <- QFeatures::filterFeatures(scp,
116 ~ qvalue < ${filtering_data.qvalue_threshold})
117 #end if
118
119 #if $filtering_data.divide_reference
120 scp <- scp::divideByReference(scp,
121 i = 1:number_of_assays,
122 colvar = "SampleType",
123 samplePattern = ".",
124 refPattern = "Reference")
125 #end if
126
127 scp <- scp::aggregateFeaturesOverAssays(
128 scp,
129 i = 1:number_of_assays,
130 fcol = fcol_aggregation_pep,
131 name = paste0("peptide_", names(scp)),
132 fun = ${peptide_aggregation.aggregation_peptides},
133 na.rm = TRUE
134 )
135
136 scp <- QFeatures::joinAssays(scp,
137 i = grep("peptide", names(scp)),
138 name = "peptides")
139
140 keep_samples <- unlist(strsplit("${peptide_filtering.samples_to_keep}", split=","))
141 scp <- scp[,SummarizedExperiment::colData(scp)[["SampleType"]] %in% keep_samples, ]
142
143 #if $peptide_filtering.filter_median_intensity.cut_median_intensity == "yes"
144 medians <- colMedians(SummarizedExperiment::assay(scp[["peptides"]]), na.rm = TRUE)
145 SummarizedExperiment::colData(scp)[["MedianRI"]] <- medians
146
147 #if $generate_QC_plots
148 QC_medianRI <- SummarizedExperimentcolData(scp) |>
149 data.frame() |>
150 ggplot2::ggplot() +
151 ggplot2::aes(x = MedianRI,
152 y = SampleType,
153 fill = SampleType) +
154 ggplot2::geom_boxplot() +
155 ggplot2::scale_x_log10()
156 ggplot2::ggsave(filename = file.path("plots", "QC_medianRI.pdf"), QC_medianRI)
157 #end if
158
159 scp <- scp[, !is.na(SummarizedExperiment::colData(scp)[["MedianRI"]]) & SummarizedExperiment::colData(scp)[["MedianRI"]] < ${peptide_filtering.filter_median_intensity.median_intensity_threshold}, ]
160 #end if
161
162 #if $peptide_filtering.filter_median_CV.cut_median_CV == "yes"
163 number_of_observations <- ${peptide_filtering.filter_median_CV.minimum_peptides_CV}
164 CV_threshold <- ${peptide_filtering.filter_median_CV.median_CV_threshold}
165 scp <- scp::medianCVperCell(scp,
166 i = 1:number_of_assays,
167 groupBy = "Leading.razor.protein",
168 nobs = number_of_observations,
169 norm = "div.median",
170 na.rm = TRUE,
171 colDataName = "MedianCV")
172
173 #if $generate_QC_plots
174 QC_medianCV <- MultiAssayExperiment::getWithColData(scp, "peptides") |>
175 SummarizedExperiment::colData() |>
176 data.frame() |>
177 ggplot2::ggplot(ggplot2::aes(x = MedianCV,
178 fill = SampleType)) +
179 ggplot2::geom_boxplot() +
180 ggplot2::geom_vline(xintercept = CV_threshold)
181 ggplot2::ggsave(filename = file.path("plots", "QC_medianCV.pdf"), QC_medianCV)
182 #end if
183
184 scp <- scp[, !is.na(SummarizedExperiment::colData(scp)[["MedianCV"]]) & SummarizedExperiment::colData(scp)[["MedianCV"]] < CV_threshold, ]
185 #end if
186
187 #if $peptide_filtering.remove_blank
188 scp <- scp[, SummarizedExperiment::colData(scp)[["SampleType"]] != "Blank", ]
189 #end if
190
191 #if $peptide_processing.normalization_method.choose_normalization == "simple"
192 scp <- QFeatures::normalize(
193 scp,
194 i = "peptides",
195 name = "peptides_norm",
196 method = "${peptide_processing.normalization_method.normalize_simple_method}"
197 )
198 #else
199 scp <- QFeatures::sweep(
200 scp,
201 i = "peptides",
202 MARGIN = 2,
203 FUN = "/",
204 STATS = ${peptide_processing.normalization_method.normalize_columns}(SummarizedExperiment::assay(scp[["peptides"]]), na.rm = TRUE),
205 name = "peptides_norm_col"
206 )
207
208 scp <- QFeatures::sweep(
209 scp,
210 i = "peptides_norm_col",
211 MARGIN = 1,
212 FUN = "/",
213 STATS = ${peptide_processing.normalization_method.normalize_rows}(SummarizedExperiment::assay(scp[["peptides_norm_col"]]), na.rm = TRUE),
214 name = "peptides_norm"
215 )
216 #end if
217
218 scp <- QFeatures::logTransform(
219 scp,
220 base = ${peptide_processing.base},
221 i = "peptides_norm",
222 name = "peptides_log"
223 )
224
225 #if $generate_QC_plots
226 QC_boxplot_peptide <- create_boxplots(scp, "peptides", FALSE, "Peptides not normalized")
227 QC_boxplot_peptide_norm <- create_boxplots(scp, "peptides_log", TRUE, "Peptides normalized")
228
229 ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_peptide.pdf"), QC_boxplot_peptide)
230 ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_peptide_norm.pdf"), QC_boxplot_peptide_norm)
231 #end if
232
233 #if $peptide_processing.remove_missing_peptides.remove_peptides == "yes"
234 pNA <- ${peptide_processing.remove_missing_peptides.pNA_peptides} / 100
235 scp <- QFeatures::filterNA(scp, i = "peptides_log", pNA = pNA)
236 #end if
237
238 scp <- scp::aggregateFeaturesOverAssays(
239 scp,
240 i = "peptides_log",
241 fcol = fcol_aggregation_prot,
242 name = "proteins",
243 fun = ${protein_aggregation.aggregation_proteins},
244 na.rm = TRUE
245 )
246
247 #if $protein_processing.normalization_method_protein.choose_normalization_protein == "simple_prot"
248 scp <- QFeatures::normalize(
249 scp,
250 i = "proteins",
251 name = "proteins_norm",
252 method = "${protein_processing.normalization_method_protein.normalize_simple_method_prot}"
253 )
254 #else
255 scp <- QFeatures::sweep(
256 scp,
257 i = "proteins",
258 MARGIN = 2,
259 FUN = "/",
260 STATS = ${protein_processing.normalization_method_protein.normalize_columns_prot}(SummarizedExperiment::assay(scp[["proteins"]]), na.rm = TRUE),
261 name = "proteins_norm_col"
262 )
263
264 scp <- QFeatures::sweep(
265 scp,
266 i = "proteins_norm_col",
267 MARGIN = 1,
268 FUN = "/",
269 STATS = ${protein_processing.normalization_method_protein.normalize_rows_prot}(SummarizedExperiment::assay(scp[["proteins_norm_col"]]), na.rm = TRUE),
270 name = "proteins_norm"
271 )
272 #end if
273
274 #if $generate_QC_plots
275 QC_boxplot_protein <- create_boxplots(scp, "proteins", TRUE, "Proteins not normalized")
276 QC_boxplot_protein_norm <- create_boxplots(scp, "proteins_norm", TRUE, "Proteins normalized")
277
278 ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_protein.pdf"), QC_boxplot_protein)
279 ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_protein_norm.pdf"), QC_boxplot_protein_norm)
280
281 pdf(file = file.path("plots", "QC_heatmap_proteins.pdf"))
282 plot_heatmap(scp, "proteins_norm")
283 dev.off()
284 #end if
285
286 scp <- QFeatures::impute(
287 scp,
288 i = "proteins_norm",
289 name = "proteins_imptd",
290 method = "knn",
291 k = ${protein_processing.impute_k},
292 rowmax = 1,
293 colmax= 1,
294 maxp = Inf,
295 rng.seed = 1234
296 )
297
298 batch_colname <- colnames(metadata)[${batch_correction.select_batch_correction.batch_col}]
299 #if $batch_correction.select_batch_correction.batch_correction_method == "combat"
300 sce <- MultiAssayExperiment::getWithColData(scp, "proteins_imptd")
301 batch <- SummarizedExperiment::colData(scp)[[batch_colname]]
302 model <- stats::model.matrix(~ SampleType, data = SummarizedExperiment::colData(sce))
303
304 SummarizedExperiment::assay(sce) <- sva::ComBat(
305 dat = SummarizedExperiment::assay(sce),
306 batch = batch,
307 mod = model
308 )
309
310 scp <- QFeatures::addAssay(
311 scp,
312 y = sce,
313 name = "proteins_batchC"
314 )
315
316 scp <- QFeatures::addAssayLinkOneToOne(
317 scp,
318 from = "proteins_imptd",
319 to = "proteins_batchC"
320 )
321 #else
322 sce <- MultiAssayExperiment::getWithColData(scp, "proteins_imptd")
323 preserve_colname <- colnames(metadata)[${batch_correction.select_batch_correction.preserve_col}]
324
325 SummarizedExperiment::assay(sce) <- limma::removeBatchEffect(
326 SummarizedExperiment::assay(sce),
327 group = sce[[preserve_colname]],
328 batch = sce[[batch_colname]]
329 )
330 scp <- QFeatures::addAssay(scp,
331 y = sce,
332 name = "proteins_batchC")
333 scp <- QFeatures::addAssayLinkOneToOne(scp,
334 from = "proteins_imptd",
335 to = "proteins_batchC")
336 #end if
337
338 #if $dimensionality_reduction.PCA_computation.run_PCA == "yes"
339 PCA_color <- colnames(metadata)[$dimensionality_reduction.PCA_computation.pca_coloring]
340 scp[["proteins_batchC"]] <- scater::runPCA(
341 scp[["proteins_batchC"]],
342 ncomponents = ${dimensionality_reduction.PCA_computation.ncomponents_PCA},
343 ntop = Inf,
344 scale = TRUE,
345 exprs_values = 1,
346 name = "PCA"
347 )
348
349 pca <- scater::plotReducedDim(
350 scp[["proteins_batchC"]],
351 dimred = "PCA",
352 colour_by = PCA_color,
353 point_alpha = 1
354 )
355
356 ggplot2::ggsave(filename = file.path("plots", "PCA.pdf"), pca)
357
358 #if $dimensionality_reduction.PCA_computation.UMAP_computation.run_UMAP == "yes"
359 scp[["proteins_batchC"]] <- scater::runUMAP(
360 scp[["proteins_batchC"]],
361 ncomponents = ${dimensionality_reduction.PCA_computation.UMAP_computation.ncomponents_UMAP},
362 ntop = Inf,
363 scale = TRUE,
364 exprs_values = 1,
365 n_neighbors = 3,
366 dimred = "PCA",
367 name = "UMAP"
368 )
369
370 umap <- scater::plotReducedDim(
371 scp[["proteins_batchC"]],
372 dimred = "UMAP",
373 colour_by = "SampleType",
374 point_alpha = 1
375 )
376 ggplot2::ggsave(filename = file.path("plots", "UMAP.pdf"), umap)
377 #end if
378 #end if
379
380 assay_df <- as.data.frame(SummarizedExperiment::assay(scp, "proteins_batchC"))
381 row_metadata <- as.data.frame(SummarizedExperiment::rowData(scp[["proteins_batchC"]]))
382
383 export_data <- cbind(row_metadata, as.data.frame(assay_df))
384 write.table(export_data, file = '$Processed_data', sep = "\t", quote = F)
385
386 #if $data_export.export_tables
387 export_all_assays(scp)
388 #end if
389
390 #if $data_export.export_RData
391 save(scp, file='$scp_object')
392 #end if
393 ]]></configfile>
394 </configfiles>
395 <inputs>
396 <expand macro="scp_param"/>
397 </inputs>
398 <outputs>
399 <data name="Processed_data" format="tabular" label="Batch-corrected protein levels"/>
400 <collection name="intermediate_outputs" type="list" label="Intermediate outputs" format="tabular">
401 <discover_datasets pattern="__name_and_ext__" directory="outputs" />
402 <filter>data_export['export_tables']</filter>
403 </collection>
404 <data name="scp_object" format="rds" label="scp object as .rds">
405 <filter>data_export['export_RData']</filter>
406 </data>
407 <data name="script" format="txt" label="R script">
408 <filter>data_export['export_R_script']</filter>
409 </data>
410 <collection name="plots" type="list" label="Plots">
411 <discover_datasets pattern="__name_and_ext__" directory="plots" />
412 <filter>generate_QC_plots or dimensionality_reduction['PCA_computation']['run_PCA'] == 'yes' or dimensionality_reduction['PCA_computation']['run_PCA'] == 'yes'</filter>
413 </collection>
414 </outputs>
415 <tests>
416 <test expect_num_outputs='2'>
417 <param name="input_data" value="evidence_subset.txt"/>
418 <param name="input_annotations" value="sampleAnnotation.txt"/>
419 <param name="runcol" value="19"/>
420 <param name="single_cells" value="Macrophage,Monocyte"/>
421 <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>
422 <param name="batch_col" value="2"/>
423 <param name="column_aggregation_peptides" value="6"/>
424 <param name="column_aggregation_proteins" value="17"/>
425 <param name="pca_coloring" value="4"/>
426 <output name="Processed_data">
427 <assert_contents>
428 <has_n_lines n="90"/>
429 <has_text text="E9PAV3"/>
430 <has_size size="625000" delta="20"/>
431 </assert_contents>
432 </output>
433 </test>
434 <test expect_num_outputs='2'>
435 <param name="input_data" value="evidence_subset.txt" />
436 <param name="input_annotations" value="sampleAnnotation.txt"/>
437 <param name="runcol" value="19"/>
438 <param name="single_cells" value="Macrophage,Monocyte"/>
439 <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>
440 <param name="batch_col" value="2"/>
441 <param name="column_aggregation_peptides" value="6"/>
442 <param name="column_aggregation_proteins" value="17"/>
443 <param name="pca_coloring" value="4"/>
444 <output_collection name="plots" type="list">
445 <element name="PCA" file="PCA.pdf" ftype="pdf" compare="sim_size" delta="60"/>
446 <element name="QC_boxplot_peptide" file="QC_boxplot_peptide.pdf" ftype="pdf" compare="sim_size" delta="40"/>
447 <element name="QC_boxplot_peptide_norm" file="QC_boxplot_peptide_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/>
448 <element name="QC_boxplot_protein" file="QC_boxplot.pdf" ftype="pdf" compare="sim_size" delta="40"/>
449 <element name="QC_boxplot_protein_norm" file="QC_boxplot_protein_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/>
450 <element name="QC_heatmap_proteins" file="QC_heatmap_proteins.pdf" ftype="pdf" compare="sim_size" delta="40"/>
451 <element name="QC_medianCV" file="QC_medianCV.pdf" ftype="pdf" compare="sim_size" delta="40"/>
452 <element name="QC_plot_SCR" file="QC_plot_SCR.pdf" ftype="pdf" compare="sim_size" delta="40"/>
453 <element name="QC_plot_SCR_col" file="QC_plot_SCR_col.pdf" ftype="pdf" compare="sim_size" delta="40"/>
454 <element name="UMAP" file="UMAP.pdf" ftype="pdf" compare="sim_size" delta="200"/>
455 </output_collection>
456 <output name="Processed_data">
457 <assert_contents>
458 <has_size size="625000" delta="20"/>
459 <has_n_lines n="90"/>
460 <has_text text="E9PAV3"/>
461 </assert_contents>
462 </output>
463 </test>
464 <test expect_num_outputs='3'>
465 <param name="input_data" value="evidence_subset.txt" />
466 <param name="input_annotations" value="sampleAnnotation.txt"/>
467 <param name="runcol" value="19"/>
468 <param name="single_cells" value="Macrophage,Monocyte"/>
469 <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>
470 <param name="column_aggregation_peptides" value="6"/>
471 <param name="column_aggregation_proteins" value="17"/>
472 <param name="batch_col" value="2"/>
473 <param name="export_tables" value="true"/>
474 <param name="pca_coloring" value="4"/>
475 <output_collection name="intermediate_outputs" type="list" count="6"/>
476 <output name="Processed_data">
477 <assert_contents>
478 <has_size size="625000" delta="20"/>
479 <has_n_lines n="90"/>
480 <has_text text="E9PAV3"/>
481 </assert_contents>
482 </output>
483 </test>
484 <test expect_num_outputs='4'>
485 <param name="input_data" value="evidence_subset.txt" />
486 <param name="input_annotations" value="sampleAnnotation.txt"/>
487 <param name="runcol" value="19"/>
488 <param name="single_cells" value="Macrophage,Monocyte"/>
489 <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>
490 <param name="batch_col" value="2"/>
491 <param name="export_RData" value="TRUE"/>
492 <param name="export_R_script" value="TRUE"/>
493 <param name="column_aggregation_peptides" value="6"/>
494 <param name="column_aggregation_proteins" value="17"/>
495 <param name="pca_coloring" value="4"/>
496 <output name="Processed_data">
497 <assert_contents>
498 <has_size size="625000" delta="20"/>
499 <has_n_lines n="90"/>
500 <has_text text="E9PAV3"/>
501 </assert_contents>
502 </output>
503 <output name="script">
504 <assert_contents>
505 <has_n_lines n="271"/>
506 <has_text text='ggplot2::ggsave(filename = file.path("plots", "PCA.pdf"), pca)'/>
507 </assert_contents>
508 </output>
509 </test>
510 </tests>
511 <help><![CDATA[
512 @GENERAL_HELP@
513 ]]></help>
514 <expand macro="citations" />
515 </tool>