Previous changeset 0:2cfd0db49bbc (2021-09-12) Next changeset 2:7902cd31b9b5 (2022-01-29) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54" |
modified:
construct_eset.xml macros.xml scripts/dendrogram.R scripts/estimateprops.R scripts/inspect.R test-data/EMTABesethealthy.subset.rds test-data/Mousebulkeset.rds test-data/Mousesubeset.degenesonly2.half.rds test-data/default_output.pdf test-data/dendro.pdf |
added:
test-data/dendro_1.pdf |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 construct_eset.xml --- a/construct_eset.xml Sun Sep 12 19:49:12 2021 +0000 +++ b/construct_eset.xml Fri Nov 26 15:55:11 2021 +0000 |
b |
@@ -14,9 +14,12 @@ suppressWarnings(suppressPackageStartupMessages(library(xbioc))) suppressWarnings(suppressPackageStartupMessages(library(MuSiC))) -null_str_vec = function(gstr){ +null_str_vec = function(gstr, is.str=FALSE){ tokens = unlist(as.vector(strsplit(gstr, split=","))) if (length(tokens) == 0){ + if (is.str){ + return(character(0)) + } return(NULL) } if (length(tokens) == 1){ @@ -40,6 +43,9 @@ #end if ## Annotation and Feature Data, or just a string for type of chip used annotation = null_str_vec('$annotation') +if (is.null(annotation)){ + annotation = character(0) +} if (all(rownames(pdata) != colnames(exprs))) { stop("Number of Samples between phenotypes and assays are not the same") @@ -66,26 +72,27 @@ metadata\$lname = NULL if (nrow(metadata)==0) { - metadata = NULL + pheno_data = new("AnnotatedDataFrame", data = pdata) +} else { + pheno_data = new("AnnotatedDataFrame", data = pdata, varMetadata = metadata) } -pheno_data = new("AnnotatedDataFrame", data = pdata, varMetadata = metadata) ## Experiment Description -- using the MIAME object experiment_data = new( "MIAME", - name = null_str_vec('$expdata.name'), - lab = null_str_vec('$expdata.lab'), - contact = null_str_vec('$expdata.contact'), - title = null_str_vec('$expdata.title'), - abstract = null_str_vec('$expdata.abstract'), - url = null_str_vec('$expdata.url'), + name = null_str_vec('$expdata.name', is.str=T), + lab = null_str_vec('$expdata.lab', is.str=T), + contact = null_str_vec('$expdata.contact', is.str=T), + title = null_str_vec('$expdata.title', is.str=T), + abstract = null_str_vec('$expdata.abstract', is.str=T), + url = null_str_vec('$expdata.url', is.str=T), other = list( #for i, row in enumerate($expdata.other): #if i==0 - '$row.field' = null_str_vec('$row.comment') + '$row.field' = null_str_vec('$row.comment', is.str=T) #else - ,'$row.field' = null_str_vec('$row.comment') + ,'$row.field' = null_str_vec('$row.comment', is.str=T) #end if #end for )) @@ -95,7 +102,7 @@ experimentData = experiment_data, annotation = annotation) -capture.output(print(e_set), file = '$out_tab') +capture.output(print(e_set), file = '$out_txt') saveRDS(e_set, file= '$out_rds') </configfile> @@ -112,7 +119,8 @@ </param> <repeat name="metadata" title="Meta Data" min="0" max="15" > <!-- optional, so min=0 --> - <param name="row_names" label="Label" type="text" > + <param name="row_names" label="Label" type="text" + help="Metadata should correspond directly to the columns of the Phenotype Data" > <expand macro="validator_text_and_urls" /> </param> <param name="label_desc" label="Label Description" type="text" > @@ -149,7 +157,7 @@ </section> </inputs> <outputs> - <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: General Info" /> + <data name="out_txt" format="txt" label="${tool.name} on ${on_string}: General Info" /> <data name="out_rds" format="rdata.eset" label="${tool.name} on ${on_string}: RData ESet Object" /> </outputs> <tests> @@ -186,7 +194,7 @@ <param name="comment" value="Some other comment" /> </repeat> </section> - <output name="out_tab"> + <output name="out_txt"> <assert_contents> <has_text text="assayData: 3 features, 2 samples " /> </assert_contents> |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 macros.xml --- a/macros.xml Sun Sep 12 19:49:12 2021 +0000 +++ b/macros.xml Fri Nov 26 15:55:11 2021 +0000 |
[ |
@@ -1,5 +1,5 @@ <macros> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <!-- The ESet inspector/constructor and MuSiC tool can have independent Galaxy versions but should reference the same package version always. --> @@ -15,11 +15,11 @@ <validator type="regex" message="FORMAT terms separated by commas">^(([A-Za-z0-9+_ -]+)\s?,?)*$</validator> </xml> <xml name="validator_text" > - <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)*$</validator> + <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)+$</validator> </xml> <xml name="celltypes_macro" > <param name="celltypes" type="text" optional="true" value="" - label="Comma list of cell types to use from scRNA dataset" help="If NULL, then use all cell types." > + label="Comma list of cell types to use from scRNA dataset" help="If blank, then use all available cell types." > <expand macro="validator_index_identifiers" /> </param> </xml> |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 scripts/dendrogram.R --- a/scripts/dendrogram.R Sun Sep 12 19:49:12 2021 +0000 +++ b/scripts/dendrogram.R Fri Nov 26 15:55:11 2021 +0000 |
[ |
@@ -17,31 +17,6 @@ args <- commandArgs(trailingOnly = TRUE) source(args[1]) -## We then perform bulk tissue cell type estimation with pre-grouping -## of cell types: C, list_of_cell_types, marker genes name, marker -## genes list. -## data.to.use = list( -## "C1" = list(cell.types = c("Neutro"), -## marker.names=NULL, -## marker.list=NULL), -## "C2" = list(cell.types = c("Podo"), -## marker.names=NULL, -## marker.list=NULL), -## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"), -## marker.names = "Epithelial", -## marker.list = read_list("../test-data/epith.markers")), -## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"), -## marker.names = "Immune", -## marker.list = read_list("../test-data/immune.markers")) -## ) -grouped_celltypes <- lapply(data.to.use, function(x) { - x$cell.types -}) -marker_groups <- lapply(data.to.use, function(x) { - x$marker.list -}) -names(marker_groups) <- names(data.to.use) - ## Perform the estimation ## Produce the first step information @@ -51,33 +26,107 @@ ## Plot the dendrogram of design matrix and cross-subject mean of ## realtive abundance -par(mfrow = c(1, 2)) -d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean") +## Hierarchical clustering using Complete Linkage +d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean") +hc1 <- hclust(d1, method = "complete") ## Hierarchical clustering using Complete Linkage -hc1 <- hclust(d, method = "complete") -## Plot the obtained dendrogram -plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)") -d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean") -## Hierarchical clustering using Complete Linkage -hc2 <- hclust(d, method = "complete") -## Plot the obtained dendrogram -pdf(file = outfile_pdf, width = 8, height = 8) -plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)") +d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean") +hc2 <- hclust(d2, method = "complete") + -cl_type <- as.character(scrna_eset[[celltypes_label]]) +if (length(data.to.use) > 0) { + ## We then perform bulk tissue cell type estimation with pre-grouping + ## of cell types: C, list_of_cell_types, marker genes name, marker + ## genes list. + ## data.to.use = list( + ## "C1" = list(cell.types = c("Neutro"), + ## marker.names=NULL, + ## marker.list=NULL), + ## "C2" = list(cell.types = c("Podo"), + ## marker.names=NULL, + ## marker.list=NULL), + ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"), + ## marker.names = "Epithelial", + ## marker.list = read_list("../test-data/epith.markers")), + ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"), + ## marker.names = "Immune", + ## marker.list = read_list("../test-data/immune.markers")) + ## ) + grouped_celltypes <- lapply(data.to.use, function(x) { + x$cell.types + }) + marker_groups <- lapply(data.to.use, function(x) { + x$marker.list + }) + names(marker_groups) <- names(data.to.use) + + + cl_type <- as.character(scrna_eset[[celltypes_label]]) + + for (cl in seq_len(length(grouped_celltypes))) { + cl_type[cl_type %in% + grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl] + } + pData(scrna_eset)[[clustertype_label]] <- factor( + cl_type, levels = c(names(grouped_celltypes), + "CD-Trans", "Novel1", "Novel2")) + + est_bulk <- music_prop.cluster( + bulk.eset = bulk_eset, sc.eset = scrna_eset, + group.markers = marker_groups, clusters = celltypes_label, + groups = clustertype_label, samples = samples_label, + clusters.type = grouped_celltypes + ) -for (cl in seq_len(length(grouped_celltypes))) { - cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl] -} -pData(scrna_eset)[[clustertype_label]] <- factor( - cl_type, levels = c(names(grouped_celltypes), - "CD-Trans", "Novel1", "Novel2")) + estimated_music_props <- est_bulk$Est.prop.weighted.cluster + ## NNLS is not calculated here + + ## Show different in estimation methods + ## Jitter plot of estimated cell type proportions + methods_list <- c("MuSiC") + + jitter_fig <- Jitter_Est( + list(data.matrix(estimated_music_props)), + method.name = methods_list, title = "Jitter plot of Est Proportions", + size = 2, alpha = 0.7) + + theme_minimal() + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text = element_text(size = 6), + axis.text.x = element_blank(), + legend.position = "none") + + plot_box <- Boxplot_Est(list( + data.matrix(estimated_music_props)), + method.name = methods_list) + + theme_minimal() + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text = element_text(size = 6), + axis.text.x = element_blank(), + legend.position = "none") -est_bulk <- music_prop.cluster( - bulk.eset = bulk_eset, sc.eset = scrna_eset, - group.markers = marker_groups, clusters = celltypes_label, - groups = clustertype_label, samples = samples_label, - clusters.type = grouped_celltypes) + plot_hmap <- Prop_heat_Est(list( + data.matrix(estimated_music_props)), + method.name = methods_list) + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text.y = element_text(size = 6), + axis.text.x = element_text(angle = -90, size = 5), + plot.title = element_text(size = 9), + legend.key.width = unit(0.15, "cm"), + legend.text = element_text(size = 5), + legend.title = element_text(size = 5)) -write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t") -dev.off() +} + +pdf(file = outfile_pdf, width = 8, height = 8) +par(mfrow = c(1, 2)) +plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)") +plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)") +if (length(data.to.use) > 0) { + plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2) +} +message(dev.off()) + +if (length(data.to.use) > 0) { + write.table(estimated_music_props, + file = outfile_tab, quote = F, col.names = NA, sep = "\t") +} |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 scripts/estimateprops.R --- a/scripts/estimateprops.R Sun Sep 12 19:49:12 2021 +0000 +++ b/scripts/estimateprops.R Fri Nov 26 15:55:11 2021 +0000 |
[ |
b'@@ -14,36 +14,67 @@\n clusters = celltypes_label,\n samples = samples_label, select.ct = celltypes, verbose = T)\n \n+estimated_music_props <- est_prop$Est.prop.weighted\n+estimated_nnls_props <- est_prop$Est.prop.allgene\n \n ## Show different in estimation methods\n ## Jitter plot of estimated cell type proportions\n-jitter.fig <- Jitter_Est(\n- list(data.matrix(est_prop$Est.prop.weighted),\n- data.matrix(est_prop$Est.prop.allgene)),\n- method.name = methods, title = "Jitter plot of Est Proportions")\n+jitter_fig <- Jitter_Est(\n+ list(data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = methods, title = "Jitter plot of Est Proportions",\n+ size = 2, alpha = 0.7) + theme_minimal()\n \n \n ## Make a Plot\n ## A more sophisticated jitter plot is provided as below. We separated\n-## the T2D subjects and normal subjects by their HbA1c levels.\n-m_prop <- rbind(melt(est_prop$Est.prop.weighted),\n- melt(est_prop$Est.prop.allgene))\n+## the T2D subjects and normal subjects by their disease factor levels.\n+estimated_music_props_flat <- melt(estimated_music_props)\n+estimated_nnls_props_flat <- melt(estimated_nnls_props)\n+\n+m_prop <- rbind(estimated_music_props_flat,\n+ estimated_nnls_props_flat)\n+colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+\n+if (is.null(celltypes)) {\n+ celltypes <- levels(m_prop$CellType)\n+ message("No celltypes declared, using:")\n+ message(celltypes)\n+}\n \n-colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+if (phenotype_target_threshold == -99) {\n+ phenotype_target_threshold <- -Inf\n+ message("phenotype target threshold set to -Inf")\n+}\n+\n+if (is.null(phenotype_factors)) {\n+ phenotype_factors <- colnames(pData(bulk_eset))\n+}\n+## filter out unwanted factors like "sampleID" and "subjectName"\n+phenotype_factors <- phenotype_factors[\n+ !(phenotype_factors %in% phenotype_factors_always_exclude)]\n+message("Phenotype Factors to use:")\n+message(phenotype_factors)\n+\n \n m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint\n-m_prop$Method <- factor(rep(methods, each = 89 * 6), levels = methods) # nolint\n-m_prop$HbA1c <- rep(bulk_eset$hba1c, 2 * 6) # nolint\n-m_prop <- m_prop[!is.na(m_prop$HbA1c), ]\n-m_prop$Disease <- factor(sample_groups[(m_prop$HbA1c > 6.5) + 1], # nolint\n+m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint\n+ levels = methods)\n+m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint\n+m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]\n+## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2\n+sample_groups <- c("Normal", sample_disease_group)\n+m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint\n levels = sample_groups)\n \n+## Binary to scale: e.g. TRUE / 5 = 0.2\n m_prop$D <- (m_prop$Disease == # nolint\n sample_disease_group) / sample_disease_group_scale\n-m_prop <- rbind(subset(m_prop, Disease == healthy_phenotype),\n- subset(m_prop, Disease != healthy_phenotype))\n+## NA\'s are not included in the comparison below\n+m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),\n+ subset(m_prop, Disease == sample_disease_group))\n \n-jitter.new <- ggplot(m_prop, aes(Method, Prop)) +\n+jitter_new <- ggplot(m_prop, aes(Method, Prop)) +\n geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n size = 2, alpha = 0.7,\n position = position_jitter(width = 0.25, height = 0)) +\n@@ -52,20 +83,23 @@\n scale_shape_manual(values = c(21, 24)) + theme_minimal()\n \n ## Plot to compare method effectiveness\n-## Create dataframe for beta cell proportions and HbA1c levels\n-m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:89, 2), phenotype_factors],\n- ct.prop = c(est_prop$Est.prop.weighted[, 2],\n- '..b'mooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +\n geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n size = 2, alpha = 0.7) + facet_wrap(~ Method) +\n@@ -73,21 +107,81 @@\n scale_colour_manual(values = c("white", "gray20")) +\n scale_shape_manual(values = c(21, 24))\n \n+## BoxPlot\n+plot_box <- Boxplot_Est(list(\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = c("MuSiC", "NNLS")) +\n+ theme(axis.text.x = element_text(angle = -90),\n+ axis.text.y = element_text(size = 8)) +\n+ ggtitle(element_blank()) + theme_minimal()\n+\n+## Heatmap\n+plot_hmap <- Prop_heat_Est(list(\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = c("MuSiC", "NNLS")) +\n+ theme(axis.text.x = element_text(angle = -90),\n+ axis.text.y = element_text(size = 6))\n \n pdf(file = outfile_pdf, width = 8, height = 8)\n-plot_grid(jitter.fig, jitter.new, labels = "auto", ncol = 1, nrow = 2)\n-jitt_compare\n-dev.off()\n+plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)\n+plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)\n+plot_hmap\n+message(dev.off())\n+\n+## Output Proportions\n+\n+write.table(est_prop$Est.prop.weighted,\n+ file = paste0("report_data/prop_",\n+ "Music Estimated Proportions of Cell Types",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Est.prop.allgene,\n+ file = paste0("report_data/prop_",\n+ "NNLS Estimated Proportions of Cell Types",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Weight.gene,\n+ file = paste0("report_data/weightgene_",\n+ "Music Estimated Proportions of Cell Types (by Gene)",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$r.squared.full,\n+ file = paste0("report_data/rsquared_",\n+ "Music R-sqr Estimated Proportions of Each Subject",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Var.prop,\n+ file = paste0("report_data/varprop_",\n+ "Matrix of Variance of MuSiC Estimates",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+\n \n ## Summary table\n for (meth in methods) {\n ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =\n- ##subset(m_prop_ana, Method == meth))\n+ sub_data <- subset(m_prop_ana, Method == meth)\n+ ## We can only do regression where there are more than 1 factors\n+ ## so we must find and exclude the ones which are not\n+ gt1_facts <- sapply(phenotype_factors, function(facname) {\n+ return(length(unique(sort(sub_data[[facname]]))) == 1)\n+ })\n+ form_factors <- phenotype_factors\n+ exclude_facts <- names(gt1_facts)[gt1_facts]\n+ if (length(exclude_facts) > 0) {\n+ message("Factors with only one level will be excluded:")\n+ message(exclude_facts)\n+ form_factors <- phenotype_factors[\n+ !(phenotype_factors %in% exclude_facts)]\n+ }\n lm_beta_meth <- lm(as.formula(\n- paste("ct.prop", paste(phenotype_factors, collapse = " + "),\n- sep = " ~ ")),\n- data = subset(m_prop_ana, Method == meth))\n- print(paste0("Summary: ", meth))\n+ paste("ct.prop", paste(form_factors, collapse = " + "),\n+ sep = " ~ ")), data = sub_data)\n+ message(paste0("Summary: ", meth))\n capture.output(summary(lm_beta_meth),\n- file = paste0("report_data/summ_", meth, ".txt"))\n+ file = paste0("report_data/summ_Log of ",\n+ meth,\n+ " fitting.txt"))\n }\n' |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 scripts/inspect.R --- a/scripts/inspect.R Sun Sep 12 19:49:12 2021 +0000 +++ b/scripts/inspect.R Fri Nov 26 15:55:11 2021 +0000 |
[ |
@@ -6,17 +6,19 @@ source(args[1]) printout <- function(text) { - if (typeof(text) %in% c("list", "vector")) { + if (typeof(text) %in% c("list", "vector", "integer", "double", "numeric")) { write.table(text, file = outfile_tab, quote = F, sep = "\t", col.names = NA) } else { ## text + print(typeof(text)) capture.output(text, file = outfile_tab) # nolint } } -if (inspector %in% c("print", "pData", "fData", "dims", "experimentData", - "exprs", "signature", "annotation", "abstract")) { +if (inspector %in% c("print", "pData", "fData", "dims", + "experimentData", "protocolData", "exprs", + "signature", "annotation", "abstract")) { op <- get(inspector) tab <- op(rds_eset) printout(tab) |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/EMTABesethealthy.subset.rds |
b |
Binary file test-data/EMTABesethealthy.subset.rds has changed |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/Mousebulkeset.rds |
b |
Binary file test-data/Mousebulkeset.rds has changed |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/Mousesubeset.degenesonly2.half.rds |
b |
Binary file test-data/Mousesubeset.degenesonly2.half.rds has changed |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/default_output.pdf |
b |
Binary file test-data/default_output.pdf has changed |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/dendro.pdf |
b |
Binary file test-data/dendro.pdf has changed |
b |
diff -r 2cfd0db49bbc -r be91cb6f48e7 test-data/dendro_1.pdf |
b |
Binary file test-data/dendro_1.pdf has changed |