Galaxy |

Changeset 1:817eb707bbf4 (2021-11-26)

Previous changeset 0:2fed32b5aa02 (2021-09-12) Next changeset 2:577435e5e6b2 (2022-01-29)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"

modified:
inspect_eset.xml
macros.xml
scripts/dendrogram.R
scripts/estimateprops.R
scripts/inspect.R
test-data/EMTABesethealthy.subset.rds
test-data/Mousebulkeset.rds
test-data/Mousesubeset.degenesonly2.half.rds
test-data/default_output.pdf
test-data/dendro.pdf

added:
test-data/dendro_1.pdf

diff -r 2fed32b5aa02 -r 817eb707bbf4 inspect_eset.xml
--- a/inspect_eset.xml Sun Sep 12 19:48:26 2021 +0000
+++ b/inspect_eset.xml Fri Nov 26 15:54:31 2021 +0000

@@ -38,14 +38,15 @@
             <option value="pData" >PhenoType Data Table</option>
             <option value="fData" >Feature Data Table</option>
             <option value="dims" >Dimension</option>
-            <option value="experimentData" />
-            <option value="signature" />
-            <option value="annotation" />
-            <option value="abstract" />
+            <option value="protocolData">Protocol Data</option>
+            <option value="experimentData" >Experiment Data</option>
+            <option value="annotation" >Annotation</option>
+            <option value="signature" >Signature (requires Annotation)</option>
+            <option value="abstract" >Abstract</option>
         </param>
     </inputs>
     <outputs>
-        <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Inspection Result" />
+        <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Inspection Result (${inspector})" />
     </outputs>
     <tests>
         <test expect_num_outputs="1" >
@@ -75,7 +76,6 @@
             <param name="inspector" value="dims" />
             <output name="out_tab" >
                 <assert_contents>
-                    <has_n_columns n="1" />
                     <has_text_matching expression="Samples\s+\d{1,4}"/>
                 </assert_contents>
             </output>

diff -r 2fed32b5aa02 -r 817eb707bbf4 macros.xml
--- a/macros.xml Sun Sep 12 19:48:26 2021 +0000
+++ b/macros.xml Fri Nov 26 15:54:31 2021 +0000

[

@@ -1,5 +1,5 @@
<macros>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     
@@ -15,11 +15,11 @@
         <validator type="regex" message="FORMAT terms separated by commas">^(([A-Za-z0-9+_ -]+)\s?,?)*$</validator>
     </xml>
     <xml name="validator_text" >
-        <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)*$</validator>
+        <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)+$</validator>
     </xml>
     <xml name="celltypes_macro" >
         <param name="celltypes" type="text" optional="true" value=""
-               label="Comma list of cell types to use from scRNA dataset" help="If NULL, then use all cell types." >
+               label="Comma list of cell types to use from scRNA dataset" help="If blank, then use all available cell types." >
             <expand macro="validator_index_identifiers" />
         </param>
     </xml>

diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/dendrogram.R
--- a/scripts/dendrogram.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/dendrogram.R Fri Nov 26 15:54:31 2021 +0000

[

@@ -17,31 +17,6 @@
args <- commandArgs(trailingOnly = TRUE)
source(args[1])

-## We then perform bulk tissue cell type estimation with pre-grouping
-## of cell types: C, list_of_cell_types, marker genes name, marker
-## genes list.
-## data.to.use = list(
-##     "C1" = list(cell.types = c("Neutro"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C2" = list(cell.types = c("Podo"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
-##                 marker.names = "Epithelial",
-##                 marker.list = read_list("../test-data/epith.markers")),
-##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
-##                 marker.names = "Immune",
-##                 marker.list = read_list("../test-data/immune.markers"))
-## )
-grouped_celltypes <- lapply(data.to.use, function(x) {
-    x$cell.types
-})
-marker_groups <- lapply(data.to.use, function(x) {
-    x$marker.list
-})
-names(marker_groups) <- names(data.to.use)
-

## Perform the estimation
## Produce the first step information
@@ -51,33 +26,107 @@

## Plot the dendrogram of design matrix and cross-subject mean of
## realtive abundance
-par(mfrow = c(1, 2))
-d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+## Hierarchical clustering using Complete Linkage
+d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+hc1 <- hclust(d1, method = "complete")
## Hierarchical clustering using Complete Linkage
-hc1 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
-d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
-## Hierarchical clustering using Complete Linkage
-hc2 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-pdf(file = outfile_pdf, width = 8, height = 8)
-plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
+hc2 <- hclust(d2, method = "complete")
+

-cl_type <- as.character(scrna_eset[[celltypes_label]])
+if (length(data.to.use) > 0) {
+    ## We then perform bulk tissue cell type estimation with pre-grouping
+    ## of cell types: C, list_of_cell_types, marker genes name, marker
+    ## genes list.
+    ## data.to.use = list(
+    ##     "C1" = list(cell.types = c("Neutro"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C2" = list(cell.types = c("Podo"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
+    ##                 marker.names = "Epithelial",
+    ##                 marker.list = read_list("../test-data/epith.markers")),
+    ##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
+    ##                 marker.names = "Immune",
+    ##                 marker.list = read_list("../test-data/immune.markers"))
+    ## )
+    grouped_celltypes <- lapply(data.to.use, function(x) {
+        x$cell.types
+    })
+    marker_groups <- lapply(data.to.use, function(x) {
+        x$marker.list
+    })
+    names(marker_groups) <- names(data.to.use)
+
+
+    cl_type <- as.character(scrna_eset[[celltypes_label]])
+
+    for (cl in seq_len(length(grouped_celltypes))) {
+        cl_type[cl_type %in%
+                grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
+    }
+    pData(scrna_eset)[[clustertype_label]] <- factor(
+        cl_type, levels = c(names(grouped_celltypes),
+                            "CD-Trans", "Novel1", "Novel2"))
+
+    est_bulk <- music_prop.cluster(
+        bulk.eset = bulk_eset, sc.eset = scrna_eset,
+        group.markers = marker_groups, clusters = celltypes_label,
+        groups = clustertype_label, samples = samples_label,
+        clusters.type = grouped_celltypes
+    )

-for (cl in seq_len(length(grouped_celltypes))) {
-  cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
-}
-pData(scrna_eset)[[clustertype_label]] <- factor(
-    cl_type, levels = c(names(grouped_celltypes),
-                        "CD-Trans", "Novel1", "Novel2"))
+    estimated_music_props <- est_bulk$Est.prop.weighted.cluster
+    ## NNLS is not calculated here
+
+    ## Show different in estimation methods
+    ## Jitter plot of estimated cell type proportions
+    methods_list <- c("MuSiC")
+
+    jitter_fig <- Jitter_Est(
+        list(data.matrix(estimated_music_props)),
+        method.name = methods_list, title = "Jitter plot of Est Proportions",
+        size = 2, alpha = 0.7) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")
+
+    plot_box <- Boxplot_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")

-est_bulk <- music_prop.cluster(
-    bulk.eset = bulk_eset, sc.eset = scrna_eset,
-    group.markers = marker_groups, clusters = celltypes_label,
-    groups = clustertype_label, samples = samples_label,
-    clusters.type = grouped_celltypes)
+    plot_hmap <- Prop_heat_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text.y = element_text(size = 6),
+              axis.text.x = element_text(angle = -90, size = 5),
+              plot.title = element_text(size = 9),
+              legend.key.width = unit(0.15, "cm"),
+              legend.text = element_text(size = 5),
+              legend.title = element_text(size = 5))

-write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t")
-dev.off()
+}
+
+pdf(file = outfile_pdf, width = 8, height = 8)
+par(mfrow = c(1, 2))
+plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
+plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+if (length(data.to.use) > 0) {
+    plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
+}
+message(dev.off())
+
+if (length(data.to.use) > 0) {
+    write.table(estimated_music_props,
+                file = outfile_tab, quote = F, col.names = NA, sep = "\t")
+}

diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/estimateprops.R
--- a/scripts/estimateprops.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/estimateprops.R Fri Nov 26 15:54:31 2021 +0000

[

b'@@ -14,36 +14,67 @@\n clusters = celltypes_label,\n samples = samples_label, select.ct = celltypes, verbose = T)\n \n+estimated_music_props <- est_prop$Est.prop.weighted\n+estimated_nnls_props <- est_prop$Est.prop.allgene\n \n ## Show different in estimation methods\n ## Jitter plot of estimated cell type proportions\n-jitter.fig <- Jitter_Est(\n- list(data.matrix(est_prop$Est.prop.weighted),\n- data.matrix(est_prop$Est.prop.allgene)),\n- method.name = methods, title = "Jitter plot of Est Proportions")\n+jitter_fig <- Jitter_Est(\n+ list(data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = methods, title = "Jitter plot of Est Proportions",\n+ size = 2, alpha = 0.7) + theme_minimal()\n \n \n ## Make a Plot\n ## A more sophisticated jitter plot is provided as below. We separated\n-## the T2D subjects and normal subjects by their HbA1c levels.\n-m_prop <- rbind(melt(est_prop$Est.prop.weighted),\n- melt(est_prop$Est.prop.allgene))\n+## the T2D subjects and normal subjects by their disease factor levels.\n+estimated_music_props_flat <- melt(estimated_music_props)\n+estimated_nnls_props_flat <- melt(estimated_nnls_props)\n+\n+m_prop <- rbind(estimated_music_props_flat,\n+ estimated_nnls_props_flat)\n+colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+\n+if (is.null(celltypes)) {\n+ celltypes <- levels(m_prop$CellType)\n+ message("No celltypes declared, using:")\n+ message(celltypes)\n+}\n \n-colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+if (phenotype_target_threshold == -99) {\n+ phenotype_target_threshold <- -Inf\n+ message("phenotype target threshold set to -Inf")\n+}\n+\n+if (is.null(phenotype_factors)) {\n+ phenotype_factors <- colnames(pData(bulk_eset))\n+}\n+## filter out unwanted factors like "sampleID" and "subjectName"\n+phenotype_factors <- phenotype_factors[\n+ !(phenotype_factors %in% phenotype_factors_always_exclude)]\n+message("Phenotype Factors to use:")\n+message(phenotype_factors)\n+\n \n m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint\n-m_prop$Method <- factor(rep(methods, each = 89 * 6), levels = methods) # nolint\n-m_prop$HbA1c <- rep(bulk_eset$hba1c, 2 * 6) # nolint\n-m_prop <- m_prop[!is.na(m_prop$HbA1c), ]\n-m_prop$Disease <- factor(sample_groups[(m_prop$HbA1c > 6.5) + 1], # nolint\n+m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint\n+ levels = methods)\n+m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint\n+m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]\n+## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2\n+sample_groups <- c("Normal", sample_disease_group)\n+m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint\n levels = sample_groups)\n \n+## Binary to scale: e.g. TRUE / 5 = 0.2\n m_prop$D <- (m_prop$Disease == # nolint\n sample_disease_group) / sample_disease_group_scale\n-m_prop <- rbind(subset(m_prop, Disease == healthy_phenotype),\n- subset(m_prop, Disease != healthy_phenotype))\n+## NA\'s are not included in the comparison below\n+m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),\n+ subset(m_prop, Disease == sample_disease_group))\n \n-jitter.new <- ggplot(m_prop, aes(Method, Prop)) +\n+jitter_new <- ggplot(m_prop, aes(Method, Prop)) +\n geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n size = 2, alpha = 0.7,\n position = position_jitter(width = 0.25, height = 0)) +\n@@ -52,20 +83,23 @@\n scale_shape_manual(values = c(21, 24)) + theme_minimal()\n \n ## Plot to compare method effectiveness\n-## Create dataframe for beta cell proportions and HbA1c levels\n-m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:89, 2), phenotype_factors],\n- ct.prop = c(est_prop$Est.prop.weighted[, 2],\n- '..b'mooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +\n geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n size = 2, alpha = 0.7) + facet_wrap(~ Method) +\n@@ -73,21 +107,81 @@\n scale_colour_manual(values = c("white", "gray20")) +\n scale_shape_manual(values = c(21, 24))\n \n+## BoxPlot\n+plot_box <- Boxplot_Est(list(\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = c("MuSiC", "NNLS")) +\n+ theme(axis.text.x = element_text(angle = -90),\n+ axis.text.y = element_text(size = 8)) +\n+ ggtitle(element_blank()) + theme_minimal()\n+\n+## Heatmap\n+plot_hmap <- Prop_heat_Est(list(\n+ data.matrix(estimated_music_props),\n+ data.matrix(estimated_nnls_props)),\n+ method.name = c("MuSiC", "NNLS")) +\n+ theme(axis.text.x = element_text(angle = -90),\n+ axis.text.y = element_text(size = 6))\n \n pdf(file = outfile_pdf, width = 8, height = 8)\n-plot_grid(jitter.fig, jitter.new, labels = "auto", ncol = 1, nrow = 2)\n-jitt_compare\n-dev.off()\n+plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)\n+plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)\n+plot_hmap\n+message(dev.off())\n+\n+## Output Proportions\n+\n+write.table(est_prop$Est.prop.weighted,\n+ file = paste0("report_data/prop_",\n+ "Music Estimated Proportions of Cell Types",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Est.prop.allgene,\n+ file = paste0("report_data/prop_",\n+ "NNLS Estimated Proportions of Cell Types",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Weight.gene,\n+ file = paste0("report_data/weightgene_",\n+ "Music Estimated Proportions of Cell Types (by Gene)",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$r.squared.full,\n+ file = paste0("report_data/rsquared_",\n+ "Music R-sqr Estimated Proportions of Each Subject",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Var.prop,\n+ file = paste0("report_data/varprop_",\n+ "Matrix of Variance of MuSiC Estimates",\n+ ".tabular"),\n+ quote = F, sep = "\\t", col.names = NA)\n+\n \n ## Summary table\n for (meth in methods) {\n ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =\n- ##subset(m_prop_ana, Method == meth))\n+ sub_data <- subset(m_prop_ana, Method == meth)\n+ ## We can only do regression where there are more than 1 factors\n+ ## so we must find and exclude the ones which are not\n+ gt1_facts <- sapply(phenotype_factors, function(facname) {\n+ return(length(unique(sort(sub_data[[facname]]))) == 1)\n+ })\n+ form_factors <- phenotype_factors\n+ exclude_facts <- names(gt1_facts)[gt1_facts]\n+ if (length(exclude_facts) > 0) {\n+ message("Factors with only one level will be excluded:")\n+ message(exclude_facts)\n+ form_factors <- phenotype_factors[\n+ !(phenotype_factors %in% exclude_facts)]\n+ }\n lm_beta_meth <- lm(as.formula(\n- paste("ct.prop", paste(phenotype_factors, collapse = " + "),\n- sep = " ~ ")),\n- data = subset(m_prop_ana, Method == meth))\n- print(paste0("Summary: ", meth))\n+ paste("ct.prop", paste(form_factors, collapse = " + "),\n+ sep = " ~ ")), data = sub_data)\n+ message(paste0("Summary: ", meth))\n capture.output(summary(lm_beta_meth),\n- file = paste0("report_data/summ_", meth, ".txt"))\n+ file = paste0("report_data/summ_Log of ",\n+ meth,\n+ " fitting.txt"))\n }\n'

diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/inspect.R
--- a/scripts/inspect.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/inspect.R Fri Nov 26 15:54:31 2021 +0000

[

@@ -6,17 +6,19 @@
source(args[1])

printout <- function(text) {
-    if (typeof(text) %in% c("list", "vector")) {
+    if (typeof(text) %in% c("list", "vector", "integer", "double", "numeric")) {
         write.table(text, file = outfile_tab, quote = F, sep = "\t",
                     col.names = NA)
     } else {
         ## text
+        print(typeof(text))
         capture.output(text, file = outfile_tab)  # nolint
     }
}

-if (inspector %in% c("print", "pData", "fData", "dims", "experimentData",
-                     "exprs", "signature", "annotation", "abstract")) {
+if (inspector %in% c("print", "pData", "fData", "dims",
+                     "experimentData", "protocolData", "exprs",
+                     "signature", "annotation", "abstract")) {
     op <- get(inspector)
     tab <- op(rds_eset)
     printout(tab)

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/EMTABesethealthy.subset.rds

Binary file test-data/EMTABesethealthy.subset.rds has changed

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/Mousebulkeset.rds

Binary file test-data/Mousebulkeset.rds has changed

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/Mousesubeset.degenesonly2.half.rds

Binary file test-data/Mousesubeset.degenesonly2.half.rds has changed

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/default_output.pdf

Binary file test-data/default_output.pdf has changed

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/dendro.pdf

Binary file test-data/dendro.pdf has changed

diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/dendro_1.pdf

Binary file test-data/dendro_1.pdf has changed