Repository 'music_deconvolution'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/music_deconvolution

Changeset 1:3ca0132c182a (2021-11-26)
Previous changeset 0:224721e76869 (2021-09-12) Next changeset 2:1c4cf4b7debe (2021-11-30)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
modified:
macros.xml
music-deconvolution.xml
scripts/dendrogram.R
scripts/estimateprops.R
scripts/inspect.R
test-data/EMTABesethealthy.subset.rds
test-data/Mousebulkeset.rds
test-data/Mousesubeset.degenesonly2.half.rds
test-data/default_output.pdf
test-data/dendro.pdf
added:
test-data/dendro_1.pdf
b
diff -r 224721e76869 -r 3ca0132c182a macros.xml
--- a/macros.xml Sun Sep 12 19:48:48 2021 +0000
+++ b/macros.xml Fri Nov 26 15:54:51 2021 +0000
[
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <!-- The ESet inspector/constructor and MuSiC tool can have
          independent Galaxy versions but should reference the same
          package version always. -->
@@ -15,11 +15,11 @@
         <validator type="regex" message="FORMAT terms separated by commas">^(([A-Za-z0-9+_ -]+)\s?,?)*$</validator>
     </xml>
     <xml name="validator_text" >
-        <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)*$</validator>
+        <validator type="regex" message="No commas allowed">^(([A-Za-z0-9+_ -]+)\s?)+$</validator>
     </xml>
     <xml name="celltypes_macro" >
         <param name="celltypes" type="text" optional="true" value=""
-               label="Comma list of cell types to use from scRNA dataset" help="If NULL, then use all cell types." >
+               label="Comma list of cell types to use from scRNA dataset" help="If blank, then use all available cell types." >
             <expand macro="validator_index_identifiers" />
         </param>
     </xml>
b
diff -r 224721e76869 -r 3ca0132c182a music-deconvolution.xml
--- a/music-deconvolution.xml Sun Sep 12 19:48:48 2021 +0000
+++ b/music-deconvolution.xml Fri Nov 26 15:54:51 2021 +0000
[
b'@@ -1,11 +1,11 @@\n <tool id="music_deconvolution" name="MuSiC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"\n-      profile="20.05" license="GPL-3.0-or-later" >\n+      profile="21.09" license="GPL-3.0-or-later" >\n     <description>estimate cell type proportions in bulk RNA-seq data</description>\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n     <expand macro="requirements" />\n-    <command detect_errors="exit_code"><![CDATA[\n+    <command detect_errors="exit_code" ><![CDATA[\n mkdir report_data &&\n Rscript --vanilla \'$__tool_directory__/scripts/${do.method}.R\' \'$conf\'\n ]]></command>\n@@ -29,16 +29,17 @@\n #if str($do.method) == "estimateprops":\n \n phenotype_factors = null_str_vec(\'$do.phenotype_factors\')\n+phenotype_factors_always_exclude = null_str_vec(\'$do.phenotype_factors_always_exclude\')\n celltypes_label = null_str_vec(\'$do.celltypes_label\')\n samples_label = null_str_vec(\'$do.samples_label\')\n celltypes = null_str_vec(\'$do.celltypes\')\n-methods = null_str_vec(\'$do.methods\')\n-phenotype_gene = null_str_vec(\'$do.phenotype_gene\')\n-sample_groups = null_str_vec(\'$do.sample_groups\')\n+methods = c("MuSiC", "NNLS")\n+phenotype_target = null_str_vec(\'$do.phenotype_target\')\n+phenotype_target_threshold = as.numeric(\'$do.phenotype_target_threshold\')\n sample_disease_group = null_str_vec(\'$do.sample_disease_group\')\n sample_disease_group_scale = as.integer(\'$do.sample_disease_group_scale\')\n-healthy_phenotype = null_str_vec(\'$do.healthy_phenotype\')\n compare_title = null_str_vec(\'$do.compare_title\')\n+\n outfile_pdf=\'$out_pdf\'\n \n #elif str($do.method) == "dendrogram":\n@@ -91,20 +92,24 @@\n                     <expand macro="validator_text" />\n                 </param>\n                 <expand macro="celltypes_macro" />\n-                <param name="methods" multiple="true" type="select" display="checkboxes" label="Cell Proportion Method" >\n-                    <option value="MuSiC" selected="true" />\n-                    <option value="NNLS" selected="true" />\n-                </param>\n                 <param name="phenotype_factors" type="text"\n-                       label="List of phenotypes factors" help="If blank, then use all phenotypes." >\n+                       label="Phenotype factors"\n+                       help="List of phenotypes factors to be used in the linear regression. Please make sure that each factor has more than one unique value. Names correspond to column names in the bulk RNA dataset phenotype table. If blank, then treat all bulk phenotype columns as factors." >\n                     <expand macro="validator_index_identifiers" />\n                 </param>\n-                <param name="phenotype_gene" type="text" label="Causative Gene"\n-                       help="MUST exist in the phenotype factors above." >\n+                <param name="phenotype_factors_always_exclude" type="text"\n+                       label="Excluded phenotype factors"\n+                       help="List of phenotype factors to always exclude in the analysis"\n+                       value="sampleID,SubjectName" >\n+                    <expand macro="validator_index_identifiers" />\n+                </param>\n+                <param name="phenotype_target" type="text" label="Phenotype Target"\n+                       help="MUST exist in the bulk RNA datasets phenotype factors, as above." >\n                     <expand macro="validator_text" />\n                 </param>\n-                <param name="sample_groups" type="text" label="List of Sample Groups" >\n-                    <expand macro="validator_index_identifiers" />\n+                <param name="phenotype_target_threshold" type="float" label="Phenotype Target Threshold"\n+                       value="-99"\n+                       help="The (%) threshold at which the phenotype target manifests. Leave at -99 to select all." >\n                 </param>\n                 <param name="sample_disease_group" type="text" label="Sample Disease Group"\n                        help="MUST exist'..b'acro,Neutro,B lymph,T lymph,NK" />\n+            </conditional>\n+            <output name="out_pdf" value="dendro_1.pdf" compare="sim_size" />\n+        </test>\n         <test expect_num_outputs="2" >\n-            <!-- Dendrogram test -->\n+            <!-- Dendrogram test 2 -->\n             <param name="bulk_eset" value="Mousebulkeset.rds" />\n             <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />\n             <conditional name="do" >\n@@ -195,12 +216,12 @@\n             <output name="out_pdf" value="dendro.pdf" compare="sim_size" />\n             <output name="out_tab">\n                 <assert_contents>\n-                    <has_text_matching expression="^\\s+Est\\.prop\\.weighted\\.cluster\\.Neutro\\s+Est\\.prop\\.weighted\\.cluster\\.Podo\\s+Est\\.prop\\.weighted\\.cluster\\.Endo" />\n+                    <has_text_matching expression="^\\s+Neutro\\s+Podo\\s+Endo" />\n                     <has_text text="APOL1.GNA78M"/>\n                 </assert_contents>\n             </output>\n         </test>\n-        <test expect_num_outputs="2" >\n+        <test expect_num_outputs="3" >\n             <!-- Estimate Proportions test -->\n             <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />\n             <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />\n@@ -209,25 +230,23 @@\n                 <param name="celltypes_label" value="cellType" />\n                 <param name="samples_label" value="sampleID" />\n                 <param name="celltypes" value="alpha,beta,delta,gamma,acinar,ductal" />\n-                <param name="methods" value="MuSiC,NNLS" />\n                 <param name="phenotype_factors" value="age,bmi,hba1c,gender" />\n-                <param name="phenotype_gene" value="hba1c" />\n-                <param name="sample_groups" value="Normal,T2D" />\n+                <param name="phenotype_target" value="hba1c" />\n+                <param name="phenotype_target_threshold" value="6.5" />\n                 <param name="sample_disease_group" value="T2D" />\n                 <param name="sample_disease_group_scale" value="5" />\n-                <param name="healthy_phenotype" value="Normal" />\n                 <param name="compare_title" value="HbA1c vs Beta Cell Type Proportion" />\n             </conditional>\n             <output name="out_pdf" value="default_output.pdf" compare="sim_size" />\n-            <output_collection name="summaries" count="2">\n-                <element name="MuSiC" ftype="txt">\n+            <output_collection name="summaries" count="5">\n+                <element name="Log of MuSiC fitting" ftype="txt">\n                     <assert_contents>\n-                        <has_text text="Residual standard error: 0.1662 on 72 degrees of freedom"/>\n+                        <has_text text="Residual standard error: 0.1704 on 72 degrees of freedom"/>\n                     </assert_contents>\n                 </element>\n-                <element name="NNLS" ftype="txt">\n+                <element name="Log of NNLS fitting" ftype="txt">\n                     <assert_contents>\n-                        <has_text text="Residual standard error: 0.06561 on 72 degrees of freedom"/>\n+                        <has_text text="Residual standard error: 0.0645 on 72 degrees of freedom"/>\n                     </assert_contents>\n                 </element>\n             </output_collection>\n@@ -238,7 +257,8 @@\n \n Solid tissues often contain closely related cell types which leads to collinearity. To deal with collinearity, MuSiC employs a tree-guided procedure that recursively zooms in on closely related cell types. Briefly, we first group similar cell types into the same cluster and estimate cluster proportions, then recursively repeat this procedure within each cluster.\n \n-.. image:: https://xuranw.github.io/MuSiC/articles/images/FigureMethod.jpg\n+.. image:: $PATH_TO_IMAGES/FigureMethod.jpg\n+\n     ]]></help>\n     <citations>\n         <citation type="doi">https://doi.org/10.1038/s41467-018-08023-x</citation>\n'
b
diff -r 224721e76869 -r 3ca0132c182a scripts/dendrogram.R
--- a/scripts/dendrogram.R Sun Sep 12 19:48:48 2021 +0000
+++ b/scripts/dendrogram.R Fri Nov 26 15:54:51 2021 +0000
[
@@ -17,31 +17,6 @@
 args <- commandArgs(trailingOnly = TRUE)
 source(args[1])
 
-## We then perform bulk tissue cell type estimation with pre-grouping
-## of cell types: C, list_of_cell_types, marker genes name, marker
-## genes list.
-## data.to.use = list(
-##     "C1" = list(cell.types = c("Neutro"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C2" = list(cell.types = c("Podo"),
-##                 marker.names=NULL,
-##                 marker.list=NULL),
-##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
-##                 marker.names = "Epithelial",
-##                 marker.list = read_list("../test-data/epith.markers")),
-##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
-##                 marker.names = "Immune",
-##                 marker.list = read_list("../test-data/immune.markers"))
-## )
-grouped_celltypes <- lapply(data.to.use, function(x) {
-    x$cell.types
-})
-marker_groups <- lapply(data.to.use, function(x) {
-    x$marker.list
-})
-names(marker_groups) <- names(data.to.use)
-
 
 ## Perform the estimation
 ## Produce the first step information
@@ -51,33 +26,107 @@
 
 ## Plot the dendrogram of design matrix and cross-subject mean of
 ## realtive abundance
-par(mfrow = c(1, 2))
-d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+## Hierarchical clustering using Complete Linkage
+d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+hc1 <- hclust(d1, method = "complete")
 ## Hierarchical clustering using Complete Linkage
-hc1 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
-d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
-## Hierarchical clustering using Complete Linkage
-hc2 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-pdf(file = outfile_pdf, width = 8, height = 8)
-plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
+hc2 <- hclust(d2, method = "complete")
+
 
-cl_type <- as.character(scrna_eset[[celltypes_label]])
+if (length(data.to.use) > 0) {
+    ## We then perform bulk tissue cell type estimation with pre-grouping
+    ## of cell types: C, list_of_cell_types, marker genes name, marker
+    ## genes list.
+    ## data.to.use = list(
+    ##     "C1" = list(cell.types = c("Neutro"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C2" = list(cell.types = c("Podo"),
+    ##                 marker.names=NULL,
+    ##                 marker.list=NULL),
+    ##     "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
+    ##                 marker.names = "Epithelial",
+    ##                 marker.list = read_list("../test-data/epith.markers")),
+    ##     "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
+    ##                 marker.names = "Immune",
+    ##                 marker.list = read_list("../test-data/immune.markers"))
+    ## )
+    grouped_celltypes <- lapply(data.to.use, function(x) {
+        x$cell.types
+    })
+    marker_groups <- lapply(data.to.use, function(x) {
+        x$marker.list
+    })
+    names(marker_groups) <- names(data.to.use)
+
+
+    cl_type <- as.character(scrna_eset[[celltypes_label]])
+
+    for (cl in seq_len(length(grouped_celltypes))) {
+        cl_type[cl_type %in%
+                grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
+    }
+    pData(scrna_eset)[[clustertype_label]] <- factor(
+        cl_type, levels = c(names(grouped_celltypes),
+                            "CD-Trans", "Novel1", "Novel2"))
+
+    est_bulk <- music_prop.cluster(
+        bulk.eset = bulk_eset, sc.eset = scrna_eset,
+        group.markers = marker_groups, clusters = celltypes_label,
+        groups = clustertype_label, samples = samples_label,
+        clusters.type = grouped_celltypes
+    )
 
-for (cl in seq_len(length(grouped_celltypes))) {
-  cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
-}
-pData(scrna_eset)[[clustertype_label]] <- factor(
-    cl_type, levels = c(names(grouped_celltypes),
-                        "CD-Trans", "Novel1", "Novel2"))
+    estimated_music_props <- est_bulk$Est.prop.weighted.cluster
+    ## NNLS is not calculated here
+
+    ## Show different in estimation methods
+    ## Jitter plot of estimated cell type proportions
+    methods_list <- c("MuSiC")
+
+    jitter_fig <- Jitter_Est(
+        list(data.matrix(estimated_music_props)),
+        method.name = methods_list, title = "Jitter plot of Est Proportions",
+        size = 2, alpha = 0.7) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")
+
+    plot_box <- Boxplot_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        theme_minimal() +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text = element_text(size = 6),
+              axis.text.x = element_blank(),
+              legend.position = "none")
 
-est_bulk <- music_prop.cluster(
-    bulk.eset = bulk_eset, sc.eset = scrna_eset,
-    group.markers = marker_groups, clusters = celltypes_label,
-    groups = clustertype_label, samples = samples_label,
-    clusters.type = grouped_celltypes)
+    plot_hmap <- Prop_heat_Est(list(
+        data.matrix(estimated_music_props)),
+        method.name = methods_list) +
+        labs(x = element_blank(), y = element_blank()) +
+        theme(axis.text.y = element_text(size = 6),
+              axis.text.x = element_text(angle = -90, size = 5),
+              plot.title = element_text(size = 9),
+              legend.key.width = unit(0.15, "cm"),
+              legend.text = element_text(size = 5),
+              legend.title = element_text(size = 5))
 
-write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t")
-dev.off()
+}
+    
+pdf(file = outfile_pdf, width = 8, height = 8)
+par(mfrow = c(1, 2))
+plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
+plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+if (length(data.to.use) > 0) {
+    plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
+}
+message(dev.off())
+
+if (length(data.to.use) > 0) {
+    write.table(estimated_music_props,
+                file = outfile_tab, quote = F, col.names = NA, sep = "\t")
+}
b
diff -r 224721e76869 -r 3ca0132c182a scripts/estimateprops.R
--- a/scripts/estimateprops.R Sun Sep 12 19:48:48 2021 +0000
+++ b/scripts/estimateprops.R Fri Nov 26 15:54:51 2021 +0000
[
b'@@ -14,36 +14,67 @@\n     clusters = celltypes_label,\n     samples = samples_label, select.ct = celltypes, verbose = T)\n \n+estimated_music_props <- est_prop$Est.prop.weighted\n+estimated_nnls_props <- est_prop$Est.prop.allgene\n \n ## Show different in estimation methods\n ## Jitter plot of estimated cell type proportions\n-jitter.fig <- Jitter_Est(\n-    list(data.matrix(est_prop$Est.prop.weighted),\n-         data.matrix(est_prop$Est.prop.allgene)),\n-    method.name = methods, title = "Jitter plot of Est Proportions")\n+jitter_fig <- Jitter_Est(\n+    list(data.matrix(estimated_music_props),\n+         data.matrix(estimated_nnls_props)),\n+    method.name = methods, title = "Jitter plot of Est Proportions",\n+    size = 2, alpha = 0.7) + theme_minimal()\n \n \n ## Make a Plot\n ## A more sophisticated jitter plot is provided as below. We separated\n-## the T2D subjects and normal subjects by their HbA1c levels.\n-m_prop <- rbind(melt(est_prop$Est.prop.weighted),\n-               melt(est_prop$Est.prop.allgene))\n+## the T2D subjects and normal subjects by their disease factor levels.\n+estimated_music_props_flat <- melt(estimated_music_props)\n+estimated_nnls_props_flat <- melt(estimated_nnls_props)\n+\n+m_prop <- rbind(estimated_music_props_flat,\n+                estimated_nnls_props_flat)\n+colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+\n+if (is.null(celltypes)) {\n+    celltypes <- levels(m_prop$CellType)\n+    message("No celltypes declared, using:")\n+    message(celltypes)\n+}\n \n-colnames(m_prop) <- c("Sub", "CellType", "Prop")\n+if (phenotype_target_threshold == -99) {\n+    phenotype_target_threshold <- -Inf\n+    message("phenotype target threshold set to -Inf")\n+}\n+\n+if (is.null(phenotype_factors)) {\n+    phenotype_factors <- colnames(pData(bulk_eset))\n+}\n+## filter out unwanted factors like "sampleID" and "subjectName"\n+phenotype_factors <- phenotype_factors[\n+    !(phenotype_factors %in% phenotype_factors_always_exclude)]\n+message("Phenotype Factors to use:")\n+message(phenotype_factors)\n+\n \n m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint\n-m_prop$Method <- factor(rep(methods, each = 89 * 6), levels = methods) # nolint\n-m_prop$HbA1c <- rep(bulk_eset$hba1c, 2 * 6) # nolint\n-m_prop <- m_prop[!is.na(m_prop$HbA1c), ]\n-m_prop$Disease <- factor(sample_groups[(m_prop$HbA1c > 6.5) + 1], # nolint\n+m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint\n+                        levels = methods)\n+m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint\n+m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]\n+## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2\n+sample_groups <- c("Normal", sample_disease_group)\n+m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint\n                          levels = sample_groups)\n \n+## Binary to scale: e.g. TRUE / 5 = 0.2\n m_prop$D <- (m_prop$Disease ==   # nolint\n              sample_disease_group) / sample_disease_group_scale\n-m_prop <- rbind(subset(m_prop, Disease == healthy_phenotype),\n-               subset(m_prop, Disease != healthy_phenotype))\n+## NA\'s are not included in the comparison below\n+m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),\n+               subset(m_prop, Disease == sample_disease_group))\n \n-jitter.new <- ggplot(m_prop, aes(Method, Prop)) +\n+jitter_new <- ggplot(m_prop, aes(Method, Prop)) +\n     geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n                size = 2, alpha = 0.7,\n                position = position_jitter(width = 0.25, height = 0)) +\n@@ -52,20 +83,23 @@\n     scale_shape_manual(values = c(21, 24)) + theme_minimal()\n \n ## Plot to compare method effectiveness\n-## Create dataframe for beta cell proportions and HbA1c levels\n-m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:89, 2), phenotype_factors],\n-                        ct.prop = c(est_prop$Est.prop.weighted[, 2],\n-                      '..b'mooth(method = "lm",  se = FALSE, col = "black", lwd = 0.25) +\n     geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),\n                size = 2, alpha = 0.7) +  facet_wrap(~ Method) +\n@@ -73,21 +107,81 @@\n     scale_colour_manual(values = c("white", "gray20")) +\n     scale_shape_manual(values = c(21, 24))\n \n+## BoxPlot\n+plot_box <- Boxplot_Est(list(\n+    data.matrix(estimated_music_props),\n+    data.matrix(estimated_nnls_props)),\n+    method.name = c("MuSiC", "NNLS")) +\n+    theme(axis.text.x = element_text(angle = -90),\n+          axis.text.y = element_text(size = 8)) +\n+    ggtitle(element_blank()) + theme_minimal()\n+\n+## Heatmap\n+plot_hmap <- Prop_heat_Est(list(\n+    data.matrix(estimated_music_props),\n+    data.matrix(estimated_nnls_props)),\n+    method.name = c("MuSiC", "NNLS")) +\n+    theme(axis.text.x = element_text(angle = -90),\n+          axis.text.y = element_text(size = 6))\n \n pdf(file = outfile_pdf, width = 8, height = 8)\n-plot_grid(jitter.fig, jitter.new, labels = "auto", ncol = 1, nrow = 2)\n-jitt_compare\n-dev.off()\n+plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)\n+plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)\n+plot_hmap\n+message(dev.off())\n+\n+## Output Proportions\n+\n+write.table(est_prop$Est.prop.weighted,\n+            file = paste0("report_data/prop_",\n+                          "Music Estimated Proportions of Cell Types",\n+                          ".tabular"),\n+            quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Est.prop.allgene,\n+            file = paste0("report_data/prop_",\n+                          "NNLS Estimated Proportions of Cell Types",\n+                          ".tabular"),\n+            quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Weight.gene,\n+            file = paste0("report_data/weightgene_",\n+                          "Music Estimated Proportions of Cell Types (by Gene)",\n+                          ".tabular"),\n+            quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$r.squared.full,\n+            file = paste0("report_data/rsquared_",\n+                          "Music R-sqr Estimated Proportions of Each Subject",\n+                          ".tabular"),\n+            quote = F, sep = "\\t", col.names = NA)\n+write.table(est_prop$Var.prop,\n+            file = paste0("report_data/varprop_",\n+                          "Matrix of Variance of MuSiC Estimates",\n+                          ".tabular"),\n+            quote = F, sep = "\\t", col.names = NA)\n+\n \n ## Summary table\n for (meth in methods) {\n     ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =\n-    ##subset(m_prop_ana, Method == meth))\n+    sub_data <- subset(m_prop_ana, Method == meth)\n+    ## We can only do regression where there are more than 1 factors\n+    ## so we must find and exclude the ones which are not\n+    gt1_facts <- sapply(phenotype_factors, function(facname) {\n+        return(length(unique(sort(sub_data[[facname]]))) == 1)\n+    })\n+    form_factors <- phenotype_factors\n+    exclude_facts <- names(gt1_facts)[gt1_facts]\n+    if (length(exclude_facts) > 0) {\n+        message("Factors with only one level will be excluded:")\n+        message(exclude_facts)\n+        form_factors <- phenotype_factors[\n+            !(phenotype_factors %in% exclude_facts)]\n+    }\n     lm_beta_meth <- lm(as.formula(\n-        paste("ct.prop", paste(phenotype_factors, collapse = " + "),\n-              sep = " ~ ")),\n-        data = subset(m_prop_ana, Method == meth))\n-    print(paste0("Summary: ", meth))\n+        paste("ct.prop", paste(form_factors, collapse = " + "),\n+              sep = " ~ ")), data = sub_data)\n+    message(paste0("Summary: ", meth))\n     capture.output(summary(lm_beta_meth),\n-                   file = paste0("report_data/summ_", meth, ".txt"))\n+                   file = paste0("report_data/summ_Log of ",\n+                                 meth,\n+                                 " fitting.txt"))\n }\n'
b
diff -r 224721e76869 -r 3ca0132c182a scripts/inspect.R
--- a/scripts/inspect.R Sun Sep 12 19:48:48 2021 +0000
+++ b/scripts/inspect.R Fri Nov 26 15:54:51 2021 +0000
[
@@ -6,17 +6,19 @@
 source(args[1])
 
 printout <- function(text) {
-    if (typeof(text) %in% c("list", "vector")) {
+    if (typeof(text) %in% c("list", "vector", "integer", "double", "numeric")) {
         write.table(text, file = outfile_tab, quote = F, sep = "\t",
                     col.names = NA)
     } else {
         ## text
+        print(typeof(text))
         capture.output(text, file = outfile_tab)  # nolint
     }
 }
 
-if (inspector %in% c("print", "pData", "fData", "dims", "experimentData",
-                     "exprs", "signature", "annotation", "abstract")) {
+if (inspector %in% c("print", "pData", "fData", "dims",
+                     "experimentData", "protocolData", "exprs",
+                     "signature", "annotation", "abstract")) {
     op <- get(inspector)
     tab <- op(rds_eset)
     printout(tab)
b
diff -r 224721e76869 -r 3ca0132c182a test-data/EMTABesethealthy.subset.rds
b
Binary file test-data/EMTABesethealthy.subset.rds has changed
b
diff -r 224721e76869 -r 3ca0132c182a test-data/Mousebulkeset.rds
b
Binary file test-data/Mousebulkeset.rds has changed
b
diff -r 224721e76869 -r 3ca0132c182a test-data/Mousesubeset.degenesonly2.half.rds
b
Binary file test-data/Mousesubeset.degenesonly2.half.rds has changed
b
diff -r 224721e76869 -r 3ca0132c182a test-data/default_output.pdf
b
Binary file test-data/default_output.pdf has changed
b
diff -r 224721e76869 -r 3ca0132c182a test-data/dendro.pdf
b
Binary file test-data/dendro.pdf has changed
b
diff -r 224721e76869 -r 3ca0132c182a test-data/dendro_1.pdf
b
Binary file test-data/dendro_1.pdf has changed