Mercurial > repos > dlalgroup > pmids_to_pubtator_matrix
view simtext_app.R @ 0:3f4adc85ba5d draft
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
author | dlalgroup |
---|---|
date | Thu, 24 Sep 2020 02:01:50 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env Rscript ### SimText App ### # # The tool enables the exploration of data generated by text_to_wordmatrix or pmids_to_pubtator_matrix in a locally run ShinyApp. Features are word clouds for each initial search query, dimension reduction and hierarchical clustering of the binary matrix, and a table with words and their frequency among the search queries. # # Input: # # 1) Input 1: # Tab-delimited table with # - column with search queries starting with "ID_", e.g. "ID_gene" if initial search queries were genes # - column(s) with grouping factor(s) to compare pre-existing categories of the initial search queries with the grouping based on text. The column names should start with "GROUPING_". If the column name is "GROUPING_disorder", "disorder" will be shown as a grouping variable in the app. # 2) Input 2: # Output of text_to_wordmatrix or pmids_to_pubtator_matrix, or binary matrix. # # optional arguments: # -h, --help show help message # -i INPUT, --input INPUT input file name. add path if file is not in working directory # -m MATRIX, --matrix MATRIX matrix file name. add path if file is not in working directory # -p PORT, --port PORT specify port, otherwise randomly selected # #Output: #Shiny app with word clouds, dimensionality reduction plot, dendrogram of hierarchical clustering and table with words and their frequency among the entities. # #Packages if ( '--install_packages' %in% commandArgs()) { print('Installing packages') if (!require('shiny')) install.packages('shiny', repo="http://cran.rstudio.com/"); if (!require('plotly')) install.packages('plotly', repo="http://cran.rstudio.com/"); if (!require('DT')) install.packages('DT', repo="http://cran.rstudio.com/"); if (!require('shinycssloaders')) install.packages('shinycssloaders', repo="http://cran.rstudio.com/"); if (!require('shinythemes')) install.packages('shinythemes', repo="http://cran.rstudio.com/"); if (!require('tableHTML')) install.packages('tableHTML', repo="http://cran.rstudio.com/"); if (!require('argparse')) install.packages('argparse', repo="http://cran.rstudio.com/"); if (!require('PubMedWordcloud')) install.packages('PubMedWordcloud', repo="http://cran.rstudio.com/"); if (!require('ggplot2')) install.packages('ggplot2', repo="http://cran.rstudio.com/"); if (!require('stringr')) install.packages('stringr', repo="http://cran.rstudio.com/"); if (!require('tidyr')) install.packages('tidyr', repo="http://cran.rstudio.com/"); if (!require('magrittr')) install.packages('magrittr', repo="http://cran.rstudio.com/"); if (!require('plyr')) install.packages('plyr', repo="http://cran.rstudio.com/"); if (!require('ggpubr')) install.packages('ggpubr', repo="http://cran.rstudio.com/"); if (!require('rafalib')) install.packages('rafalib', repo="http://cran.rstudio.com/"); if (!require('RColorBrewer')) install.packages('RColorBrewer', repo="http://cran.rstudio.com/"); if (!require('dendextend')) install.packages('dendextend', repo="http://cran.rstudio.com/"); if (!require('Rtsne')) install.packages('Rtsne', repo="http://cran.rstudio.com/"); if (!require('umap')) install.packages('umap', repo="http://cran.rstudio.com/"); } suppressPackageStartupMessages(library("shiny")) suppressPackageStartupMessages(library("plotly")) suppressPackageStartupMessages(library("DT")) suppressPackageStartupMessages(library("shinycssloaders")) suppressPackageStartupMessages(library("shinythemes")) suppressPackageStartupMessages(library("tableHTML")) suppressPackageStartupMessages(library("argparse")) suppressPackageStartupMessages(library("PubMedWordcloud")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("stringr")) suppressPackageStartupMessages(library("tidyr")) suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("plyr")) suppressPackageStartupMessages(library("ggpubr")) suppressPackageStartupMessages(library("rafalib")) suppressPackageStartupMessages(library("RColorBrewer")) suppressPackageStartupMessages(library("dendextend")) suppressPackageStartupMessages(library("Rtsne")) suppressPackageStartupMessages(library("umap")) #command arguments parser <- ArgumentParser() parser$add_argument("-i", "--input", help = "input file name. add path if file is not in working directory") parser$add_argument("-m", "--matrix", default= NULL, help = "matrix file name. add path if file is not in working directory") parser$add_argument("--host", default=NULL, help="Specify host") parser$add_argument("-p", "--port", type="integer", default=NULL, help="Specify port, otherwise randomly select") parser$add_argument("--install_packages", action="store_true", default=FALSE, help="If you want to auto install missing required packages.") args <- parser$parse_args() # Set host if(!is.null(args$host)){ options(shiny.host = args$host) } # Set port if(!is.null(args$port)){ options(shiny.port = args$port) } #load data data = read.delim(args$input, stringsAsFactors=FALSE) index_grouping = grep("GROUPING_", names(data)) names(data)[index_grouping] = sub(".*_", "",names(data)[index_grouping]) colindex_id = grep("^ID_", names(data)) matrix = read.delim(args$matrix, check.names = FALSE, header = TRUE, sep='\t') matrix = (as.matrix(matrix)>0) *1 #transform matrix to binary matrix ##### UI ###### ui <- shinyUI(fluidPage( navbarPage(theme = shinytheme("flatly"), id = "inTabset",selected = "panel1", title = "SimText", tabPanel("Home", value = "panel1", tabPanel("Results", value = "panel1", fluidRow(width=12, offset=0, column(width = 4, style = "padding-right: 0px", wellPanel(h5(strong("ID of interest")), style = "background-color:white; border-bottom: 2px solid #EEEEEE; border-top-color: white; border-right-color: white; border-left-color: white; box-shadow: 0px 0px 0px white; padding:3px; width: 100%"), selectInput('ID', 'Select ID:', paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))), column(width = 3, style = "padding-right: 0px", wellPanel(h5(strong("Color settings")), style = "background-color:white; border-bottom: 2px solid #EEEEEE; border-top-color: white; border-right-color: white; border-left-color: white; box-shadow: 0px 0px 0px white; padding:3px; width: 100%"), radioButtons('colour', 'Color by:', c("Grouping variable", "Individual word")), selectInput("colour_select", "Select:", choices=c(names(data)[index_grouping]))) ), fluidRow(width = 12, offset = 0, column(width = 4, #style = "height:650px;", wellPanel(textOutput("ID"), style = "background-color: #333333; color: white; border-top-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-bottom: 0px; padding:5px"), wellPanel( fluidRow( column(width = 4, numericInput('fontsize', 'Font size:',value = 7, min=1, max=50)), column(width = 4, numericInput('nword', 'Word number:',value = 50, min=1, max=100)), column(width = 12, withSpinner(plotOutput("WordcloudPlot",height= "325px"))), column(width = 12, downloadLink("downloadWordcloud", "Download"))), style = "background-color: #ffffff; border-bottom-color: #333333; border-left-color: #333333; height: 470px; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-top: 0px"), wellPanel(textOutput("Table"), style = "background-color: #333333; color: white; border-top-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-bottom: 0px; padding:5px"), wellPanel(withSpinner(DT::dataTableOutput("datatable", height= "150px")), style = "background-color: #ffffff; border-bottom-color: #333333; border-left-color: #333333; border-right-color: #333333; height: 175px; box-shadow: 3px 3px 3px #d8d8d8; margin-top: 0px") ), column(width = 8, #style='padding:0px;', wellPanel("T-SNE plot of wordmatrix", style = "background-color: #333333; color: white; border-top-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-bottom: 0px; padding:5px"), wellPanel( fluidRow( column(width = 2, radioButtons('method', 'Method:',choices=c("t-SNE","UMAP"))), column(width = 2, numericInput('perplexity', 'Perplexity:',value = 2, min=1, max=nrow(data)-1)), column(width = 2, radioButtons('label', 'Labels:',choices=c("Index","IDs"))), column(width = 2, numericInput('labelsize', 'Label size:',value = 12, min=1, max=30)), column(width = 8, style='padding:0px;', withSpinner(plotlyOutput("TsnePlot",height=550))), column(width = 4, style='padding:0px;', withSpinner(plotOutput("TsnePlot_legend",height=550))), column(width=2, downloadLink("downloadPlotdata",label = "Download data"))), style = "background-color: white; border-bottom-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-top: 0px" #height=575px ))), fluidRow(column(width = 12, wellPanel("Hierarchical clustering of wordmatrix", style = "background-color: #333333; color: white; border-top-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-bottom: 0px; padding:5px") , wellPanel( fluidRow( column(width = 2, radioButtons('hcmethod', 'Method:',choices=c("ward.D2","average","complete","single"))), column(width = 2, numericInput('labelsize_hc', 'Label size:', value = 8, min=1, max=30)) ), fluidRow( column(width = 9, withSpinner(plotOutput("hclust"))), column(width = 3, withSpinner(plotOutput("hclust_legend"))) ), style = "background-color: #ffffff; border-bottom-color: #333333; border-left-color: #333333; border-right-color: #333333; box-shadow: 3px 3px 3px #d8d8d8; margin-top: 0px") , verbatimTextOutput("test") )) )) # , #tabPanel("About", value = "panel2", h3("")) ))) ###### SERVER ###### server <- function(input, output, session) { ##### Global ##### IDs = reactive(paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")")) index_ID = reactive({which(IDs() == input$ID)}) ##### Wordcloud plot and download ###### output$ID <- renderText({ paste("Wordcloud of",data[[colindex_id]][index_ID()]) }) output$WordcloudPlot <- renderPlot({ ID_matrix = matrix[index_ID(),] ID_matrix = data.frame(word= as.character(names(ID_matrix)), freq= ID_matrix) colnames(ID_matrix) = c("word", "freq") ID_matrix = ID_matrix[ID_matrix$freq == 1,] plotWordCloud(ID_matrix, max.words = min(nrow(ID_matrix),input$nword), scale= c(input$fontsize/10, input$fontsize/10), colors= brewer.pal(8,"Greys")[4:8]) }) output$downloadWordcloud <- downloadHandler( filename = function() { paste0(paste0("Wordcloudof",data[[colindex_id]][index_ID()]),".pdf", sep="") }, content = function(file) { ID_matrix = matrix[index_ID(),] ID_matrix = data.frame(word= names(ID_matrix), freq= ID_matrix) colnames(ID_matrix) = c("word", "freq") ID_matrix = ID_matrix[ID_matrix$freq == 1,] pdf(file) plotWordCloud(ID_matrix, max.words = min(max(nrow(ID_matrix)),input$nword), scale= c(input$fontsize/10, input$fontsize/10), colors= brewer.pal(8,"Greys")[4:8]) dev.off() } ) ##### Table ##### output$Table <- renderText({ paste("Most occuring words among IDs") }) output$datatable <- DT::renderDataTable({ colsum_data= data.frame(word=colnames(matrix), freq=colSums(matrix)) colsum_data = colsum_data[order(colsum_data$freq, decreasing = T),] colnames(colsum_data) = c("Word", paste0("IDs (total n=", nrow(matrix),")")) DT::datatable(colsum_data, extensions = c("Buttons"), rownames = F, fillContainer = T, escape=FALSE, options = list(dom = "t", scrollY = min(nrow(colsum_data),500), scrollX= TRUE, scroller = TRUE, autoWidth = TRUE, pageLength = nrow(colsum_data), columnDefs = list(list(className = 'dt-center', targets = "_all"), list(width = '50%', targets = "_all"))) ) }) ##### Colour/Grouping ##### outVar <- reactive({ if(input$colour == "Grouping variable"){ return(names(data)[index_grouping]) } else { return(colnames(matrix)) } }) observe({ updateSelectInput(session, "colour_select", choices = outVar())}) colour_choice = reactive({ if(input$colour == "Grouping variable"){ return(as.factor(data[,input$colour_select])) } else { matrix = as.data.frame(matrix) colour_byword = matrix[[input$colour_select]] colour_byword = ifelse(colour_byword > 0,"Selected word associated with ID","Selected word not associated with ID") return(as.factor(colour_byword)) } }) color_palette = reactive({palette=c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", "#00AFBB", "#E7B800", "#FC4E07", "#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00") return( palette[1:length(levels(colour_choice()))] ) }) ##### Dimension reduction plot and download ##### data.dimred = reactive({ if (input$method == "t-SNE"){ tsne_result <- Rtsne(matrix, perplexity = input$perplexity, check_duplicates=F) data["X_Coord"] = tsne_result$Y[,1] data["Y_Coord"] = tsne_result$Y[,2] return(data) } else if (input$method == "UMAP"){ umap_result = umap(matrix) data["X_Coord"] = umap_result$layout[,1] data["Y_Coord"] = umap_result$layout[,2] return(data) } }) output$TsnePlot <- renderPlotly({ if (input$label == "Index") { labeling = as.character(seq(1,nrow(data))) } else if (input$label == "IDs") { labeling= as.character(data[[colindex_id]]) } p = plot_ly(colors = color_palette()) %>% add_trace(type="scatter", mode = 'markers', x = data.dimred()$X_Coord[index_ID()], y = data.dimred()$Y_Coord[index_ID()], opacity=0.15, marker = list( color = "grey", size = 80)) %>% add_trace(x=data.dimred()$X_Coord, y=data.dimred()$Y_Coord, type="scatter", mode="text", text= labeling, textfont = list(size= input$labelsize), color = factor(colour_choice())) %>% add_trace(x=data.dimred()$X_Coord, y=data.dimred()$Y_Coord, type="scatter", mode="markers", opacity=0, text= paste0( "ID: ",data[[colindex_id]], "\n", "Index: ",seq(1,nrow(data)), "\n", "Grouping: ", paste(data[,index_grouping])), hoverinfo = "text", color = factor(colour_choice())) %>% layout(showlegend = FALSE, yaxis= list(title = "", zeroline = FALSE, linecolor = toRGB("black"), linewidth = 1, showticklabels = FALSE, showgrid = FALSE), xaxis = list(title = "", zeroline = FALSE, linecolor = toRGB("black"), linewidth = 1, showticklabels = FALSE, showgrid = FALSE), autosize = T) %>% config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "hoverClosestGeo", "hoverClosestGl2d", "toImage", "hoverClosestCartesian", "lasso2d", "select2d", "resetScale2d", "hoverCompareCartesian", "hoverClosestPie", "toggleSpikelines"), displaylogo = FALSE) %>% style(hoverinfo = "none", traces = c(1,2)) p }) #legend of plotly plot by ggplot output$TsnePlot_legend <- renderPlot({ p = ggplot(data, aes(x=1, y=1)) + geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), size=3.5, fontface = "bold") + theme_classic()+ scale_color_manual(values = color_palette())+ theme(legend.title = element_blank())+ theme(legend.position = "right")+ theme(legend.text=element_text(size=9)) leg <- get_legend(p) as_ggplot(leg) }) output$downloadPlotdata <- downloadHandler( filename = function() { paste0(input$method,"_coordinates.csv") }, content = function(file) { write.csv(data.dimred(), file, row.names = F) } ) ##### Hierarchical clustering ####### output$hclust <- renderPlot({ set.seed(42) clustering=hclust(dist(matrix), method=input$hcmethod) par(oma=c(3,3,3,3)) palette(color_palette()) par(mar = rep(0, 4)) myplclust(clustering, labels=paste(data[[colindex_id]]), lab.col=as.fumeric(as.character(colour_choice()), levels = sort(unique(as.character(colour_choice())))), cex=as.numeric(input$labelsize_hc/10), main="", yaxt="n", ylab= "") }) #legend output$hclust_legend <- renderPlot({ p = ggplot(data, aes(x=1, y=1)) + geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), fontface = "bold") + theme_classic()+ scale_color_manual(values = color_palette())+ theme(legend.title = element_blank())+ theme(legend.position = "right")+ theme(legend.text=element_text(size=9)) leg <- get_legend(p) as_ggplot(leg) }) ##### Test field for development ###### #output$test <- renderPrint({ #}) } ###### APP ###### shinyApp(ui, server)