view 2_per_base_N_content.Rmd @ 2:58f3c3128fdd draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit 9285c2b8ad41a486dde2a87600a6b8267841c8b5-dirty
author mingchen0919
date Tue, 08 Aug 2017 10:40:31 -0400
parents d732d4526c6d
children
line wrap: on
line source

---
title: "Per Base N Content"
output: html_document
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(echo = ECHO)
```

## Per Base N Content

```{r}
PBNC_df = data.frame()
PBNC_file_paths = read.csv('PBNC_file_paths.txt',
                           header = TRUE, stringsAsFactors = FALSE)
for(i in 1:nrow(PBNC_file_paths)) {
  # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2])
  file_path = PBNC_file_paths[i,2]
  pbnc_df = read.csv(file_path,
                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
  (function (df) {
    df1 = select(df, -Base2)
    df2 = select(df, -Base1) %>% filter(Base2 != '')
    colnames(df1) = c(colnames(df1)[1:2], 'Base')
    colnames(df2) = c(colnames(df2)[1:2], 'Base')
    res = rbind(df1, df2) %>% arrange(Base)
    return(res)
  })
  pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df))
  PBNC_df = rbind(PBNC_df, pbnc_df)
}
```


```{r}
PBNC_df$N.Count = PBNC_df$N.Count * 100
max_phred = max(PBNC_df$N.Count) + 5
hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>%
  hc_title(
    text = "Per Base N Content"
  ) %>%
  hc_xAxis(
    title = list(text = "Base Position")
  ) %>%
  hc_yAxis(
    title = list(text = "N %"),
    plotLines = list(
      list(label = list(text = "N = 5%"),
           width = 2,
           dashStyle = "dash",
           color = "red",
           value = 5)
    )
  ) %>% 
  hc_exporting(enabled = TRUE)
```