annotate 01_skewer_analysis.Rmd @ 0:a42e58c71e5b draft default tip

planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
author mingchen0919
date Sun, 30 Dec 2018 12:55:49 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
1 ---
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
2 title: 'Skewer Analysis'
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
3 output:
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
4 html_document:
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
5 highlight: pygments
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
6 ---
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
7
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
8 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
9 knitr::opts_chunk$set(error = TRUE, echo = FALSE)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
10 ```
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
11
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
12
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
13 ## Job script
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
14
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
15 ```{bash echo=FALSE}
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
16 sh ${TOOL_INSTALL_DIR}/build-and-run-job-scripts.sh
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
17 ```
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
18
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
19 ```{r echo=FALSE,warning=FALSE,results='asis'}
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
20 # display content of the job-script.sh file.
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
21 cat('```bash\n')
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
22 cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/job-1-script.sh')), sep = '\n')
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
23 cat('\n```')
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
24 ```
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
25
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
26
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
27 # Results summary
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
28
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
29 ## Reads processing summary
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
30
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
31 ```{r echo=TRUE}
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
32 log = readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/trim-trimmed.log'))
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
33 start_line = grep('read.+processed; of these:', log)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
34 end_line = grep('untrimmed.+available after processing', log)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
35 processing_summary = gsub('(\\d+) ', '\\1\t', log[start_line:end_line])
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
36 processing_summary_df = do.call(rbind, strsplit(processing_summary, '\t'))
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
37 colnames(processing_summary_df) = c('Total reads:', processing_summary_df[1,1])
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
38 knitr::kable(processing_summary_df[-1, ])
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
39 ```
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
40
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
41 ## Length distribution of reads after trimming
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
42
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
43 ```{r echo=TRUE, message=FALSE, warning=FALSE}
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
44 start_line = grep('length count percentage', log)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
45 len_dist = log[(start_line):length(log)]
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
46 len_dist = do.call(rbind, strsplit(len_dist, '\t'))
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
47 columns = len_dist[1, ]
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
48 len_dist = as.data.frame(len_dist[-1, ])
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
49 colnames(len_dist) = columns
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
50
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
51 library(plotly)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
52 library(ggplot2)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
53 len_dist$count = as.numeric(len_dist$count)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
54 labels = as.character(len_dist$length)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
55 len_dist$length = 1:nrow(len_dist)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
56 pp = ggplot(data = len_dist, aes(length, count)) +
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
57 geom_line(color='red') +
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
58 scale_x_continuous(name = 'Length',
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
59 breaks = 1:nrow(len_dist),
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
60 labels = labels) +
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
61 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
62 ylab('Count') +
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
63 ggtitle('Length distribution')
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
64 ggplotly(pp)
a42e58c71e5b planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
mingchen0919
parents:
diff changeset
65 ```