annotate paired_fastq_filtering.R @ 14:62fefa284036 draft

Uploaded
author petr-novak
date Fri, 07 Feb 2020 06:06:47 -0500
parents 378565f5a875
children f1738f8649b0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
2 # TODO - add triming from start and end, removing short reads
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
3 library(optparse, quiet = TRUE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
4 matricesSum = function(m1,m2){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
5 mfin = matrix(0, nrow = nrow(m2), ncol = max(ncol(m1), ncol(m2)))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
6 rownames(mfin) = rownames(m2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
7 if (sum(m1)>0){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
8 mfin[,1:ncol(m1)] = mfin[,1:ncol(m1)] + m1
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
9 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
10 if (sum(m2)>0){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
11 mfin[,1:ncol(m2)] = mfin[,1:ncol(m2)] + m2
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
12 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
13
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
14 return(mfin)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
15 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
16 megablast = function(fx,params="-F \"m D\" -D 4 -p 90 ",database){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
17 # assume blastn and makeblastdb in $PATH
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
18 tmp_in = tempfile()
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
19 tmp_out = tempfile()
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
20 writeFasta(fx,file=tmp_in)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
21 ## check if db is present:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
22 dbfiles = paste0(database, ".nhr")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
23 if (!file.exists(dbfiles)){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
24 cat("running makeblastdb\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
25 system(paste0("makeblastdb -dbtype nucl -in ", database))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
26 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
27 params = '-num_alignments 1 -num_threads 4 -outfmt "6 qseqid qcovs pident" '
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
28 cmd=paste("blastn", " -db", database, "-query", tmp_in,
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
29 "-out", tmp_out, " ", params)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
30 status=system(cmd,intern=TRUE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
31 if (file.info(tmp_out)$size != 0){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
32 results=read.table(tmp_out,sep="\t",as.is=TRUE,comment.char="")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
33 colnames(results) = c("qseqid", "qcovs", "pident")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
34 }else{
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
35 results=NULL
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
36 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
37 unlink(tmp_in)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
38 unlink(tmp_out)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
39 return(results)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
40 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
41
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
42
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
43 plotFrequencies = function(x){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
44 par(xaxs = 'i', yaxs = 'i')
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
45 plot(NULL, xlim = c(1,ncol(x)), ylim = c(0,1),type ='n', xlab="position", ylab = "proportion")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
46 col = c(A="red", "T"="orange", C="blue", G="green")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
47 for (n in c("A","C", "T","G")){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
48 points(x[n,]/colSums(x), type = 'l', col = col[n])
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
49 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
50 grid(ny = 50, nx = ncol(x) - 1 )
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
51 legend("topright", col = col, lwd = 2, legend = names(col))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
52 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
53
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
54 option_list = list(make_option(c("-a", "--fastqA"), action = "store", type = "character",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
55 help = "fastq file A", default = NA), make_option(c("-b", "--fastqB"), action = "store",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
56 type = "character", help = "fastq file B", default = NA), make_option(c("-x",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
57 "--fastqX"), action = "store", type = "character", help = "output fastq file X",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
58 default = NA), make_option(c("-y", "--fastqY"), action = "store", type = "character",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
59 help = "output fastq file Y", default = NA), make_option(c("-c", "--cut_off"),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
60 action = "store", type = "numeric", help = "Quality cut-off value [default %default]",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
61 default = 10), make_option(c("-r", "--rnd_seed"), action = "store", type = "numeric",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
62 help = "seed for random number generator [default %default]", default = 123),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
63 make_option(c("-G", "--png_file_output"), action = "store", type = "character", default=NA),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
64 make_option(c("-p", "--percent_above"), action = "store", type = "numeric", help = "Percent of bases in sequence that must have quality equal to / higher than cut-off value [default %default]",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
65 default = 95), make_option(c("-e", "--trim_end"), action = "store", type = "numeric",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
66 help = "trimming - end position [no trimming by default]", default = NA),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
67 make_option(c("-s", "--trim_start"), action = "store", type = "numeric", help = "triming position - start [no trimming by default]",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
68 default = NA), make_option(c("-n", "--sample_size"), action = "store", type = "numeric",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
69 help = "requested sample size (number of pairs)[no sampling by default]",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
70 default = NULL), make_option(c("-N", "--Max_N"), action = "store", type = "integer",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
71 help = "maximum number of Ns in sequences [default %default]", default = 0),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
72 make_option(c("-R", "--rename"), action = "store_true", type = "logical", help = "Rename sequences",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
73 default = FALSE), make_option(c("-f", "--format"), action = "store", type = "character",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
74 help = "format of output - fastq or fasta [default %default] ", default = "fasta"),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
75 make_option(c("-C", "--cutadapt_options"), action = "store", type = "character",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
76 help = "file specifying cutadapt options", default = NULL), make_option(c("-j",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
77 "--chunk_size"), action = "store", type = "numeric", help = "Number of sequences processed in single step. This option affect speed of processing and memory usage [default %default]",
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
78 default = 1000000),
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
79 make_option(c('-F', '--filter_seq'),action='store',type='character',help='file specifying sequences for filtering (e.g. plastid DNA)',default=NULL)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
80 )
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
81
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
82 description = paste(strwrap("Script for filterinq fastq files generated from paired end sequencing
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
83 \t\t\t\t\t\t"),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
84 collapse = "\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
85 # arguments whic hcould be modifed
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
86 cutadapt_arguments = paste(" --anywhere='AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT' ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
87 " --anywhere='AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT' ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
88 " --anywhere='GATCGGAAGAGCACACGTCTGAACTCCAGTCAC' --anywhere='ATCTCGTATGCCGTCTTCTGCTTG' ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
89 " --anywhere='CAAGCAGAAGACGGCATACGAGAT' --anywhere='GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC' ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
90 "--error-rate=0.05 --times=1 --overlap=15 --discard ", " ", sep = "")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
91
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
92
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
93 epilogue = paste(strwrap(c("Description:\n
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
94 This tool is designed to make memory efficient preprocessing of two fastq files. Input files can be in GNU zipped archive (.gz extension).
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
95 Reads are filtered based on the quality, presence of N bases and adapters.
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
96 Two input fastq files are procesed in parallel. Cutaddapt program must be installed to use adapter filtering.
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
97 Only complete pair are kept. As the input files are process in chunks,
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
98 it is required that pair reads are complete and in the same order in both input files. All reads which pass the quality
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
99 filter fill be writen into ouytput files. If --sample size is specified, only sample of sequences will be returned.
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
100 By default cutadapt us run with this options:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
101 \n ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
102 cutadapt_arguments, "\n\nIf you want to use different adapter sequences specify them in separate file and use -C --cutadapt options.
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
103 Consult cutadapt manual for usage. ")),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
104 collapse = "\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
105
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
106 epilogue = paste(epilogue, "
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
107 fastqA fastqB
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
108 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
109 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
110 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
111 | Trimming (optional) |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
112 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
113 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
114 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
115 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
116 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
117 | Filter by quality |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
118 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
119 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
120 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
121 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
122 ---------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
123 | Discard single reads |--->
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
124 | Keep complete pairs |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
125 ---------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
126 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
127 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
128 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
129 | cutadapt filtering |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
130 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
131 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
132 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
133 ---------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
134 | Discard single reads |--->
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
135 | Keep complete pairs |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
136 ---------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
137 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
138 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
139 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
140 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
141 | sample (optional) |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
142 ----------------------------------
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
143 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
144 | |
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
145 V V
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
146 FastqX FastqY
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
147 (or fastaX) (or fastaY)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
148
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
149 # example:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
150
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
151 ",
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
152 sep = "\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
153
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
154
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
155 parser = OptionParser(option_list = option_list, epilogue = epilogue, description = description,
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
156 )
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
157
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
158
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
159
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
160 opt = parse_args(parser, args = commandArgs(TRUE))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
161 if (!(opt$format %in% c("fasta", "fastq"))) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
162 stop("wrong output format")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
163 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
164 if (any(is.na(c(opt$fastqA, opt$fastqB, opt$fastqX, opt$fastqY)))) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
165 cat("\nInput ond output files must be specified!\n\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
166 print_help(parser)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
167 q()
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
168 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
169
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
170
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
171 if (!is.null(opt$cutadapt_options)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
172 if (file.exists(opt$cutadapt_options)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
173 cutadapt_arguments = scan(opt$cutadapt_options, what = "character", comment.char = "#")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
174
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
175 } else {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
176 cutadapt_arguments = opt$cutadapt_options
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
177 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
178 ## remove 'EOF'
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
179 cutadapt_arguments = paste(" ", cutadapt_arguments, " ", collapse = " ", sep = " ")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
180 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
181
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
182
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
183
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
184 cutadapt_cmd = function(input, output, cutadapt_arguments) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
185 cmds = paste("cutadapt --format=", "fastq", cutadapt_arguments, " ", c(input),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
186 " -o ", c(output), sep = "")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
187 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
188
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
189 set.seed(opt$rnd_seed)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
190 PROP = opt$percent_above/100
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
191 MINQ = opt$cut_off
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
192 TRIM_END = opt$trim_end
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
193 TRIM_START = opt$trim_start
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
194
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
195 CHUNK_SIZE = opt$chunk_size
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
196
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
197 MIN_LENGTH = 1 + TRIM_END - ifelse(is.na(TRIM_START), 1, TRIM_START)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
198 if (is.na(MIN_LENGTH)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
199 MIN_LENGTH = 1
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
200 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
201
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
202 suppressPackageStartupMessages(library(ShortRead))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
203 Qfilter = srFilter(function(x) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
204 rowSums(as(quality(x), "matrix") > MINQ, na.rm = TRUE)/nchar(sread(x)) >= PROP
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
205 })
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
206
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
207 Min_Length_filter = srFilter(function(x) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
208 nchar(sread(x)) >= MIN_LENGTH
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
209 })
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
210
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
211
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
212
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
213
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
214 fin_filter = compose(Qfilter, nFilter(opt$Max_N), Min_Length_filter)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
215
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
216 if (opt$format == "fasta") {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
217 writeFun = writeFasta
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
218 } else {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
219 writeFun = writeFastq
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
220 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
221
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
222 f1out = opt$fastqX
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
223 f2out = opt$fastqY
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
224 # are output files empty?:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
225
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
226
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
227 # find wich character specify pair: - it could be
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
228 lA = readLines(opt$fastqA, n = 1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
229 lB = readLines(opt$fastqB, n = 1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
230 pair_tag_position = which(!strsplit(lA, "")[[1]] == strsplit(lB, "")[[1]])
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
231
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
232 if ((length(pair_tag_position)) != 1 & !opt$rename) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
233 warning("unable to determine pair tag")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
234 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
235
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
236 # pair_regex=paste(substring(lA,pair_tag_position-1,pair_tag_position-1),'.+',sep='')
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
237 # assume 1/ same order 2/ all id must be same
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
238
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
239 # check size of sequences - must be same...
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
240 n1 = countLines(opt$fastqA)/4
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
241
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
242
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
243 number_of_chunks = round(n1/CHUNK_SIZE)
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
244 ## adjust chunk size to make last chunk of the same size is all other
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
245 ## this is to avoid small last chunk
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
246 CHUNK_SIZE = round(n1/number_of_chunks)
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
247
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
248 if (number_of_chunks == 0) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
249 CHUNK_SIZE = n1
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
250 number_of_chunks = 1
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
251 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
252 if (!is.null(opt$sample_size)) {
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
253 sample_size_in_chunk = round(opt$sample_size/number_of_chunks)
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
254 n_missing = opt$sample_size - sample_size_in_chunk * number_of_chunks
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
255 } else {
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
256 sample_size_in_chunk = CHUNK_SIZE
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
257 n_missing = 0
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
258 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
259
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
260 cat("number chunks ", number_of_chunks, "\n")
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
261 cat("chunks size ", CHUNK_SIZE, "\n")
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
262 # adjust the chunk size to get exact count of sequences:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
263 CHUNK_SIZE = ceiling(n1/number_of_chunks)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
264 F_id = ifelse(opt$rename, "/1", "1")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
265 R_id = ifelse(opt$rename, "/2", "2")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
266
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
267 f1 <- FastqStreamer(opt$fastqA, CHUNK_SIZE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
268 f2 <- FastqStreamer(opt$fastqB, CHUNK_SIZE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
269 total = 0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
270 chunk = 0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
271 nucleotideFrequenciesForward = nucleotideFrequenciesReverse = matrix(0)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
272 while (TRUE) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
273 chunk = chunk + 1
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
274 cat("chunk number ", chunk, "\n")
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
275 fq1 <- yield(f1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
276 fq2 <- yield(f2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
277 if (length(fq1) == 0) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
278 break
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
279 }
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
280 cat("chunk number ", chunk, " imported\n")
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
281 cat("chunk size", length(fq1), "\n")
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
282 ## rename
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
283 chunk_id = sprintf(paste0("%0", round(log10(number_of_chunks)) + 1, "d"), chunk)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
284 cat("chunk id ", chunk_id, "\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
285 fmt = paste0("%0", round(log10(length(fq1))) + 1, "d")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
286
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
287 ## either do not rename
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
288 if (opt$rename) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
289 ## or use index
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
290 fq1@id = fq2@id = BStringSet(paste0(chunk_id, sprintf(fmt, 1:length(fq1))))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
291 } else {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
292 ## or use largest common substring
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
293 ind = mapply(function(x, y) which(x != y)[1], strsplit(as.character(id(fq1)),
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
294 split = ""), strsplit(as.character(id(fq2)), split = ""))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
295 fq1@id = fq2@id = subseq(id(fq1), 1, ind - 1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
296
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
297 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
298
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
299 if (!is.na(TRIM_END)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
300 fq1 = narrow(fq1, end = ifelse((TRIM_END) > width(fq1), width(fq1), TRIM_END))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
301 fq2 = narrow(fq2, end = ifelse((TRIM_END) > width(fq2), width(fq2), TRIM_END))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
302 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
303 if (!is.na(TRIM_START)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
304 fq1 = narrow(fq1, start = ifelse((TRIM_START) < width(fq1), TRIM_START, width(fq1)))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
305 fq2 = narrow(fq2, start = ifelse((TRIM_START) < width(fq2), TRIM_START, width(fq2)))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
306
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
307 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
308
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
309
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
310 fqF1 = fq1[fin_filter(fq1)]
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
311 fqF2 = fq2[fin_filter(fq2)]
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
312
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
313 inc1 = id(fqF1) %in% id(fqF2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
314 inc2 = id(fqF2) %in% id(fqF1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
315
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
316 cat("running cutadapt on chunk\n")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
317 # cut addapt here: # filter parts:
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
318 tmp_in1 = tempfile(fileext = ".fastq")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
319 tmp_out1 = tempfile(fileext = ".fastq")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
320 tmp_in2 = tempfile(fileext = ".fastq")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
321 tmp_out2 = tempfile(fileext = ".fastq")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
322
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
323
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
324 if (is.null(formals(writeFastq)$compress)) {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
325 writeFastq(fqF1[inc1], file = tmp_in1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
326 writeFastq(fqF2[inc2], file = tmp_in2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
327 } else {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
328 writeFastq(fqF1[inc1], file = tmp_in1, compress = FALSE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
329 writeFastq(fqF2[inc2], file = tmp_in2, compress = FALSE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
330 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
331
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
332
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
333 cmd1 = cutadapt_cmd(tmp_in1, tmp_out1, cutadapt_arguments)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
334 cmd2 = cutadapt_cmd(tmp_in2, tmp_out2, cutadapt_arguments)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
335 # this should run cutadapt in parallel
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
336
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
337 status = system(paste(cmd1, "& \n", cmd2, "&\nwait"), intern = TRUE)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
338 # collect results
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
339 ftmp1 = FastqFile(tmp_out1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
340
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
341 fqF1 = readFastq(ftmp1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
342 close(ftmp1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
343
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
344 ftmp2 = FastqFile(tmp_out2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
345 fqF2 = readFastq(ftmp2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
346 close(ftmp2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
347 ## clean up
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
348 unlink(tmp_out1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
349 unlink(tmp_in1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
350 unlink(tmp_out2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
351 unlink(tmp_in2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
352
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
353
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
354
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
355 ## remove sequences similar to filter database (e.g. plastid DNA)
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
356 if (!is.null(opt$filter_seq)){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
357 blast_results1 = megablast(fqF1, database=opt$filter_seq)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
358 blast_results2 = megablast(fqF2, database=opt$filter_seq)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
359 if (!is.null(blast_results1) & !is.null(blast_results2)){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
360 exclude1=with(blast_results1, unique(qseqid[pident >= 90 & qcovs >=90]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
361 exclude2=with(blast_results2, unique(qseqid[pident >= 90 & qcovs >=90]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
362 ## note - blast will truncate seq ids - anything beyond space is omitted
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
363 seq_to_exclude1 = gsub(" .*","",id(fqF1)) %in% exclude1
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
364 seq_to_exclude2 = gsub(" .*","",id(fqF2)) %in% exclude2
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
365 excluded_contamination1=signif(sum(seq_to_exclude1)/length(seq_to_exclude1)*100,3)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
366 excluded_contamination2=signif(sum(seq_to_exclude2)/length(seq_to_exclude2)*100,3)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
367 cat(excluded_contamination1,"% filtered out after blast - forward reads\n" )
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
368 cat(excluded_contamination2,"% filtered out after blast - reverse reads\n" )
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
369 fqF1 = fqF1[!seq_to_exclude1]
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
370 fqF2 = fqF2[!seq_to_exclude2]
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
371 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
372 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
373
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
374
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
375
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
376
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
377 # filter complete pairs again: id1=gsub(pair_regex,'',id(fqF1))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
378 # id2=gsub(pair_regex,'',id(fqF2))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
379 inc1 = id(fqF1) %in% id(fqF2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
380 inc2 = id(fqF2) %in% id(fqF1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
381 total = sum(inc1) + total
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
382 ## create new id - last character must differentiate pair - for interlacig
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
383
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
384 fqF1@id = BStringSet(paste0(id(fqF1), F_id))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
385 fqF2@id = BStringSet(paste0(id(fqF2), R_id))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
386
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
387
5
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
388 if (sum(inc1) > (sample_size_in_chunk + n_missing)) {
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
389 smp = sort(sample(sum(inc1), sample_size_in_chunk + n_missing))
378565f5a875 Uploaded
petr-novak
parents: 0
diff changeset
390 n_missing = 0 ## this was to correct rounding error
0
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
391 writeFun(fqF1[inc1][smp], file = f1out, mode = "a")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
392 writeFun(fqF2[inc2][smp], file = f2out, mode = "a")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
393 nfrq1 = alphabetByCycle(sread(fqF1[inc1][smp]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
394 nfrq2 = alphabetByCycle(sread(fqF2[inc2][smp]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
395 } else {
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
396 writeFun(fqF1[inc1], file = f1out, mode = "a")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
397 writeFun(fqF2[inc2], file = f2out, mode = "a")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
398 nfrq1 = alphabetByCycle(sread(fqF1[inc1]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
399 nfrq2 = alphabetByCycle(sread(fqF2[inc2]))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
400 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
401 nucleotideFrequenciesForward = matricesSum(nucleotideFrequenciesForward, nfrq1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
402 nucleotideFrequenciesReverse = matricesSum(nucleotideFrequenciesReverse, nfrq2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
403
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
404 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
405 if( !is.na(opt$png_file_output)){
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
406 png(opt$png_file_output, width = 1000, height = 1000)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
407 par(mfrow=c(2,1))
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
408 plotFrequencies(nucleotideFrequenciesForward)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
409 mtext("forward reads")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
410 plotFrequencies(nucleotideFrequenciesReverse)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
411 mtext("reverse reads")
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
412 dev.off()
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
413 }
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
414 close(f1)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
415 close(f2)
a4cd8608ef6b Uploaded
petr-novak
parents:
diff changeset
416