Mercurial > repos > iuc > dada2_mergepairs
changeset 3:0b884b080bb6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
author | iuc |
---|---|
date | Tue, 14 Jul 2020 07:41:00 -0400 |
parents | 69900ffd3b8e |
children | 306a40d4bb8f |
files | macros.xml static/images/pairpipe.png static/images/pairpipe.svg test-data/gentest.R test-data/learnErrors.pdf test-data/learnErrors_R1.pdf |
diffstat | 6 files changed, 206 insertions(+), 193 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Mon Mar 16 08:22:08 2020 -0400 +++ b/macros.xml Tue Jul 14 07:41:00 2020 -0400 @@ -7,7 +7,7 @@ </requirements> </xml> - <token name="@DADA2_VERSION@">1.14</token> + <token name="@DADA2_VERSION@">1.16</token> <token name="@WRAPPER_VERSION@">0</token> <xml name="version_command"> @@ -124,6 +124,9 @@ .. image:: pairpipe.png +Note: In particular for the analysis of paired collections the collections should be sorted lexicographical +before the analysis. + For single end data you the steps "Unzip collection" and "mergePairs" are not necessary. More information may be found on the dada2 homepage:: https://benjjneb.github.io/dada2/index.html (in particular tutorials) or the documentation of dada2's R package https://bioconductor.org/packages/release/bioc/html/dada2.html (in particular the pdf which contains the full documentation of all parameters)
--- a/static/images/pairpipe.svg Mon Mar 16 08:22:08 2020 -0400 +++ b/static/images/pairpipe.svg Tue Jul 14 07:41:00 2020 -0400 @@ -24,7 +24,7 @@ <dc:format>image/svg+xml</dc:format> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> - <dc:title></dc:title> + <dc:title /> </cc:Work> </rdf:RDF> </metadata> @@ -37,16 +37,16 @@ guidetolerance="10" inkscape:pageopacity="0" inkscape:pageshadow="2" - inkscape:window-width="1920" - inkscape:window-height="1016" + inkscape:window-width="1680" + inkscape:window-height="986" id="namedview386" showgrid="false" inkscape:snap-global="true" inkscape:snap-bbox="false" inkscape:object-paths="true" - inkscape:zoom="1" - inkscape:cx="650.80177" - inkscape:cy="176.12189" + inkscape:zoom="2" + inkscape:cx="336.68624" + inkscape:cy="192.12189" inkscape:window-x="0" inkscape:window-y="27" inkscape:window-maximized="1" @@ -443,22 +443,22 @@ </marker> </defs> <path - style="stroke:#000000;marker-end:url(#id2)" - d="m 117.55976,114.5 h 42" + style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" + d="m 117.55976,114.5 h 34.86" id="line138" inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> <rect height="30.245491" width="116.46159" - x="165.14262" - y="0.37725419" + x="174.14262" + y="3.3772583" id="rect80" style="fill:#ebd9b2;stroke:#000000;stroke-width:0.75450838" /> <text id="text188" - y="20.499996" - x="170.26537" + y="23.5" + x="179.26537" style="font-size:14px;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif">plotQualityProfile</text> <rect height="30.184877" @@ -483,9 +483,9 @@ inkscape:connector-curvature="0" id="path5591" d="m 971.55976,116.5 h 42.00004" - style="stroke:#000000;marker-end:url(#id2)" /> + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" /> <path - style="stroke:#000000;marker-end:url(#id2)" + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" d="m 1167.5598,116.5 h 42" id="path5593" inkscape:connector-curvature="0" @@ -494,8 +494,8 @@ sodipodi:nodetypes="cc" inkscape:connector-curvature="0" id="path5557" - d="m 255.55976,114.5 h 42" - style="stroke:#000000;marker-end:url(#id2)" /> + d="m 283.77925,110.5 h 33.78051" + style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" /> <g id="g5364" transform="translate(-1226.0934,-496.75423)"> @@ -524,7 +524,7 @@ sodipodi:role="line">and addSpecies</tspan></text> </g> <path - style="stroke:#000000;marker-end:url(#id2)" + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" d="m 545.55976,90.499996 h 42" id="path5573" inkscape:connector-curvature="0" @@ -534,21 +534,21 @@ inkscape:connector-curvature="0" id="path5575" d="m 545.55976,150.5 h 42" - style="stroke:#000000;marker-end:url(#id2)" /> + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" /> <path sodipodi:nodetypes="cc" inkscape:connector-curvature="0" id="path5597" d="m 645.55976,90.499996 555.87494,-54.3773" - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" /> + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" d="M 645.55976,150.5 1201.4347,46.233626" id="path5599" inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" d="M 779.55976,116.5 1201.4347,40.196646" id="path5601" inkscape:connector-curvature="0" @@ -576,9 +576,9 @@ inkscape:connector-curvature="0" id="path5583" d="m 645.55976,90.499996 46,20.000004" - style="stroke:#000000;marker-end:url(#id2)" /> + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" /> <path - style="stroke:#000000;marker-end:url(#id2)" + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" d="M 645.55976,150.5 692.7316,122.10054" id="path5585" inkscape:connector-curvature="0" @@ -588,7 +588,7 @@ inkscape:connector-curvature="0" id="path5561" d="m 421.55976,124.5 37.062,24" - style="stroke:#000000;marker-end:url(#id2)" /> + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" /> <path sodipodi:nodetypes="cc" inkscape:connector-curvature="0" @@ -602,14 +602,14 @@ inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" d="M 257.55976,114.5 C 274.37974,61.579996 445.40374,44.875026 606.50218,44.875906 739.16074,44.87663 971.24276,28.390716 1201.4347,30.159676" id="path5595" inkscape:connector-curvature="0" sodipodi:nodetypes="csc" /> <g id="g5293" - transform="translate(-212.17332,-513.99998)"> + transform="translate(-52.17332,-509.99998)"> <rect style="fill:#ebd9b2;stroke:#000000;stroke-width:0.73455542" id="rect84" @@ -624,48 +624,12 @@ id="text194">filterAndTrim</text> </g> <circle - cx="257.11923" - cy="114" + cx="417.11923" + cy="110.5" r="5" id="circle5305" style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" /> <rect - height="30.07983" - width="111.46966" - x="304.38879" - y="101.46008" - id="rect88" - style="fill:#ebd9b2;stroke:#000000;stroke-width:0.92017078" /> - <text - id="text202" - y="121.5" - x="309.42871" - style="font-size:14px;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif">Unzip Collection</text> - <circle - style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" - id="circle320" - r="5" - cy="109.5" - cx="416.42871" /> - <circle - style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" - id="circle324" - r="5" - cy="109.5" - cx="416.42871" /> - <circle - style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" - id="circle328" - r="5" - cy="124.5" - cx="416.42871" /> - <circle - style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" - id="circle332" - r="5" - cy="124.5" - cx="416.42871" /> - <rect height="30.25116" width="82.581673" x="463.61261" @@ -756,7 +720,7 @@ inkscape:connector-curvature="0" id="path5605" d="M 971.55976,116.5 1201.4347,52.270596" - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" /> + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" /> <g id="g5353" transform="translate(-994.97942,-607.3983)"> @@ -815,34 +779,34 @@ sodipodi:nodetypes="cc" inkscape:connector-curvature="0" id="path5609" - d="M 117.55976,114.5 161.14262,20.499995" - style="stroke:#000000;marker-end:url(#id2)" /> + d="M 121.78486,109.34886 167.41518,39.34955" + style="stroke:#000000;stroke-width:0.93333334;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#id2)" /> <rect style="fill:#ebd9b2;stroke:#000000;stroke-width:0.74489039" id="rect5370" - y="92.711388" + y="95.906334" x="0.3724452" width="117.18732" height="42.187313" /> <text id="text5376" - y="107.53393" + y="111.53393" x="2.1782999" style="font-style:normal;font-weight:normal;font-size:20px;line-height:100%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" xml:space="preserve"><tspan style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'" - y="107.53393" + y="111.53393" x="2.1782999" id="tspan5372" sodipodi:role="line">Paired input </tspan><tspan id="tspan5380" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'" - y="127.53393" + y="131.53394" x="2.1782999" sodipodi:role="line">dataset collection</tspan><tspan style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'" id="tspan5374" - y="147.53392" + y="151.53394" x="2.1782999" sodipodi:role="line" /></text> <circle @@ -866,15 +830,15 @@ inkscape:connector-curvature="0" id="path1289" d="m 421.55976,110.5 37.062,-21.830514" - style="stroke:#000000;marker-end:url(#id2)" /> + style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:none" d="m 421.55976,110.5 c 37.23584,-60.959159 95.4379,-71.01965 170,-20.000004" id="path1291" inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:none" d="m 421.55976,124.5 c 26.21758,79.15744 93.37522,68.88106 170,26" id="path1293" inkscape:connector-curvature="0" @@ -886,26 +850,78 @@ inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> <path - style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:none" d="m 421.55976,124.5 274.19828,-2.49999" id="path1297" inkscape:connector-curvature="0" /> <rect style="fill:#ebd9b2;stroke:#000000;stroke-width:0.70499283" id="rect1299" - y="40.352497" - x="165.11786" + y="43.352493" + x="174.11786" width="101.51111" height="30.295006" /> <text style="font-size:14px;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif" - x="170.26537" - y="60.499992" + x="179.26537" + y="63.5" id="text1301">plotComplexity</text> <path - style="stroke:#000000;marker-end:url(#id2)" - d="M 117.55976,114.5 161.14262,60.499995" + style="stroke:#000000;stroke-width:0.93333334;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#id2)" + d="M 121.78486,109.34886 168.30515,76.42082" id="path1307" inkscape:connector-curvature="0" sodipodi:nodetypes="cc" /> + <circle + style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" + id="circle1296" + r="5" + cy="123.25001" + cx="417.11923" /> + <rect + height="42.187313" + width="117.18732" + x="160.37244" + y="95.906334" + id="rect1298" + style="fill:#ebd9b2;stroke:#000000;stroke-width:0.74489039" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:20px;line-height:100%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="162.1783" + y="111.53393" + id="text1306"><tspan + sodipodi:role="line" + id="tspan1300" + x="162.1783" + y="111.53393" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'">Unzip & Sort</tspan><tspan + sodipodi:role="line" + x="162.1783" + y="131.53394" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'" + id="tspan1302">dataset collection</tspan><tspan + sodipodi:role="line" + x="162.1783" + y="151.53394" + id="tspan1304" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:'Helvetica, Arial, FreeSans, Sans, sans, sans-serif'" /></text> + <circle + style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" + id="circle1310" + r="5" + cy="110.5" + cx="277.11923" /> + <circle + cx="277.11923" + cy="123.25001" + r="5" + id="circle1312" + style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" /> + <path + style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" + d="m 283.77925,122.5 h 33.78051" + id="path1314" + inkscape:connector-curvature="0" + sodipodi:nodetypes="cc" /> </svg>
--- a/test-data/gentest.R Mon Mar 16 08:22:08 2020 -0400 +++ b/test-data/gentest.R Tue Jul 14 07:41:00 2020 -0400 @@ -1,192 +1,186 @@ -library(dada2, quietly=T) -library(ggplot2, quietly=T) +library(dada2, quietly = T) +library(ggplot2, quietly = T) -sample.names <- c('F3D0_S188_L001', 'F3D141_S207_L001') -fwd <- c('F3D0_S188_L001_R1_001.fastq.gz', 'F3D141_S207_L001_R1_001.fastq.gz') -rev <- c('F3D0_S188_L001_R2_001.fastq.gz', 'F3D141_S207_L001_R2_001.fastq.gz') +sample_names <- c("F3D0_S188_L001", "F3D141_S207_L001") +fwd <- c("F3D0_S188_L001_R1_001.fastq.gz", "F3D141_S207_L001_R1_001.fastq.gz") +rev <- c("F3D0_S188_L001_R2_001.fastq.gz", "F3D141_S207_L001_R2_001.fastq.gz") -filt.fwd <- c('filterAndTrim_F3D0_R1.fq.gz', 'filterAndTrim_F3D141_R1.fq.gz') -filt.rev <- c('filterAndTrim_F3D0_R2.fq.gz', 'filterAndTrim_F3D141_R2.fq.gz') +filt_fwd <- c("filterAndTrim_F3D0_R1.fq.gz", "filterAndTrim_F3D141_R1.fq.gz") +filt_rev <- c("filterAndTrim_F3D0_R2.fq.gz", "filterAndTrim_F3D141_R2.fq.gz") print("filterAndTrim") -for(i in 1:length(fwd)){ - ftout <- filterAndTrim(fwd[i], filt.fwd[i], rev[i], filt.rev[i]) - b <- paste(strsplit(fwd[i], ".", fixed=T)[[1]][1], "tab", sep=".") - write.table(ftout, b, quote=F, sep="\t", col.names=NA) +for (i in seq_len(fwd)) { + ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i]) + b <- paste(strsplit(fwd[i], ".", fixed = T)[[1]][1], "tab", sep = ".") + write.table(ftout, b, quote = F, sep = "\t", col.names = NA) } # In the test only the 1st data set is used t <- data.frame() -t <- rbind(t, ftout[1,]) +t <- rbind(t, ftout[1, ]) colnames(t) <- colnames(ftout) rownames(t) <- rownames(ftout)[1] -write.table(t, "filterAndTrim.tab", quote=F, sep="\t", col.names=NA) +write.table(t, "filterAndTrim.tab", quote = F, sep = "\t", col.names = NA) -names(fwd) <- sample.names -names(rev) <- sample.names -names(filt.fwd) <- sample.names -names(filt.rev) <- sample.names +names(fwd) <- sample_names +names(rev) <- sample_names +names(filt_fwd) <- sample_names +names(filt_rev) <- sample_names # Plot quality profile (just for one file, Galaxy compares with sim_size) print("plots") -qp <- plotQualityProfile(fwd) -ggsave('qualityProfile_fwd.pdf', qp, width = 20,height = 15,units = c("cm")) -qp <- plotQualityProfile(rev) -ggsave('qualityProfile_rev.pdf', qp, width = 20,height = 15,units = c("cm")) -qp <- plotQualityProfile(fwd[1]) -ggsave('qualityProfile.pdf', qp, width = 20,height = 15,units = c("cm")) +qp <- dada2::plotQualityProfile(fwd) +ggsave("qualityProfile_fwd.pdf", qp, width = 20, height = 15, units = c("cm")) +qp <- dada2::plotQualityProfile(rev) +ggsave("qualityProfile_rev.pdf", qp, width = 20, height = 15, units = c("cm")) +qp <- dada2::plotQualityProfile(fwd[1]) +ggsave("qualityProfile.pdf", qp, width = 20, height = 15, units = c("cm")) # Plot complexity (just for one file, Galaxy compares with sim_size) -cp <- plotComplexity(fwd) -ggsave('complexity_fwd.pdf', cp, width = 20,height = 15,units = c("cm")) -cp <- plotComplexity(rev) -ggsave('complexity_rev.pdf', cp, width = 20,height = 15,units = c("cm")) -cp <- plotComplexity(fwd[1]) -ggsave('complexity.pdf', cp, width = 20,height = 15,units = c("cm")) +cp <- dada2::plotComplexity(fwd) +ggsave("complexity_fwd.pdf", cp, width = 20, height = 15, units = c("cm")) +cp <- dada2::plotComplexity(rev) +ggsave("complexity_rev.pdf", cp, width = 20, height = 15, units = c("cm")) +cp <- dada2::plotComplexity(fwd[1]) +ggsave("complexity.pdf", cp, width = 20, height = 15, units = c("cm")) # learn Errors print("learnErrors") -err.fwd <- learnErrors(filt.fwd) -saveRDS(err.fwd, file='learnErrors_R1.Rdata') -plot <- plotErrors(err.fwd) -ggsave('learnErrors_R1.pdf', plot, width = 20,height = 15,units = c("cm")) +err_fwd <- dada2::learnErrors(filt_fwd) +saveRDS(err_fwd, file = "learnErrors_R1.Rdata") +plot <- dada2::plotErrors(err_fwd) +ggsave("learnErrors_R1.pdf", plot, width = 20, height = 15, units = c("cm")) -err.rev <- learnErrors(filt.rev) -saveRDS(err.rev, file='learnErrors_R2.Rdata') -plot <- plotErrors(err.rev) -ggsave('learnErrors.pdf', plot, width = 20,height = 15,units = c("cm")) +err_rev <- dada2::learnErrors(filt_rev) +saveRDS(err_rev, file = "learnErrors_R2.Rdata") +plot <- dada2::plotErrors(err_rev) +ggsave("learnErrors.pdf", plot, width = 20, height = 15, units = c("cm")) # dada print("dada") -dada.fwd <- dada(filt.fwd, err.fwd) -dada.rev <- dada(filt.rev, err.rev) -for( id in sample.names ){ - saveRDS(dada.fwd[[id]], file=paste("dada_", id,"_R1.Rdata", sep="")) - saveRDS(dada.rev[[id]], file=paste("dada_", id,"_R2.Rdata", sep="")) +dada_fwd <- dada2::dada(filt_fwd, err_fwd) +dada_rev <- dada2::dada(filt_rev, err_rev) +for (id in sample_names) { + saveRDS(dada_fwd[[id]], file = paste("dada_", id, "_R1.Rdata", sep = "")) + saveRDS(dada_rev[[id]], file = paste("dada_", id, "_R2.Rdata", sep = "")) } # merge pairs print("mergePairs") -merged <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev) -for( id in sample.names ){ - saveRDS(merged[[id]], file=paste("mergePairs_", id,".Rdata", sep="")) +merged <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev) +for (id in sample_names) { + saveRDS(merged[[id]], file = paste("mergePairs_", id, ".Rdata", sep = "")) } # make sequence table print("makeSequenceTable") seqtab <- makeSequenceTable(merged) -write.table(t(seqtab), file="makeSequenceTable.tab", quote=F, sep="\t", row.names = T, col.names = NA) +write.table(t(seqtab), file = "makeSequenceTable.tab", quote = F, sep = "\t", row.names = T, col.names = NA) -reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) -df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen) -pdf( 'makeSequenceTable.pdf' ) -ggplot(data=df, aes(x=length, y=count)) + +reads_per_seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) +df <- data.frame(length = as.numeric(names(reads_per_seqlen)), count = reads_per_seqlen) +pdf("makeSequenceTable.pdf") +ggplot(data = df, aes(x = length, y = count)) + geom_col() + theme_bw() bequiet <- dev.off() # remove bimera print("removeBimera") -seqtab.nochim <- removeBimeraDenovo(seqtab) -write.table(t(seqtab), file="removeBimeraDenovo.tab", quote=F, sep="\t", row.names = T, col.names = NA) +seqtab_nochim <- dada2::removeBimeraDenovo(seqtab) +write.table(t(seqtab), file = "removeBimeraDenovo.tab", quote = F, sep = "\t", row.names = T, col.names = NA) # assign taxonomy/species -tl <- 'Level1,Level2,Level3,Level4,Level5' +tl <- "Level1,Level2,Level3,Level4,Level5" tl <- strsplit(tl, ",")[[1]] set.seed(42) print("assignTaxonomyAndSpecies") -taxa <- assignTaxonomy(seqtab.nochim, 'reference.fa.gz', outputBootstraps = T, taxLevels=tl, multithread = 1) +taxa <- dada2::assignTaxonomy(seqtab_nochim, "reference.fa.gz", outputBootstraps = T, taxLevels = tl, multithread = 1) -taxa$tax <- addSpecies(taxa$tax, 'reference_species.fa.gz') -write.table(taxa$tax, file = 'assignTaxonomyAddspecies.tab', quote = F, sep = "\t", row.names = T, col.names = NA) +taxa$tax <- dada2::addSpecies(taxa$tax, "reference_species.fa.gz") +write.table(taxa$tax, file = "assignTaxonomyAddspecies.tab", quote = F, sep = "\t", row.names = T, col.names = NA) -write.table(taxa$boot, file = 'assignTaxonomyAddspecies_boot.tab', quote = F, sep = "\t", row.names = T, col.names = NA) +write.table(taxa$boot, file = "assignTaxonomyAddspecies_boot.tab", quote = F, sep = "\t", row.names = T, col.names = NA) -## Generate extra test data for parameter testing +## Generate extra test data for parameter testing print("alternatives") -filterAndTrim(fwd, c('filterAndTrim_single_F3D0_R1.fq.gz', 'filterAndTrim_single_F3D141_R1.fq.gz'), rm.phix = T, orient.fwd = 'TACGG') +dada2::filterAndTrim(fwd, c("filterAndTrim_single_F3D0_R1.fq.gz", "filterAndTrim_single_F3D141_R1.fq.gz"), rm.phix = T, orient.fwd = "TACGG") -filterAndTrim(fwd, c('filterAndTrim_single_trimmers_F3D0_R1.fq.gz', 'filterAndTrim_single_trimmers_F3D141_R1.fq.gz'), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) +dada2::filterAndTrim(fwd, c("filterAndTrim_single_trimmers_F3D0_R1.fq.gz", "filterAndTrim_single_trimmers_F3D141_R1.fq.gz"), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) -filterAndTrim(fwd, c('filterAndTrim_single_filters_F3D0_R1.fq.gz', 'filterAndTrim_single_filters_F3D141_R1.fq.gz'), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) +dada2::filterAndTrim(fwd, c("filterAndTrim_single_filters_F3D0_R1.fq.gz", "filterAndTrim_single_filters_F3D141_R1.fq.gz"), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) -merged_nondef <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) -for( id in sample.names ){ - saveRDS(merged_nondef[[id]], file=paste("mergePairs_", id,"_nondefault.Rdata", sep="")) +merged_nondef <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) +for (id in sample_names) { + saveRDS(merged_nondef[[id]], file = paste("mergePairs_", id, "_nondefault.Rdata", sep = "")) } -rb.dada.fwd <- removeBimeraDenovo(dada.fwd[["F3D0_S188_L001"]]) -write.table(rb.dada.fwd, file = 'removeBimeraDenovo_F3D0_dada_uniques.tab', quote = F, sep = "\t", row.names = T, col.names = F) +rb_dada_fwd <- dada2::removeBimeraDenovo(dada_fwd[["F3D0_S188_L001"]]) +write.table(rb_dada_fwd, file = "removeBimeraDenovo_F3D0_dada_uniques.tab", quote = F, sep = "\t", row.names = T, col.names = F) -rb.merged <- removeBimeraDenovo(merged, method="pooled") -saveRDS(rb.merged, file='removeBimeraDenovo_F3D0_mergepairs.Rdata') - +rb_merged <- dada2::removeBimeraDenovo(merged, method = "pooled") +saveRDS(rb_merged, file = "removeBimeraDenovo_F3D0_mergepairs.Rdata") + # SeqCounts -getN <- function(x){ sum(getUniques(x)) } - -read.uniques <- function ( fname ) { - p <- read.table(fname, header=F, sep="\t") - n <-x[,2] - names(n)<-x[,1] +get_n <- function(x) { + sum(dada2::getUniques(x)) } - print("seqCounts ft") -samples = list() -samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header=T, sep="\t", row.names=1) +samples <- list() +samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) dname <- "filter" tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] -names(tdf) <- paste( dname, names(tdf) ) -tdf <- cbind( data.frame(samples=names( samples )), tdf) -write.table(tdf, "seqCounts_filter.tab", quote=F, sep="\t", row.names = F, col.names = T) +names(tdf) <- paste(dname, names(tdf)) +tdf <- cbind(data.frame(samples = names(samples)), tdf) +write.table(tdf, "seqCounts_filter.tab", quote = F, sep = "\t", row.names = F, col.names = T) -samples = list() -samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header=T, sep="\t", row.names=1) -samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header=T, sep="\t", row.names=1) +samples <- list() +samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) +samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) dname <- "filter" tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] tdf <- rbind(tdf, samples[["F3D141_S207_L001_R1_001.tab"]]) -names(tdf) <- paste( dname, names(tdf) ) -tdf <- cbind( data.frame(samples=names( samples )), tdf) -write.table(tdf, "seqCounts_filter_both.tab", quote=F, sep="\t", row.names = F, col.names = T) +names(tdf) <- paste(dname, names(tdf)) +tdf <- cbind(data.frame(samples = names(samples)), tdf) +write.table(tdf, "seqCounts_filter_both.tab", quote = F, sep = "\t", row.names = F, col.names = T) print("seqCounts dada") -samples = list() -samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS('dada_F3D0_S188_L001_R1.Rdata') -samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS('dada_F3D141_S207_L001_R1.Rdata') +samples <- list() +samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS("dada_F3D0_S188_L001_R1.Rdata") +samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS("dada_F3D141_S207_L001_R1.Rdata") dname <- "dadaF" -tdf <- data.frame( samples = names(samples) ) -tdf[[ dname ]] <- sapply(samples, getN) -write.table(tdf, "seqCounts_dadaF.tab", quote=F, sep="\t", row.names = F, col.names = T) +tdf <- data.frame(samples = names(samples)) +tdf[[dname]] <- sapply(samples, get_n) +write.table(tdf, "seqCounts_dadaF.tab", quote = F, sep = "\t", row.names = F, col.names = T) print("seqCounts mp") -samples = list() -samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS('mergePairs_F3D0_S188_L001.Rdata') -samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS('mergePairs_F3D141_S207_L001.Rdata') +samples <- list() +samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS("mergePairs_F3D0_S188_L001.Rdata") +samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS("mergePairs_F3D141_S207_L001.Rdata") dname <- "merge" -tdf <- data.frame( samples = names(samples) ) -tdf[[ dname ]] <- sapply(samples, getN) -write.table(tdf, "seqCounts_merge.tab", quote=F, sep="\t", row.names = F, col.names = T) +tdf <- data.frame(samples = names(samples)) +tdf[[dname]] <- sapply(samples, get_n) +write.table(tdf, "seqCounts_merge.tab", quote = F, sep = "\t", row.names = F, col.names = T) print("seqCounts st") -samples = list() -samples <- t(as.matrix( read.table("makeSequenceTable.tab", header=T, sep="\t", row.names=1) )) +samples <- list() +samples <- t(as.matrix(read.table("makeSequenceTable.tab", header = T, sep = "\t", row.names = 1))) dname <- "seqtab" -tdf <- data.frame( samples = row.names(samples) ) -tdf[[ dname ]] <- rowSums(samples) -write.table(tdf, "seqCounts_seqtab.tab", quote=F, sep="\t", row.names = F, col.names = T) +tdf <- data.frame(samples = row.names(samples)) +tdf[[dname]] <- rowSums(samples) +write.table(tdf, "seqCounts_seqtab.tab", quote = F, sep = "\t", row.names = F, col.names = T) print("seqCounts rb") -samples = list() -samples <- t(as.matrix( read.table("removeBimeraDenovo.tab", header=T, sep="\t", row.names=1) )) +samples <- list() +samples <- t(as.matrix(read.table("removeBimeraDenovo.tab", header = T, sep = "\t", row.names = 1))) dname <- "nochim" -tdf <- data.frame( samples = row.names(samples) ) -tdf[[ dname ]] <- rowSums(samples) -write.table(tdf, "seqCounts_nochim.tab", quote=F, sep="\t", row.names = F, col.names = T) - +tdf <- data.frame(samples = row.names(samples)) +tdf[[dname]] <- rowSums(samples) +write.table(tdf, "seqCounts_nochim.tab", quote = F, sep = "\t", row.names = F, col.names = T)