Repository 'dada2_filterandtrim'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/dada2_filterandtrim

Changeset 3:ca73bbac615a (2020-07-14)
Previous changeset 2:23fc35093b11 (2020-03-16) Next changeset 4:f732237754fc (2021-02-01)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
modified:
macros.xml
static/images/pairpipe.png
static/images/pairpipe.svg
test-data/gentest.R
removed:
test-data/learnErrors.pdf
test-data/learnErrors_R1.pdf
b
diff -r 23fc35093b11 -r ca73bbac615a macros.xml
--- a/macros.xml Mon Mar 16 07:33:38 2020 -0400
+++ b/macros.xml Tue Jul 14 07:38:55 2020 -0400
b
@@ -7,7 +7,7 @@
         </requirements>
     </xml>
 
-    <token name="@DADA2_VERSION@">1.14</token>
+    <token name="@DADA2_VERSION@">1.16</token>
     <token name="@WRAPPER_VERSION@">0</token>
 
     <xml name="version_command">
@@ -124,6 +124,9 @@
 
 .. image:: pairpipe.png
 
+Note: In particular for the analysis of paired collections the collections should be sorted lexicographical
+before the analysis.
+
 For single end data you the steps "Unzip collection" and "mergePairs" are not necessary.
 
 More information may be found on the dada2 homepage:: https://benjjneb.github.io/dada2/index.html (in particular tutorials) or the documentation of dada2's R package https://bioconductor.org/packages/release/bioc/html/dada2.html (in particular the pdf which contains the full documentation of all parameters)
b
diff -r 23fc35093b11 -r ca73bbac615a static/images/pairpipe.png
b
Binary file static/images/pairpipe.png has changed
b
diff -r 23fc35093b11 -r ca73bbac615a static/images/pairpipe.svg
--- a/static/images/pairpipe.svg Mon Mar 16 07:33:38 2020 -0400
+++ b/static/images/pairpipe.svg Tue Jul 14 07:38:55 2020 -0400
b
b'@@ -24,7 +24,7 @@\n         <dc:format>image/svg+xml</dc:format>\n         <dc:type\n            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />\n-        <dc:title></dc:title>\n+        <dc:title />\n       </cc:Work>\n     </rdf:RDF>\n   </metadata>\n@@ -37,16 +37,16 @@\n      guidetolerance="10"\n      inkscape:pageopacity="0"\n      inkscape:pageshadow="2"\n-     inkscape:window-width="1920"\n-     inkscape:window-height="1016"\n+     inkscape:window-width="1680"\n+     inkscape:window-height="986"\n      id="namedview386"\n      showgrid="false"\n      inkscape:snap-global="true"\n      inkscape:snap-bbox="false"\n      inkscape:object-paths="true"\n-     inkscape:zoom="1"\n-     inkscape:cx="650.80177"\n-     inkscape:cy="176.12189"\n+     inkscape:zoom="2"\n+     inkscape:cx="336.68624"\n+     inkscape:cy="192.12189"\n      inkscape:window-x="0"\n      inkscape:window-y="27"\n      inkscape:window-maximized="1"\n@@ -443,22 +443,22 @@\n     </marker>\n   </defs>\n   <path\n-     style="stroke:#000000;marker-end:url(#id2)"\n-     d="m 117.55976,114.5 h 42"\n+     style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none"\n+     d="m 117.55976,114.5 h 34.86"\n      id="line138"\n      inkscape:connector-curvature="0"\n      sodipodi:nodetypes="cc" />\n   <rect\n      height="30.245491"\n      width="116.46159"\n-     x="165.14262"\n-     y="0.37725419"\n+     x="174.14262"\n+     y="3.3772583"\n      id="rect80"\n      style="fill:#ebd9b2;stroke:#000000;stroke-width:0.75450838" />\n   <text\n      id="text188"\n-     y="20.499996"\n-     x="170.26537"\n+     y="23.5"\n+     x="179.26537"\n      style="font-size:14px;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif">plotQualityProfile</text>\n   <rect\n      height="30.184877"\n@@ -483,9 +483,9 @@\n      inkscape:connector-curvature="0"\n      id="path5591"\n      d="m 971.55976,116.5 h 42.00004"\n-     style="stroke:#000000;marker-end:url(#id2)" />\n+     style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" />\n   <path\n-     style="stroke:#000000;marker-end:url(#id2)"\n+     style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none"\n      d="m 1167.5598,116.5 h 42"\n      id="path5593"\n      inkscape:connector-curvature="0"\n@@ -494,8 +494,8 @@\n      sodipodi:nodetypes="cc"\n      inkscape:connector-curvature="0"\n      id="path5557"\n-     d="m 255.55976,114.5 h 42"\n-     style="stroke:#000000;marker-end:url(#id2)" />\n+     d="m 283.77925,110.5 h 33.78051"\n+     style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" />\n   <g\n      id="g5364"\n      transform="translate(-1226.0934,-496.75423)">\n@@ -524,7 +524,7 @@\n          sodipodi:role="line">and addSpecies</tspan></text>\n   </g>\n   <path\n-     style="stroke:#000000;marker-end:url(#id2)"\n+     style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none"\n      d="m 545.55976,90.499996 h 42"\n      id="path5573"\n      inkscape:connector-curvature="0"\n@@ -534,21 +534,21 @@\n      inkscape:connector-curvature="0"\n      id="path5575"\n      d="m 545.55976,150.5 h 42"\n-     style="stroke:#000000;marker-end:url(#id2)" />\n+     style="stroke:#000000;marker-end:url(#id2);stroke-width:0.93333333;stroke-miterlimit:4;stroke-dasharray:none" />\n   <path\n      sodipodi:nodetypes="cc"\n      inkscape:connector-curvature="0"\n      id="path5597"\n      d="m 645.55976,90.499996 555.87494,-54.3773"\n-     style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2)" />\n+     style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none" />\n   <path\n-     style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px'..b'd="path1293"\n      inkscape:connector-curvature="0"\n@@ -886,26 +850,78 @@\n      inkscape:connector-curvature="0"\n      sodipodi:nodetypes="cc" />\n   <path\n-     style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.93333333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:none"\n      d="m 421.55976,124.5 274.19828,-2.49999"\n      id="path1297"\n      inkscape:connector-curvature="0" />\n   <rect\n      style="fill:#ebd9b2;stroke:#000000;stroke-width:0.70499283"\n      id="rect1299"\n-     y="40.352497"\n-     x="165.11786"\n+     y="43.352493"\n+     x="174.11786"\n      width="101.51111"\n      height="30.295006" />\n   <text\n      style="font-size:14px;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif"\n-     x="170.26537"\n-     y="60.499992"\n+     x="179.26537"\n+     y="63.5"\n      id="text1301">plotComplexity</text>\n   <path\n-     style="stroke:#000000;marker-end:url(#id2)"\n-     d="M 117.55976,114.5 161.14262,60.499995"\n+     style="stroke:#000000;stroke-width:0.93333334;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#id2)"\n+     d="M 121.78486,109.34886 168.30515,76.42082"\n      id="path1307"\n      inkscape:connector-curvature="0"\n      sodipodi:nodetypes="cc" />\n+  <circle\n+     style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000"\n+     id="circle1296"\n+     r="5"\n+     cy="123.25001"\n+     cx="417.11923" />\n+  <rect\n+     height="42.187313"\n+     width="117.18732"\n+     x="160.37244"\n+     y="95.906334"\n+     id="rect1298"\n+     style="fill:#ebd9b2;stroke:#000000;stroke-width:0.74489039" />\n+  <text\n+     xml:space="preserve"\n+     style="font-style:normal;font-weight:normal;font-size:20px;line-height:100%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"\n+     x="162.1783"\n+     y="111.53393"\n+     id="text1306"><tspan\n+       sodipodi:role="line"\n+       id="tspan1300"\n+       x="162.1783"\n+       y="111.53393"\n+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:\'Helvetica, Arial, FreeSans, Sans, sans, sans-serif\'">Unzip &amp; Sort</tspan><tspan\n+       sodipodi:role="line"\n+       x="162.1783"\n+       y="131.53394"\n+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:\'Helvetica, Arial, FreeSans, Sans, sans, sans-serif\'"\n+       id="tspan1302">dataset collection</tspan><tspan\n+       sodipodi:role="line"\n+       x="162.1783"\n+       y="151.53394"\n+       id="tspan1304"\n+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14.66666698px;line-height:0.99999998%;font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;-inkscape-font-specification:\'Helvetica, Arial, FreeSans, Sans, sans, sans-serif\'" /></text>\n+  <circle\n+     style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000"\n+     id="circle1310"\n+     r="5"\n+     cy="110.5"\n+     cx="277.11923" />\n+  <circle\n+     cx="277.11923"\n+     cy="123.25001"\n+     r="5"\n+     id="circle1312"\n+     style="font-family:Helvetica, Arial, FreeSans, Sans, sans, sans-serif;fill:#ffffff;stroke:#000000" />\n+  <path\n+     style="stroke:#000000;stroke-width:0.93333333;marker-end:url(#id2);stroke-miterlimit:4;stroke-dasharray:none"\n+     d="m 283.77925,122.5 h 33.78051"\n+     id="path1314"\n+     inkscape:connector-curvature="0"\n+     sodipodi:nodetypes="cc" />\n </svg>\n'
b
diff -r 23fc35093b11 -r ca73bbac615a test-data/gentest.R
--- a/test-data/gentest.R Mon Mar 16 07:33:38 2020 -0400
+++ b/test-data/gentest.R Tue Jul 14 07:38:55 2020 -0400
[
b'@@ -1,192 +1,186 @@\n-library(dada2, quietly=T)\n-library(ggplot2, quietly=T)\n+library(dada2, quietly = T)\n+library(ggplot2, quietly = T)\n \n-sample.names <- c(\'F3D0_S188_L001\', \'F3D141_S207_L001\')\n-fwd <- c(\'F3D0_S188_L001_R1_001.fastq.gz\', \'F3D141_S207_L001_R1_001.fastq.gz\')\n-rev <- c(\'F3D0_S188_L001_R2_001.fastq.gz\', \'F3D141_S207_L001_R2_001.fastq.gz\')\n+sample_names <- c("F3D0_S188_L001", "F3D141_S207_L001")\n+fwd <- c("F3D0_S188_L001_R1_001.fastq.gz", "F3D141_S207_L001_R1_001.fastq.gz")\n+rev <- c("F3D0_S188_L001_R2_001.fastq.gz", "F3D141_S207_L001_R2_001.fastq.gz")\n \n-filt.fwd <- c(\'filterAndTrim_F3D0_R1.fq.gz\', \'filterAndTrim_F3D141_R1.fq.gz\')\n-filt.rev <- c(\'filterAndTrim_F3D0_R2.fq.gz\', \'filterAndTrim_F3D141_R2.fq.gz\')\n+filt_fwd <- c("filterAndTrim_F3D0_R1.fq.gz", "filterAndTrim_F3D141_R1.fq.gz")\n+filt_rev <- c("filterAndTrim_F3D0_R2.fq.gz", "filterAndTrim_F3D141_R2.fq.gz")\n \n print("filterAndTrim")\n \n-for(i in 1:length(fwd)){\n-\tftout <- filterAndTrim(fwd[i], filt.fwd[i], rev[i], filt.rev[i])\n-    b <- paste(strsplit(fwd[i], ".", fixed=T)[[1]][1], "tab", sep=".")\n-    write.table(ftout, b, quote=F, sep="\\t", col.names=NA)\n+for (i in seq_len(fwd)) {\n+    ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i])\n+    b <- paste(strsplit(fwd[i], ".", fixed = T)[[1]][1], "tab", sep = ".")\n+    write.table(ftout, b, quote = F, sep = "\\t", col.names = NA)\n }\n \n # In the test only the 1st data set is used\n t <- data.frame()\n-t <- rbind(t, ftout[1,])\n+t <- rbind(t, ftout[1, ])\n colnames(t) <- colnames(ftout)\n rownames(t) <- rownames(ftout)[1]\n-write.table(t, "filterAndTrim.tab", quote=F, sep="\\t", col.names=NA)\n+write.table(t, "filterAndTrim.tab", quote = F, sep = "\\t", col.names = NA)\n \n-names(fwd) <- sample.names\n-names(rev) <- sample.names\n-names(filt.fwd) <- sample.names\n-names(filt.rev) <- sample.names\n+names(fwd) <- sample_names\n+names(rev) <- sample_names\n+names(filt_fwd) <- sample_names\n+names(filt_rev) <- sample_names\n \n # Plot quality profile (just for one file, Galaxy compares with sim_size)\n print("plots")\n-qp <- plotQualityProfile(fwd)\n-ggsave(\'qualityProfile_fwd.pdf\', qp, width = 20,height = 15,units = c("cm"))\n-qp <- plotQualityProfile(rev)\n-ggsave(\'qualityProfile_rev.pdf\', qp, width = 20,height = 15,units = c("cm"))\n-qp <- plotQualityProfile(fwd[1])\n-ggsave(\'qualityProfile.pdf\', qp, width = 20,height = 15,units = c("cm"))\n+qp <- dada2::plotQualityProfile(fwd)\n+ggsave("qualityProfile_fwd.pdf", qp, width = 20, height = 15, units = c("cm"))\n+qp <- dada2::plotQualityProfile(rev)\n+ggsave("qualityProfile_rev.pdf", qp, width = 20, height = 15, units = c("cm"))\n+qp <- dada2::plotQualityProfile(fwd[1])\n+ggsave("qualityProfile.pdf", qp, width = 20, height = 15, units = c("cm"))\n \n # Plot complexity (just for one file, Galaxy compares with sim_size)\n \n-cp <- plotComplexity(fwd)\n-ggsave(\'complexity_fwd.pdf\', cp, width = 20,height = 15,units = c("cm"))\n-cp <- plotComplexity(rev)\n-ggsave(\'complexity_rev.pdf\', cp, width = 20,height = 15,units = c("cm"))\n-cp <- plotComplexity(fwd[1])\n-ggsave(\'complexity.pdf\', cp, width = 20,height = 15,units = c("cm"))\n+cp <- dada2::plotComplexity(fwd)\n+ggsave("complexity_fwd.pdf", cp, width = 20, height = 15, units = c("cm"))\n+cp <- dada2::plotComplexity(rev)\n+ggsave("complexity_rev.pdf", cp, width = 20, height = 15, units = c("cm"))\n+cp <- dada2::plotComplexity(fwd[1])\n+ggsave("complexity.pdf", cp, width = 20, height = 15, units = c("cm"))\n \n \n # learn Errors\n print("learnErrors")\n-err.fwd <- learnErrors(filt.fwd) \n-saveRDS(err.fwd, file=\'learnErrors_R1.Rdata\')\n-plot <- plotErrors(err.fwd)\n-ggsave(\'learnErrors_R1.pdf\', plot, width = 20,height = 15,units = c("cm"))\n+err_fwd <- dada2::learnErrors(filt_fwd)\n+saveRDS(err_fwd, file = "learnErrors_R1.Rdata")\n+plot <- dada2::plotErrors(err_fwd)\n+ggsave("learnErrors_R1.pdf", plot, width = 20, height = 15, units = c("cm"))\n \n-err.rev <- learnErrors(filt.rev) \n-saveRDS(err.rev, file=\'learnErrors_R2.Rdata\')\n-plot <- plotErrors(err.rev)\n-ggsav'..b' "seqCounts_filter.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n \n-samples = list()\n-samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header=T, sep="\\t", row.names=1)\n-samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header=T, sep="\\t", row.names=1)\n+samples <- list()\n+samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = T, sep = "\\t", row.names = 1)\n+samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header = T, sep = "\\t", row.names = 1)\n dname <- "filter"\n tdf <- samples[["F3D0_S188_L001_R1_001.tab"]]\n tdf <- rbind(tdf, samples[["F3D141_S207_L001_R1_001.tab"]])\n-names(tdf) <- paste( dname, names(tdf) )\n-tdf <- cbind( data.frame(samples=names( samples )), tdf)\n-write.table(tdf, "seqCounts_filter_both.tab", quote=F, sep="\\t", row.names = F, col.names = T)\n+names(tdf) <- paste(dname, names(tdf))\n+tdf <- cbind(data.frame(samples = names(samples)), tdf)\n+write.table(tdf, "seqCounts_filter_both.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n \n print("seqCounts dada")\n-samples = list()\n-samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS(\'dada_F3D0_S188_L001_R1.Rdata\')\n-samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS(\'dada_F3D141_S207_L001_R1.Rdata\')\n+samples <- list()\n+samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS("dada_F3D0_S188_L001_R1.Rdata")\n+samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS("dada_F3D141_S207_L001_R1.Rdata")\n dname <- "dadaF"\n-tdf <- data.frame( samples = names(samples) )\n-tdf[[ dname ]] <- sapply(samples, getN)\n-write.table(tdf, "seqCounts_dadaF.tab", quote=F, sep="\\t", row.names = F, col.names = T)\n+tdf <- data.frame(samples = names(samples))\n+tdf[[dname]] <- sapply(samples, get_n)\n+write.table(tdf, "seqCounts_dadaF.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n \n print("seqCounts mp")\n-samples = list()\n-samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS(\'mergePairs_F3D0_S188_L001.Rdata\')\n-samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS(\'mergePairs_F3D141_S207_L001.Rdata\')\n+samples <- list()\n+samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS("mergePairs_F3D0_S188_L001.Rdata")\n+samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS("mergePairs_F3D141_S207_L001.Rdata")\n dname <- "merge"\n-tdf <- data.frame( samples = names(samples) )\n-tdf[[ dname ]] <- sapply(samples, getN)\n-write.table(tdf, "seqCounts_merge.tab", quote=F, sep="\\t", row.names = F, col.names = T)\n+tdf <- data.frame(samples = names(samples))\n+tdf[[dname]] <- sapply(samples, get_n)\n+write.table(tdf, "seqCounts_merge.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n \n print("seqCounts st")\n-samples = list()\n-samples <- t(as.matrix( read.table("makeSequenceTable.tab", header=T, sep="\\t", row.names=1) ))\n+samples <- list()\n+samples <- t(as.matrix(read.table("makeSequenceTable.tab", header = T, sep = "\\t", row.names = 1)))\n dname <- "seqtab"\n-tdf <- data.frame( samples = row.names(samples) )\n-tdf[[ dname ]] <- rowSums(samples)\n-write.table(tdf, "seqCounts_seqtab.tab", quote=F, sep="\\t", row.names = F, col.names = T)\n+tdf <- data.frame(samples = row.names(samples))\n+tdf[[dname]] <- rowSums(samples)\n+write.table(tdf, "seqCounts_seqtab.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n \n print("seqCounts rb")\n-samples = list()\n-samples <- t(as.matrix( read.table("removeBimeraDenovo.tab", header=T, sep="\\t", row.names=1) ))\n+samples <- list()\n+samples <- t(as.matrix(read.table("removeBimeraDenovo.tab", header = T, sep = "\\t", row.names = 1)))\n dname <- "nochim"\n-tdf <- data.frame( samples = row.names(samples) )\n-tdf[[ dname ]] <- rowSums(samples)\n-write.table(tdf, "seqCounts_nochim.tab", quote=F, sep="\\t", row.names = F, col.names = T)\n-\n+tdf <- data.frame(samples = row.names(samples))\n+tdf[[dname]] <- rowSums(samples)\n+write.table(tdf, "seqCounts_nochim.tab", quote = F, sep = "\\t", row.names = F, col.names = T)\n'
b
diff -r 23fc35093b11 -r ca73bbac615a test-data/learnErrors.pdf
b
Binary file test-data/learnErrors.pdf has changed
b
diff -r 23fc35093b11 -r ca73bbac615a test-data/learnErrors_R1.pdf
b
Binary file test-data/learnErrors_R1.pdf has changed