Repository 're_utils'
hg clone https://toolshed.g2.bx.psu.edu/repos/petr-novak/re_utils

Changeset 5:378565f5a875 (2019-11-22)
Previous changeset 4:d397f5a85464 (2019-09-18) Next changeset 6:f224513123a1 (2019-12-02)
Commit message:
Uploaded
modified:
ChipSeqRatioAnalysis.R
paired_fastq_filtering.R
paired_fastq_filtering.xml
single_fastq_filtering.R
single_fastq_filtering.xml
test_run1.sh
test_run2.sh
added:
ChipSeqRatioReport
fasta_tmp_single
tmp.RData
removed:
test_data/ERR215189_1_part.fastq.gz
test_data/ERR215189_2_part.fastq.gz
test_data/seq_C_10k
test_data/seq_I_10k
test_data/test_run1
b
diff -r d397f5a85464 -r 378565f5a875 ChipSeqRatioAnalysis.R
--- a/ChipSeqRatioAnalysis.R Wed Sep 18 06:30:04 2019 -0400
+++ b/ChipSeqRatioAnalysis.R Fri Nov 22 07:56:48 2019 -0500
[
@@ -3,82 +3,28 @@
 library(base64enc, quietly=T)
 
 
-htmlheader="
-  <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">
+htmlheader=
+" <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">
   <head>
   <title> ChIP-Seq Mapper Output </title>
-  <style>
-  <!--
-  table { background:#FFFFFF;
-  border:1px solid gray;
-  border-collapse:collapse;
-  color:#fff;
-  font:normal 13px verdana, arial, helvetica, sans-serif;
-    width: 100%;
+ <style>
+html,body{font-family:Verdana,sans-serif;font-size:15px;line-height:1.5}
 
-  }
-  caption { border:1px solid #5C443A;
-  color:#5C443A;
-  font-weight:bold;
-  font-size:20pt
-  padding:6px 4px 8px 0px;
-  text-align:center;
-  
-  }
-  td, th { color:#363636;
-  padding:.4em;
-  }
-  tr { border:1px dotted gray;
-  }
-  thead th, tfoot th { background:#5C443A;
-  color:#FFFFFF;
-  padding:3px 10px 3px 10px;
-  text-align:left;
-  text-transform:uppercase;
-  }
-  tbody td a { color:#3636FF;
-  text-decoration:underline;
-  }
-  tbody td a:visited { color:gray;
-  text-decoration:line-through;
-  }
-  tbody td a:hover { text-decoration:underline;
-  }
-  tbody th a { color:#3636FF;
-  font-weight:normal;
-  text-decoration:none;
-  }
-  tbody th a:hover { color:#363636;
-  }
-  tbody td+td+td+td a { background-image:url('bullet_blue.png');
-  background-position:left center;
-  background-repeat:no-repeat;
-  color:#FFFFFF;
-  padding-left:15px;
-  }
-  tbody td+td+td+td a:visited { background-image:url('bullet_white.png');
-  background-position:left center;
-  background-repeat:no-repeat;
-  }
-  tbody th, tbody td { text-align:left;
-  vertical-align:top;
-  }
-  tfoot td { background:#5C443A;
-  color:#FFFFFF;
-  padding-top:3px;
-  }
-  .odd { background:#fff;
-  }
-  tbody tr:hover { background:#EEEEEE;
-  border:1px solid #03476F;
-  color:#000000;
-  }
-  -->
-  </style>
+table {
+  border-collapse: collapse;
+  border: 1px solid black;
+  width: 1000pt
+}
+table, th, td {
+  border: 1px solid black;
+}
+</style>
   
   </head>
-  
-  "
+
+
+
+"
 
 
                                         #arguments
@@ -97,21 +43,20 @@
 df$"Ratio Chip/(Chip+Input)"=df$Chip_Hits/(df$Chip_Hits + df$Input_Hits)
 df$"Normalized ratio Chip/(Chip+Input)"=(df$Chip_Hits/chipN)/((df$Input_Hits/inputN)+(df$Chip_Hits/chipN))
 
-outputTable = df[df$"Normalized ratio Chip/(Chip+Input)" > threshld,]
-outputTable = outputTable[!is.na(outputTable$Cluster),]
+outputTable = df[df$"Normalized ratio Chip/(Chip+Input)" > threshld,
+                 ]
+outputTable = outputTable[!is.na(outputTable$Cluster),
+                          c('Cluster', 'Chip_Hits', 'Input_Hits',
+                            'Normalized ratio Chip/Input','Normalized ratio Chip/(Chip+Input)')]
 save.image("tmp.RData")                                        #Plot creation
 pngfile <- tempfile()
 png(pngfile, width = 1000, height = 1200, pointsize=20)
-par(mfrow=c(3,1))
+par(mfrow=c(2,1))
 lims=range(df$"Normalized ratio Chip/Input"[df$"Normalized ratio Chip/Input">0], finite = TRUE)
 suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/Input", log="y", xlab="Cluster Nr.", ylab="Normalized ChiP/Seq ratio", pch=20, ylim=lims))
 abline(h=1,col='#00000080', lwd = 2)
 abline(h=2,col='#FF000080', lwd = 2)
 
-lims=range(df$"Normalized ratio Chip/Input", finite = TRUE)
-suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/Input", xlab="Cluster Nr.", ylab="Normalize ChiP/Seq ratio", pch=20, ylim=lims))
-abline(h=1,col='#00000080', lwd = 2)
-abline(h=2,col='#FF000080', lwd = 2)
 
 suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/(Chip+Input)", xlab="Cluster Nr.", ylab="Normalized Chip/(Chip+Input)", pch=20))
 abline(h=0.5,col='#00000080', lwd = 2)
@@ -133,7 +78,8 @@
 
 HTML(graph, file=filename)
 if (nrow(outputTable)>0){
-    HTML(outputTable, file=filename, classtable = "dataframe", row.names=FALSE, Align='left')
+  HTML(outputTable, file=filename, classtable = "dataframe",
+       row.names=FALSE, align='left', caption="Clusters with Normalized ChIP/Input ratio > 2", captionalign="top")
 }
 HTMLEndFile(filename) 
 file.rename(from=filename, to=HTMLfile)
b
diff -r d397f5a85464 -r 378565f5a875 ChipSeqRatioReport
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ChipSeqRatioReport Fri Nov 22 07:56:48 2019 -0500
b
b'@@ -0,0 +1,265 @@\n+\n+\t <html xmlns:mml="http://www.w3.org/1998/Math/MathML">\n+  <head>\n+  <title> ChIP-Seq Mapper Output </title>\n+  <style>\n+  <!--\n+  table { background:#FFFFFF;\n+  border:1px solid gray;\n+  border-collapse:collapse;\n+  color:#fff;\n+  font:normal 13px verdana, arial, helvetica, sans-serif;\n+    width: 100%;\n+\n+  }\n+  caption { border:1px solid #5C443A;\n+  color:#5C443A;\n+  font-weight:bold;\n+  font-size:20pt\n+  padding:6px 4px 8px 0px;\n+  text-align:center;\n+  \n+  }\n+  td, th { color:#363636;\n+  padding:.4em;\n+  }\n+  tr { border:1px dotted gray;\n+  }\n+  thead th, tfoot th { background:#5C443A;\n+  color:#FFFFFF;\n+  padding:3px 10px 3px 10px;\n+  text-align:left;\n+  text-transform:uppercase;\n+  }\n+  tbody td a { color:#3636FF;\n+  text-decoration:underline;\n+  }\n+  tbody td a:visited { color:gray;\n+  text-decoration:line-through;\n+  }\n+  tbody td a:hover { text-decoration:underline;\n+  }\n+  tbody th a { color:#3636FF;\n+  font-weight:normal;\n+  text-decoration:none;\n+  }\n+  tbody th a:hover { color:#363636;\n+  }\n+  tbody td+td+td+td a { background-image:url(\'bullet_blue.png\');\n+  background-position:left center;\n+  background-repeat:no-repeat;\n+  color:#FFFFFF;\n+  padding-left:15px;\n+  }\n+  tbody td+td+td+td a:visited { background-image:url(\'bullet_white.png\');\n+  background-position:left center;\n+  background-repeat:no-repeat;\n+  }\n+  tbody th, tbody td { text-align:left;\n+  vertical-align:top;\n+  }\n+  tfoot td { background:#5C443A;\n+  color:#FFFFFF;\n+  padding-top:3px;\n+  }\n+  .odd { background:#fff;\n+  }\n+  tbody tr:hover { background:#EEEEEE;\n+  border:1px solid #03476F;\n+  color:#000000;\n+  }\n+  -->\n+  </style>\n+  \n+  </head>\n+  \n+  \n+<p class=\'character\'><img src="data:image/png;base64 , iVBORw0KGgoAAAANSUhEUgAAA+gAAASwCAIAAADnu/enAAAgAElEQVR4nOzdeUBU1f//8TuKKCoiGsiYigoW2qIlbqAiaO4ZLrmlfnBNMs1Ks9JP7mlmaWkuLUYqWi5p5UctI0EgZVHUxCUJEEVQEGVnEJjfH/P5zXc+MwODOnPvXOb5+Gvm3Ms9L/b3nDn3HIVarRYAAAAAWLdaUgcAAAAAYBqFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJQowr3u3fvpqamSp0CAAAAMD85Fe6RkZGDBg1q3bq1t7f3Z599dv/+fb0TPvroozZt2kiSDQAAALAo2RTuZ8+e7du379GjR7Oyss6fPz937lx/f/+cnBypcwEAAABikE3hvmzZMkEQDhw4UFBQkJ+f/+mnn8bGxg4YMKCwsFDqaAAAAIDFyaZwj4+PHzt2bGBgoEKhqFu37ptvvnn06NHz58+PHTu2oqJC6nQAAACAZcmmcM/KytKbvx4QEPD1118fOnRo/vz5UqUCAAAAxGEndYDqcnd3P3v2rF7jxIkTL126tGrVqscff/ytt96SJBgAAAAgAtkU7oMHD16/fv2mTZtmzJhhZ/d/sVeuXHnz5s233347IyOjtLRUwoQAAACA5SjUarXUGaolOzu7a9euKSkpAwYMOHr0qO4htVo9d+7czz//XPtUioAAAACABclmjvtjjz0WFxf3+uuvP/XUU3qHFArFZ599tn//fg8PD0myAQAAAJYmmxF3AAAAwJbJZo67llqtTklJSU5Ozs/PFwTBycnJ09OzVatWUucCAAAALEhOhXt2dvaHH364a9euW7du6R1yd3efMmXK22+/3aBBA0myAQAAABYlm6kyGRkZPj4+qampHh4ePXv2dHd3b9iwoSAIeXl5ycnJ4eHhN2/e7NixY0REhJOTk9RhAQAAADOTzYj7okWL0tPTd+/ePXbsWMOj5eXlW7ZsmT179uLFi9evXy9+PAAAAMCiZDPirlQqBw0atG3btirOGTt27J9//pmWliZaKgAAAEAcslkOMicnx9PTs+pz2rdvbzj9HQAAAKgBZFO4K5XKhISEqs+Jj49v3ry5OHkAAAAAMcmmcB8xYsT+/fvXrl2rUqkMjxYUFCxatOjQoUNGZ8ADAAAAciebOe65ubkBAQFnzpxxdHT09vbWrCqjVqvz8/NTUlLi4+OLi4v9/PwOHz5cv359qcMCAAAAZiabwl0QhJKSks2bN4eGhiYkJFRUVGjb7ezsunbtGhQUNHnyZDs72ayTAwAAAFSfnAp3LZVKde3atfz8fIVC0ahRI3d39zp16li0x1atWrm6ulq0CwAAAFiD7Ozs1NRUqVMYIcvx6bp16z7xxBNi9ujh4XH8+HExewQAAIAk/P39pY5gnGxuTtWTmZkZGBh48uRJqYMAAAAAYpBr4V5QUPDTTz+lp6dLHQQAAAAQg1wLdwAAAMCmULgDAAAAMiCzm1PnzZuneXDv3j1BELZv337q1Cnt0bVr1z7ENW/evFlSUlL1OSZPAAAAACxKZoX7vn37NA/KysoEQTh16tT58+e1Rx+'..b'vXP3jwYM+ePb179/b19e3Zs2dpaenx48cvX778/PPPp6SkPP3003XeEhoaum7dutmzZy9fvtzGxiY5OfnMmTMdOnSQk7G3t7e9vf20adOOHTvm6ekpSdKpU6e2bNni4+Pj6ekp/r1tfeXKlXl5eYMHD37ppZdiYmIOHDiwdOnSgoKCQYMGXbt2bf369SUlJbNmzXr8D2hvbx8fHz958uSCggIPD4/GpuXk5PTv39/f3z81NfXxdwoAqsYVdwBojdq1a7d79+6UlJSQkJC8vLxvvvkmLS2ta9eu69atO3nypKura/23BAQEbN68uby8fNSoUWPGjCkrK0tKSmrXrp1884xWq01PT58xY8bhw4ejoqI+/vjjkydPLlu27ODBg/JDGEeMGDF69Ojz58+vXr26oKBACOHo6JiZmTl9+vRDhw5Nmzbtyy+/9PT0PHz4sL+/f7N8xkmTJr366qvNsikAMAeapn9HCQAAAKA14Io7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVOA/TXVT6W1ggrUAAAAASUVORK5CYII= " alt="image" /></p>\n+\n+\n+<p align= center >\n+<table cellspacing=0 border=1>\n+<caption align=bottom class=captiondataframe></caption>\n+<tr><td>\n+\t<table border=0 class=dataframe>\n+\t<tbody> \n+\t<tr class= firstline > \n+\t\t<th>Cluster  </th>\n+\t\t<th>Chip_Hits  </th>\n+\t\t<th>Input_Hits  </th>\n+\t\t<th>Ratio Chip/Input  </th>\n+\t\t<th>Normalized ratio Chip/Input  </th>\n+\t\t<th>Ratio Chip/(Chip+Input)  </th>\n+\t\t<th>Normalized ratio Chip/(Chip+Input)</th> \n+\t</tr> \n+<tr> \n+<td class=cellinside> 73\n+</td>\n+<td class=cellinside> 5171\n+</td>\n+<td class=cellinside> 91\n+</td>\n+<td class=cellinside> 56.8\n+</td>\n+<td class=cellinside> 56.8\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>112\n+</td>\n+<td class=cellinside>15274\n+</td>\n+<td class=cellinside>240\n+</td>\n+<td class=cellinside> 63.6\n+</td>\n+<td class=cellinside> 63.6\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>160\n+</td>\n+<td class=cellinside>    1\n+</td>\n+<td class=cellinside>  0\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>168\n+</td>\n+<td class=cellinside> 1306\n+</td>\n+<td class=cellinside> 25\n+</td>\n+<td class=cellinside> 52.2\n+</td>\n+<td class=cellinside> 52.2\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>208\n+</td>\n+<td class=cellinside> 3134\n+</td>\n+<td class=cellinside> 25\n+</td>\n+<td class=cellinside>125.4\n+</td>\n+<td class=cellinside>125.4\n+</td>\n+<td class=cellinside>0.99\n+</td>\n+<td class=cellinside>0.99\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>213\n+</td>\n+<td class=cellinside>    1\n+</td>\n+<td class=cellinside>  0\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>225\n+</td>\n+<td class=cellinside>  409\n+</td>\n+<td class=cellinside>  1\n+</td>\n+<td class=cellinside>409.0\n+</td>\n+<td class=cellinside>409.0\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>236\n+</td>\n+<td class=cellinside> 4638\n+</td>\n+<td class=cellinside> 55\n+</td>\n+<td class=cellinside> 84.3\n+</td>\n+<td class=cellinside> 84.3\n+</td>\n+<td class=cellinside>0.99\n+</td>\n+<td class=cellinside>0.99\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>250\n+</td>\n+<td class=cellinside>    1\n+</td>\n+<td class=cellinside>  0\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>  Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>294\n+</td>\n+<td class=cellinside>   11\n+</td>\n+<td class=cellinside>  5\n+</td>\n+<td class=cellinside>  2.2\n+</td>\n+<td class=cellinside>  2.2\n+</td>\n+<td class=cellinside>0.69\n+</td>\n+<td class=cellinside>0.69\n+</td></tr>\n+ \n+\t</tbody>\n+</table>\n+ </td></table>\n+ <br>\n+\n+<hr size=1>\n+<font size=-1>\n+\t Generated on: <i>Fri Nov 22 12:59:10 2019</i> - <b>R2HTML</b> \n+<hr size=1>\n+\t</body>\n+</html>\n\\ No newline at end of file\n'
b
diff -r d397f5a85464 -r 378565f5a875 paired_fastq_filtering.R
--- a/paired_fastq_filtering.R Wed Sep 18 06:30:04 2019 -0400
+++ b/paired_fastq_filtering.R Fri Nov 22 07:56:48 2019 -0500
[
@@ -75,7 +75,7 @@
     make_option(c("-C", "--cutadapt_options"), action = "store", type = "character", 
         help = "file specifying cutadapt options", default = NULL), make_option(c("-j", 
         "--chunk_size"), action = "store", type = "numeric", help = "Number of sequences processed in single step. This option affect speed of processing and memory usage [default %default]", 
-        default = 1e+06),
+        default = 1000000),
     make_option(c('-F', '--filter_seq'),action='store',type='character',help='file specifying sequences for filtering (e.g. plastid DNA)',default=NULL)
     )
 
@@ -241,16 +241,24 @@
 
 
 number_of_chunks = round(n1/CHUNK_SIZE)
+## adjust chunk size to make last chunk of the same size is all other
+## this is to avoid small last chunk
+CHUNK_SIZE = round(n1/number_of_chunks)
+
 if (number_of_chunks == 0) {
     CHUNK_SIZE = n1
     number_of_chunks = 1
 }
 if (!is.null(opt$sample_size)) {
-    sample_size_in_chunk = opt$sample_size/number_of_chunks
+  sample_size_in_chunk = round(opt$sample_size/number_of_chunks)
+  n_missing = opt$sample_size - sample_size_in_chunk * number_of_chunks
 } else {
-    sample_size_in_chunk = CHUNK_SIZE
+  sample_size_in_chunk = CHUNK_SIZE
+  n_missing = 0
 }
 
+cat("number chunks ", number_of_chunks, "\n")
+cat("chunks size ", CHUNK_SIZE, "\n")
 # adjust the chunk size to get exact count of sequences:
 CHUNK_SIZE = ceiling(n1/number_of_chunks)
 F_id = ifelse(opt$rename, "/1", "1")
@@ -263,11 +271,14 @@
 nucleotideFrequenciesForward = nucleotideFrequenciesReverse = matrix(0)
 while (TRUE) {
     chunk = chunk + 1
+    cat("chunk number ", chunk, "\n")
     fq1 <- yield(f1)
     fq2 <- yield(f2)
     if (length(fq1) == 0) {
         break
     }
+    cat("chunk number ", chunk, " imported\n")
+    cat("chunk size", length(fq1), "\n")
     ## rename
     chunk_id = sprintf(paste0("%0", round(log10(number_of_chunks)) + 1, "d"), chunk)
     cat("chunk id ", chunk_id, "\n")
@@ -341,7 +352,7 @@
     
 
 
-                                        # remove sequences similar to filter database (e.g. plastid DNA)
+    ## remove sequences similar to filter database (e.g. plastid DNA)
     if (!is.null(opt$filter_seq)){
       blast_results1 =  megablast(fqF1, database=opt$filter_seq)
       blast_results2 =  megablast(fqF2, database=opt$filter_seq)
@@ -374,8 +385,9 @@
     fqF2@id = BStringSet(paste0(id(fqF2), R_id))
     
     
-    if (sum(inc1) > sample_size_in_chunk) {
-        smp = sort(sample(sum(inc1), sample_size_in_chunk))
+    if (sum(inc1) > (sample_size_in_chunk + n_missing)) {
+        smp = sort(sample(sum(inc1), sample_size_in_chunk + n_missing))
+        n_missing = 0  ## this was to correct rounding error
         writeFun(fqF1[inc1][smp], file = f1out, mode = "a")
         writeFun(fqF2[inc2][smp], file = f2out, mode = "a")
         nfrq1 = alphabetByCycle(sread(fqF1[inc1][smp]))
b
diff -r d397f5a85464 -r 378565f5a875 paired_fastq_filtering.xml
--- a/paired_fastq_filtering.xml Wed Sep 18 06:30:04 2019 -0400
+++ b/paired_fastq_filtering.xml Fri Nov 22 07:56:48 2019 -0500
b
@@ -1,4 +1,7 @@
 <tool id="paired_fastq_filtering" name="Preprocessing of fastq paired-reads">
+  <stdio>
+     <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
   <description>
     Preprocessing of paired reads fastq files
     including trimming, quality filtering, cutadapt filtering and interlacing. Broken
b
diff -r d397f5a85464 -r 378565f5a875 single_fastq_filtering.R
--- a/single_fastq_filtering.R Wed Sep 18 06:30:04 2019 -0400
+++ b/single_fastq_filtering.R Fri Nov 22 07:56:48 2019 -0500
[
@@ -201,25 +201,25 @@
 
 
 number_of_chunks=round(n1/CHUNK_SIZE)
+CHUNK_SIZE = round(n1/number_of_chunks)
+
 if (number_of_chunks==0){
  CHUNK_SIZE=n1
  number_of_chunks=1
 }
 if (!is.null(opt$sample_size)){
- sample_size_in_chunk=opt$sample_size/number_of_chunks
+ sample_size_in_chunk=round(opt$sample_size/number_of_chunks)
+  n_missing = opt$sample_size - sample_size_in_chunk * number_of_chunks
 }else{
  sample_size_in_chunk=CHUNK_SIZE
+  n_missing = 0
 }
 
 # adjust the chunk size to get exact count of sequences:
 CHUNK_SIZE = ceiling(n1/number_of_chunks)
 save.image("tmp.RData")
 
-print("--------------------------------")
-print (sample_size_in_chunk)
-print (opt$sample_size)
-print (CHUNK_SIZE)
-print("--------------------------------")
+
 f1 <- FastqStreamer(opt$fastqA, CHUNK_SIZE)
 total=0
 nucleotideFrequenciesForward = matrix(0)
@@ -291,8 +291,9 @@
  # filter complete pairs again:
 
  # create new id - last character must differentiate pair - for interlacig
- if (length(fqF1)>sample_size_in_chunk){
- smp=sort(sample(seq_along(fqF1),sample_size_in_chunk))
+ if (length(fqF1)>(sample_size_in_chunk + n_missing)){
+ smp=sort(sample(seq_along(fqF1),sample_size_in_chunk + n_missing))
+    n_missing = 0
  writeFun(fqF1[smp],file=f1out,mode='a')
     nfrq1 = alphabetByCycle(sread(fqF1[smp]))
 
b
diff -r d397f5a85464 -r 378565f5a875 single_fastq_filtering.xml
--- a/single_fastq_filtering.xml Wed Sep 18 06:30:04 2019 -0400
+++ b/single_fastq_filtering.xml Fri Nov 22 07:56:48 2019 -0500
b
@@ -1,4 +1,7 @@
 <tool id="single_fastq_filtering" name="Preprocessing of fastq reads">
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
   <description>
     Preprocessing of fastq files
     including trimming, quality filtering, cutadapt filtering and sampling
b
diff -r d397f5a85464 -r 378565f5a875 test_data/ERR215189_1_part.fastq.gz
b
Binary file test_data/ERR215189_1_part.fastq.gz has changed
b
diff -r d397f5a85464 -r 378565f5a875 test_data/ERR215189_2_part.fastq.gz
b
Binary file test_data/ERR215189_2_part.fastq.gz has changed
b
diff -r d397f5a85464 -r 378565f5a875 test_data/seq_C_10k
--- a/test_data/seq_C_10k Wed Sep 18 06:30:04 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,30000 +0,0 @@\n->2000001\n-GTTTATCTGATTTGGCATGAGTATTTCATGGTGTACAATGCATTTGATGTATCTGAGAAT\n-TCTCTTAGCTACAAGTATAAATTGTATGATCTTGGTTCCT\n->2000002\n-TGATCTACCAAATTATCTAATGAGGCTTTATAGAAGCTAGCATATAGGAAGAAGATATTG\n-TTTCCTGAGAGATCTAGTTGGCAAGATCGGAAGAGCACAC\n->2000003\n-GTTCAATATGGTGAAGTATTGATATTTCTTTTCATCAATACCGTGAGGTATTGATCTTTC\n-TTTTTCTTCAAAACCGTGAAGTATTGATCTTCCCTAGATC\n->2000004\n-TGCAACACTAGATCTTTTACTTGTCATATTGCGGGCTTATTAAGCAGATAGATGGAGGAA\n-CCAAGATCATACAATTTATAGTTTGAGCTAAAAGATTCCT\n->2000005\n-CTTTTTGGTTTATTACGGTTTTGGATTTTGCTTACACGGGATTGACCTTTCAACGACTCA\n-TGTTATGTAATGTAGTTCTTTGTTCATCGAGTACTGGACC\n->2000006\n-GAATATTGGAAGACTGAGCTACAACTTGCAGATTTTCTAACCAAGCCATCAAAGAAAACA\n-AGACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGAC\n->2000007\n-TCCCGGTAGATTGAAAACCCGAAAGGGCAATCTAGGAAAAAGTTAGGGATTCATGGCAAG\n-TAACTGCATCATAAGACTTGATCGCCCGCAACGTCGAGAT\n->2000008\n-TAAAGAGAACTACGTTACATAACATGAGCAGTTGAAAGGTCAATCCCAAGTAAGCAAAAT\n-CAAAGCCATAATAAGCCAAAGAGAAAAGAAAGGAAGATCG\n->2000009\n-GACAAAGACTCAACCATAAAGAAGAAGTTGCAGAGAAAAGAAGTGCTTTACATACTATGA\n-GTAGTTGAAACGTACCTTATCTGTAGACCAAATAGCTAGT\n->2000010\n-TATAAAATCATAAAAACGTAACGATTTGTTATCTCGTTGTCATTCTGACTTCCTCTCAAA\n-CTCTGAATTGGAATAGCTAGATGCAAGATCGGAAGAGCAC\n->2000011\n-TGCCAAATAAGAAAAACTCAAGAAAGGGGAGGGAAATGCATATGTCTATTCTAATTCAGA\n-TATAAAAGAAAGACAACATGACATGTTGCAGCCTCTAGAT\n->2000012\n-CTTTCTATCGAAATCTTTACTTCATTGCTTTATCGCTTTTCTTAACCTTTTTTATTGTCA\n-AACTGTTCATAGATCGGAAGAGCACACGTCTGAACTCCAG\n->2000013\n-GGATGGGTAGTCGATTTATCCGCCAATTGCAATGTCATTTTTGTTGCTTTCATATCAATG\n-CTCCCTAATCTTTTCACAATCGACAATAGAATCAAGTTTA\n->2000014\n-GACTCGTAGATGGAACCTTGTACAAGCAGATCATCAATTCACTAAAGTACATATGCAACA\n-CGAGACATTATATTTGTCATAGTGTGGTCTTAGTAAGCAG\n->2000015\n-GTACTAGTATGATATCATGCTCGTTATTCTTGATGAAAAGAGTGTTATCAACCTGTCCTC\n-CATGTAACATTTTTTCAGGCGGGAATTTAATTAAAGTGGC\n->2000016\n-GAGTTTTTGAAAGGATCAATCCTTATCTACCAAAGACTCAACCATAAAGAAGAAGTTGCA\n-GAGAAAAGAAGTGCTTTACATACTATGAGTAGTTGAAGCG\n->2000017\n-GAGCCCAATTCCTATATTCCGATGAAGCTCTTCATTAGAGCTTTAATTGGAAGAGCTAAG\n-ACTCATCCTCTCTCTCAACACAGATAAAAAACCACACATT\n->2000018\n-CTCTTTTTAACCTTTGTAACACATTCACGTCATTCCAATAAACATTATTTTTACCTTGAT\n-TTCAGTCTAAGACATTATAAATTCCATGGAGTCTGTAAAA\n->2000019\n-ACTAAATGAAGAAAAGAAACTACATTACACAACATGAGTAGTTGAAAGGGCCAACCCTTG\n-TTAATTAAAGACTTAAGCCTAGTAAACCAAAAACAGAGAA\n->2000020\n-CCATTCATCAAGCTACGGTACATACTTTGACTTAGCTTATAGGTAAATTCATCAAGGTAA\n-CGTAGATATTTTGACTTAGTGTAAGGTCGAATCCTCAACG\n->2000021\n-CGTCAAAAAATGTACATTTCAACTACTCAAACTATGTAATGTACTTTCAATCCTCTTTAT\n-TTGTTTTCTTATTGTTAAGTCTTTGATCAACATCGGTCAG\n->2000022\n-GTGATTAGGGATTTCGTCGACAAGGAAGAGACGTTTCAACTAATAAAATTATGTAAGTCA\n-CTTATTTTCTCTTCATTTGCTTCTTTAAGTTTGAGTCTCT\n->2000023\n-TACTTAGAGTTAAGAAAATTCTCACATACATCAAATGCACTCTAGACCCTGAAATACTCA\n-TGCCAAATCTGATAAACTCAAGAAGGGAAGCAAATGCATC\n->2000024\n-CCATGACATGATGCACAGGGAAATTGAAGTGTATGTTGATGACATGATTGCCAAATCTCG\n-ATCTGAAGAGGGCCATCTTGATGATTTGTTAAAACTGTTT\n->2000025\n-CTGCTTGTACAAGGTTCCATCTACCCTTCTATCAATTGAGTCCTTATTCAGTTCGATTGA\n-TGTTTTAATGGAATTTATAACGTCTTAGAATGAAATCGAG\n->2000026\n-GCCATCAAAGAAAACAAGACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGACTGGA\n-TGATCCGAATTAAAATGAGTATTGGCAATGTAATTCAATT\n->2000027\n-AGGACATTTTGCAGGGCTTCCCGGTGGGCTTCTGAGTGCAGTAGCAAAGAGAGTATTGAT\n-ATTCTAGAAGGTGTTTGTAACAGTTGATAGATCGGAAGAG\n->2000028\n-GTGACTTTGTGGCTTTATTTCACAAATTGAGCCAACGGAAATTAGTGAAGCTATAGTCGA\n-CAAACATTGGTATCTTGCAAGATCGGAAGAGCACACGTCT\n->2000029\n-CTTCATTTTCTTTCTAATAATTATTAGGTCTTTCTTCATCAACATATGGATCTTTCAACT\n-ACTCAAAGTATGTAATGTACTCTCTTTTATTAGATCGGAA\n->2000030\n-GAGATGGTGAATTGACTGGTTATCTTGGGTGACAACAATTCACAATTTGGATTCGGCCGA\n-GTGAAAGCTTTGAAGAACGGTGATTTCTTGCAAAGGTGTA\n->2000031\n-AACTAGAGGAATGGCCTTGATAAATTTACCTTTACGTTATGTTAAAGTACATATTCTACA\n-TTGATGAAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n->2000032\n-GGTTTCTTTTATCTCAAAGTATAAATTGTATGATCTTGGTTCCTCCGTCTATCTGCTTAC\n-TAAGCCCGCATTATGATAAATAAAAGGTCTAGATCGGAAG\n->2000033\n-GGATAGGTCCATCCATTCTAAACCTAATCCTAAACTATAGGAAACCAAATGAAAAGAGAG\n-GAAATATTACACATGAAATGCGTAGTTGAAAGGATCAATC\n->2000034\n-GCTCATGGATCAGTTTGAACAGACTCAGAAGATGCTTAGGGACGAAGTAAATGTCATGTT\n-TGGTAAACTTGTGGAAGCTCTCTCGAGATCGGAAGAGCAC\n->2000035\n-CCTAAAAGACGAAAGAAAGTACGTTACAAACTTGGAGTAGTTAAAAGGTTCATTCCTCGT\n-TGAACAAAGACCTAGTAATTAAAAAACAAA'..b'TAACCCAA\n->2009966\n-GTCATTTTGACTTTCTTTTAAATCTGAATGAGAATAGACATATGCATTTGCCTCCCTTCT\n-TGAGTTTTTCTTATTTGGCATGAGTATTTCAAGGTCTAGA\n->2009967\n-TCGGGGTTTTACAGGGAGCATCTCAATTTCCGGAGTCAATCCCAAATCACAAGGAGCATT\n-ACCTTTTTGTTGACAATGTGGAAGACATCGTTCGATTGGA\n->2009968\n-AGACTTAAGCCTAGTAAACCAAAAACAGAGAAAAATGTACATTGCATACTATGAGTAGTT\n-GAAATGTATGACCCTTGTTGACTAAAGACTTAACCATAAG\n->2009969\n-GGTTGATCTTTTGTTAGTTTTCATTCTTTTGGGAGATCTGCGTTTTCCTGTGGAGAGTAT\n-GGGATTTGTTGCTGAACTGATTCTTCTACTATGACTAGAT\n->2009970\n-AGTCACTTATTTTCTCTTCATTTTCTTCAAGTTTGAGTCTCTGGTTGATAAGAACTGATC\n-CTTTCAACTACTCATAGTATGTGAAATATTTCCTTTCTTT\n->2009971\n-TCTTACGATTAAGTCTTTGAGAAACAAGGGGTTCTCTTTTTAACCACTAATGTATTTTAT\n-GTACTCTTCATTTGATTTATATTACTTGGTCTTTGATCAT\n->2009972\n-GGTGAAGTTCTGAAGATCAAGTTCTTAGTTGAAAACAAATCAATAATTTTTAGAAGCAAG\n-CATATAGAAAGAAGATATTTTTTCCTGGGAGATCAAGTTG\n->2009973\n-GTTGACAACAAATCAAAAATTGATCTACCAAATTATCTAATGAGGCATGTTAGAAGCAAG\n-CATATAGAAAGAAGATATTGTTTCCTAAGAGATCAAGTTG\n->2009974\n-AAGGATGTAATGTACCTTGATGAATGGACCTTTACACTAAGTCAAAGTATGTACCGTACC\n-TTGAAGAATGCACGTGTACACTATGTCAAAGTATGTGCCT\n->2009975\n-GCAACCCAAGCAAACTGTGACCACTTAACCATTTAAAAATTTAGTAAGTTGACTAAATAT\n-TTGGCTATATATATACTCTTTTAAACCTTTGTAACACACA\n->2009976\n-GAAAAACTCATGAAAGGGGAGGGAAATGCATACGTCTATTCTAATTCAGATTTAAAAGAA\n-AGACAAAATGACAAGATCGGAAGAGCACACGTCTGAACTC\n->2009977\n-GGTGATTGGAGCTTATACCATACAGAGAGGCCCATTGAAAGTTTATTGAAAACATATAGA\n-AAAGATATTTATAAAAATGGTTGGGTCTTACACTCTATAT\n->2009978\n-GACGTTTCAACTAATAAAATTATGTAAGTCACTTATTTTCTCTTCATTTGCTTCTTTAAG\n-TTTGAGTCTCTGGTTGATAAGAACTGATCCTTTCAACTAC\n->2009979\n-GTCATAGTAGAAGAATCAGTTCAGGATCAAATCCCATACACTCCACAGGAAAACGCAGAT\n-CTCCCAAAAGAATGAAAACTAACAAAAGATCATCCGATAG\n->2009980\n-GCGGTTTTAGCGCAAAGGTTTTGAAAAGGTGGTAAAAGCAAGCAAACTAGCCTAAACTAA\n-TGCAAGAAATAAATTGGTCTCATTGTAAGGTAGCCCAAGA\n->2009981\n-GGAAGCAAATTGAAAAGAAAAACAAAGCGGGAAATTTACTTCTGCCAAGGAACTTAGGAA\n-GCAAATTGAAAAGAAAAACAAAGCGGGAAATTTACTTCTC\n->2009982\n-GAGGTTTCTTTTATCGCAAAGTATAAATTGTATGATCTTGGTTCCTCCGTCTATCTGCTT\n-ACTAAGCCCGCCTTATGATAAATAAAAGGTCTAAGATCGG\n->2009983\n-TCTAATTCCTAGGTCATAGTTCATCAAGGAATGGACCTTTCAACTAATCAAAGTATGTAA\n-TGTACTTTATTTCCTCTTTATTTGGTTTCTTATGGTTAAG\n->2009984\n-GACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGACTGGATGATCCGAATTAAAATG\n-AGTATTGGCAATGTAATTCAAAGATCGGAAGAGCACACGT\n->2009985\n-CATAAAATACATTAGTGGTTAAAAAGAGAACCCCTTGTTTCTCAAAGACTTAATCGTAAG\n-AAACCAAATAAACAAATAAGAAGTACTTAGATCGGAAGAG\n->2009986\n-GTGTTGCAAGTATTATTGCAAGTCTAATACTTGCAACACGACTTGCAACACAACTAACTG\n-CTAATATAGTTGAATTATAGTGCCAATACTCATTTTATTT\n->2009987\n-ATGAAAAAAAAGAAACTACATTACACAACATGAGTAGTTGAAAGGGCCAACCCTTGTTAA\n-TTAAAGACTTAAGCCTAGTAAACCAAAAACAGAGAAAAAT\n->2009988\n-AGTTCCATTCATCAATGTAGAATATGTACTTTAACATAACGTAAAGGTCCATTTATCAAG\n-GCCATTCCTCTAGTAAGACCACATACATCAACTTAGTGAA\n->2009989\n-CCCTCAAGTCTTGTTTTCTTTGATGGCTTGGTTAGAAAATCTGCAAGTTGTAGCTCAGTC\n-TTCCAATATTCAATCTTCAAACTTCTTATTTGCCAACTTG\n->2009990\n-GGGGTTTTTTTTTACTAGAAGTAATATGCAAAAGATATCTTGAAGAAGCTCAAGAAGAGT\n-AAATTGCAACCCTGCAATTACTCCATTGAAACGTGAATCG\n->2009991\n-ACAACTCATTGTATGTAAAGCACTGAATTTCTCTTCAATTTCTTCTTCATGGTTGAGTCT\n-TTGGTCGAAGATCGGAAGAGCACACGTCTGAACTCCAGTC\n->2009992\n-GAGAAGGTGACCTGGTACTAAAAAACATTAATCTTTCCTCACAGACTCTAGGGGCAAATG\n-GACGCCTAATTATGATGGGCCATACGTCGTCAAGAAAGCC\n->2009993\n-GTCTTAGAATGAAATCAAGTTAAAAATAATGTTTATTGGAATGACGTGAATGTGTGTTAC\n-AAAGGTTTAAAAGAGTATATATATATAGCCCAATATTTAG\n->2009994\n-GGTTGAGTCTGTGGTCGATAAGAATTGATCATTTCAACTACTCATAGTATGTCTTAGTTT\n-TCCTTTCTTTTTATTCGGTTTATTAGTTTAATATGGTTTT\n->2009995\n-ATACATTACATATCATTAGTTTAAAGGTAAGAAGTACTTTACAGGTAAGAAGAAGTACGT\n-TACATAATATGAGTAGTTTAAAGGTCCATTCTTTGTTGAC\n->2009996\n-CTTTATTTGGATTTTAATGCCTACGTCTTTGGCCAAAGACGCCTGGACTTTTCAACTAAT\n-CATATTATGGAAACTACTTCGTACCTATTTATCTATTCAC\n->2009997\n-GATCCACTAACCAAATGTTTGCACATAAGACTTTTCATGGGTATGTTGGTCACACGGGTC\n-TTGGTTTTGAGATTGCCTTTGATTCGAACGAGTATTTTCC\n->2009998\n-CCCTAGCGGATTTTCTCTCTCGACATATTACATTCCTTAGCAAAATTTCTTTGCATTCAA\n-GGGAGAATCTCAGTTCCCAATGGACTAAATTCCCCAGTGA\n->2009999\n-TCACCAATATTCAACTCTTCAAATAACAATTCAATTCACAAGGCTTGGTAGTTTAAATAA\n-GCGGCCGCAACATATTCTACTTCACATGATGGATGATAAG\n->2010000\n-TGGTTGAGTCTGTGGTCGATAAGAATTGATCATTTCAACTACTCATAGTATGTCTTAGTT\n-TTCCTTTCTTTTTATTCGGTTTAGATCGGAAGAGCACACG\n'
b
diff -r d397f5a85464 -r 378565f5a875 test_data/seq_I_10k
--- a/test_data/seq_I_10k Wed Sep 18 06:30:04 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,30000 +0,0 @@\n->2000001\n-TCTTTCCATTCAAAGCACAAGTACATTTGAATCAGGGTTTATACTACAAAACGTCCAGGT\n-ACATAAGCTTAATCTCCAAGTTCCAATTCATAACTTTTCC\n->2000002\n-GGTCAATACTCCCTCTCTGCATGTATTCACAGAAATCCATTTACCTGAGGCTGAATGGTG\n-TCAAGATAGGTTTGACCAGCTTGATTCGGTCGAAGGAAAA\n->2000003\n-TTGTTAGGAGAAATAATTAAAACTCCAACACATGTTCCATCCCTATGACTAGAACCATCA\n-AAATATAACTTCCATGGCTCGAGCTCTAGATAGTTTTGGG\n->2000004\n-CACAAACCAAATCTTTAAAATTCCCTAAAAATAAACGTCATATGCTAAGAACATTTTTGA\n-TGCAAATGTATGCAATGTCATGTAATGCTTTAATCTCAGG\n->2000005\n-ATCTTTAATATTTAAAAATTAATTACAAGTGAATTCTACAAATTAATGTTTCAAATTGAT\n-GTTACAAATTAAAATACAAATTAAATAATTACAAATAATA\n->2000006\n-AGAGAAAAATCATATGTGTTTGAAGTCTTTAAAGATCTATGTCAAATATTACAAAGAGAG\n-AAAGTGGAATAATCAGAATCAGAAGTGACCATGGTAAGGA\n->2000007\n-GACGGAAGTATGAGCGGCAACCATCGATGAAGATAGCTCCTCCGACGTTACGATCCTTCC\n-AGGTATCTCATTTTGCTCTTTTTTCCTTTCTCCAATAAAA\n->2000008\n-CTCTTTTTTTTTATAATTGTCTAAAATTTAACAAATTTATAAATAGATAAAAGTTTGGTA\n-CAATTAAGGACAAAAATTGTACTCTCTAAAATTTATAAAA\n->2000009\n-GAGATTTGCCGGTGATGATGCACCGTTACCGCCTGTGTACGGTTGTGATTATAGCGAGGG\n-TGATCGGACGGACGAGATGGTTGGACTTGTTAGTCTTTTG\n->2000010\n-GGCAAAGGCCAAGGGTAAACAGGTCATGATTGACGATCAAGATTCTGCACCAGTAAATAT\n-CCCCAAGCAAAGTGCGATGCCCGAAGCTTCTTCGTCTCAA\n->2000011\n-GGACGAGTATTGAAAAAAGAATATTAACATGGACGAGTGTTGGAAATCGATACGAACGAG\n-TATTGGAAAAAGATAAGAATCAACACGGACGAGTGTCGGA\n->2000012\n-TCCTGTTATGCGGCATGGGAAAATTTTATATAGGTCTCTCATGTCCCTCAACGTGATTTG\n-TGGAGTATCCATCAACCATATTTATAGTCATCTTGCTACA\n->2000013\n-TGTTCAATGTCATATTTATCACAATATTCCTCAAAGAGGTGGTTTTCAAATTCACCTCCG\n-TGATCGGTTCAAATAGCAACTATTTTTAAACTAAACTTGT\n->2000014\n-GTACAACCTCTAGGTATAAGTTCGATATATCCTCCGCATCCTCTGCACTCAGACCTTGAG\n-ATCTATCAGACTTTGGAGATAATGCAGCTGCAATAATTGA\n->2000015\n-TGTTCTTCATTAATCTATGATCATGTTGCTGCAAATTACATGGATACGTATAACAACATC\n-CTTAATCTGTCAATATATGGCCCAATGCTGCAAACCAGAC\n->2000016\n-AGGACCCTCGATGTCCCTCGGATAGCCTTCTCTTGGGCTCAAAATACAAGGACCCTCGAT\n-GTCCCTCGGATAGCCTCCTCTTGGGCTTCATACAAGGACC\n->2000017\n-TCTCCCGTGGCGAGGATCGGGGACGGGGACGGGGAATAATTTGGGGGACGGGGCGGAGAA\n-CGCGGAAGCATCCTCCGCAGATTCCCCGCCCCGTTGACAT\n->2000018\n-AACATGTCGAGATATTTCTAGAAAAAATCTGGTTGCATATGATTTTACAAATTGTTTCAA\n-TTACATTTACTATAAATATATTTCCTAAAATAAAAAAAAA\n->2000019\n-GAATCGATGCCCATTTGAAGCAGTGGGCTCGATGCGTCTGATCATGTCAATGCCCCACAT\n-TGCGAAGGGCCAAGGAGAAGTCAGAACATTCAAAGGTACA\n->2000020\n-AAGTCCACAGTCCAAACTCCAGATGCTTAGGATAATCAAAACAACTCCAAAAGAGATTAT\n-CAATTTTTTTAGAGTTTTTGATATTATTTATTGTTTTTAG\n->2000021\n-CACGATCTCAAATATGTAAACAATTTTTATTATGACCAAACATATAACAATAAATTTGAG\n-CATGAAGGTTGAGATCTACAAAATTCAGATCATCATGAAA\n->2000022\n-GACATGTAAGTTGTATCTCTCAGAAAGAAAACATATTTAGAGAGTCTGTGTACCTCACGA\n-GTTTGTAGTGAAGTTGTGTGTGTTCTCATCCATGAGCTTT\n->2000023\n-AGAAACAAAATATAAATAAAAGAGAGTACATTACATACTTTGAGTAGTTGAAAGATCCAT\n-ATGTTGATGAACAAAGACCTAATAATTATTAGAAAGAAAA\n->2000024\n-GATTTTAGGCCTTAGGCCCATGTTTCCATTTACTCCTTGGAACCCCCATTTACTCTTTGC\n-ACTCCCCTTGAGTTTTTATTTATTTTATGCTTTCAAATAT\n->2000025\n-GCTCATGGAAAACATCATTGTGCGCAATTTCAACATGTCGGTGAAGGTTGAGATGTCTCT\n-TAATCAGCAAGAAGAAAACAAATCCATAATATTTCAAATG\n->2000026\n-GAAAAATAACTGGCGAAAACAACAATTGTAAGCACTCTGAGACGACTCTCAAACCGGTCA\n-ATTAAAGTTTTCAAAGTCTAAAGAGTAAAAGTTGATTCAA\n->2000027\n-TGCTTTTTAGGGGGAGCGTGGTTCTATTTTGATTGGTTGTTGTGTTGTTGTATGGTTGCA\n-TGGCATACCTGATGTCCTGACATCCTGACTATGATAATTT\n->2000028\n-AAGTTCATCACCATCACCATCTAGAATTTCACATTGAGATTCTAGATTGGTGGAACTAGA\n-CCTTCATCAATTCAATTCACATTCATCATCAAAGTATCAT\n->2000029\n-GGACTGTTGCAAATTTTCATATCTCAAGCCAATTAAATGTTGCAGTACTATACCGTGTAT\n-ATAACTACATCATTTATTCATTTTCCGATTAATATTTAAT\n->2000030\n-GGAGCTTGCTTCAACCCATATAAGCTCTTTCTCAATCTACACACAAGATCTTCTTTTCCT\n-TTAACTTGAAAACCATCAGGTTGTTTCATGTAGATATCTT\n->2000031\n-GTTTCTCCTCTTGAGATATCAGATTATTATTAGGACTATCGTTTCTTCTTCACCTTAAAT\n-CTACCAAAAGGACGTGAACATTGTCTTCCTCAACGGATAC\n->2000032\n-CAGGCCCTAGCGATTGTGTGTGTTTGTGTGTGTTTGAAAGAAATAATAGACAAATATACT\n-TGTTTAGTGAGCACAACCATCTAAACTACAATCGCTACCT\n->2000033\n-GAACACAAGATTATGGCATGATGATGATGATGATGTGATAAAGAAAAGATGAGAGTGAAG\n-TTAGAATATTTATACCAACTAGTGCCACTTGGTTTGGTGA\n->2000034\n-TCACCCCTAAGATCCCACATATATCTCAATACATAGCATGCATCTCATATGTTTGATTGA\n-TGTCTTTGCTTATTAATCTTCCCCAAGGTTTCCTCACTTT\n->2000035\n-CCTTTGTAACACATTCACATCATTCCAATAAACGTTATTTGTACTGTAATTTAAACATTC\n-TAAGCCACTATAAATTCCATGGAGTGTGTC'..b'TGAGGGCT\n->2009966\n-CAACAGATTCCTGGGCAGATGTGAAGAAGAACTCCCTGAATTGGAGGTCTCAGATAATGT\n-TTTGTGTAAGGAGATAACTTCAAGGCAAGTGAAGATCGGA\n->2009967\n-GATTTGAGTAGTTTTTAGTTTAATGGTGGTTGTTTATAAAGAACCAAAACTTGACCCCAC\n-ATTTTATATTTTTTATTTGTAGTTTAATATTATGTTGCAG\n->2009968\n-CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT\n-AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC\n->2009969\n-GAAACTAATCAATCCTTTTGAAAATGGATAGAATCATTTATCCTTAAATCATGCGCCACA\n-TGGAATAACAGAATATTCTCAAATATTCTGCAATAGAATT\n->2009970\n-TCCTTGGTTGATAAGGGAAACTAGTATAATTCCCTTGTGTTCTTTACTTTTATGCATTTT\n-ATTTTCTTAATTTCATTATTGTAAGAACATAATATTCAAT\n->2009971\n-GGGAGCTCAACCTTTGAAAAGTTAAACAAAAAAGGGAATGCAACTAAATCTAGATATCCA\n-TATTTTTCAGTAATAAGAGCTAATATTTTCTGTAAACAAT\n->2009972\n-GCTTGAGATCAGATTTCACTAACCGGTTACCATACCATGAGCATGTAAAGATAACGGGCT\n-TAATGTGATTGCTTTAGAATGAAAAAGGATTTGAAAAGAG\n->2009973\n-TTCTCAGCATAAAAGAAATATTTTGCAATTCTCCTTTTTCTCTATATACCTTCTTGAGGA\n-GGCAACTCAGGCCTTATAATTCTGAGATATAATATCTTAA\n->2009974\n-CAAGTATTGGTCTCGATTTCACTTCCGTCGACCCCTTTCTTCTTACCACACTTCTGTCCT\n-CCGAGGAGGTAAAAATGAAGAAGGATTTGTGTGCTTGGTA\n->2009975\n-GTGAATCCTGGGAACACTATGAAGATCAATGTTGACAGACCAAATCCCTCCATTCAACCA\n-AGGTTTGGGTCTTTTTATTTTTGTTTTGATGGTTGTAAAA\n->2009976\n-GCTTCCACCTAAACTGACTAATCCTGGTAGATTCACCATCCCTTGTTCTATTGGGCCTGT\n-AAAATTTGGCCAAGCTCTTTGTAATTTGGGGGCAAGCATT\n->2009977\n-AATTATAACTATTCCACCTTTTTGTTGCACATAACGACATATTAATCATCTACTCGAATG\n-AGTATCTACTAATTAGATTTTAGGAGATTTAAGAATAGTA\n->2009978\n-GGCTCCAGATGAAAAGGGAAAGAAGAATTTTCAACAAAAAATGAAGGTAGGAAAGGGATA\n-AGTGGCGGACGTGTTCCTGCTGCAACACCCTACTACCCCA\n->2009979\n-CTACGTGCACCTATTGGGTCAAAATTGTTTTTATCACCTTCATCTCTTTCTTTTTATTGA\n-ACAAGTGACAAGATCATAGTCTCTACTATGCATTTAACAC\n->2009980\n-CCGAACATTCCGAGAGGAAAAAACTATATGGCGAGGACTGGGATCCAGGTGCTACTTTTT\n-TATTTAAAAAATTAAACTCGTGTTGTGCAGATAACATGAA\n->2009981\n-GAGGGGAAACTAAATATTATTTGGAATCTCTTAGATAACAAAAAAACTAAGACTAAAAAG\n-GGGGTGAGATAAGGAATGGGTGTATGGGCCTAAAATTAAA\n->2009982\n-GTCCTTTTGAAAAGAAAAAAAGAGAAGAATAAAAGGAAAAAGAGAAGAGAAATAAGTTGT\n-GAGGGTGTTGTTTGAAAGAAATTGAGGTATCTTATAGTGC\n->2009983\n-CCCTCTGCACCCATGCAATACACTCTACTTTTGGAGCTTTACAGCTAACTCTGCTTTTGG\n-AATATGAAAATCATTAACAGCTAACTCTGCTTTTAGAGCT\n->2009984\n-GTTCTAATAACCTCCTTTTCCCCACGATAAGCGCATTTGGCAGTTCCCTGGGTGCATTCC\n-TAACAGAGCTCCTTTGTGATGTTACCAGCAATGTTGCATG\n->2009985\n-GGGGAATCACCGTCAAAGAAATCCCTATAGTCTTCATCCTCATCTTCTAGCTTATCAAAA\n-GCTGTTTGAAACTTCTCAGCCGCATCTAACATCAAATATA\n->2009986\n-GAAATATAAGGTCATGCCACTCAAAAGTACTTCTTACACCTCACTATTATCTGTAAGCTA\n-TTCAACCTTTCTCATATTTTTTTTCTTCTTTTTACTTTCA\n->2009987\n-TGAATAATTGCATACTTAGTTGAAAAAAAAGACTTAGTAAATTTTTACTAAGTGTTGAAT\n-TTCAATAAGTGAGAAAAATCCATTTTTAAAACAACATAAT\n->2009988\n-GCGTCAACATGTTTTGTCCAGATTCAAATGACGTTACATATCATTTCTTCAGATTTAGAA\n-CTTGTAAGTTCAACCGCACCCTAGATAAAACAATTAGGGT\n->2009989\n-ACACCACAATCATCACGATTTCATCAAAACACACAATAATCCTCACAACACACTAATTAT\n-CACAACATCATAATTTTCCTCCAAGTACGCCATAAACGTC\n->2009990\n-CTCTTCAACATCCTTTGAAGGTTGCAAATCGAGTCTAACAGTTGCCAGATCTACCACCAT\n-ATTCATTTCATTCATGTTACTTCCAATTTAAGCAATATTT\n->2009991\n-GGTACATGTTGTTGCAAGTTGTCTAATTTCGACCTACCATGGCCAACCTGAACTTGCAGA\n-ATTTGCATTTTCACCCTTAGGCCCATATCCCTTTTTTTTT\n->2009992\n-GTTAGTTTTCATTCTTTTGGGAGATCTGCGTTTTCCTGTGGAGTGTATGGGATTTGATCC\n-TGAACTGATTCTTCTACTATGACTTGATTTCCCCTTAACC\n->2009993\n-CTCTTGTTGCAGGACTGAAGGTCCTGGACAAAGGATATTCTATTGCAGATCATGTAAAGA\n-AGATTATTAGAAGTCTTCATAAGAAATGGAGACCTATGGT\n->2009994\n-GTTAGTTGAGTTGTAATTCAACATCTTTAATATGTTGAAAGCATATATCACTAGGGTAGT\n-GATTGAGAGAATGTGAGAAGGGTTCACATATTTATGGGGA\n->2009995\n-AGGGTTTCGATTTTGGCATTTCGGAACAACAACCAAAAAAATGGGAGGTGGTGCAGCAGA\n-TCACGGGAATGGCGGCAATGGAGATTTCAGATACAAGGTT\n->2009996\n-TGGCATGTCTTGTCTCCTTTAGTTTAAACTCTATCAAATGGACGTGAAAATTGTCTTTAT\n-CAATGAGTACTTGAATAAGGAAGTCTATGTTGAGAAAACA\n->2009997\n-TTCCTTCTCCTTCAATTGCCTTTGTAGTTCTAACTTCTTCGTATTCAAGGCATGAAACTT\n-GTTTTCCCAAGCATCTCTATCTTTCTTTATCCGAGCTAGA\n->2009998\n-GAAGAATCGGATTTTTATCGTTTTAGTTTCGGTTTCGATTTCATTCGGTTTTGTAAATAA\n-TTTTAGCTTAGGGTTTTGTTTGCTTGTTTGCTTGGTTTGT\n->2009999\n-GGTTATGGCACAAAAGAATTGCACATATTCACATGGAGCACTTAAACAAGTTAGTGAAGC\n-ATGACCTTGTTATCGGCCTACCAAAGATGAAGTTCCTCAA\n->2010000\n-AATTAAAGTCATATTTTGATATCTCCTTGGACATCGGTTATTTTAAAGTTTGTGTTATTA\n-AATATTTTTCAAAACTCACTACACACATGACTTCAATGGT\n'
b
diff -r d397f5a85464 -r 378565f5a875 test_data/test_run1
--- a/test_data/test_run1 Wed Sep 18 06:30:04 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,202 +0,0 @@
-"Cluster" "Chip_Hits" "Input_Hits" "Ratio"
-0 7945058 1071746 7.41319118522486
-1 278875 215538 1.2938553758502
-2 43504 94919 0.458327626713303
-3 37000 82538 0.448278368751363
-4 37253 104022 0.358126165618811
-5 32188 94960 0.338963774220724
-6 41039 81447 0.503873684727491
-7 39166 81623 0.479840241108511
-8 19339 66583 0.290449514140246
-9 29697 61497 0.482901604956339
-10 34273 82270 0.416591710222438
-11 27681 96899 0.285668582751112
-12 40545 96101 0.421899876171944
-13 31935 83895 0.380654389415341
-14 33932 90760 0.373865138827677
-15 29663 63240 0.469054395951929
-16 29084 91679 0.317237317160964
-17 17457 55788 0.312916756291676
-18 27249 55273 0.49298934380258
-19 21490 77952 0.275682471264368
-20 25851 67348 0.383842133396686
-21 27818 76020 0.365930018416206
-22 23859 72563 0.328803935890192
-23 85592 65619 1.30437830506408
-24 20936 59046 0.354571012430986
-25 21352 79958 0.267040196102954
-26 28586 62558 0.456951948591707
-27 24346 56413 0.4315671919593
-28 31336 64221 0.487940081904673
-29 16956 47383 0.357849861764768
-30 25372 59508 0.426362841970827
-31 19306 74537 0.259012302614809
-32 12498 50658 0.24671325358285
-33 22528 57473 0.391975362344057
-34 11578 38760 0.298710010319917
-35 17775 53446 0.332578677543689
-36 20979 71877 0.291873617429776
-37 24357 43553 0.559249649851905
-38 19401 65495 0.296221085579052
-39 23919 53615 0.446125151543411
-40 17944 37992 0.472309959991577
-41 21754 49360 0.440721231766613
-42 19911 59247 0.336067648994886
-43 17630 44057 0.400163424654425
-44 14639 47108 0.3107540120574
-45 10675 40313 0.26480291717312
-46 40498 55429 0.730628371430118
-47 15591 32845 0.474684122393058
-48 13229 45448 0.291079915507833
-49 15226 41012 0.371257193016678
-50 23851 49910 0.477880184331797
-51 11010 42552 0.258742244782854
-52 14233 41167 0.345738091189545
-53 17944 35795 0.5012990641151
-54 20288 38659 0.524793709097493
-55 13152 38394 0.342553523988123
-56 16631 38914 0.427378321426736
-57 249976 205389 1.21708562776001
-58 13347 28879 0.462169742719623
-59 16808 65546 0.256430598358405
-60 13355 42455 0.314568366505712
-61 16294 29082 0.560277835086995
-62 9790 52857 0.185216716801937
-63 17801 47726 0.372983279554121
-64 17157 32211 0.532644127782435
-65 20927 47539 0.440206987946738
-66 14999 49875 0.300731829573935
-67 16260 36851 0.441236330086022
-68 15925 42157 0.377754584054843
-69 11127 28004 0.397336094843594
-70 7739 25074 0.308646406636356
-71 6621 29947 0.221090593381641
-72 10923 40365 0.270605722779636
-73 9914675 84502 117.330654895742
-74 8758 38412 0.228001666145996
-75 9310 24364 0.382121162370711
-76 10614 47205 0.224849062599301
-77 13063 32597 0.400742399607326
-78 13404 29084 0.460871957089809
-79 9299 25667 0.362293996181868
-80 10109 26945 0.375171645945444
-81 15658 31730 0.493476205483769
-82 7483 32182 0.232521285190479
-83 16345 30634 0.533557485147222
-84 13293 27484 0.483663222238393
-85 5322 38749 0.137345479883352
-86 21424 33627 0.637107086567342
-87 11548 29314 0.393941461417753
-88 7762 29456 0.263511678435633
-89 12153 46433 0.261731957874787
-90 13081 22936 0.570326124869201
-91 6183 33349 0.185402860655492
-92 6357 35098 0.181121431420594
-93 48995 31410 1.5598535498249
-94 8371 31091 0.269241902801454
-95 13228 20080 0.658764940239044
-96 8453 27056 0.312426079243051
-97 9568 19505 0.490540886952064
-98 7938 32634 0.243243243243243
-99 7824 16977 0.460858808976851
-100 76514 28367 2.6972891035358
-101 10176 25158 0.404483663248271
-102 11009 30853 0.356821054678637
-103 6696 20379 0.328573531576623
-104 6767 26109 0.25918265732123
-105 12572 66266 0.189720218513265
-106 9522 20128 0.473072337042925
-107 10249 29304 0.349747474747475
-108 10362 34525 0.300130340333092
-109 9815 35803 0.274139038628048
-110 4387 16229 0.27031856553084
-111 10448 17881 0.584307365359879
-112 8117443 57680 140.732368238558
-113 9645 19854 0.485796313085524
-114 6879 27269 0.252264476145073
-115 8197 18651 0.439493860918986
-116 7111 21775 0.326567164179104
-117 16273 33720 0.482591933570581
-118 10242 29605 0.345955075156224
-119 12252 37892 0.323340019001372
-120 10751 27590 0.389670170351577
-121 6448 18451 0.349466153596011
-122 9191 25908 0.354755287941948
-123 5235 24699 0.211951900886676
-124 6465 20952 0.308562428407789
-125 7721 17814 0.4334231503312
-126 6593 26577 0.248071640892501
-127 12142 18977 0.639827159192707
-128 6505 18595 0.349825221833826
-129 3710 21375 0.173567251461988
-130 6348 16298 0.389495643637256
-131 4208 19672 0.213908092720618
-132 4136 9478 0.436378982907786
-133 7285 17145 0.424905220180811
-134 7642 14034 0.544534701439362
-135 3953 13049 0.302935090811556
-136 7778 14820 0.524831309041835
-137 7588 20607 0.368224389770466
-138 1208 3015 0.400663349917081
-139 7826 20711 0.377866834049539
-140 7551 13678 0.552054393917239
-141 4938 15140 0.326155878467635
-142 3788 16800 0.22547619047619
-143 4537 15722 0.288576516982572
-144 20198 10665 1.8938584153774
-145 28177 18344 1.53603358046228
-146 5743 19920 0.288303212851406
-147 3763 14154 0.265861240638689
-148 56754 17007 3.33709648968072
-149 5988 11740 0.510051107325383
-150 784 1172 0.668941979522184
-151 2718 13509 0.201199200532978
-152 3807 14812 0.25702133405347
-153 8382 45786 0.183069060411479
-154 3647 15198 0.239965784971707
-155 8500 32566 0.261008413682982
-156 6815 14389 0.473625686288137
-157 2307 13795 0.167234505255527
-158 2912 11803 0.246716936372109
-159 4316 11848 0.364280891289669
-160 992 2170 0.457142857142857
-161 5305 13186 0.402320643106325
-162 4569 17654 0.258808202107171
-163 3935 16460 0.23906439854192
-164 2725 13244 0.205753548776805
-165 3772 11890 0.317241379310345
-166 3081 15437 0.199585411673253
-167 837 2591 0.323041296796604
-168 2438412 16297 149.62336626373
-169 6131 16203 0.378386718508918
-170 3283 13179 0.249108430078155
-171 3106 12624 0.246039290240811
-172 9370 45868 0.204281852271736
-173 3537 15712 0.225114562118126
-174 747 2267 0.329510366122629
-175 4326 13004 0.332666871731775
-176 6665 9325 0.714745308310992
-177 4657 12573 0.370396882207906
-178 2538 10016 0.253394568690096
-179 2662 9086 0.292978208232446
-180 2441 10297 0.237059337671166
-181 544 1658 0.32810615199035
-182 664 1018 0.652259332023576
-183 31045 22110 1.4041157847128
-184 1365 4387 0.311146569409619
-185 2159 11375 0.189802197802198
-186 1933 7834 0.246744957875925
-187 2062 6753 0.305345772249371
-188 3962 11049 0.358584487283917
-189 2234 9201 0.242799695685252
-190 834 6610 0.126172465960666
-191 520 1629 0.319214241866176
-192 2677 10891 0.245799283812322
-193 509 805 0.632298136645963
-194 2155 4528 0.475927561837456
-195 2477 7482 0.331061213579257
-196 14445 15443 0.935375250922748
-197 3703 8763 0.42257217847769
-198 2873 28533 0.100690428626503
-199 1727 9618 0.179559159908505
-200 3448 6234 0.553095925569458
b
diff -r d397f5a85464 -r 378565f5a875 test_run1.sh
--- a/test_run1.sh Wed Sep 18 06:30:04 2019 -0400
+++ b/test_run1.sh Fri Nov 22 07:56:48 2019 -0500
b
@@ -9,7 +9,17 @@
 ./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test1.fasta -G tmp/test1.png -c 10 -N 0
 
 echo "single fastq filtering with with sampling"
-./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.fasta -G tmp/test2.png -c 10 -N 0 -n 500
+./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2a.fasta -G tmp/test2a.png -c 10 -N 0 -n 500
+
+echo "single fastq filtering with with sampling"
+./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2b.fasta -G tmp/test2b.png -c 10 -N 0 -n 647
+
+echo "single fastq filtering with with sampling"
+./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2c.fasta -G tmp/test2c.png -c 10 -N 0 -n 839
+
+echo "single fastq filtering with with sampling"
+./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2d.fasta -G tmp/test2d.png -c 10 -N 0 -n 911
+
 
 echo "single fastq filtering with contaminant removing"
 ./single_fastq_filtering_wrapper.sh -F tool_data/organele_ref_and_phi-X174.fasta -a test_data/ERR215189_1_part.fastq.gz -o tmp/test3.fasta -G tmp/test3.png -c 10 -N 0 
b
diff -r d397f5a85464 -r 378565f5a875 test_run2.sh
--- a/test_run2.sh Wed Sep 18 06:30:04 2019 -0400
+++ b/test_run2.sh Fri Nov 22 07:56:48 2019 -0500
b
@@ -6,7 +6,16 @@
 ./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.1.fasta -G tmp/test2.1.png -c 10 -N 0
 
 echo "paired fastq filtering with with sampling"
-./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.2.fasta -G tmp/test2.2.png -c 10 -N 0 -n 500
+./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a2.fasta -G tmp/test2.2.png -c 10 -N 0 -n 500
+
+echo "paired fastq filtering with with sampling"
+./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a3.fasta -G tmp/test2.2.png -c 10 -N 0 -n 653
+
+echo "paired fastq filtering with with sampling"
+./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a4.fasta -G tmp/test2.2.png -c 10 -N 0 -n 547
+
+echo "paired fastq filtering with with sampling"
+./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a5.fasta -G tmp/test2.2.png -c 10 -N 0 -n 839
 
 echo "paired fastq filtering with contaminant removing"
 ./paired_fastq_filtering_wrapper.sh -F tool_data/organele_ref_and_phi-X174.fasta -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.3.fasta -G tmp/test2.3.png -c 10 -N 0
b
diff -r d397f5a85464 -r 378565f5a875 tmp.RData
b
Binary file tmp.RData has changed