Previous changeset 4:d397f5a85464 (2019-09-18) Next changeset 6:f224513123a1 (2019-12-02) |
Commit message:
Uploaded |
modified:
ChipSeqRatioAnalysis.R paired_fastq_filtering.R paired_fastq_filtering.xml single_fastq_filtering.R single_fastq_filtering.xml test_run1.sh test_run2.sh |
added:
ChipSeqRatioReport fasta_tmp_single tmp.RData |
removed:
test_data/ERR215189_1_part.fastq.gz test_data/ERR215189_2_part.fastq.gz test_data/seq_C_10k test_data/seq_I_10k test_data/test_run1 |
b |
diff -r d397f5a85464 -r 378565f5a875 ChipSeqRatioAnalysis.R --- a/ChipSeqRatioAnalysis.R Wed Sep 18 06:30:04 2019 -0400 +++ b/ChipSeqRatioAnalysis.R Fri Nov 22 07:56:48 2019 -0500 |
[ |
@@ -3,82 +3,28 @@ library(base64enc, quietly=T) -htmlheader=" - <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"> +htmlheader= +" <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"> <head> <title> ChIP-Seq Mapper Output </title> - <style> - <!-- - table { background:#FFFFFF; - border:1px solid gray; - border-collapse:collapse; - color:#fff; - font:normal 13px verdana, arial, helvetica, sans-serif; - width: 100%; + <style> +html,body{font-family:Verdana,sans-serif;font-size:15px;line-height:1.5} - } - caption { border:1px solid #5C443A; - color:#5C443A; - font-weight:bold; - font-size:20pt - padding:6px 4px 8px 0px; - text-align:center; - - } - td, th { color:#363636; - padding:.4em; - } - tr { border:1px dotted gray; - } - thead th, tfoot th { background:#5C443A; - color:#FFFFFF; - padding:3px 10px 3px 10px; - text-align:left; - text-transform:uppercase; - } - tbody td a { color:#3636FF; - text-decoration:underline; - } - tbody td a:visited { color:gray; - text-decoration:line-through; - } - tbody td a:hover { text-decoration:underline; - } - tbody th a { color:#3636FF; - font-weight:normal; - text-decoration:none; - } - tbody th a:hover { color:#363636; - } - tbody td+td+td+td a { background-image:url('bullet_blue.png'); - background-position:left center; - background-repeat:no-repeat; - color:#FFFFFF; - padding-left:15px; - } - tbody td+td+td+td a:visited { background-image:url('bullet_white.png'); - background-position:left center; - background-repeat:no-repeat; - } - tbody th, tbody td { text-align:left; - vertical-align:top; - } - tfoot td { background:#5C443A; - color:#FFFFFF; - padding-top:3px; - } - .odd { background:#fff; - } - tbody tr:hover { background:#EEEEEE; - border:1px solid #03476F; - color:#000000; - } - --> - </style> +table { + border-collapse: collapse; + border: 1px solid black; + width: 1000pt +} +table, th, td { + border: 1px solid black; +} +</style> </head> - - " + + + +" #arguments @@ -97,21 +43,20 @@ df$"Ratio Chip/(Chip+Input)"=df$Chip_Hits/(df$Chip_Hits + df$Input_Hits) df$"Normalized ratio Chip/(Chip+Input)"=(df$Chip_Hits/chipN)/((df$Input_Hits/inputN)+(df$Chip_Hits/chipN)) -outputTable = df[df$"Normalized ratio Chip/(Chip+Input)" > threshld,] -outputTable = outputTable[!is.na(outputTable$Cluster),] +outputTable = df[df$"Normalized ratio Chip/(Chip+Input)" > threshld, + ] +outputTable = outputTable[!is.na(outputTable$Cluster), + c('Cluster', 'Chip_Hits', 'Input_Hits', + 'Normalized ratio Chip/Input','Normalized ratio Chip/(Chip+Input)')] save.image("tmp.RData") #Plot creation pngfile <- tempfile() png(pngfile, width = 1000, height = 1200, pointsize=20) -par(mfrow=c(3,1)) +par(mfrow=c(2,1)) lims=range(df$"Normalized ratio Chip/Input"[df$"Normalized ratio Chip/Input">0], finite = TRUE) suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/Input", log="y", xlab="Cluster Nr.", ylab="Normalized ChiP/Seq ratio", pch=20, ylim=lims)) abline(h=1,col='#00000080', lwd = 2) abline(h=2,col='#FF000080', lwd = 2) -lims=range(df$"Normalized ratio Chip/Input", finite = TRUE) -suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/Input", xlab="Cluster Nr.", ylab="Normalize ChiP/Seq ratio", pch=20, ylim=lims)) -abline(h=1,col='#00000080', lwd = 2) -abline(h=2,col='#FF000080', lwd = 2) suppressWarnings(plot(df$Cluster,df$"Normalized ratio Chip/(Chip+Input)", xlab="Cluster Nr.", ylab="Normalized Chip/(Chip+Input)", pch=20)) abline(h=0.5,col='#00000080', lwd = 2) @@ -133,7 +78,8 @@ HTML(graph, file=filename) if (nrow(outputTable)>0){ - HTML(outputTable, file=filename, classtable = "dataframe", row.names=FALSE, Align='left') + HTML(outputTable, file=filename, classtable = "dataframe", + row.names=FALSE, align='left', caption="Clusters with Normalized ChIP/Input ratio > 2", captionalign="top") } HTMLEndFile(filename) file.rename(from=filename, to=HTMLfile) |
b |
diff -r d397f5a85464 -r 378565f5a875 ChipSeqRatioReport --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ChipSeqRatioReport Fri Nov 22 07:56:48 2019 -0500 |
b |
b'@@ -0,0 +1,265 @@\n+\n+\t <html xmlns:mml="http://www.w3.org/1998/Math/MathML">\n+ <head>\n+ <title> ChIP-Seq Mapper Output </title>\n+ <style>\n+ <!--\n+ table { background:#FFFFFF;\n+ border:1px solid gray;\n+ border-collapse:collapse;\n+ color:#fff;\n+ font:normal 13px verdana, arial, helvetica, sans-serif;\n+ width: 100%;\n+\n+ }\n+ caption { border:1px solid #5C443A;\n+ color:#5C443A;\n+ font-weight:bold;\n+ font-size:20pt\n+ padding:6px 4px 8px 0px;\n+ text-align:center;\n+ \n+ }\n+ td, th { color:#363636;\n+ padding:.4em;\n+ }\n+ tr { border:1px dotted gray;\n+ }\n+ thead th, tfoot th { background:#5C443A;\n+ color:#FFFFFF;\n+ padding:3px 10px 3px 10px;\n+ text-align:left;\n+ text-transform:uppercase;\n+ }\n+ tbody td a { color:#3636FF;\n+ text-decoration:underline;\n+ }\n+ tbody td a:visited { color:gray;\n+ text-decoration:line-through;\n+ }\n+ tbody td a:hover { text-decoration:underline;\n+ }\n+ tbody th a { color:#3636FF;\n+ font-weight:normal;\n+ text-decoration:none;\n+ }\n+ tbody th a:hover { color:#363636;\n+ }\n+ tbody td+td+td+td a { background-image:url(\'bullet_blue.png\');\n+ background-position:left center;\n+ background-repeat:no-repeat;\n+ color:#FFFFFF;\n+ padding-left:15px;\n+ }\n+ tbody td+td+td+td a:visited { background-image:url(\'bullet_white.png\');\n+ background-position:left center;\n+ background-repeat:no-repeat;\n+ }\n+ tbody th, tbody td { text-align:left;\n+ vertical-align:top;\n+ }\n+ tfoot td { background:#5C443A;\n+ color:#FFFFFF;\n+ padding-top:3px;\n+ }\n+ .odd { background:#fff;\n+ }\n+ tbody tr:hover { background:#EEEEEE;\n+ border:1px solid #03476F;\n+ color:#000000;\n+ }\n+ -->\n+ </style>\n+ \n+ </head>\n+ \n+ \n+<p class=\'character\'><img src="data:image/png;base64 , iVBORw0KGgoAAAANSUhEUgAAA+gAAASwCAIAAADnu/enAAAgAElEQVR4nOzdeUBU1f//8TuKKCoiGsiYigoW2qIlbqAiaO4ZLrmlfnBNMs1Ks9JP7mlmaWkuLUYqWi5p5UctI0EgZVHUxCUJEEVQEGVnEJjfH/P5zXc+MwODOnPvXOb5+Gvm3Ms9L/b3nDn3HIVarRYAAAAAWLdaUgcAAAAAYBqFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJA4Q4AAADIAIU7AAAAIAMU7gAAAIAMULgDAAAAMkDhDgAAAMgAhTsAAAAgAxTuAAAAgAxQuAMAAAAyQOEOAAAAyACFOwAAACADFO4AAACADFC4AwAAADJQowr3u3fvpqamSp0CAAAAMD85Fe6RkZGDBg1q3bq1t7f3Z599dv/+fb0TPvroozZt2kiSDQAAALAo2RTuZ8+e7du379GjR7Oyss6fPz937lx/f/+cnBypcwEAAABikE3hvmzZMkEQDhw4UFBQkJ+f/+mnn8bGxg4YMKCwsFDqaAAAAIDFyaZwj4+PHzt2bGBgoEKhqFu37ptvvnn06NHz58+PHTu2oqJC6nQAAACAZcmmcM/KytKbvx4QEPD1118fOnRo/vz5UqUCAAAAxGEndYDqcnd3P3v2rF7jxIkTL126tGrVqscff/ytt96SJBgAAAAgAtkU7oMHD16/fv2mTZtmzJhhZ/d/sVeuXHnz5s233347IyOjtLRUwoQAAACA5SjUarXUGaolOzu7a9euKSkpAwYMOHr0qO4htVo9d+7czz//XPtUioAAAACABclmjvtjjz0WFxf3+uuvP/XUU3qHFArFZ599tn//fg8PD0myAQAAAJYmmxF3AAAAwJbJZo67llqtTklJSU5Ozs/PFwTBycnJ09OzVatWUucCAAAALEhOhXt2dvaHH364a9euW7du6R1yd3efMmXK22+/3aBBA0myAQAAABYlm6kyGRkZPj4+qampHh4ePXv2dHd3b9iwoSAIeXl5ycnJ4eHhN2/e7NixY0REhJOTk9RhAQAAADOTzYj7okWL0tPTd+/ePXbsWMOj5eXlW7ZsmT179uLFi9evXy9+PAAAAMCiZDPirlQqBw0atG3btirOGTt27J9//pmWliZaKgAAAEAcslkOMicnx9PTs+pz2rdvbzj9HQAAAKgBZFO4K5XKhISEqs+Jj49v3ry5OHkAAAAAMcmmcB8xYsT+/fvXrl2rUqkMjxYUFCxatOjQoUNGZ8ADAAAAciebOe65ubkBAQFnzpxxdHT09vbWrCqjVqvz8/NTUlLi4+OLi4v9/PwOHz5cv359qcMCAAAAZiabwl0QhJKSks2bN4eGhiYkJFRUVGjb7ezsunbtGhQUNHnyZDs72ayTAwAAAFSfnAp3LZVKde3atfz8fIVC0ahRI3d39zp16li0x1atWrm6ulq0CwAAAFiD7Ozs1NRUqVMYIcvx6bp16z7xxBNi9ujh4XH8+HExewQAAIAk/P39pY5gnGxuTtWTmZkZGBh48uRJqYMAAAAAYpBr4V5QUPDTTz+lp6dLHQQAAAAQg1wLdwAAAMCmULgDAAAAMiCzm1PnzZuneXDv3j1BELZv337q1Cnt0bVr1z7ENW/evFlSUlL1OSZPAAAAACxKZoX7vn37NA/KysoEQTh16tT58+e1Rx+'..b'vXP3jwYM+ePb179/b19e3Zs2dpaenx48cvX778/PPPp6SkPP3003XeEhoaum7dutmzZy9fvtzGxiY5OfnMmTMdOnSQk7G3t7e9vf20adOOHTvm6ekpSdKpU6e2bNni4+Pj6ekp/r1tfeXKlXl5eYMHD37ppZdiYmIOHDiwdOnSgoKCQYMGXbt2bf369SUlJbNmzXr8D2hvbx8fHz958uSCggIPD4/GpuXk5PTv39/f3z81NfXxdwoAqsYVdwBojdq1a7d79+6UlJSQkJC8vLxvvvkmLS2ta9eu69atO3nypKura/23BAQEbN68uby8fNSoUWPGjCkrK0tKSmrXrp1884xWq01PT58xY8bhw4ejoqI+/vjjkydPLlu27ODBg/JDGEeMGDF69Ojz58+vXr26oKBACOHo6JiZmTl9+vRDhw5Nmzbtyy+/9PT0PHz4sL+/f7N8xkmTJr366qvNsikAMAeapn9HCQAAAKA14Io7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVIDgDgAAAKgAwR0AAABQAYI7AAAAoAIEdwAAAEAFCO4AAACAChDcAQAAABUguAMAAAAqQHAHAAAAVOA/TXVT6W1ggrUAAAAASUVORK5CYII= " alt="image" /></p>\n+\n+\n+<p align= center >\n+<table cellspacing=0 border=1>\n+<caption align=bottom class=captiondataframe></caption>\n+<tr><td>\n+\t<table border=0 class=dataframe>\n+\t<tbody> \n+\t<tr class= firstline > \n+\t\t<th>Cluster </th>\n+\t\t<th>Chip_Hits </th>\n+\t\t<th>Input_Hits </th>\n+\t\t<th>Ratio Chip/Input </th>\n+\t\t<th>Normalized ratio Chip/Input </th>\n+\t\t<th>Ratio Chip/(Chip+Input) </th>\n+\t\t<th>Normalized ratio Chip/(Chip+Input)</th> \n+\t</tr> \n+<tr> \n+<td class=cellinside> 73\n+</td>\n+<td class=cellinside> 5171\n+</td>\n+<td class=cellinside> 91\n+</td>\n+<td class=cellinside> 56.8\n+</td>\n+<td class=cellinside> 56.8\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>112\n+</td>\n+<td class=cellinside>15274\n+</td>\n+<td class=cellinside>240\n+</td>\n+<td class=cellinside> 63.6\n+</td>\n+<td class=cellinside> 63.6\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>160\n+</td>\n+<td class=cellinside> 1\n+</td>\n+<td class=cellinside> 0\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>168\n+</td>\n+<td class=cellinside> 1306\n+</td>\n+<td class=cellinside> 25\n+</td>\n+<td class=cellinside> 52.2\n+</td>\n+<td class=cellinside> 52.2\n+</td>\n+<td class=cellinside>0.98\n+</td>\n+<td class=cellinside>0.98\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>208\n+</td>\n+<td class=cellinside> 3134\n+</td>\n+<td class=cellinside> 25\n+</td>\n+<td class=cellinside>125.4\n+</td>\n+<td class=cellinside>125.4\n+</td>\n+<td class=cellinside>0.99\n+</td>\n+<td class=cellinside>0.99\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>213\n+</td>\n+<td class=cellinside> 1\n+</td>\n+<td class=cellinside> 0\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>225\n+</td>\n+<td class=cellinside> 409\n+</td>\n+<td class=cellinside> 1\n+</td>\n+<td class=cellinside>409.0\n+</td>\n+<td class=cellinside>409.0\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>236\n+</td>\n+<td class=cellinside> 4638\n+</td>\n+<td class=cellinside> 55\n+</td>\n+<td class=cellinside> 84.3\n+</td>\n+<td class=cellinside> 84.3\n+</td>\n+<td class=cellinside>0.99\n+</td>\n+<td class=cellinside>0.99\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>250\n+</td>\n+<td class=cellinside> 1\n+</td>\n+<td class=cellinside> 0\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside> Inf\n+</td>\n+<td class=cellinside>1.00\n+</td>\n+<td class=cellinside>1.00\n+</td></tr>\n+ \n+<tr> \n+<td class=cellinside>294\n+</td>\n+<td class=cellinside> 11\n+</td>\n+<td class=cellinside> 5\n+</td>\n+<td class=cellinside> 2.2\n+</td>\n+<td class=cellinside> 2.2\n+</td>\n+<td class=cellinside>0.69\n+</td>\n+<td class=cellinside>0.69\n+</td></tr>\n+ \n+\t</tbody>\n+</table>\n+ </td></table>\n+ <br>\n+\n+<hr size=1>\n+<font size=-1>\n+\t Generated on: <i>Fri Nov 22 12:59:10 2019</i> - <b>R2HTML</b> \n+<hr size=1>\n+\t</body>\n+</html>\n\\ No newline at end of file\n' |
b |
diff -r d397f5a85464 -r 378565f5a875 paired_fastq_filtering.R --- a/paired_fastq_filtering.R Wed Sep 18 06:30:04 2019 -0400 +++ b/paired_fastq_filtering.R Fri Nov 22 07:56:48 2019 -0500 |
[ |
@@ -75,7 +75,7 @@ make_option(c("-C", "--cutadapt_options"), action = "store", type = "character", help = "file specifying cutadapt options", default = NULL), make_option(c("-j", "--chunk_size"), action = "store", type = "numeric", help = "Number of sequences processed in single step. This option affect speed of processing and memory usage [default %default]", - default = 1e+06), + default = 1000000), make_option(c('-F', '--filter_seq'),action='store',type='character',help='file specifying sequences for filtering (e.g. plastid DNA)',default=NULL) ) @@ -241,16 +241,24 @@ number_of_chunks = round(n1/CHUNK_SIZE) +## adjust chunk size to make last chunk of the same size is all other +## this is to avoid small last chunk +CHUNK_SIZE = round(n1/number_of_chunks) + if (number_of_chunks == 0) { CHUNK_SIZE = n1 number_of_chunks = 1 } if (!is.null(opt$sample_size)) { - sample_size_in_chunk = opt$sample_size/number_of_chunks + sample_size_in_chunk = round(opt$sample_size/number_of_chunks) + n_missing = opt$sample_size - sample_size_in_chunk * number_of_chunks } else { - sample_size_in_chunk = CHUNK_SIZE + sample_size_in_chunk = CHUNK_SIZE + n_missing = 0 } +cat("number chunks ", number_of_chunks, "\n") +cat("chunks size ", CHUNK_SIZE, "\n") # adjust the chunk size to get exact count of sequences: CHUNK_SIZE = ceiling(n1/number_of_chunks) F_id = ifelse(opt$rename, "/1", "1") @@ -263,11 +271,14 @@ nucleotideFrequenciesForward = nucleotideFrequenciesReverse = matrix(0) while (TRUE) { chunk = chunk + 1 + cat("chunk number ", chunk, "\n") fq1 <- yield(f1) fq2 <- yield(f2) if (length(fq1) == 0) { break } + cat("chunk number ", chunk, " imported\n") + cat("chunk size", length(fq1), "\n") ## rename chunk_id = sprintf(paste0("%0", round(log10(number_of_chunks)) + 1, "d"), chunk) cat("chunk id ", chunk_id, "\n") @@ -341,7 +352,7 @@ - # remove sequences similar to filter database (e.g. plastid DNA) + ## remove sequences similar to filter database (e.g. plastid DNA) if (!is.null(opt$filter_seq)){ blast_results1 = megablast(fqF1, database=opt$filter_seq) blast_results2 = megablast(fqF2, database=opt$filter_seq) @@ -374,8 +385,9 @@ fqF2@id = BStringSet(paste0(id(fqF2), R_id)) - if (sum(inc1) > sample_size_in_chunk) { - smp = sort(sample(sum(inc1), sample_size_in_chunk)) + if (sum(inc1) > (sample_size_in_chunk + n_missing)) { + smp = sort(sample(sum(inc1), sample_size_in_chunk + n_missing)) + n_missing = 0 ## this was to correct rounding error writeFun(fqF1[inc1][smp], file = f1out, mode = "a") writeFun(fqF2[inc2][smp], file = f2out, mode = "a") nfrq1 = alphabetByCycle(sread(fqF1[inc1][smp])) |
b |
diff -r d397f5a85464 -r 378565f5a875 paired_fastq_filtering.xml --- a/paired_fastq_filtering.xml Wed Sep 18 06:30:04 2019 -0400 +++ b/paired_fastq_filtering.xml Fri Nov 22 07:56:48 2019 -0500 |
b |
@@ -1,4 +1,7 @@ <tool id="paired_fastq_filtering" name="Preprocessing of fastq paired-reads"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> <description> Preprocessing of paired reads fastq files including trimming, quality filtering, cutadapt filtering and interlacing. Broken |
b |
diff -r d397f5a85464 -r 378565f5a875 single_fastq_filtering.R --- a/single_fastq_filtering.R Wed Sep 18 06:30:04 2019 -0400 +++ b/single_fastq_filtering.R Fri Nov 22 07:56:48 2019 -0500 |
[ |
@@ -201,25 +201,25 @@ number_of_chunks=round(n1/CHUNK_SIZE) +CHUNK_SIZE = round(n1/number_of_chunks) + if (number_of_chunks==0){ CHUNK_SIZE=n1 number_of_chunks=1 } if (!is.null(opt$sample_size)){ - sample_size_in_chunk=opt$sample_size/number_of_chunks + sample_size_in_chunk=round(opt$sample_size/number_of_chunks) + n_missing = opt$sample_size - sample_size_in_chunk * number_of_chunks }else{ sample_size_in_chunk=CHUNK_SIZE + n_missing = 0 } # adjust the chunk size to get exact count of sequences: CHUNK_SIZE = ceiling(n1/number_of_chunks) save.image("tmp.RData") -print("--------------------------------") -print (sample_size_in_chunk) -print (opt$sample_size) -print (CHUNK_SIZE) -print("--------------------------------") + f1 <- FastqStreamer(opt$fastqA, CHUNK_SIZE) total=0 nucleotideFrequenciesForward = matrix(0) @@ -291,8 +291,9 @@ # filter complete pairs again: # create new id - last character must differentiate pair - for interlacig - if (length(fqF1)>sample_size_in_chunk){ - smp=sort(sample(seq_along(fqF1),sample_size_in_chunk)) + if (length(fqF1)>(sample_size_in_chunk + n_missing)){ + smp=sort(sample(seq_along(fqF1),sample_size_in_chunk + n_missing)) + n_missing = 0 writeFun(fqF1[smp],file=f1out,mode='a') nfrq1 = alphabetByCycle(sread(fqF1[smp])) |
b |
diff -r d397f5a85464 -r 378565f5a875 single_fastq_filtering.xml --- a/single_fastq_filtering.xml Wed Sep 18 06:30:04 2019 -0400 +++ b/single_fastq_filtering.xml Fri Nov 22 07:56:48 2019 -0500 |
b |
@@ -1,4 +1,7 @@ <tool id="single_fastq_filtering" name="Preprocessing of fastq reads"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> <description> Preprocessing of fastq files including trimming, quality filtering, cutadapt filtering and sampling |
b |
diff -r d397f5a85464 -r 378565f5a875 test_data/ERR215189_1_part.fastq.gz |
b |
Binary file test_data/ERR215189_1_part.fastq.gz has changed |
b |
diff -r d397f5a85464 -r 378565f5a875 test_data/ERR215189_2_part.fastq.gz |
b |
Binary file test_data/ERR215189_2_part.fastq.gz has changed |
b |
diff -r d397f5a85464 -r 378565f5a875 test_data/seq_C_10k --- a/test_data/seq_C_10k Wed Sep 18 06:30:04 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,30000 +0,0 @@\n->2000001\n-GTTTATCTGATTTGGCATGAGTATTTCATGGTGTACAATGCATTTGATGTATCTGAGAAT\n-TCTCTTAGCTACAAGTATAAATTGTATGATCTTGGTTCCT\n->2000002\n-TGATCTACCAAATTATCTAATGAGGCTTTATAGAAGCTAGCATATAGGAAGAAGATATTG\n-TTTCCTGAGAGATCTAGTTGGCAAGATCGGAAGAGCACAC\n->2000003\n-GTTCAATATGGTGAAGTATTGATATTTCTTTTCATCAATACCGTGAGGTATTGATCTTTC\n-TTTTTCTTCAAAACCGTGAAGTATTGATCTTCCCTAGATC\n->2000004\n-TGCAACACTAGATCTTTTACTTGTCATATTGCGGGCTTATTAAGCAGATAGATGGAGGAA\n-CCAAGATCATACAATTTATAGTTTGAGCTAAAAGATTCCT\n->2000005\n-CTTTTTGGTTTATTACGGTTTTGGATTTTGCTTACACGGGATTGACCTTTCAACGACTCA\n-TGTTATGTAATGTAGTTCTTTGTTCATCGAGTACTGGACC\n->2000006\n-GAATATTGGAAGACTGAGCTACAACTTGCAGATTTTCTAACCAAGCCATCAAAGAAAACA\n-AGACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGAC\n->2000007\n-TCCCGGTAGATTGAAAACCCGAAAGGGCAATCTAGGAAAAAGTTAGGGATTCATGGCAAG\n-TAACTGCATCATAAGACTTGATCGCCCGCAACGTCGAGAT\n->2000008\n-TAAAGAGAACTACGTTACATAACATGAGCAGTTGAAAGGTCAATCCCAAGTAAGCAAAAT\n-CAAAGCCATAATAAGCCAAAGAGAAAAGAAAGGAAGATCG\n->2000009\n-GACAAAGACTCAACCATAAAGAAGAAGTTGCAGAGAAAAGAAGTGCTTTACATACTATGA\n-GTAGTTGAAACGTACCTTATCTGTAGACCAAATAGCTAGT\n->2000010\n-TATAAAATCATAAAAACGTAACGATTTGTTATCTCGTTGTCATTCTGACTTCCTCTCAAA\n-CTCTGAATTGGAATAGCTAGATGCAAGATCGGAAGAGCAC\n->2000011\n-TGCCAAATAAGAAAAACTCAAGAAAGGGGAGGGAAATGCATATGTCTATTCTAATTCAGA\n-TATAAAAGAAAGACAACATGACATGTTGCAGCCTCTAGAT\n->2000012\n-CTTTCTATCGAAATCTTTACTTCATTGCTTTATCGCTTTTCTTAACCTTTTTTATTGTCA\n-AACTGTTCATAGATCGGAAGAGCACACGTCTGAACTCCAG\n->2000013\n-GGATGGGTAGTCGATTTATCCGCCAATTGCAATGTCATTTTTGTTGCTTTCATATCAATG\n-CTCCCTAATCTTTTCACAATCGACAATAGAATCAAGTTTA\n->2000014\n-GACTCGTAGATGGAACCTTGTACAAGCAGATCATCAATTCACTAAAGTACATATGCAACA\n-CGAGACATTATATTTGTCATAGTGTGGTCTTAGTAAGCAG\n->2000015\n-GTACTAGTATGATATCATGCTCGTTATTCTTGATGAAAAGAGTGTTATCAACCTGTCCTC\n-CATGTAACATTTTTTCAGGCGGGAATTTAATTAAAGTGGC\n->2000016\n-GAGTTTTTGAAAGGATCAATCCTTATCTACCAAAGACTCAACCATAAAGAAGAAGTTGCA\n-GAGAAAAGAAGTGCTTTACATACTATGAGTAGTTGAAGCG\n->2000017\n-GAGCCCAATTCCTATATTCCGATGAAGCTCTTCATTAGAGCTTTAATTGGAAGAGCTAAG\n-ACTCATCCTCTCTCTCAACACAGATAAAAAACCACACATT\n->2000018\n-CTCTTTTTAACCTTTGTAACACATTCACGTCATTCCAATAAACATTATTTTTACCTTGAT\n-TTCAGTCTAAGACATTATAAATTCCATGGAGTCTGTAAAA\n->2000019\n-ACTAAATGAAGAAAAGAAACTACATTACACAACATGAGTAGTTGAAAGGGCCAACCCTTG\n-TTAATTAAAGACTTAAGCCTAGTAAACCAAAAACAGAGAA\n->2000020\n-CCATTCATCAAGCTACGGTACATACTTTGACTTAGCTTATAGGTAAATTCATCAAGGTAA\n-CGTAGATATTTTGACTTAGTGTAAGGTCGAATCCTCAACG\n->2000021\n-CGTCAAAAAATGTACATTTCAACTACTCAAACTATGTAATGTACTTTCAATCCTCTTTAT\n-TTGTTTTCTTATTGTTAAGTCTTTGATCAACATCGGTCAG\n->2000022\n-GTGATTAGGGATTTCGTCGACAAGGAAGAGACGTTTCAACTAATAAAATTATGTAAGTCA\n-CTTATTTTCTCTTCATTTGCTTCTTTAAGTTTGAGTCTCT\n->2000023\n-TACTTAGAGTTAAGAAAATTCTCACATACATCAAATGCACTCTAGACCCTGAAATACTCA\n-TGCCAAATCTGATAAACTCAAGAAGGGAAGCAAATGCATC\n->2000024\n-CCATGACATGATGCACAGGGAAATTGAAGTGTATGTTGATGACATGATTGCCAAATCTCG\n-ATCTGAAGAGGGCCATCTTGATGATTTGTTAAAACTGTTT\n->2000025\n-CTGCTTGTACAAGGTTCCATCTACCCTTCTATCAATTGAGTCCTTATTCAGTTCGATTGA\n-TGTTTTAATGGAATTTATAACGTCTTAGAATGAAATCGAG\n->2000026\n-GCCATCAAAGAAAACAAGACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGACTGGA\n-TGATCCGAATTAAAATGAGTATTGGCAATGTAATTCAATT\n->2000027\n-AGGACATTTTGCAGGGCTTCCCGGTGGGCTTCTGAGTGCAGTAGCAAAGAGAGTATTGAT\n-ATTCTAGAAGGTGTTTGTAACAGTTGATAGATCGGAAGAG\n->2000028\n-GTGACTTTGTGGCTTTATTTCACAAATTGAGCCAACGGAAATTAGTGAAGCTATAGTCGA\n-CAAACATTGGTATCTTGCAAGATCGGAAGAGCACACGTCT\n->2000029\n-CTTCATTTTCTTTCTAATAATTATTAGGTCTTTCTTCATCAACATATGGATCTTTCAACT\n-ACTCAAAGTATGTAATGTACTCTCTTTTATTAGATCGGAA\n->2000030\n-GAGATGGTGAATTGACTGGTTATCTTGGGTGACAACAATTCACAATTTGGATTCGGCCGA\n-GTGAAAGCTTTGAAGAACGGTGATTTCTTGCAAAGGTGTA\n->2000031\n-AACTAGAGGAATGGCCTTGATAAATTTACCTTTACGTTATGTTAAAGTACATATTCTACA\n-TTGATGAAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n->2000032\n-GGTTTCTTTTATCTCAAAGTATAAATTGTATGATCTTGGTTCCTCCGTCTATCTGCTTAC\n-TAAGCCCGCATTATGATAAATAAAAGGTCTAGATCGGAAG\n->2000033\n-GGATAGGTCCATCCATTCTAAACCTAATCCTAAACTATAGGAAACCAAATGAAAAGAGAG\n-GAAATATTACACATGAAATGCGTAGTTGAAAGGATCAATC\n->2000034\n-GCTCATGGATCAGTTTGAACAGACTCAGAAGATGCTTAGGGACGAAGTAAATGTCATGTT\n-TGGTAAACTTGTGGAAGCTCTCTCGAGATCGGAAGAGCAC\n->2000035\n-CCTAAAAGACGAAAGAAAGTACGTTACAAACTTGGAGTAGTTAAAAGGTTCATTCCTCGT\n-TGAACAAAGACCTAGTAATTAAAAAACAAA'..b'TAACCCAA\n->2009966\n-GTCATTTTGACTTTCTTTTAAATCTGAATGAGAATAGACATATGCATTTGCCTCCCTTCT\n-TGAGTTTTTCTTATTTGGCATGAGTATTTCAAGGTCTAGA\n->2009967\n-TCGGGGTTTTACAGGGAGCATCTCAATTTCCGGAGTCAATCCCAAATCACAAGGAGCATT\n-ACCTTTTTGTTGACAATGTGGAAGACATCGTTCGATTGGA\n->2009968\n-AGACTTAAGCCTAGTAAACCAAAAACAGAGAAAAATGTACATTGCATACTATGAGTAGTT\n-GAAATGTATGACCCTTGTTGACTAAAGACTTAACCATAAG\n->2009969\n-GGTTGATCTTTTGTTAGTTTTCATTCTTTTGGGAGATCTGCGTTTTCCTGTGGAGAGTAT\n-GGGATTTGTTGCTGAACTGATTCTTCTACTATGACTAGAT\n->2009970\n-AGTCACTTATTTTCTCTTCATTTTCTTCAAGTTTGAGTCTCTGGTTGATAAGAACTGATC\n-CTTTCAACTACTCATAGTATGTGAAATATTTCCTTTCTTT\n->2009971\n-TCTTACGATTAAGTCTTTGAGAAACAAGGGGTTCTCTTTTTAACCACTAATGTATTTTAT\n-GTACTCTTCATTTGATTTATATTACTTGGTCTTTGATCAT\n->2009972\n-GGTGAAGTTCTGAAGATCAAGTTCTTAGTTGAAAACAAATCAATAATTTTTAGAAGCAAG\n-CATATAGAAAGAAGATATTTTTTCCTGGGAGATCAAGTTG\n->2009973\n-GTTGACAACAAATCAAAAATTGATCTACCAAATTATCTAATGAGGCATGTTAGAAGCAAG\n-CATATAGAAAGAAGATATTGTTTCCTAAGAGATCAAGTTG\n->2009974\n-AAGGATGTAATGTACCTTGATGAATGGACCTTTACACTAAGTCAAAGTATGTACCGTACC\n-TTGAAGAATGCACGTGTACACTATGTCAAAGTATGTGCCT\n->2009975\n-GCAACCCAAGCAAACTGTGACCACTTAACCATTTAAAAATTTAGTAAGTTGACTAAATAT\n-TTGGCTATATATATACTCTTTTAAACCTTTGTAACACACA\n->2009976\n-GAAAAACTCATGAAAGGGGAGGGAAATGCATACGTCTATTCTAATTCAGATTTAAAAGAA\n-AGACAAAATGACAAGATCGGAAGAGCACACGTCTGAACTC\n->2009977\n-GGTGATTGGAGCTTATACCATACAGAGAGGCCCATTGAAAGTTTATTGAAAACATATAGA\n-AAAGATATTTATAAAAATGGTTGGGTCTTACACTCTATAT\n->2009978\n-GACGTTTCAACTAATAAAATTATGTAAGTCACTTATTTTCTCTTCATTTGCTTCTTTAAG\n-TTTGAGTCTCTGGTTGATAAGAACTGATCCTTTCAACTAC\n->2009979\n-GTCATAGTAGAAGAATCAGTTCAGGATCAAATCCCATACACTCCACAGGAAAACGCAGAT\n-CTCCCAAAAGAATGAAAACTAACAAAAGATCATCCGATAG\n->2009980\n-GCGGTTTTAGCGCAAAGGTTTTGAAAAGGTGGTAAAAGCAAGCAAACTAGCCTAAACTAA\n-TGCAAGAAATAAATTGGTCTCATTGTAAGGTAGCCCAAGA\n->2009981\n-GGAAGCAAATTGAAAAGAAAAACAAAGCGGGAAATTTACTTCTGCCAAGGAACTTAGGAA\n-GCAAATTGAAAAGAAAAACAAAGCGGGAAATTTACTTCTC\n->2009982\n-GAGGTTTCTTTTATCGCAAAGTATAAATTGTATGATCTTGGTTCCTCCGTCTATCTGCTT\n-ACTAAGCCCGCCTTATGATAAATAAAAGGTCTAAGATCGG\n->2009983\n-TCTAATTCCTAGGTCATAGTTCATCAAGGAATGGACCTTTCAACTAATCAAAGTATGTAA\n-TGTACTTTATTTCCTCTTTATTTGGTTTCTTATGGTTAAG\n->2009984\n-GACTTGAGGGTTTTAAAATACTTATGTAAATGAGAAGACTGGATGATCCGAATTAAAATG\n-AGTATTGGCAATGTAATTCAAAGATCGGAAGAGCACACGT\n->2009985\n-CATAAAATACATTAGTGGTTAAAAAGAGAACCCCTTGTTTCTCAAAGACTTAATCGTAAG\n-AAACCAAATAAACAAATAAGAAGTACTTAGATCGGAAGAG\n->2009986\n-GTGTTGCAAGTATTATTGCAAGTCTAATACTTGCAACACGACTTGCAACACAACTAACTG\n-CTAATATAGTTGAATTATAGTGCCAATACTCATTTTATTT\n->2009987\n-ATGAAAAAAAAGAAACTACATTACACAACATGAGTAGTTGAAAGGGCCAACCCTTGTTAA\n-TTAAAGACTTAAGCCTAGTAAACCAAAAACAGAGAAAAAT\n->2009988\n-AGTTCCATTCATCAATGTAGAATATGTACTTTAACATAACGTAAAGGTCCATTTATCAAG\n-GCCATTCCTCTAGTAAGACCACATACATCAACTTAGTGAA\n->2009989\n-CCCTCAAGTCTTGTTTTCTTTGATGGCTTGGTTAGAAAATCTGCAAGTTGTAGCTCAGTC\n-TTCCAATATTCAATCTTCAAACTTCTTATTTGCCAACTTG\n->2009990\n-GGGGTTTTTTTTTACTAGAAGTAATATGCAAAAGATATCTTGAAGAAGCTCAAGAAGAGT\n-AAATTGCAACCCTGCAATTACTCCATTGAAACGTGAATCG\n->2009991\n-ACAACTCATTGTATGTAAAGCACTGAATTTCTCTTCAATTTCTTCTTCATGGTTGAGTCT\n-TTGGTCGAAGATCGGAAGAGCACACGTCTGAACTCCAGTC\n->2009992\n-GAGAAGGTGACCTGGTACTAAAAAACATTAATCTTTCCTCACAGACTCTAGGGGCAAATG\n-GACGCCTAATTATGATGGGCCATACGTCGTCAAGAAAGCC\n->2009993\n-GTCTTAGAATGAAATCAAGTTAAAAATAATGTTTATTGGAATGACGTGAATGTGTGTTAC\n-AAAGGTTTAAAAGAGTATATATATATAGCCCAATATTTAG\n->2009994\n-GGTTGAGTCTGTGGTCGATAAGAATTGATCATTTCAACTACTCATAGTATGTCTTAGTTT\n-TCCTTTCTTTTTATTCGGTTTATTAGTTTAATATGGTTTT\n->2009995\n-ATACATTACATATCATTAGTTTAAAGGTAAGAAGTACTTTACAGGTAAGAAGAAGTACGT\n-TACATAATATGAGTAGTTTAAAGGTCCATTCTTTGTTGAC\n->2009996\n-CTTTATTTGGATTTTAATGCCTACGTCTTTGGCCAAAGACGCCTGGACTTTTCAACTAAT\n-CATATTATGGAAACTACTTCGTACCTATTTATCTATTCAC\n->2009997\n-GATCCACTAACCAAATGTTTGCACATAAGACTTTTCATGGGTATGTTGGTCACACGGGTC\n-TTGGTTTTGAGATTGCCTTTGATTCGAACGAGTATTTTCC\n->2009998\n-CCCTAGCGGATTTTCTCTCTCGACATATTACATTCCTTAGCAAAATTTCTTTGCATTCAA\n-GGGAGAATCTCAGTTCCCAATGGACTAAATTCCCCAGTGA\n->2009999\n-TCACCAATATTCAACTCTTCAAATAACAATTCAATTCACAAGGCTTGGTAGTTTAAATAA\n-GCGGCCGCAACATATTCTACTTCACATGATGGATGATAAG\n->2010000\n-TGGTTGAGTCTGTGGTCGATAAGAATTGATCATTTCAACTACTCATAGTATGTCTTAGTT\n-TTCCTTTCTTTTTATTCGGTTTAGATCGGAAGAGCACACG\n' |
b |
diff -r d397f5a85464 -r 378565f5a875 test_data/seq_I_10k --- a/test_data/seq_I_10k Wed Sep 18 06:30:04 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,30000 +0,0 @@\n->2000001\n-TCTTTCCATTCAAAGCACAAGTACATTTGAATCAGGGTTTATACTACAAAACGTCCAGGT\n-ACATAAGCTTAATCTCCAAGTTCCAATTCATAACTTTTCC\n->2000002\n-GGTCAATACTCCCTCTCTGCATGTATTCACAGAAATCCATTTACCTGAGGCTGAATGGTG\n-TCAAGATAGGTTTGACCAGCTTGATTCGGTCGAAGGAAAA\n->2000003\n-TTGTTAGGAGAAATAATTAAAACTCCAACACATGTTCCATCCCTATGACTAGAACCATCA\n-AAATATAACTTCCATGGCTCGAGCTCTAGATAGTTTTGGG\n->2000004\n-CACAAACCAAATCTTTAAAATTCCCTAAAAATAAACGTCATATGCTAAGAACATTTTTGA\n-TGCAAATGTATGCAATGTCATGTAATGCTTTAATCTCAGG\n->2000005\n-ATCTTTAATATTTAAAAATTAATTACAAGTGAATTCTACAAATTAATGTTTCAAATTGAT\n-GTTACAAATTAAAATACAAATTAAATAATTACAAATAATA\n->2000006\n-AGAGAAAAATCATATGTGTTTGAAGTCTTTAAAGATCTATGTCAAATATTACAAAGAGAG\n-AAAGTGGAATAATCAGAATCAGAAGTGACCATGGTAAGGA\n->2000007\n-GACGGAAGTATGAGCGGCAACCATCGATGAAGATAGCTCCTCCGACGTTACGATCCTTCC\n-AGGTATCTCATTTTGCTCTTTTTTCCTTTCTCCAATAAAA\n->2000008\n-CTCTTTTTTTTTATAATTGTCTAAAATTTAACAAATTTATAAATAGATAAAAGTTTGGTA\n-CAATTAAGGACAAAAATTGTACTCTCTAAAATTTATAAAA\n->2000009\n-GAGATTTGCCGGTGATGATGCACCGTTACCGCCTGTGTACGGTTGTGATTATAGCGAGGG\n-TGATCGGACGGACGAGATGGTTGGACTTGTTAGTCTTTTG\n->2000010\n-GGCAAAGGCCAAGGGTAAACAGGTCATGATTGACGATCAAGATTCTGCACCAGTAAATAT\n-CCCCAAGCAAAGTGCGATGCCCGAAGCTTCTTCGTCTCAA\n->2000011\n-GGACGAGTATTGAAAAAAGAATATTAACATGGACGAGTGTTGGAAATCGATACGAACGAG\n-TATTGGAAAAAGATAAGAATCAACACGGACGAGTGTCGGA\n->2000012\n-TCCTGTTATGCGGCATGGGAAAATTTTATATAGGTCTCTCATGTCCCTCAACGTGATTTG\n-TGGAGTATCCATCAACCATATTTATAGTCATCTTGCTACA\n->2000013\n-TGTTCAATGTCATATTTATCACAATATTCCTCAAAGAGGTGGTTTTCAAATTCACCTCCG\n-TGATCGGTTCAAATAGCAACTATTTTTAAACTAAACTTGT\n->2000014\n-GTACAACCTCTAGGTATAAGTTCGATATATCCTCCGCATCCTCTGCACTCAGACCTTGAG\n-ATCTATCAGACTTTGGAGATAATGCAGCTGCAATAATTGA\n->2000015\n-TGTTCTTCATTAATCTATGATCATGTTGCTGCAAATTACATGGATACGTATAACAACATC\n-CTTAATCTGTCAATATATGGCCCAATGCTGCAAACCAGAC\n->2000016\n-AGGACCCTCGATGTCCCTCGGATAGCCTTCTCTTGGGCTCAAAATACAAGGACCCTCGAT\n-GTCCCTCGGATAGCCTCCTCTTGGGCTTCATACAAGGACC\n->2000017\n-TCTCCCGTGGCGAGGATCGGGGACGGGGACGGGGAATAATTTGGGGGACGGGGCGGAGAA\n-CGCGGAAGCATCCTCCGCAGATTCCCCGCCCCGTTGACAT\n->2000018\n-AACATGTCGAGATATTTCTAGAAAAAATCTGGTTGCATATGATTTTACAAATTGTTTCAA\n-TTACATTTACTATAAATATATTTCCTAAAATAAAAAAAAA\n->2000019\n-GAATCGATGCCCATTTGAAGCAGTGGGCTCGATGCGTCTGATCATGTCAATGCCCCACAT\n-TGCGAAGGGCCAAGGAGAAGTCAGAACATTCAAAGGTACA\n->2000020\n-AAGTCCACAGTCCAAACTCCAGATGCTTAGGATAATCAAAACAACTCCAAAAGAGATTAT\n-CAATTTTTTTAGAGTTTTTGATATTATTTATTGTTTTTAG\n->2000021\n-CACGATCTCAAATATGTAAACAATTTTTATTATGACCAAACATATAACAATAAATTTGAG\n-CATGAAGGTTGAGATCTACAAAATTCAGATCATCATGAAA\n->2000022\n-GACATGTAAGTTGTATCTCTCAGAAAGAAAACATATTTAGAGAGTCTGTGTACCTCACGA\n-GTTTGTAGTGAAGTTGTGTGTGTTCTCATCCATGAGCTTT\n->2000023\n-AGAAACAAAATATAAATAAAAGAGAGTACATTACATACTTTGAGTAGTTGAAAGATCCAT\n-ATGTTGATGAACAAAGACCTAATAATTATTAGAAAGAAAA\n->2000024\n-GATTTTAGGCCTTAGGCCCATGTTTCCATTTACTCCTTGGAACCCCCATTTACTCTTTGC\n-ACTCCCCTTGAGTTTTTATTTATTTTATGCTTTCAAATAT\n->2000025\n-GCTCATGGAAAACATCATTGTGCGCAATTTCAACATGTCGGTGAAGGTTGAGATGTCTCT\n-TAATCAGCAAGAAGAAAACAAATCCATAATATTTCAAATG\n->2000026\n-GAAAAATAACTGGCGAAAACAACAATTGTAAGCACTCTGAGACGACTCTCAAACCGGTCA\n-ATTAAAGTTTTCAAAGTCTAAAGAGTAAAAGTTGATTCAA\n->2000027\n-TGCTTTTTAGGGGGAGCGTGGTTCTATTTTGATTGGTTGTTGTGTTGTTGTATGGTTGCA\n-TGGCATACCTGATGTCCTGACATCCTGACTATGATAATTT\n->2000028\n-AAGTTCATCACCATCACCATCTAGAATTTCACATTGAGATTCTAGATTGGTGGAACTAGA\n-CCTTCATCAATTCAATTCACATTCATCATCAAAGTATCAT\n->2000029\n-GGACTGTTGCAAATTTTCATATCTCAAGCCAATTAAATGTTGCAGTACTATACCGTGTAT\n-ATAACTACATCATTTATTCATTTTCCGATTAATATTTAAT\n->2000030\n-GGAGCTTGCTTCAACCCATATAAGCTCTTTCTCAATCTACACACAAGATCTTCTTTTCCT\n-TTAACTTGAAAACCATCAGGTTGTTTCATGTAGATATCTT\n->2000031\n-GTTTCTCCTCTTGAGATATCAGATTATTATTAGGACTATCGTTTCTTCTTCACCTTAAAT\n-CTACCAAAAGGACGTGAACATTGTCTTCCTCAACGGATAC\n->2000032\n-CAGGCCCTAGCGATTGTGTGTGTTTGTGTGTGTTTGAAAGAAATAATAGACAAATATACT\n-TGTTTAGTGAGCACAACCATCTAAACTACAATCGCTACCT\n->2000033\n-GAACACAAGATTATGGCATGATGATGATGATGATGTGATAAAGAAAAGATGAGAGTGAAG\n-TTAGAATATTTATACCAACTAGTGCCACTTGGTTTGGTGA\n->2000034\n-TCACCCCTAAGATCCCACATATATCTCAATACATAGCATGCATCTCATATGTTTGATTGA\n-TGTCTTTGCTTATTAATCTTCCCCAAGGTTTCCTCACTTT\n->2000035\n-CCTTTGTAACACATTCACATCATTCCAATAAACGTTATTTGTACTGTAATTTAAACATTC\n-TAAGCCACTATAAATTCCATGGAGTGTGTC'..b'TGAGGGCT\n->2009966\n-CAACAGATTCCTGGGCAGATGTGAAGAAGAACTCCCTGAATTGGAGGTCTCAGATAATGT\n-TTTGTGTAAGGAGATAACTTCAAGGCAAGTGAAGATCGGA\n->2009967\n-GATTTGAGTAGTTTTTAGTTTAATGGTGGTTGTTTATAAAGAACCAAAACTTGACCCCAC\n-ATTTTATATTTTTTATTTGTAGTTTAATATTATGTTGCAG\n->2009968\n-CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT\n-AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC\n->2009969\n-GAAACTAATCAATCCTTTTGAAAATGGATAGAATCATTTATCCTTAAATCATGCGCCACA\n-TGGAATAACAGAATATTCTCAAATATTCTGCAATAGAATT\n->2009970\n-TCCTTGGTTGATAAGGGAAACTAGTATAATTCCCTTGTGTTCTTTACTTTTATGCATTTT\n-ATTTTCTTAATTTCATTATTGTAAGAACATAATATTCAAT\n->2009971\n-GGGAGCTCAACCTTTGAAAAGTTAAACAAAAAAGGGAATGCAACTAAATCTAGATATCCA\n-TATTTTTCAGTAATAAGAGCTAATATTTTCTGTAAACAAT\n->2009972\n-GCTTGAGATCAGATTTCACTAACCGGTTACCATACCATGAGCATGTAAAGATAACGGGCT\n-TAATGTGATTGCTTTAGAATGAAAAAGGATTTGAAAAGAG\n->2009973\n-TTCTCAGCATAAAAGAAATATTTTGCAATTCTCCTTTTTCTCTATATACCTTCTTGAGGA\n-GGCAACTCAGGCCTTATAATTCTGAGATATAATATCTTAA\n->2009974\n-CAAGTATTGGTCTCGATTTCACTTCCGTCGACCCCTTTCTTCTTACCACACTTCTGTCCT\n-CCGAGGAGGTAAAAATGAAGAAGGATTTGTGTGCTTGGTA\n->2009975\n-GTGAATCCTGGGAACACTATGAAGATCAATGTTGACAGACCAAATCCCTCCATTCAACCA\n-AGGTTTGGGTCTTTTTATTTTTGTTTTGATGGTTGTAAAA\n->2009976\n-GCTTCCACCTAAACTGACTAATCCTGGTAGATTCACCATCCCTTGTTCTATTGGGCCTGT\n-AAAATTTGGCCAAGCTCTTTGTAATTTGGGGGCAAGCATT\n->2009977\n-AATTATAACTATTCCACCTTTTTGTTGCACATAACGACATATTAATCATCTACTCGAATG\n-AGTATCTACTAATTAGATTTTAGGAGATTTAAGAATAGTA\n->2009978\n-GGCTCCAGATGAAAAGGGAAAGAAGAATTTTCAACAAAAAATGAAGGTAGGAAAGGGATA\n-AGTGGCGGACGTGTTCCTGCTGCAACACCCTACTACCCCA\n->2009979\n-CTACGTGCACCTATTGGGTCAAAATTGTTTTTATCACCTTCATCTCTTTCTTTTTATTGA\n-ACAAGTGACAAGATCATAGTCTCTACTATGCATTTAACAC\n->2009980\n-CCGAACATTCCGAGAGGAAAAAACTATATGGCGAGGACTGGGATCCAGGTGCTACTTTTT\n-TATTTAAAAAATTAAACTCGTGTTGTGCAGATAACATGAA\n->2009981\n-GAGGGGAAACTAAATATTATTTGGAATCTCTTAGATAACAAAAAAACTAAGACTAAAAAG\n-GGGGTGAGATAAGGAATGGGTGTATGGGCCTAAAATTAAA\n->2009982\n-GTCCTTTTGAAAAGAAAAAAAGAGAAGAATAAAAGGAAAAAGAGAAGAGAAATAAGTTGT\n-GAGGGTGTTGTTTGAAAGAAATTGAGGTATCTTATAGTGC\n->2009983\n-CCCTCTGCACCCATGCAATACACTCTACTTTTGGAGCTTTACAGCTAACTCTGCTTTTGG\n-AATATGAAAATCATTAACAGCTAACTCTGCTTTTAGAGCT\n->2009984\n-GTTCTAATAACCTCCTTTTCCCCACGATAAGCGCATTTGGCAGTTCCCTGGGTGCATTCC\n-TAACAGAGCTCCTTTGTGATGTTACCAGCAATGTTGCATG\n->2009985\n-GGGGAATCACCGTCAAAGAAATCCCTATAGTCTTCATCCTCATCTTCTAGCTTATCAAAA\n-GCTGTTTGAAACTTCTCAGCCGCATCTAACATCAAATATA\n->2009986\n-GAAATATAAGGTCATGCCACTCAAAAGTACTTCTTACACCTCACTATTATCTGTAAGCTA\n-TTCAACCTTTCTCATATTTTTTTTCTTCTTTTTACTTTCA\n->2009987\n-TGAATAATTGCATACTTAGTTGAAAAAAAAGACTTAGTAAATTTTTACTAAGTGTTGAAT\n-TTCAATAAGTGAGAAAAATCCATTTTTAAAACAACATAAT\n->2009988\n-GCGTCAACATGTTTTGTCCAGATTCAAATGACGTTACATATCATTTCTTCAGATTTAGAA\n-CTTGTAAGTTCAACCGCACCCTAGATAAAACAATTAGGGT\n->2009989\n-ACACCACAATCATCACGATTTCATCAAAACACACAATAATCCTCACAACACACTAATTAT\n-CACAACATCATAATTTTCCTCCAAGTACGCCATAAACGTC\n->2009990\n-CTCTTCAACATCCTTTGAAGGTTGCAAATCGAGTCTAACAGTTGCCAGATCTACCACCAT\n-ATTCATTTCATTCATGTTACTTCCAATTTAAGCAATATTT\n->2009991\n-GGTACATGTTGTTGCAAGTTGTCTAATTTCGACCTACCATGGCCAACCTGAACTTGCAGA\n-ATTTGCATTTTCACCCTTAGGCCCATATCCCTTTTTTTTT\n->2009992\n-GTTAGTTTTCATTCTTTTGGGAGATCTGCGTTTTCCTGTGGAGTGTATGGGATTTGATCC\n-TGAACTGATTCTTCTACTATGACTTGATTTCCCCTTAACC\n->2009993\n-CTCTTGTTGCAGGACTGAAGGTCCTGGACAAAGGATATTCTATTGCAGATCATGTAAAGA\n-AGATTATTAGAAGTCTTCATAAGAAATGGAGACCTATGGT\n->2009994\n-GTTAGTTGAGTTGTAATTCAACATCTTTAATATGTTGAAAGCATATATCACTAGGGTAGT\n-GATTGAGAGAATGTGAGAAGGGTTCACATATTTATGGGGA\n->2009995\n-AGGGTTTCGATTTTGGCATTTCGGAACAACAACCAAAAAAATGGGAGGTGGTGCAGCAGA\n-TCACGGGAATGGCGGCAATGGAGATTTCAGATACAAGGTT\n->2009996\n-TGGCATGTCTTGTCTCCTTTAGTTTAAACTCTATCAAATGGACGTGAAAATTGTCTTTAT\n-CAATGAGTACTTGAATAAGGAAGTCTATGTTGAGAAAACA\n->2009997\n-TTCCTTCTCCTTCAATTGCCTTTGTAGTTCTAACTTCTTCGTATTCAAGGCATGAAACTT\n-GTTTTCCCAAGCATCTCTATCTTTCTTTATCCGAGCTAGA\n->2009998\n-GAAGAATCGGATTTTTATCGTTTTAGTTTCGGTTTCGATTTCATTCGGTTTTGTAAATAA\n-TTTTAGCTTAGGGTTTTGTTTGCTTGTTTGCTTGGTTTGT\n->2009999\n-GGTTATGGCACAAAAGAATTGCACATATTCACATGGAGCACTTAAACAAGTTAGTGAAGC\n-ATGACCTTGTTATCGGCCTACCAAAGATGAAGTTCCTCAA\n->2010000\n-AATTAAAGTCATATTTTGATATCTCCTTGGACATCGGTTATTTTAAAGTTTGTGTTATTA\n-AATATTTTTCAAAACTCACTACACACATGACTTCAATGGT\n' |
b |
diff -r d397f5a85464 -r 378565f5a875 test_data/test_run1 --- a/test_data/test_run1 Wed Sep 18 06:30:04 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,202 +0,0 @@ -"Cluster" "Chip_Hits" "Input_Hits" "Ratio" -0 7945058 1071746 7.41319118522486 -1 278875 215538 1.2938553758502 -2 43504 94919 0.458327626713303 -3 37000 82538 0.448278368751363 -4 37253 104022 0.358126165618811 -5 32188 94960 0.338963774220724 -6 41039 81447 0.503873684727491 -7 39166 81623 0.479840241108511 -8 19339 66583 0.290449514140246 -9 29697 61497 0.482901604956339 -10 34273 82270 0.416591710222438 -11 27681 96899 0.285668582751112 -12 40545 96101 0.421899876171944 -13 31935 83895 0.380654389415341 -14 33932 90760 0.373865138827677 -15 29663 63240 0.469054395951929 -16 29084 91679 0.317237317160964 -17 17457 55788 0.312916756291676 -18 27249 55273 0.49298934380258 -19 21490 77952 0.275682471264368 -20 25851 67348 0.383842133396686 -21 27818 76020 0.365930018416206 -22 23859 72563 0.328803935890192 -23 85592 65619 1.30437830506408 -24 20936 59046 0.354571012430986 -25 21352 79958 0.267040196102954 -26 28586 62558 0.456951948591707 -27 24346 56413 0.4315671919593 -28 31336 64221 0.487940081904673 -29 16956 47383 0.357849861764768 -30 25372 59508 0.426362841970827 -31 19306 74537 0.259012302614809 -32 12498 50658 0.24671325358285 -33 22528 57473 0.391975362344057 -34 11578 38760 0.298710010319917 -35 17775 53446 0.332578677543689 -36 20979 71877 0.291873617429776 -37 24357 43553 0.559249649851905 -38 19401 65495 0.296221085579052 -39 23919 53615 0.446125151543411 -40 17944 37992 0.472309959991577 -41 21754 49360 0.440721231766613 -42 19911 59247 0.336067648994886 -43 17630 44057 0.400163424654425 -44 14639 47108 0.3107540120574 -45 10675 40313 0.26480291717312 -46 40498 55429 0.730628371430118 -47 15591 32845 0.474684122393058 -48 13229 45448 0.291079915507833 -49 15226 41012 0.371257193016678 -50 23851 49910 0.477880184331797 -51 11010 42552 0.258742244782854 -52 14233 41167 0.345738091189545 -53 17944 35795 0.5012990641151 -54 20288 38659 0.524793709097493 -55 13152 38394 0.342553523988123 -56 16631 38914 0.427378321426736 -57 249976 205389 1.21708562776001 -58 13347 28879 0.462169742719623 -59 16808 65546 0.256430598358405 -60 13355 42455 0.314568366505712 -61 16294 29082 0.560277835086995 -62 9790 52857 0.185216716801937 -63 17801 47726 0.372983279554121 -64 17157 32211 0.532644127782435 -65 20927 47539 0.440206987946738 -66 14999 49875 0.300731829573935 -67 16260 36851 0.441236330086022 -68 15925 42157 0.377754584054843 -69 11127 28004 0.397336094843594 -70 7739 25074 0.308646406636356 -71 6621 29947 0.221090593381641 -72 10923 40365 0.270605722779636 -73 9914675 84502 117.330654895742 -74 8758 38412 0.228001666145996 -75 9310 24364 0.382121162370711 -76 10614 47205 0.224849062599301 -77 13063 32597 0.400742399607326 -78 13404 29084 0.460871957089809 -79 9299 25667 0.362293996181868 -80 10109 26945 0.375171645945444 -81 15658 31730 0.493476205483769 -82 7483 32182 0.232521285190479 -83 16345 30634 0.533557485147222 -84 13293 27484 0.483663222238393 -85 5322 38749 0.137345479883352 -86 21424 33627 0.637107086567342 -87 11548 29314 0.393941461417753 -88 7762 29456 0.263511678435633 -89 12153 46433 0.261731957874787 -90 13081 22936 0.570326124869201 -91 6183 33349 0.185402860655492 -92 6357 35098 0.181121431420594 -93 48995 31410 1.5598535498249 -94 8371 31091 0.269241902801454 -95 13228 20080 0.658764940239044 -96 8453 27056 0.312426079243051 -97 9568 19505 0.490540886952064 -98 7938 32634 0.243243243243243 -99 7824 16977 0.460858808976851 -100 76514 28367 2.6972891035358 -101 10176 25158 0.404483663248271 -102 11009 30853 0.356821054678637 -103 6696 20379 0.328573531576623 -104 6767 26109 0.25918265732123 -105 12572 66266 0.189720218513265 -106 9522 20128 0.473072337042925 -107 10249 29304 0.349747474747475 -108 10362 34525 0.300130340333092 -109 9815 35803 0.274139038628048 -110 4387 16229 0.27031856553084 -111 10448 17881 0.584307365359879 -112 8117443 57680 140.732368238558 -113 9645 19854 0.485796313085524 -114 6879 27269 0.252264476145073 -115 8197 18651 0.439493860918986 -116 7111 21775 0.326567164179104 -117 16273 33720 0.482591933570581 -118 10242 29605 0.345955075156224 -119 12252 37892 0.323340019001372 -120 10751 27590 0.389670170351577 -121 6448 18451 0.349466153596011 -122 9191 25908 0.354755287941948 -123 5235 24699 0.211951900886676 -124 6465 20952 0.308562428407789 -125 7721 17814 0.4334231503312 -126 6593 26577 0.248071640892501 -127 12142 18977 0.639827159192707 -128 6505 18595 0.349825221833826 -129 3710 21375 0.173567251461988 -130 6348 16298 0.389495643637256 -131 4208 19672 0.213908092720618 -132 4136 9478 0.436378982907786 -133 7285 17145 0.424905220180811 -134 7642 14034 0.544534701439362 -135 3953 13049 0.302935090811556 -136 7778 14820 0.524831309041835 -137 7588 20607 0.368224389770466 -138 1208 3015 0.400663349917081 -139 7826 20711 0.377866834049539 -140 7551 13678 0.552054393917239 -141 4938 15140 0.326155878467635 -142 3788 16800 0.22547619047619 -143 4537 15722 0.288576516982572 -144 20198 10665 1.8938584153774 -145 28177 18344 1.53603358046228 -146 5743 19920 0.288303212851406 -147 3763 14154 0.265861240638689 -148 56754 17007 3.33709648968072 -149 5988 11740 0.510051107325383 -150 784 1172 0.668941979522184 -151 2718 13509 0.201199200532978 -152 3807 14812 0.25702133405347 -153 8382 45786 0.183069060411479 -154 3647 15198 0.239965784971707 -155 8500 32566 0.261008413682982 -156 6815 14389 0.473625686288137 -157 2307 13795 0.167234505255527 -158 2912 11803 0.246716936372109 -159 4316 11848 0.364280891289669 -160 992 2170 0.457142857142857 -161 5305 13186 0.402320643106325 -162 4569 17654 0.258808202107171 -163 3935 16460 0.23906439854192 -164 2725 13244 0.205753548776805 -165 3772 11890 0.317241379310345 -166 3081 15437 0.199585411673253 -167 837 2591 0.323041296796604 -168 2438412 16297 149.62336626373 -169 6131 16203 0.378386718508918 -170 3283 13179 0.249108430078155 -171 3106 12624 0.246039290240811 -172 9370 45868 0.204281852271736 -173 3537 15712 0.225114562118126 -174 747 2267 0.329510366122629 -175 4326 13004 0.332666871731775 -176 6665 9325 0.714745308310992 -177 4657 12573 0.370396882207906 -178 2538 10016 0.253394568690096 -179 2662 9086 0.292978208232446 -180 2441 10297 0.237059337671166 -181 544 1658 0.32810615199035 -182 664 1018 0.652259332023576 -183 31045 22110 1.4041157847128 -184 1365 4387 0.311146569409619 -185 2159 11375 0.189802197802198 -186 1933 7834 0.246744957875925 -187 2062 6753 0.305345772249371 -188 3962 11049 0.358584487283917 -189 2234 9201 0.242799695685252 -190 834 6610 0.126172465960666 -191 520 1629 0.319214241866176 -192 2677 10891 0.245799283812322 -193 509 805 0.632298136645963 -194 2155 4528 0.475927561837456 -195 2477 7482 0.331061213579257 -196 14445 15443 0.935375250922748 -197 3703 8763 0.42257217847769 -198 2873 28533 0.100690428626503 -199 1727 9618 0.179559159908505 -200 3448 6234 0.553095925569458 |
b |
diff -r d397f5a85464 -r 378565f5a875 test_run1.sh --- a/test_run1.sh Wed Sep 18 06:30:04 2019 -0400 +++ b/test_run1.sh Fri Nov 22 07:56:48 2019 -0500 |
b |
@@ -9,7 +9,17 @@ ./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test1.fasta -G tmp/test1.png -c 10 -N 0 echo "single fastq filtering with with sampling" -./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.fasta -G tmp/test2.png -c 10 -N 0 -n 500 +./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2a.fasta -G tmp/test2a.png -c 10 -N 0 -n 500 + +echo "single fastq filtering with with sampling" +./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2b.fasta -G tmp/test2b.png -c 10 -N 0 -n 647 + +echo "single fastq filtering with with sampling" +./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2c.fasta -G tmp/test2c.png -c 10 -N 0 -n 839 + +echo "single fastq filtering with with sampling" +./single_fastq_filtering_wrapper.sh -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2d.fasta -G tmp/test2d.png -c 10 -N 0 -n 911 + echo "single fastq filtering with contaminant removing" ./single_fastq_filtering_wrapper.sh -F tool_data/organele_ref_and_phi-X174.fasta -a test_data/ERR215189_1_part.fastq.gz -o tmp/test3.fasta -G tmp/test3.png -c 10 -N 0 |
b |
diff -r d397f5a85464 -r 378565f5a875 test_run2.sh --- a/test_run2.sh Wed Sep 18 06:30:04 2019 -0400 +++ b/test_run2.sh Fri Nov 22 07:56:48 2019 -0500 |
b |
@@ -6,7 +6,16 @@ ./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.1.fasta -G tmp/test2.1.png -c 10 -N 0 echo "paired fastq filtering with with sampling" -./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.2.fasta -G tmp/test2.2.png -c 10 -N 0 -n 500 +./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a2.fasta -G tmp/test2.2.png -c 10 -N 0 -n 500 + +echo "paired fastq filtering with with sampling" +./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a3.fasta -G tmp/test2.2.png -c 10 -N 0 -n 653 + +echo "paired fastq filtering with with sampling" +./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a4.fasta -G tmp/test2.2.png -c 10 -N 0 -n 547 + +echo "paired fastq filtering with with sampling" +./paired_fastq_filtering_wrapper.sh -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.a5.fasta -G tmp/test2.2.png -c 10 -N 0 -n 839 echo "paired fastq filtering with contaminant removing" ./paired_fastq_filtering_wrapper.sh -F tool_data/organele_ref_and_phi-X174.fasta -b test_data/ERR215189_2_part.fastq.gz -a test_data/ERR215189_1_part.fastq.gz -o tmp/test2.3.fasta -G tmp/test2.3.png -c 10 -N 0 |
b |
diff -r d397f5a85464 -r 378565f5a875 tmp.RData |
b |
Binary file tmp.RData has changed |