Repository 'umi_tools_count'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/umi_tools_count

Changeset 0:8db56d2f8b72 (2018-06-21)
Next changeset 1:3c932ad4a174 (2018-07-14)
Commit message:
planemo upload commit c79a5f4a05156bb2a6035a844aa9ad8f0e59ecb5
added:
macros.xml
test-data/chr19_gene_tags.bam
test-data/count_single_cells_gene_tag.tsv
test-data/count_single_cells_gene_tag_wide.tsv
test-data/count_single_gene_tag.tsv
test-data/dedup_out1.bam
test-data/dedup_out2.bam
test-data/dedup_out3.bam
test-data/dedup_out4.bam
test-data/dedup_out5.bam
test-data/dedup_out6.bam
test-data/group_in1.sam
test-data/group_in2.bam
test-data/group_in3.bam
test-data/group_in4.bam
test-data/group_in5.bam
test-data/group_in6.bam
test-data/group_out2.bam
test-data/group_out3.bam
test-data/group_out3.tab
test-data/group_out4.bam
test-data/group_out4.tab
test-data/group_out5.bam
test-data/group_out6.bam
test-data/out_R1.fastq.gz
test-data/out_R2.fastq.gz
test-data/out_SE.fastq
test-data/out_paired.log
test-data/out_single.log
test-data/out_wl_paired.html
test-data/out_wl_paired.log
test-data/out_wl_paired.tresh.tab
test-data/out_wl_paired.txt
test-data/out_wl_single.html
test-data/out_wl_single.tresh.tab
test-data/out_wl_single.txt
test-data/scrb_extract.fastq.gz
test-data/scrb_seq_barcodes
test-data/scrb_seq_fastq.1.gz
test-data/scrb_seq_fastq.2.gz
test-data/t_R1.fastq
test-data/t_R1.fastq.gz
test-data/t_R2.fastq
test-data/t_R2.fastq.gz
umi-tools_counts.xml
b
diff -r 000000000000 -r 8db56d2f8b72 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 21 15:20:14 2018 -0400
[
@@ -0,0 +1,88 @@
+<?xml version="1.0"?>
+<macros>
+    <macro name="barcode2_conditional" >
+        <conditional name="barcode">
+            <param name="barcode_select" argument="--split-barcode" type="select" label="Barcode on both reads?">
+                <option value="first_read_only">Barcode on first read only</option>
+                <option value="both_reads">Barcode on both reads</option>
+            </param>
+            <when value="first_read_only"/>
+            <when value="both_reads">
+                <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
+                       help="Use this option to specify the format of the UMI/barcode for
+                             the second read pair if required." >
+                </param>
+            </when>
+        </conditional>
+    </macro>
+    <xml name="input_types">
+        <conditional name="input_type">
+            <param name="type" type="select" label="Library type">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
+                <option value="paired_collection">Paired-end Dataset Collection</option>
+            </param>
+            <when value="single">
+                <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+            </when>
+            <when value="paired">
+                <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+                <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+                <expand macro="barcode2_conditional" />
+            </when>
+            <when value="paired_collection">
+                <param name="input_readpair" type="data_collection" collection_type="paired" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+                <expand macro="barcode2_conditional" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/gr.209601.116</citation>
+            <citation type="bibtex">
+                @misc{githubUMI-tools,
+                title = {UMI-tools},
+                publisher = {GitHub},
+                journal = {GitHub repository},
+                url = {https://github.com/CGATOxford/UMI-tools},
+                }
+            </citation>
+        </citations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">umi_tools</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <token name="@VERSION@">0.5.3</token>
+    <token name="@COMMAND_LINK@"><![CDATA[
+        #set $gz = False
+        #if $input_type.type == 'single':
+            #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
+                ln -s '$input_type.input_single' input_single.gz &&
+                #set $gz = True
+            #else
+                ln -s '$input_type.input_single' input_single.txt &&
+            #end if
+        #elif $input_type.type == 'paired':
+            #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
+                ln -s '$input_type.input_read1' input_read1.gz &&
+                ln -s '$input_type.input_read2' input_read2.gz &&
+                #set $gz = True
+            #else
+                ln -s '$input_type.input_read1' input_read1.txt &&
+                ln -s '$input_type.input_read2' input_read2.txt &&
+            #end if
+        #else  ## paired_collection
+            #if $input_type.input_readpair.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
+                ln -s '$input_type.input_readpair.forward' input_read1.gz &&
+                ln -s '$input_type.input_readpair.reverse' input_read2.gz &&
+                #set $gz = True
+            #else
+                ln -s '$input_type.input_readpair.forward' input_read1.txt &&
+                ln -s '$input_type.input_readpair.reverse' input_read2.txt &&
+            #end if
+        #end if
+    ]]></token>
+</macros>
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/chr19_gene_tags.bam
b
Binary file test-data/chr19_gene_tags.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/count_single_cells_gene_tag.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count_single_cells_gene_tag.tsv Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,23 @@
+gene cell count
+ENSG00000011304.18 ACAAGG 33
+ENSG00000011304.18 TTCACG 24
+ENSG00000065268.10 ACAAGG 4
+ENSG00000065268.10 TTCACG 11
+ENSG00000070404.9 TTCACG 1
+ENSG00000070423.17 ACAAGG 2
+ENSG00000070423.17 TTCACG 4
+ENSG00000099804.8 ACAAGG 5
+ENSG00000099804.8 TTCACG 4
+ENSG00000099821.13 ACAAGG 6
+ENSG00000099821.13 TTCACG 1
+ENSG00000099864.17 TTCACG 2
+ENSG00000105556.11 ACAAGG 2
+ENSG00000105556.11 TTCACG 3
+ENSG00000116017.10 ACAAGG 7
+ENSG00000116017.10 TTCACG 18
+ENSG00000172270.18 ACAAGG 9
+ENSG00000172270.18 TTCACG 3
+ENSG00000175221.14 ACAAGG 1
+ENSG00000175221.14 TTCACG 3
+ENSG00000198858.9 ACAAGG 1
+ENSG00000267751.5 TTCACG 1
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/count_single_cells_gene_tag_wide.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count_single_cells_gene_tag_wide.tsv Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,14 @@
+gene ACAAGG TTCACG
+ENSG00000011304.18 33 24
+ENSG00000065268.10 4 11
+ENSG00000070404.9 0 1
+ENSG00000070423.17 2 4
+ENSG00000099804.8 5 4
+ENSG00000099821.13 6 1
+ENSG00000099864.17 0 2
+ENSG00000105556.11 2 3
+ENSG00000116017.10 7 18
+ENSG00000172270.18 9 3
+ENSG00000175221.14 1 3
+ENSG00000198858.9 1 0
+ENSG00000267751.5 0 1
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/count_single_gene_tag.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count_single_gene_tag.tsv Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,14 @@
+gene count
+ENSG00000011304.18 54
+ENSG00000065268.10 15
+ENSG00000070404.9 1
+ENSG00000070423.17 6
+ENSG00000099804.8 9
+ENSG00000099821.13 7
+ENSG00000099864.17 2
+ENSG00000105556.11 5
+ENSG00000116017.10 24
+ENSG00000172270.18 12
+ENSG00000175221.14 4
+ENSG00000198858.9 1
+ENSG00000267751.5 1
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out1.bam
b
Binary file test-data/dedup_out1.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out2.bam
b
Binary file test-data/dedup_out2.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out3.bam
b
Binary file test-data/dedup_out3.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out4.bam
b
Binary file test-data/dedup_out4.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out5.bam
b
Binary file test-data/dedup_out5.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/dedup_out6.bam
b
Binary file test-data/dedup_out6.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in1.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_in1.sam Thu Jun 21 15:20:14 2018 -0400
b
b'@@ -0,0 +1,666 @@\n+@HD\tVN:1.0\tSO:coordinate\n+@PG\tID:Bowtie\tVN:1.1.2\tCL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -a /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"\n+@SQ\tSN:chr10\tLN:129993255\n+@SQ\tSN:chr11\tLN:121843856\n+@SQ\tSN:chr12\tLN:121257530\n+@SQ\tSN:chr13\tLN:120284312\n+@SQ\tSN:chr14\tLN:125194864\n+@SQ\tSN:chr15\tLN:103494974\n+@SQ\tSN:chr16\tLN:98319150\n+@SQ\tSN:chr17\tLN:95272651\n+@SQ\tSN:chr18\tLN:90772031\n+@SQ\tSN:chr19\tLN:61342430\n+@SQ\tSN:chr1\tLN:197195432\n+@SQ\tSN:chr2\tLN:181748087\n+@SQ\tSN:chr3\tLN:159599783\n+@SQ\tSN:chr4\tLN:155630120\n+@SQ\tSN:chr5\tLN:152537259\n+@SQ\tSN:chr6\tLN:149517037\n+@SQ\tSN:chr7\tLN:152524553\n+@SQ\tSN:chr8\tLN:131738871\n+@SQ\tSN:chr9\tLN:124076172\n+@SQ\tSN:chrM\tLN:16299\n+@SQ\tSN:chrX\tLN:166650296\n+@SQ\tSN:chrY\tLN:15902555\n+SRR2057595.10000392_AAGTA\t16\tchr19\t31326011\t255\t57M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:23A33\tNM:i:1\tUG:i:512\tBX:Z:AAGTA\n+SRR2057595.10000468_GCTAT\t16\tchr19\t61240267\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C16\tNM:i:1\tUG:i:706\tBX:Z:GCTAT\n+SRR2057595.10000704_TGATG\t16\tchr19\t3544146\t255\t39M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:39\tNM:i:0\tUG:i:1\tBX:Z:TGATG\n+SRR2057595.10000838_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.10001118_TCTCA\t16\tchr19\t4078298\t255\t50M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A6\tNM:i:1\tUG:i:36\tBX:Z:TCTCA\n+SRR2057595.10001329_CGGTC\t16\tchr19\t28083193\t255\t60M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:60\tNM:i:0\tUG:i:497\tBX:Z:CGGTC\n+SRR2057595.10001382_CTGCC\t16\tchr19\t3957282\t255\t22M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:17C1G2\tNM:i:2\tUG:i:9\tBX:Z:CTGCC\n+SRR2057595.10001382_CTGCC\t16\tchr19\t3970897\t255\t22M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:17C1G2\tNM:i:2\tUG:i:10\tBX:Z:CTGCC\n+SRR2057595.10002047_ATTAT\t16\tchr19\t7546213\t255\t27M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:27\tNM:i:0\tUG:i:278\tBX:Z:ATTAT\n+SRR2057595.10002219_ATCAG\t16\tchr19\t5796783\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:234\tBX:Z:ATCAG\n+SRR2057595.10002597_AATTA\t16\tchr19\t50733887\t255\t64M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:58C0C4\tNM:i:2\tUG:i:645\tBX:Z:AATTA\n+SRR2057595.10003084_GTCTA\t16\tchr19\t4078420\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A44\tNM:i:2\tUG:i:124\tBX:Z:GTCTA\n+SRR2057595.10003230_ACATT\t0\tchr19\t8799370\t255\t64M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:64\tNM:i:0\tUG:i:297\tBX:Z:ACATT\n+SRR2057595.10003317_CGGCC\t16\tchr19\t52641725\t255\t20M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:1C13A4\tNM:i:2\tUG:i:648\tBX:Z:CGGCC\n+SRR2057595.10003567_ATGTC\t0\tchr19\t37052811\t255\t64M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C52\tNM:i:1\tUG:i:542\tBX:Z:ATGTC\n+SRR2057595.10003576_AATCG\t16\tchr19\t4078303\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:38A21A6\tNM:i:2\tUG:i:88\tBX:Z:AATCG\n+SRR2057595.10003630_GTGTT\t16\tchr19\t4078298\t255\t52M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A8\tNM:i:2\tUG:i:45\tBX:Z:GTGTT\n+SRR2057595.10003869_TCTTG\t16\tchr19\t4078423\t255\t67M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:19A47\tNM:i:1\tUG:i:111\tBX:Z:TCTTG\n+SRR2057595.1000404_AACTT\t16\tchr19\t4078420\t255\t64M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A41\tNM:i:2\tUG:i:55\tBX:Z:AACTT\n+SRR2057595.10004161_TTCGC\t16\tchr19\t4078428\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:14A24T27\tNM:i:2\tUG:i:74\tBX:Z:TTCGC\n+SRR2057595.1000475_TCTGG\t0\tchr19\t13129234\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:22T5\tNM:i:1\tUG:i:383\tBX:Z:TCTGG\n+SRR2057595.10004845_GGATA\t0\tchr19\t24394180\t255\t29M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:11C15T1\tNM:i:2\tUG:i:481\tBX:Z:GGATA\n+SRR2057595.10005100_TTAAA\t16\tchr19\t4785463\t255\t21M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:21\tNM:i:0\tUG:i:196\tBX:Z:TTAAA\n+SRR2057595.10005761_CTAGA\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:43\tBX:Z:CTAGA\n+SRR2057595.10005808_CAGTA\t16\tchr19\t7545872\t255\t24M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:24\tNM:i:0\tUG:i:276\tBX:Z:CAGTA\n+SRR2057595.10005955_GACAT\t16\tchr19\t4078302\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:39A21A5\tNM:i:2\tUG:i:82\tBX:Z:GACAT\n+SRR2057595.10006011_TCTCA\t16\tchr19\t4078298\t255\t50M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A6\tNM:i:1\tUG:i:36\tBX:Z:TCTCA\n+SRR2057595.10006546_GCTAC\t16\tchr19\t4078446\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:97\tBX:Z:GCTAC\n+SRR2057595.1000663_ACGCA\t16\tchr19\t51217013\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:3A46A16\tNM:i:2\tUG:i:647\tBX:Z:ACGCA\n+SRR2057595.10006870_GTCCA\t16\tchr19\t4078431\t255\t67M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11A55\tNM:i:1\tUG:i:107\tBX:Z:GTCCA\n+SRR2057595.10006926_GGATA\t0\tchr19\t24394180\t255\t29M\t*\t0\t0\t*\t*\tXA:'..b'\tchr19\t55501486\t255\t31M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:8A22\tNM:i:1\tUG:i:659\tBX:Z:GATTG\n+SRR2057595.1013616_ATAGA\t0\tchr19\t38795805\t255\t62M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:46A15\tNM:i:1\tUG:i:553\tBX:Z:ATAGA\n+SRR2057595.10136267_TTCGC\t16\tchr19\t4078428\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:14A24T27\tNM:i:2\tUG:i:74\tBX:Z:TTCGC\n+SRR2057595.10136714_ATCCG\t16\tchr19\t37369602\t255\t35M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:35\tNM:i:0\tUG:i:544\tBX:Z:ATCCG\n+SRR2057595.10136765_GCTAT\t0\tchr19\t57701750\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:34T1T30\tNM:i:2\tUG:i:677\tBX:Z:GCTAT\n+SRR2057595.10137151_ATTCT\t0\tchr19\t3576314\t255\t23M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:16T5T0\tNM:i:2\tUG:i:5\tBX:Z:ATTCT\n+SRR2057595.10137262_ACAAA\t16\tchr19\t4078298\t255\t49M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A5\tNM:i:1\tUG:i:27\tBX:Z:ACAAA\n+SRR2057595.10137317_GATTG\t0\tchr19\t8799830\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:314\tBX:Z:GATTG\n+SRR2057595.10137603_GCTAT\t16\tchr19\t61240267\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C16\tNM:i:1\tUG:i:706\tBX:Z:GCTAT\n+SRR2057595.10137788_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.1013793_GCGCA\t0\tchr19\t8798523\t255\t30M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:30\tNM:i:0\tUG:i:295\tBX:Z:GCGCA\n+SRR2057595.10138029_GTTGT\t16\tchr19\t12564367\t255\t21M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:0A12A7\tNM:i:2\tUG:i:380\tBX:Z:GTTGT\n+SRR2057595.10138656_CTAGC\t16\tchr19\t4078298\t255\t62M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A18\tNM:i:1\tUG:i:63\tBX:Z:CTAGC\n+SRR2057595.10138697_GCCGT\t0\tchr19\t30861987\t255\t20M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:3C2T13\tNM:i:2\tUG:i:511\tBX:Z:GCCGT\n+SRR2057595.10138764_GATGG\t0\tchr19\t44417644\t255\t23M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:15T4C2\tNM:i:2\tUG:i:601\tBX:Z:GATGG\n+SRR2057595.10138897_TGATT\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:41\tBX:Z:TGATT\n+SRR2057595.10139239_GAGTG\t0\tchr19\t8798502\t255\t51M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:51\tNM:i:0\tUG:i:289\tBX:Z:GAGTG\n+SRR2057595.10139499_AGTCT\t0\tchr19\t13129558\t255\t37M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:21A15\tNM:i:1\tUG:i:398\tBX:Z:AGTCT\n+SRR2057595.10139596_GCTAC\t16\tchr19\t4078446\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:97\tBX:Z:GCTAC\n+SRR2057595.10139652_GGATG\t0\tchr19\t13129322\t255\t27M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:27\tNM:i:0\tUG:i:402\tBX:Z:GGATG\n+SRR2057595.10139674_ATGAA\t16\tchr19\t4078298\t255\t37M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:37\tNM:i:0\tUG:i:175\tBX:Z:ATAAA\n+SRR2057595.10139952_TAGTA\t16\tchr19\t61240268\t255\t27M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:10C16\tNM:i:1\tUG:i:702\tBX:Z:TAGTA\n+SRR2057595.10140058_ATATA\t0\tchr19\t13129330\t255\t22M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:0T21\tNM:i:1\tUG:i:422\tBX:Z:ATATA\n+SRR2057595.10141122_GTCCC\t0\tchr19\t12347657\t255\t32M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:32\tNM:i:0\tUG:i:372\tBX:Z:GTCCC\n+SRR2057595.10141122_GTCCC\t0\tchr19\t12348043\t255\t32M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:32\tNM:i:0\tUG:i:374\tBX:Z:GTCCC\n+SRR2057595.10141122_GTCCC\t16\tchr19\t15035071\t255\t32M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:29T2\tNM:i:1\tUG:i:428\tBX:Z:GTCCC\n+SRR2057595.10141295_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.10142441_GTCTA\t16\tchr19\t4078420\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A44\tNM:i:2\tUG:i:124\tBX:Z:GTCTA\n+SRR2057595.10142482_GCCGA\t16\tchr19\t7389994\t255\t48M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:47C0\tNM:i:1\tUG:i:274\tBX:Z:GCCGA\n+SRR2057595.10143102_AAGTA\t16\tchr19\t31326011\t255\t57M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:23A33\tNM:i:1\tUG:i:512\tBX:Z:AAGTA\n+SRR2057595.10143387_GCATA\t16\tchr19\t4078298\t255\t57M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A13\tNM:i:2\tUG:i:53\tBX:Z:GCATA\n+SRR2057595.10143631_ATTTT\t0\tchr19\t45095257\t255\t57M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:57\tNM:i:0\tUG:i:606\tBX:Z:ATTTT\n+SRR2057595.10143687_CGCTT\t0\tchr19\t13129322\t255\t26M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:26\tNM:i:0\tUG:i:406\tBX:Z:CGCTT\n+SRR2057595.10143772_TGATT\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:41\tBX:Z:TGATT\n+SRR2057595.10143909_CAGAC\t0\tchr19\t5493782\t255\t41M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:41\tNM:i:0\tUG:i:218\tBX:Z:CAGAC\n+SRR2057595.10143972_CTTAC\t16\tchr19\t4078298\t255\t54M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A10\tNM:i:2\tUG:i:49\tBX:Z:CTTAC\n+SRR2057595.10144216_CATAA\t0\tchr19\t13129322\t255\t35M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:33C1\tNM:i:1\tUG:i:401\tBX:Z:CATAA\n+SRR2057595.10144367_ATGAT\t16\tchr19\t61240306\t255\t36M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:16A4T14\tNM:i:2\tUG:i:716\tBX:Z:ATGAT\n'
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in2.bam
b
Binary file test-data/group_in2.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in3.bam
b
Binary file test-data/group_in3.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in4.bam
b
Binary file test-data/group_in4.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in5.bam
b
Binary file test-data/group_in5.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_in6.bam
b
Binary file test-data/group_in6.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out2.bam
b
Binary file test-data/group_out2.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out3.bam
b
Binary file test-data/group_out3.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out3.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_out3.tab Thu Jun 21 15:20:14 2018 -0400
b
b'@@ -0,0 +1,498 @@\n+read_id\tcontig\tposition\tgene\tumi\tumi_count\tfinal_umi\tfinal_umi_count\tunique_id\n+chrM_81_583_3:0:0_2:0:0_f4/1_TT\tchrM\t80\tNA\tTT\t1\tTT\t1\t0\n+chrM_110_732_3:0:0_2:0:0_160/1_CC\tchrM\t109\tNA\tCC\t1\tCC\t1\t1\n+chrM_118_613_6:0:0_4:0:0_169/1_AG\tchrM\t117\tNA\tAG\t1\tAG\t1\t2\n+chrM_149_684_2:0:0_2:0:0_6e/1_TA\tchrM\t148\tNA\tTA\t1\tTA\t1\t3\n+chrM_152_616_6:0:0_2:0:0_84/1_GC\tchrM\t151\tNA\tGC\t1\tGC\t1\t4\n+chrM_247_748_8:0:0_1:0:0_1b7/1_GC\tchrM\t246\tNA\tGC\t1\tGC\t1\t5\n+chrM_280_772_1:0:0_9:0:0_31/1_CG\tchrM\t279\tNA\tCG\t1\tCG\t1\t6\n+chrM_292_819_2:0:0_1:0:0_18d/1_CA\tchrM\t291\tNA\tCA\t1\tCA\t1\t7\n+chrM_390_890_3:0:0_6:0:0_bf/1_CA\tchrM\t389\tNA\tCA\t1\tCA\t1\t8\n+chrM_447_921_2:0:0_3:0:0_1cc/1_AC\tchrM\t446\tNA\tAC\t1\tAC\t1\t9\n+chrM_469_983_2:0:0_1:0:0_121/1_AC\tchrM\t468\tNA\tAC\t1\tAC\t1\t10\n+chrM_541_1074_3:0:0_8:1:0_22/1_AC\tchrM\t540\tNA\tAC\t1\tAC\t1\t11\n+chrM_8_556_3:0:0_2:0:0_1b1/1_AC\tchrM\t556\tNA\tAC\t1\tAC\t1\t12\n+chrM_112_577_2:0:0_4:0:0_17b/1_CG\tchrM\t577\tNA\tCG\t1\tCG\t1\t13\n+chrM_627_1063_6:0:0_4:1:0_12a/1_AA\tchrM\t626\tNA\tAA\t1\tAA\t1\t14\n+chrM_164_650_5:0:0_3:0:0_164/1_AC\tchrM\t650\tNA\tAC\t1\tAC\t1\t15\n+chrM_200_695_6:0:0_2:0:0_5a/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_168_695_5:0:0_2:0:0_1af/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_170_695_1:0:0_6:0:0_1e3/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_209_705_3:0:0_2:0:0_1b3/1_TA\tchrM\t705\tNA\tTA\t1\tTA\t1\t17\n+chrM_288_807_3:0:0_5:0:0_8e/1_AG\tchrM\t807\tNA\tAG\t1\tAG\t1\t18\n+chrM_818_1274_1:1:0_4:0:0_124/1_TC\tchrM\t817\tNA\tTC\t1\tTC\t1\t19\n+chrM_442_872_4:0:0_6:0:0_146/1_AT\tchrM\t872\tNA\tAT\t1\tAT\t1\t20\n+chrM_460_919_3:0:0_1:0:0_0/1_GA\tchrM\t919\tNA\tGA\t1\tGA\t1\t21\n+chrM_384_950_5:0:0_2:0:0_34/1_GG\tchrM\t950\tNA\tGG\t1\tGG\t1\t22\n+chrM_476_980_5:0:0_2:0:0_133/1_TA\tchrM\t980\tNA\tTA\t1\tTA\t1\t23\n+chrM_552_999_4:0:0_5:0:0_bb/1_AT\tchrM\t999\tNA\tAT\t1\tAT\t1\t24\n+chrM_512_1030_5:0:0_7:1:0_a5/1_AC\tchrM\t1030\tNA\tAC\t1\tAC\t1\t25\n+chrM_1103_1634_5:0:0_3:0:0_36/1_TA\tchrM\t1102\tNA\tTA\t1\tTA\t1\t26\n+chrM_648_1148_3:0:0_3:1:0_1d1/1_TT\tchrM\t1148\tNA\tTT\t1\tTT\t1\t27\n+chrM_1167_1797_8:0:0_2:1:0_14e/1_GT\tchrM\t1166\tNA\tGT\t1\tGT\t1\t28\n+chrM_619_1169_3:0:0_5:1:0_33/1_CC\tchrM\t1169\tNA\tCC\t1\tCC\t1\t29\n+chrM_668_1248_0:0:0_4:0:0_fa/1_TT\tchrM\t1248\tNA\tTT\t1\tTT\t1\t30\n+chrM_1288_1759_4:0:0_2:1:0_a1/1_TA\tchrM\t1287\tNA\tTA\t1\tTA\t1\t31\n+chrM_1327_1786_4:0:0_3:1:0_1ad/1_CA\tchrM\t1326\tNA\tCA\t1\tCA\t1\t32\n+chrM_866_1351_5:1:0_6:0:0_1c9/1_TA\tchrM\t1351\tNA\tTA\t1\tTA\t1\t33\n+chrM_853_1366_6:1:0_4:0:0_13b/1_TC\tchrM\t1366\tNA\tTC\t1\tTC\t1\t34\n+chrM_1399_1851_2:0:0_11:1:0_85/1_AT\tchrM\t1398\tNA\tAT\t1\tAT\t1\t35\n+chrM_946_1444_4:1:0_6:0:0_113/1_TT\tchrM\t1444\tNA\tTT\t1\tTT\t1\t36\n+chrM_943_1485_2:1:0_5:0:0_1e2/1_AT\tchrM\t1485\tNA\tAT\t1\tAT\t1\t37\n+chrM_1022_1501_4:0:0_5:0:0_132/1_GA\tchrM\t1501\tNA\tGA\t1\tGA\t1\t38\n+chrM_1504_2013_4:1:0_3:0:0_10b/1_AT\tchrM\t1503\tNA\tAT\t1\tAT\t1\t39\n+chrM_1505_1934_1:1:0_5:0:0_157/1_TC\tchrM\t1504\tNA\tTC\t1\tTC\t1\t40\n+chrM_997_1511_2:1:0_1:0:0_1d8/1_AT\tchrM\t1511\tNA\tAT\t1\tAT\t1\t41\n+chrM_1521_2070_5:1:0_3:0:0_1a/1_TA\tchrM\t1520\tNA\tTA\t1\tTA\t1\t42\n+chrM_1575_2126_5:1:0_2:1:0_18c/1_TA\tchrM\t1574\tNA\tTA\t1\tTA\t1\t43\n+chrM_1063_1598_5:0:0_5:0:0_f5/1_TG\tchrM\t1598\tNA\tTG\t1\tTG\t1\t44\n+chrM_1605_2128_3:1:0_4:1:0_1ea/1_TT\tchrM\t1604\tNA\tTT\t1\tTT\t1\t45\n+chrM_1065_1609_3:0:0_5:0:0_18e/1_TA\tchrM\t1609\tNA\tTA\t1\tTA\t1\t46\n+chrM_1140_1619_1:0:0_4:0:0_74/1_TT\tchrM\t1619\tNA\tTT\t1\tTT\t1\t47\n+chrM_1111_1626_4:0:0_2:0:0_186/1_AC\tchrM\t1626\tNA\tAC\t1\tAC\t1\t48\n+chrM_1664_2135_0:1:0_3:1:0_179/1_TT\tchrM\t1663\tNA\tTT\t1\tTT\t1\t49\n+chrM_1699_2147_1:0:0_7:0:0_10c/1_AA\tchrM\t1698\tNA\tAA\t1\tAA\t1\t50\n+chrM_1706_2240_3:0:0_6:0:0_99/1_GT\tchrM\t1705\tNA\tGT\t1\tGT\t1\t51\n+chrM_1756_2309_0:0:0_3:0:0_c3/1_AT\tchrM\t1755\tNA\tAT\t1\tAT\t1\t52\n+chrM_1344_1758_4:0:0_3:1:0_75/1_TT\tchrM\t1758\tNA\tTT\t1\tTT\t1\t53\n+chrM_1223_1777_5:0:0_6:1:0_42/1_TG\tchrM\t1777\tNA\tTG\t1\tTG\t1\t54\n+chrM_1790_2351_4:0:0_4:0:0_f3/1_CC\tchrM\t1789\tNA\tCC\t1\tCC\t1\t55\n+chrM_1308_1807_4:0:0_4:1:0_45/1_TA\tchrM\t1807\tNA\tTA\t1\tTA\t1\t56\n+chrM_1814_2315_6:0:0_3:0:0_12d/1_AA\tchrM\t1813\tNA\tAA\t1\tAA\t1\t57\n+chrM_1862_2304_4:0:0_1:0:0_19d/1_AA\tchrM\t1861\tNA\tAA\t1\tAA\t1\t58\n+chrM_1363_1869_5:0:0_3:1:0_aa/1_TA\tchrM\t1869\tNA\tTA\t1\tTA\t1\t59\n+chrM_1363_1887_2:0:0_3:0:0_d3/1_CA\tchrM\t1887\tNA\tCA\t1\tCA\t1\t60\n+chrM_1428_1904_7:0:0_1:0:0_1b0/1_AC\tchrM\t1904\tNA\tAC\t1\tAC\t1\t6'..b'\tTA\t1\t431\n+chrM_14135_14581_5:0:0_5:0:0_1de/1_AT\tchrM\t14581\tNA\tAT\t1\tAT\t1\t432\n+chrM_14612_15169_2:0:0_5:0:0_e4/1_TT\tchrM\t14611\tNA\tTT\t1\tTT\t1\t433\n+chrM_14668_15109_2:0:0_3:0:0_3d/1_TG\tchrM\t14667\tNA\tTG\t1\tTG\t1\t434\n+chrM_14696_15126_2:0:0_2:0:0_a7/1_AA\tchrM\t14695\tNA\tAA\t1\tAA\t1\t435\n+chrM_14727_15156_4:0:0_4:0:0_178/1_GA\tchrM\t14726\tNA\tGA\t1\tGA\t1\t436\n+chrM_14245_14783_3:0:0_1:0:0_9d/1_GG\tchrM\t14783\tNA\tGG\t1\tGG\t1\t437\n+chrM_14314_14802_5:0:0_3:0:0_191/1_GG\tchrM\t14802\tNA\tGG\t1\tGG\t1\t438\n+chrM_14817_15261_5:0:0_4:0:0_65/1_AT\tchrM\t14816\tNA\tAT\t1\tAT\t1\t439\n+chrM_14863_15320_2:0:0_4:0:0_d8/1_AA\tchrM\t14862\tNA\tAA\t1\tAA\t1\t440\n+chrM_14869_15387_2:0:0_7:0:0_1f/1_AC\tchrM\t14868\tNA\tAC\t1\tAC\t1\t441\n+chrM_14888_15340_3:0:0_3:0:0_112/1_GG\tchrM\t14887\tNA\tGG\t1\tGG\t1\t442\n+chrM_14368_14894_6:0:0_5:0:0_40/1_TC\tchrM\t14894\tNA\tTC\t1\tTC\t1\t443\n+chrM_14410_14941_6:0:0_3:0:0_e5/1_TA\tchrM\t14941\tNA\tTA\t1\tTA\t1\t444\n+chrM_14962_15543_5:0:0_8:0:0_46/1_CA\tchrM\t14961\tNA\tCA\t1\tCA\t1\t445\n+chrM_14456_14987_3:0:0_5:0:0_ab/1_GT\tchrM\t14987\tNA\tGT\t1\tGT\t1\t446\n+chrM_15070_15569_5:0:0_5:0:0_cf/1_CA\tchrM\t15069\tNA\tCA\t1\tCA\t1\t447\n+chrM_15140_15686_2:0:0_5:0:0_185/1_CC\tchrM\t15139\tNA\tCC\t1\tCC\t1\t448\n+chrM_15142_15661_7:0:0_3:0:0_11e/1_CT\tchrM\t15141\tNA\tCT\t1\tCT\t1\t449\n+chrM_15192_15694_5:0:0_2:0:0_f7/1_CA\tchrM\t15191\tNA\tCA\t1\tCA\t1\t450\n+chrM_15211_15685_5:0:0_2:0:0_1d7/1_AC\tchrM\t15210\tNA\tAC\t1\tAC\t1\t451\n+chrM_15225_15786_3:0:0_6:0:0_17e/1_TT\tchrM\t15224\tNA\tTT\t1\tTT\t1\t452\n+chrM_15258_15810_4:0:0_6:0:0_5f/1_TT\tchrM\t15257\tNA\tTT\t1\tTT\t1\t453\n+chrM_14817_15317_5:0:0_2:0:0_59/1_GC\tchrM\t15317\tNA\tGC\t1\tGC\t1\t454\n+chrM_15324_15836_4:0:0_3:0:0_94/1_AC\tchrM\t15323\tNA\tAC\t1\tAC\t1\t455\n+chrM_15365_15880_4:1:0_3:0:0_80/1_CA\tchrM\t15364\tNA\tCA\t1\tCA\t1\t456\n+chrM_15408_15863_2:1:0_3:0:0_1e6/1_AG\tchrM\t15407\tNA\tAG\t1\tAG\t1\t457\n+chrM_15439_15924_0:0:0_4:0:0_172/1_TG\tchrM\t15438\tNA\tTG\t1\tTG\t1\t458\n+chrM_14931_15457_2:0:0_4:1:0_1a1/1_AA\tchrM\t15457\tNA\tAA\t1\tAA\t1\t459\n+chrM_15547_16054_5:0:0_1:0:0_af/1_GA\tchrM\t15546\tNA\tGA\t1\tGA\t1\t460\n+chrM_15134_15560_3:0:0_4:0:0_1c4/1_GG\tchrM\t15560\tNA\tGG\t1\tGG\t1\t461\n+chrM_15575_16118_5:0:0_4:0:0_1aa/1_GC\tchrM\t15574\tNA\tGC\t1\tGC\t1\t462\n+chrM_15052_15629_7:0:0_9:0:0_15f/1_GA\tchrM\t15629\tNA\tGA\t1\tGA\t1\t463\n+chrM_15698_16224_5:0:0_6:0:0_138/1_AC\tchrM\t15697\tNA\tAC\t1\tAC\t1\t464\n+chrM_15247_15721_2:1:0_6:0:0_17/1_TC\tchrM\t15721\tNA\tTC\t1\tTC\t1\t465\n+chrM_15218_15763_2:0:0_2:0:0_171/1_AG\tchrM\t15763\tNA\tAG\t1\tAG\t1\t466\n+chrM_15860_16419_5:0:0_8:0:0_53/1_TA\tchrM\t15859\tNA\tTA\t1\tTA\t1\t467\n+chrM_15864_16329_3:0:0_3:0:0_18b/1_CG\tchrM\t15863\tNA\tCG\t1\tCG\t1\t468\n+chrM_15507_15962_3:0:0_0:0:0_5b/1_GT\tchrM\t15962\tNA\tGT\t1\tGT\t1\t469\n+chrM_15430_15985_5:0:0_3:0:0_2c/1_AT\tchrM\t15985\tNA\tAT\t1\tAT\t1\t470\n+chrM_15706_16238_6:0:0_7:0:0_7b/1_AG\tchrM\t16238\tNA\tAG\t1\tAG\t1\t471\n+chrM_16252_16701_2:1:0_5:0:0_147/1_TA\tchrM\t16251\tNA\tTA\t1\tTA\t1\t472\n+chrM_15753_16280_5:0:0_6:0:0_180/1_TG\tchrM\t16280\tNA\tTG\t1\tTG\t1\t473\n+chrM_15777_16347_2:0:0_6:0:0_16/1_AT\tchrM\t16347\tNA\tAT\t1\tAT\t1\t474\n+chrM_16366_16911_6:1:0_6:1:1_168/1_CC\tchrM\t16365\tNA\tCC\t1\tCC\t1\t475\n+chrM_16370_16833_6:1:0_6:0:1_1bb/1_AC\tchrM\t16369\tNA\tAC\t1\tAC\t1\t476\n+chrM_16402_16876_2:0:0_1:0:1_98/1_CA\tchrM\t16401\tNA\tCA\t1\tCA\t1\t477\n+chrM_16426_16953_3:0:0_2:2:0_60/1_AA\tchrM\t16425\tNA\tAA\t1\tAA\t1\t478\n+chrM_15986_16442_3:0:0_7:0:0_15d/1_TG\tchrM\t16442\tNA\tTG\t1\tTG\t1\t479\n+chrM_16030_16460_6:0:0_4:1:0_20/1_AT\tchrM\t16460\tNA\tAT\t1\tAT\t1\t480\n+chrM_15909_16463_3:0:0_7:1:0_106/1_GT\tchrM\t16463\tNA\tGT\t1\tGT\t1\t481\n+chrM_15984_16500_4:0:0_1:0:0_6b/1_GT\tchrM\t16500\tNA\tGT\t1\tGT\t1\t482\n+chrM_16070_16513_3:0:0_7:1:0_1e7/1_TA\tchrM\t16513\tNA\tTA\t1\tTA\t1\t483\n+chrM_16064_16572_4:0:0_6:1:0_f1/1_TG\tchrM\t16572\tNA\tTG\t1\tTG\t1\t484\n+chrM_16245_16740_4:1:0_2:0:1_127/1_TA\tchrM\t16742\tNA\tTA\t1\tTA\t1\t485\n+chrM_16315_16834_5:0:0_3:0:1_18/1_TT\tchrM\t16834\tNA\tTT\t1\tTT\t1\t486\n+chrM_16229_16843_2:1:0_2:0:1_c4/1_GC\tchrM\t16843\tNA\tGC\t1\tGC\t1\t487\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16419_16923_3:0:0_3:0:1_183/1_CG\tchrM\t16923\tNA\tCG\t1\tCG\t1\t489\n+chrM_16585_16993_5:0:1_5:1:0_1b9/1_GA\tchrM\t16993\tNA\tGA\t1\tGA\t1\t490\n'
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out4.bam
b
Binary file test-data/group_out4.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out5.bam
b
Binary file test-data/group_out5.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/group_out6.bam
b
Binary file test-data/group_out6.bam has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_R1.fastq.gz
b
Binary file test-data/out_R1.fastq.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_R2.fastq.gz
b
Binary file test-data/out_R2.fastq.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_SE.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_SE.fastq Thu Jun 21 15:20:14 2018 -0400
b
b"@@ -0,0 +1,288 @@\n+@HISEQ:105:C2UE1ACXX:3:1101:11160:2245_TTA 1:N:0:CAGATC\n+AAAAGTAGTTAATATATTAGATTTGTTTGATAGTGGTAGTATATATTTTTTATTTTAGTATTTAGGAGGTAGAGGTAGATGAATTTTTGAGTTTAAAG\n++\n+BBBFFFFBFFFFFIIIIIIBIIIIBIFFBFIIBBFFFFFFFFIIIIIIIIIIIIIIIBFIIIIIB7BBBFBBFFFF77<F7BFFFFFF7B7BBFFFF7\n+@HISEQ:105:C2UE1ACXX:3:1101:19467:2281_TTT 1:N:0:CATATC\n+TTGGTTAGGGTGAGATGTATAGTTTGGATTTTAGTGATTTTTGTAAAGGGGGAAAAGAATGGAGTTTTGGGTGTAGTGAGAGGTTATAGGAGTAGGGA\n++\n+<B<<FFFBBFFFFFFFFFIFIBFFI<<BFFFIIBBBFFIIIIBFFIIBBF7BBFFFBFFF77BBFBFF777BBBBBB<<<B<7<7BBF77<7<70000\n+@HISEQ:105:C2UE1ACXX:3:1101:7009:2740_TAG 1:N:0:CAGATC\n+AAGTTTTGTTTTTTATTTGGAGGTTATGGAATGTTAAGTAAGGTTTTTTTGGGTTTTGTTATTTATTTGATAATTGTGATTGTAATGTTAATAAGGGA\n++\n+BBBFFFFFFFFFFIIIIIB<FFFFFFIBBFIIFFFIIFIIIFFIFFIIII<0<BBFFBFFFFFFFFFF<BBFFFF<BBBBF7BFFF<BBFFFFF<00<\n+@HISEQ:105:C2UE1ACXX:3:1101:19067:2707_TTT 1:N:0:CAGATC\n+GTTTTTTTATTTGATATTTTAAAGGTTTTTTTTTTTTTTTTAGAAAATTTTTTTTAGTAAGATAGATTTTAAAGGGTTTGTTTTTTTTTTTTTTTTTT\n++\n+BBBFFFFFFFFFBBFFFFIIIFIFFIFFIIIIIIIFFFFFB7'0<B0<BBFF'7<0'0<<''0<'0'0<BBB<<B'7'0'0'0<BFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:4999:3182_AGG 1:N:0:CAGATC\n+GTTTTATGAGGATTTTAGGGGAGTGATTGTTTAAAGTTTATAAGATTTATGATTTATATATAGTTAGAATAGTATGTGTTAAATAAATATAAAGGGAG\n++\n+<BBFFFFBFFFFFFIIFBBBFFFIIFFIBFFIFIIBFFIIIIIFFIIIIIBFFIIIIIIIIIBFFIBFIII<FFFBFBFFFFFFFFFFFFFFF<07<7\n+@HISEQ:105:C2UE1ACXX:3:1101:2300:3263_GTT 1:N:0:TAGATC\n+TTATTTTTAATAAAATTTTTATTATTTAATTTATTAGTTAATATTTAGGAGTTTTATGTTGTGGTAAAATTTTGTTAGAGAGATAGAGAAAGTATTTA\n++\n+BBBFFFFFFFFBFFIIIIIIBIBBFFI0<FFFBFF70B00<'0<FFFBBBBFF''0B0BB000'7BBFFFFII'<BF0<'<''07'70BBF'7BBBFB\n+@HISEQ:105:C2UE1ACXX:3:1101:5605:3427_AGA 1:N:0:CAGATC\n+AGATGAGAGGTATAGGATGTGGGGAGTTTTAGTAAGATTTATAGATAAGAAGTGGTTCGGTTATAGGATTTGTTTTAGATTTTTAGATTTTTTTGTGT\n++\n+BBBFBFFFFFFFFFFBFFFFIIIIIIIBFFFBFFIBFFIIIIIFFIIIFFIBF7BFBFBBFFIFF<<FFFFBBFFBF0<BFFFFF7<BFFFFFF0B7B\n+@HISEQ:105:C2UE1ACXX:3:1101:8129:3589_GAT 1:N:0:CAGATC\n+TTTTAGTTTTTAGTTAGGATTATACGTTTATTGTGATAAAAGAGTTTTTTGATTTATCGGGTTATGTTAGGGTTTATTGATATTAGGGAATTTGAAGA\n++\n+BBBFFBFFFFFFBFFFBBFIIFIIFBFFFFII<BBFFFIIIBFFIFIIIIBBFFIIII<7<FBFF<FBF<7<BBFFFF7<BFFFF700<BBFF0<B'<\n+@HISEQ:105:C2UE1ACXX:3:1101:14304:3866_TAA 1:N:0:CAGATC\n+GTGTTTATATAGGGGATTTTTGAGTTTGATAGGTTGTTTTTGTAGAGGGTAGAATTTTGTGGAAATGTTGGTATTGGTTAAGGGGTTTTAGTGAAGAA\n++\n+<BBFFFFFFFFFFFFFFFFII<BBFFFBFFIFBFFBFFFII7FFFFFFFIF<BFIIII<F<BBFIFBFB7<BFFFBBBBBF777<B7BBF7B<BB7BB\n+@HISEQ:105:C2UE1ACXX:3:1101:12720:4398_AGT 1:N:0:CAGATC\n+GTATGTGTGTGTGTGTGTGTATTTAATTGAAGTTGGGTTTGGTGATATATATGTTTAATTTTAGTATTTTAGTGGTAGAGGTAGGTTAATTTTTGTTG\n++\n+<BBFBFFFFFFFFIFIFIFFFFFIIFII0BFBFFB7BFFF<BFFFFIIIIIIBFIIIIFIIII<FIFIIIFBFBBFB7B<<BB7<B<BFFFFFF0<B0\n+@HISEQ:105:C2UE1ACXX:3:1101:14945:4439_ATT 1:N:0:CAGATC\n+AGTGTTGAGTGGAGTATTAGAGAAGAGAAATAAGATAATAAAGTAATAGTTGTGATTAGGAGGTTTTTATAAGTTGATGGTTTATGTTAAGTAAGTTT\n++\n+BBBBFFBFFFFFFFFFFIIBIFFIFFBFFIIIIBFFIIIIIIBFFIIIBFFBFFFIIIFBFFBFBFFIIIIIFFFBBF77BBFFF7BBFF<BBF<<BF\n+@HISEQ:105:C2UE1ACXX:3:1101:8616:4508_AGG 1:N:0:CAGATC\n+AGAAATTTTGGGGGTGTAGGAGTGGTAGGATAGGAGTGTTGTTTTGTAATAGTTTTTTTTGAGGTTTAATAGGTAGGGTAGTTATTTTTAGTATTGTA\n++\n+BBBFFFFFFBBBFFFIIFFFIFFFIIFFIIFIFBFFFIIF<FBFI<FFIIIBFFFFFFFF077<B<BBBFF<<BBBBBB<7B<BFFFFFF0BBFF0<B\n+@HISEQ:105:C2UE1ACXX:3:1101:18975:4834_GAG 1:N:0:CAGATC\n+TATAAATGGTAATTTTGTAATTTAAAGATTTAAAAGTAATTATTGGTAATAGTTATTTGTGGGAGGTTGAGGTAGGGGGATTTTTGTAGAGATCGGAA\n++\n+BBBFFFFBBFFFFIIIBFIIIIIFIIFFFFIFIIIBFIIIIIIIB<FFIFIBFFFIIIBFFBFFFII<<BFBFFFFIFF<BBBFF0BB7<<<BF707B\n+@HISEQ:105:C2UE1ACXX:3:1101:4984:5374_TTG 1:N:0:CAGATC\n+TGTATCGAGGTTTGAATGAGAGTGGTATTTTTGTTATTTGTTAGTTAATGGTTTTGAGTATTAGTTTGGAAAATGATAATAAGTATTAGTTGAGGTGT\n++\n+B<BFFFBFFFFBF<FFFBFFFFFFFIFFFIII<BFFIIIBFFFFFFIIIBBFFFIBBBFFFIIBFFF<<BFFFFBBFFFFFF<BBFFF<BB7<7<BBB\n+@HISEQ:105:C2UE1ACXX:3:1101:12336:6058_TTT 1:N:0:CAGATC\n+ATTTAGATGATGGTTTTTTTATTTGATATTTTAAAGGTTTTTTTTTTTTTTTTTTAAAAAATTTTTTTTATAAAAATAAATTTTAAAGGGTTTTTTTT\n++\n+BBBFFBFFBFFBBFFFIIIIIIIIBFFIIIIIIIIBFFFFIIIIIFFFFFFFFF700<BF0BBFFF0<70''00'0'000'<<'<B<00'0'00007<\n+@HISEQ:105:C2UE1ACXX:3:1101:5999:6265_ATG 1:N:0:CAGATC\n+TATATTGTTATATATTTGTGTTTTTTTTGAA"..b"FFFIBBFFIIIIFIBFFIBBFFFIFIFIIFFIFFIBFIIIFBFFFIIIIIBFBBFFFBFFF<<FFFFI70<7BFF<BFFFFFFBBBF7\n+@HISEQ:105:C2UE1ACXX:3:1101:13060:22287_TGT 1:N:0:CAGATC\n+TAGTGAAGAATAAATTTTTATGTTGTATATTATTTTTTTTTAGTTCGTATATATCGGTATACGTGTTAGGATTTATAAAGATAGTTATTATTTTTTGT\n++\n+BBBFFFFFFFFFFIIIIIIIIBFIFFIIIIIIIIIIIIIIII<FFIBFFIIIIIIB<BFFFFBBBBBF<<BBFFFBFFF<BBF7<BFFFFFFFFFF7B\n+@HISEQ:105:C2UE1ACXX:3:1101:7272:22581_TTA 1:N:0:CAGATC\n+TTAATGATATTAAGAATTTTTTAAAGAATTTTATTTTTTTTAGGAATAGAAGGAGGAGGAGTATTTTGATCGATTTTTTAGGTTTTTTATAGGTGGAG\n++\n+BBBFFBFFFFFFF<BFIIIIIIFIIBFFFFIIFIIIIIIIII<<FFFIBFIBBFFBFFFFFFBBFFF<BBF<BBFFFFFF00<BBFFFFFF000<777\n+@HISEQ:105:C2UE1ACXX:3:1101:10060:23020_TTT 1:N:0:CAGATC\n+TTTTAGATTATTTAAGAAGGTATTAGGTTTTTAAGAGGAAAGGGTAGTCTTATAGTTTTGAGTATTTTTTTTAAAAGGAAGTAAGGATGGTGTTTTTA\n++\n+BBBFFBFFFFFFFIIBFIFFFFFIBBBFFFFIIIBFFFIFIBFFIFFFFIIFFFBFFII7B<FFFIIIIIFFFFBB77<B<BBF<7BB<<BBBBBFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14440:23104_TTT 1:N:0:CAGATC\n+TTTAAATTTAAGTTAAGGTTTGGGGAGTTGATTTTTGTTTTGTGGGTTGTTTTTTTTGTAGGAGTTGGTTTTTAGAGGTTTTTAGGAATTTTTGGTGT\n++\n+BBBFFFFFFFFFFFIIBBIFIBBBFFFFFBFFIIIIBFFIIBFBFFFFFFIIIIFFF7BB<<BBFB<BBBBFFB7<00B<BBFF00<BBFFFF''77B\n+@HISEQ:105:C2UE1ACXX:3:1101:6941:23338_GTT 1:N:0:CAGATC\n+AGAAAGGTTTTAAGTTGGTTGGGAATATAGGGGTTTTTTAGAGTTTTTATTAGGAGTTATAGTGTGTTGAATTTGGTTTTGGGTGTTGATTATAGGTT\n++\n+BBBFFFFFFFFFFBFFBBFFBBBFFFFIIBFFFIFFIIIIBFFFFFIIIIIIB<FFFFFFFBFBFFFF<BBFFF<<BBBB700<BBB0BBFFFF00<B\n+@HISEQ:105:C2UE1ACXX:3:1101:10069:23622_TTT 1:N:0:CAGATC\n+TTTTGTTTTAGGGTTTTATTTTTGTGTTTTATTTTTATTTTCGTATTATTAGTTTTTTTTATACGTTATTTGTAGAAGGTTAGTTTTTTTAATTTAGG\n++\n+BBBFBFFFFFBBBFFFIFIIIIIBFBFFIIBFIIIIIIIIIIBFFFIIIIIFFIIIIIIFFFFF<BFFFFF<BF<<B77BBB7BBFFFFFFFFFFF00\n+@HISEQ:105:C2UE1ACXX:3:1101:14079:24078_AGT 1:N:0:CAGATC\n+TTTAAAGTTTTTAGTTTTGAGTGGAATTTTAAGAATATTAGTGCGTTTTAAGCTTAGGTAGTTTTGGTAGTTTGAAAGTAATAGGGTGTATTTTGTAA\n++\n+BBBFFFFFFFFFFBFFFI<BBFFFFFFIIIIIBFFFIIIIBFFFFFFIIIIBFFIIBBFFFFFFI<7BFBFFF<BBF<BBFFF<07BBBBBFFF<BFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12064:24631_TTT 1:N:0:CCGATC\n+TTATAGTGTATTTATATATATGAAATGAATTAATGAATTTTAAAAAAAAAGAAAGTAAGTTGTTTTTAGGATTGATATTTAGAGTTAATTTTTTGAGT\n++\n+BBBFFBFFFFFFFIIIIIIIIBFIFF0BFIIIIIBFFIIIIIIIIIIIIIBFIIFFFIBFFBFFFFFFB<BFF7BBFFFFF<B<BBBFFFFFFF'70<\n+@HISEQ:105:C2UE1ACXX:3:1101:11630:24964_GAT 1:N:0:TAGATC\n+TTAGTTTTTTTAGTGTTTTTTATTTATTTCGTTTTATTATTGGAGTTTGTTAAGAAAATTAGGGTTTGATTTGGATGTTAAGGATTGGTTTTTTTTTT\n++\n+BBBBFFFFFFFFBFFFFIIIIIIIIIIIIIBFFFIFIIIIIB7BFFFFBFFIIBFIIIIIFB<BFBFBFFFF<7BB<BBFF<7<BB77BBFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12594:24878_AAT 1:N:0:CCGATC\n+TTTAATAGGATATGATATTATTTAATTTATAGATTATGGAAATTTTTTATATTTAATGAAGAAAGTTGGAATGTTTTGGGAGGTGTTTAGAATAAATA\n++\n+BBBFFFFB0BFFF<FFIIIFIIFIIIFIFIF<BFFFF<<BFFFIIIIIIIIIIIIIIBFFBFFIBFB7'<FF<<BBF0''77<BFB<BB0<BBBBBFF\n+@HISEQ:105:C2UE1ACXX:3:1101:4483:25030_ATT 1:N:0:CAGATC\n+AGGATGGTGTTTTTATTTTTAGATTTATATTATTTTGTTATATTTGTATTTGAGTAAGTTTATGGGTTTTTTAAAGAGGTAGGAGGAAGTTTTTTGTT\n++\n+B<BFFFFFFFFFFIIIIIIIIBBFIIFIIIIFIIII<BFFIFFIIBFIIII<BBFFIFIFIIIB7<FFFIIIFBF<<77BB00<70<B7BBBFFF0BB\n+@HISEQ:105:C2UE1ACXX:3:1101:12198:25235_TTA 1:N:0:CAGATC\n+ATATATGTAGTTTGTATTATTTTTGTTATAGTATATAAAGGTTAAAGAGTAGTTGTTTTAATTTTAGAGGTGGAGATTGGGTTGTATAGTTTTGGTTT\n++\n+BBBFFFBFFFFFFBFFIIFIIIII0BF<FF0BFFFIFFIBBFBFII<FBFBBFF7FFFIIIIIIII<F7<B'<BB7<B''0<<0<BBB77<BB'0<<B\n+@HISEQ:105:C2UE1ACXX:3:1101:20477:25084_TGT 1:N:0:CATATC\n+AGAGTTTATTGAGAAGTAAAGTATTAATTTTATGGGAGAAATGGGATAGAGGTAGTAGAAGTTGTTATGGAATGGGATTAATTAGGAAGTTAATTAAG\n++\n+BBBFFFFFFF0BBFFBFFIIFFIIIIIIIIIFIBB7FFFFIIFFBFFIFIBFFFFIIFIIFFFBFFFF77BFF<<B<FFFFFFF70<B7BBFFFFFF7\n+@HISEQ:105:C2UE1ACXX:3:1101:5725:25359_TAG 1:N:0:CAGATC\n+GAGAAATAAGATAATAAAGTAATAGTTGTGATTAGGAGGTTTTTTATAAGTTGATGGTTTATGTTAAGTAAGTTTATTAAGAAGTATAGTATTATATA\n++\n+BBBFFFFFFBFFFIIIIIIIIIIIBIFBFFFFIIBBFFFFFFFIIIIIIFFFBFFBBFFFIIBFFIIBFIIFIFIFFFFF<BF<BBFF7<<FFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:5502:25591_AGA 1:N:0:CAGATC\n+ATATGATTTTATTTTTAGGGATAATATTTTTTAAGTGAATTTTGATTTTTTGGTTAGTTATTTTGATGATGTGTAGAGGGTGTATAGTTTTTGGATAT\n++\n+BBBFBFFFFFFFFIIIIBBBFFFIIIIIIIIIIIBFFFIIIIIBFFIIIII<7BFFBFFFIIII<FF<FFBFFFFFB<<<BFBBBF<<BFFF00<BFB\n"
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_paired.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_paired.log Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,37 @@
+# output generated by extract --bc-pattern=NNNXXX --stdin=input_read1.gz --read2-in=input_read2.gz --stdout out1.gz --read2-out=out2.gz --log=/tmp/tmpAvWtMd/files/000/dataset_20.dat
+# job started at Wed Jan  3 17:06:39 2018 on tzk-ThinkPad-T450s -- 9eecaab6-9562-4b29-8a2e-ae6c1034a14c
+# pid: 22903, system: Linux 4.10.0-42-generic #46~16.04.1-Ubuntu SMP Mon Dec 4 15:57:59 UTC 2017 x86_64
+# blacklist                               : None
+# compresslevel                           : 6
+# error_correct_cell                      : False
+# extract_method                          : string
+# filter_cell_barcode                     : None
+# filter_cell_barcodes                    : False
+# log2stderr                              : False
+# loglevel                                : 1
+# pattern                                 : NNNXXX
+# pattern2                                : None
+# prime3                                  : None
+# quality_encoding                        : None
+# quality_filter_mask                     : None
+# quality_filter_threshold                : None
+# random_seed                             : None
+# read2_in                                : input_read2.gz
+# read2_out                               : out2.gz
+# read2_stdout                            : False
+# reads_subset                            : None
+# reconcile                               : False
+# retain_umi                              : None
+# short_help                              : None
+# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
+# stdin                                   : <_io.TextIOWrapper name='input_read1.gz' encoding='ascii'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpAvWtMd/files/000/dataset_20.dat' mode='a' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='out1.gz' encoding='ascii'>
+# timeit_file                             : None
+# timeit_header                           : None
+# timeit_name                             : all
+# whitelist                               : None
+2018-01-03 17:06:39,365 INFO Starting barcode extraction
+2018-01-03 17:06:39,377 INFO Input Reads: 100
+2018-01-03 17:06:39,377 INFO Reads output: 100
+# job finished in 0 seconds at Wed Jan  3 17:06:39 2018 --  0.60  0.05  0.00  0.00 -- 9eecaab6-9562-4b29-8a2e-ae6c1034a14c
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_single.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_single.log Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,38 @@
+# output generated by extract --bc-pattern=XXXNNN --stdin=/tmp/tmpAvWtMd/files/000/dataset_13.dat --stdout /tmp/tmpAvWtMd/files/000/dataset_14.dat --3prime --quality-filter-threshold 10 --quality-encoding phred33 --log=/tmp/tmpAvWtMd/files/000/dataset_15.dat
+# job started at Wed Jan  3 17:06:20 2018 on tzk-ThinkPad-T450s -- 8897b3b4-c6e2-4ee3-b6c0-71044a63b7e1
+# pid: 22794, system: Linux 4.10.0-42-generic #46~16.04.1-Ubuntu SMP Mon Dec 4 15:57:59 UTC 2017 x86_64
+# blacklist                               : None
+# compresslevel                           : 6
+# error_correct_cell                      : False
+# extract_method                          : string
+# filter_cell_barcode                     : None
+# filter_cell_barcodes                    : False
+# log2stderr                              : False
+# loglevel                                : 1
+# pattern                                 : XXXNNN
+# pattern2                                : None
+# prime3                                  : True
+# quality_encoding                        : phred33
+# quality_filter_mask                     : None
+# quality_filter_threshold                : 10
+# random_seed                             : None
+# read2_in                                : None
+# read2_out                               : False
+# read2_stdout                            : False
+# reads_subset                            : None
+# reconcile                               : False
+# retain_umi                              : None
+# short_help                              : None
+# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
+# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpAvWtMd/files/000/dataset_13.dat' mode='r' encoding='UTF-8'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpAvWtMd/files/000/dataset_15.dat' mode='a' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='/tmp/tmpAvWtMd/files/000/dataset_14.dat' mode='w' encoding='UTF-8'>
+# timeit_file                             : None
+# timeit_header                           : None
+# timeit_name                             : all
+# whitelist                               : None
+2018-01-03 17:06:20,360 INFO Starting barcode extraction
+2018-01-03 17:06:20,362 INFO Input Reads: 100
+2018-01-03 17:06:20,362 INFO Reads output: 72
+2018-01-03 17:06:20,362 INFO filtered: umi quality: 28
+# job finished in 0 seconds at Wed Jan  3 17:06:20 2018 --  0.58  0.06  0.00  0.00 -- 8897b3b4-c6e2-4ee3-b6c0-71044a63b7e1
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_paired.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_paired.html Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,1 @@
+<html> <head></head><body> <h1>Cell and Count Metrics</h1> <img src="OUT_cell_barcode_count_density.png" ><br /> <img src="OUT_cell_barcode_knee.png" ><br /> <img src="OUT_cell_barcode_counts.png" ><br /> </body></html>
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_paired.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_paired.log Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,40 @@
+# output generated by whitelist --bc-pattern=CCCNNNNNNNNXXXXX --subset-reads=0 --stdin=/tmp/tmpibtvD6/files/000/dataset_5.dat --read2-in=/tmp/tmpibtvD6/files/000/dataset_6.dat --expect-cells=5 --error-correct-threshold=3 --method=reads --plot-prefix=OUT --log=/tmp/tmpibtvD6/files/000/dataset_8.dat
+# job started at Sun Feb 25 10:50:16 2018 on bag -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18
+# pid: 2385, system: Linux 4.13.0-32-generic #35-Ubuntu SMP Thu Jan 25 09:13:46 UTC 2018 x86_64
+# blacklist_tsv                           : None
+# cell_number                             : False
+# compresslevel                           : 6
+# error_correct_threshold                 : 3
+# expect_cells                            : 5
+# extract_method                          : string
+# filter_cell_barcodes                    : False
+# log2stderr                              : False
+# loglevel                                : 1
+# method                                  : reads
+# pattern                                 : CCCNNNNNNNNXXXXX
+# pattern2                                : None
+# plot_prefix                             : OUT
+# prime3                                  : None
+# random_seed                             : None
+# read2_in                                : /tmp/tmpibtvD6/files/000/dataset_6.dat
+# short_help                              : None
+# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
+# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_5.dat' mode='r' encoding='UTF-8'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_8.dat' mode='a' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
+# subset_reads                            : 0
+# timeit_file                             : None
+# timeit_header                           : None
+# timeit_name                             : all
+# whitelist_tsv                           : None
+2018-02-25 10:50:16,016 INFO Starting barcode extraction
+2018-02-25 10:50:16,017 INFO Parsed 0 reads
+2018-02-25 10:50:16,019 INFO Starting - whitelist determination
+2018-02-25 10:50:17,208 INFO Finished - whitelist determination
+2018-02-25 10:50:17,208 INFO Starting - finding putative error cell barcodes
+2018-02-25 10:50:17,208 INFO Finished - finding putative error cell barcodes
+2018-02-25 10:50:17,208 INFO Writing out whitelist
+2018-02-25 10:50:17,208 INFO Parsed 100 reads
+2018-02-25 10:50:17,208 INFO 100 reads matched the barcode pattern
+2018-02-25 10:50:17,208 INFO Found 23 unique cell barcodes
+# job finished in 1 seconds at Sun Feb 25 10:50:17 2018 --  2.35  0.08  0.00  0.00 -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_paired.tresh.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_paired.tresh.tab Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,7 @@
+count action
+1 Selected
+5 Rejected
+8 Rejected
+11 Rejected
+14 Rejected
+20 Rejected
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_paired.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_paired.txt Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,1 @@
+TTT AAA,AAG,AAT,AGA,AGG,AGT,ATA,ATT,GAG,GGT,GTA,GTG,GTT,TAA,TAG,TAT,TCT,TGA,TGG,TGT,TTA,TTG 15 2,4,1,6,3,4,6,8,3,2,2,2,8,2,3,6,1,1,2,4,8,7
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_single.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_single.html Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,1 @@
+<html> <head></head><body> <h1>Cell and Count Metrics</h1> <img src="OUT_cell_barcode_count_density.png" ><br /> <img src="OUT_cell_barcode_knee.png" ><br /> <img src="OUT_cell_barcode_counts.png" ><br /> </body></html>
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_single.tresh.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_single.tresh.tab Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,3 @@
+count action
+1 Rejected
+4 Selected
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/out_wl_single.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_wl_single.txt Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,44 @@
+# output generated by whitelist --bc-pattern=CCCCCCCCNNNNNNNN --subset-reads=0 --stdin=/tmp/tmpibtvD6/files/000/dataset_1.dat --method=reads --plot-prefix=OUT --3prime
+# job started at Sun Feb 25 10:49:56 2018 on bag -- cb0db520-8a4e-4040-aa88-93efc0718fa8
+# pid: 2217, system: Linux 4.13.0-32-generic #35-Ubuntu SMP Thu Jan 25 09:13:46 UTC 2018 x86_64
+# blacklist_tsv                           : None
+# cell_number                             : False
+# compresslevel                           : 6
+# error_correct_threshold                 : 1
+# expect_cells                            : False
+# extract_method                          : string
+# filter_cell_barcodes                    : False
+# log2stderr                              : False
+# loglevel                                : 1
+# method                                  : reads
+# pattern                                 : CCCCCCCCNNNNNNNN
+# pattern2                                : None
+# plot_prefix                             : OUT
+# prime3                                  : True
+# random_seed                             : None
+# read2_in                                : None
+# short_help                              : None
+# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
+# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_1.dat' mode='r' encoding='UTF-8'>
+# stdlog                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
+# subset_reads                            : 0
+# timeit_file                             : None
+# timeit_header                           : None
+# timeit_name                             : all
+# whitelist_tsv                           : None
+## 2018-02-25 10:49:56,061 INFO Starting barcode extraction
+## 2018-02-25 10:49:56,061 INFO Parsed 0 reads
+## 2018-02-25 10:49:56,062 INFO Starting - whitelist determination
+## 2018-02-25 10:49:57,383 INFO Finished - whitelist determination
+## 2018-02-25 10:49:57,383 INFO Starting - finding putative error cell barcodes
+## 2018-02-25 10:49:57,383 INFO Finished - finding putative error cell barcodes
+## 2018-02-25 10:49:57,383 INFO Writing out whitelist
+AAAAAAAA AAAAAACA,AAACAAAA,AATAAAAA 3 1,1,1
+ACAAAAAC 2
+ACAACAAA 2
+TTACTTAA TTACTAAA 2 1
+## 2018-02-25 10:49:57,383 INFO Parsed 100 reads
+## 2018-02-25 10:49:57,383 INFO 100 reads matched the barcode pattern
+## 2018-02-25 10:49:57,383 INFO Found 95 unique cell barcodes
+# job finished in 1 seconds at Sun Feb 25 10:49:57 2018 --  2.25  0.06  0.00  0.00 -- cb0db520-8a4e-4040-aa88-93efc0718fa8
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/scrb_extract.fastq.gz
b
Binary file test-data/scrb_extract.fastq.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/scrb_seq_barcodes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scrb_seq_barcodes Thu Jun 21 15:20:14 2018 -0400
b
@@ -0,0 +1,384 @@
+AAAACT
+GCTAGA
+AAAATC
+GCTTAC
+AAACAT
+GGACAT
+AAACTA
+GGCAAT
+AAAGTT
+GGGATT
+AAATAC
+GTACAC
+AAATCA
+GTCAAG
+AAATGT
+GTGACT
+AAATTG
+GTTCGA
+AACAAT
+TAGTGG
+AACATA
+TCCAAC
+AACTAA
+TCGAAG
+AAGATT
+TCTGCA
+AAGTAT
+TTCCTC
+AAGTTA
+TTGTCC
+AATAAC
+TTTGGC
+AATACA
+CCAACC
+AATAGT
+CCTTCC
+AATATG
+CTCTCC
+AATCAA
+GGACCA
+AATCTT
+GTACCG
+AATGAT
+ACCCCC
+AATGTA
+ACCCGG
+AATTAG
+ACCGCG
+AATTCT
+ACCGGC
+AATTGA
+ACGCCG
+AATTTC
+ACGCGC
+ACAAAT
+ACGGCC
+ACAATA
+ACGGGG
+ACATAA
+AGCCCG
+ACTAAA
+AGCCGC
+ACTATT
+AGCGCC
+ACTTAT
+AGCGGG
+ACTTTA
+AGGCCC
+AGAATT
+AGGCGG
+AGATAT
+AGGGCG
+AGATTA
+AGGGGC
+AGTAAT
+CACCCC
+AGTATA
+CACCGG
+AGTTAA
+CACGCG
+ATAAAC
+CACGGC
+ATAACA
+CAGCCG
+ATAAGT
+CAGCGC
+ATAATG
+CAGGCC
+ATACAA
+CAGGGG
+ATACTT
+CCACCG
+ATAGAT
+CCACGC
+ATAGTA
+CCAGGG
+ATATAG
+CCCACG
+ATATCT
+CCCAGC
+ATATGA
+CCCCAC
+ATATTC
+CCCCCA
+ATCAAA
+CCCCGT
+ATCATT
+CCCCTG
+ATCTAT
+CCCGAG
+ATCTTA
+CCCGGA
+ATGAAT
+CCCTGG
+ATGATA
+CCGAGG
+ATGTAA
+CCGCAG
+ATTAAG
+CCGCGA
+ATTACT
+CCGGAC
+ATTAGA
+CCGGCA
+ATTATC
+CCGGGT
+ATTCAT
+CCGGTG
+ATTCTA
+CCGTCG
+ATTGAA
+CCGTGC
+ATTGTT
+CCTCGG
+ATTTAC
+CCTGCG
+ATTTCA
+CCTGGC
+ATTTGT
+CGACCC
+ATTTTG
+CGACGG
+CAAAAT
+CGAGCG
+CAAATA
+CGAGGC
+CAATAA
+CGCACC
+CATAAA
+CGCAGG
+CATATT
+CGCCAG
+CATTAT
+CGCCCT
+CATTTA
+CGCCGA
+CTAAAA
+CGCCTC
+CTAATT
+CGCGAC
+CTATAT
+CGCGCA
+CTATTA
+CGCGGT
+CTTAAT
+CGCGTG
+CTTATA
+CGCTCG
+CTTTAA
+CGCTGC
+GAAATT
+CGGACG
+GAATAT
+CGGAGC
+GAATTA
+CGGCAC
+GATAAT
+CGGCCA
+GATATA
+CGGCGT
+GATTAA
+CGGCTG
+GTAAAT
+CGGGAG
+GTAATA
+CGGGCT
+GTATAA
+CGGGGA
+GTTAAA
+CGGGTC
+GTTATT
+CGGTCC
+GTTTAT
+CGGTGG
+GTTTTA
+CGTCCG
+TAAAAC
+CGTCGC
+TAAACA
+CGTGCC
+TAAAGT
+CGTGGG
+TAAATG
+CTCCCG
+TAACAA
+CTCCGC
+TAACTT
+CTCGGG
+TAAGAT
+CTGCGG
+TAAGTA
+CTGGCG
+TAATAG
+CTGGGC
+TAATCT
+GACCCG
+TAATGA
+GACCGC
+TAATTC
+GACGCC
+TACAAA
+GACGGG
+TACATT
+GAGCCC
+TACTAT
+GAGCGG
+TACTTA
+GAGGCG
+TAGAAT
+GAGGGC
+TAGATA
+GCACCC
+TAGTAA
+GCACGG
+TAGTTT
+GCAGCG
+TATAAG
+GCAGGC
+TATACT
+GCCACC
+TATAGA
+GCCAGG
+TATATC
+GCCCAG
+TATCAT
+GCCCCT
+TATCTA
+GCCCGA
+TATGAA
+GCCCTC
+TATGTT
+GCCGAC
+TATTAC
+GCCGCA
+TATTCA
+GCCGGT
+TATTGT
+GCCGTG
+TATTTG
+GCCTCG
+TCAAAA
+GCCTGC
+TCAATT
+GCGACG
+TCATAT
+GCGAGC
+TCATTA
+GCGCAC
+TCTAAT
+GCGCCA
+TCTATA
+GCGCGT
+TCTTAA
+GCGCTG
+TGAAAT
+GCGGAG
+TGAATA
+GCGGCT
+TGATAA
+GCGGGA
+TGATTT
+GCGGTC
+TGTAAA
+GCGTCC
+TGTATT
+GCGTGG
+TGTTAT
+GCTCCG
+TGTTTA
+GCTCGC
+TTAAAG
+GCTGCC
+TTAACT
+GCTGGG
+TTAAGA
+GGACGC
+TTAATC
+GGAGCC
+TTACAT
+GGAGGG
+TTACTA
+GGCACG
+TTAGAA
+GGCAGC
+TTAGTT
+GGCCAC
+TTATAC
+GGCGAG
+TTATCA
+GGCGCT
+TTATGT
+GGCGGA
+TTATTG
+GGCGTC
+TTCAAT
+GGCTCC
+TTCATA
+GGGACC
+TTCTAA
+GGGAGG
+TTGAAA
+GGGCAG
+TTGATT
+GGGCCT
+TTGTTA
+GGGCGA
+TTTAAC
+GGGCTC
+TTTACA
+GGGGAC
+TTTAGT
+GGGGCA
+TTTATG
+GGGGGT
+TTTCAA
+GGGGTG
+TTTCTT
+GGGTCG
+TTTGTA
+GGGTGC
+TTTTAG
+GGTCCC
+TTTTCT
+GGTGCG
+TTTTGA
+GGTGGC
+TCTTTC
+GTCCCC
+TTGGAT
+GTCGCG
+ACCGTA
+GTCGGC
+AGACCT
+GTGCGC
+AGGGAT
+GTGGCC
+ATCGAG
+GTGGGG
+CAAGCT
+TCCCCG
+CACCAA
+TCCCGC
+CAGTCA
+TCCGGG
+CATCAG
+TCGCGG
+CATGGT
+TCGGCG
+CCACAT
+TCGGGC
+CCGATT
+TGCCCC
+CGACTT
+TGCGCG
+CGATTG
+TGCGGC
+CTAGTG
+TGGCCG
+CTTCTG
+TGGCGC
+GAAGAC
+TGGGCC
+GATCGT
+TGGGGG
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/scrb_seq_fastq.1.gz
b
Binary file test-data/scrb_seq_fastq.1.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/scrb_seq_fastq.2.gz
b
Binary file test-data/scrb_seq_fastq.2.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/t_R1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/t_R1.fastq Thu Jun 21 15:20:14 2018 -0400
b
b"@@ -0,0 +1,400 @@\n+@HISEQ:105:C2UE1ACXX:3:1101:11160:2245 1:N:0:CAGATC\n+AAAAGTAGTTAATATATTAGATTTGTTTGATAGTGGTAGTATATATTTTTTATTTTAGTATTTAGGAGGTAGAGGTAGATGAATTTTTGAGTTTAAAGTTA\n++\n+BBBFFFFBFFFFFIIIIIIBIIIIBIFFBFIIBBFFFFFFFFIIIIIIIIIIIIIIIBFIIIIIB7BBBFBBFFFF77<F7BFFFFFF7B7BBFFFF7<BF\n+@HISEQ:105:C2UE1ACXX:3:1101:19338:2197 1:N:0:CAGATC\n+TTTTTTTTTAGAGGGATTAGTTTTTTTTATTGAGGTTTTTGAAAGTTGTTGTATGTTAATTGTTTTTAGAATGTTGGGTATAAGTAGGATTTAGGTCTATT\n++\n+BBBFFFFFFB0<BF7BBBF7BFFIIIII7BF'0<0BBFFF'<BB'<B7<B07<B7<BFBBF0<BBBBB0<<B0BB<<000<BF00<<'0<BBB0'00BBF#\n+@HISEQ:105:C2UE1ACXX:3:1101:19467:2281 1:N:0:CATATC\n+TTGGTTAGGGTGAGATGTATAGTTTGGATTTTAGTGATTTTTGTAAAGGGGGAAAAGAATGGAGTTTTGGGTGTAGTGAGAGGTTATAGGAGTAGGGATTT\n++\n+<B<<FFFBBFFFFFFFFFIFIBFFI<<BFFFIIBBBFFIIIIBFFIIBBF7BBFFFBFFF77BBFBFF777BBBBBB<<<B<7<7BBF77<7<700007BB\n+@HISEQ:105:C2UE1ACXX:3:1101:7009:2740 1:N:0:CAGATC\n+AAGTTTTGTTTTTTATTTGGAGGTTATGGAATGTTAAGTAAGGTTTTTTTGGGTTTTGTTATTTATTTGATAATTGTGATTGTAATGTTAATAAGGGATAG\n++\n+BBBFFFFFFFFFFIIIIIB<FFFFFFIBBFIIFFFIIFIIIFFIFFIIII<0<BBFFBFFFFFFFFFF<BBFFFF<BBBBF7BFFF<BBFFFFF<00<BB0\n+@HISEQ:105:C2UE1ACXX:3:1101:13708:2613 1:N:0:CAGATC\n+ATATAATAGATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAAAGATAATTAATTTTTAAAATTTTTTTTTTTTTAATAAAA\n++\n+BBBFFFFFBFFFFIIIIIIIIIIIFFFFFB7B007BB0''''''0077BFF7'077BF0''0''''''00''00''''00'<BBBFBBFFFFF########\n+@HISEQ:105:C2UE1ACXX:3:1101:19067:2707 1:N:0:CAGATC\n+GTTTTTTTATTTGATATTTTAAAGGTTTTTTTTTTTTTTTTAGAAAATTTTTTTTAGTAAGATAGATTTTAAAGGGTTTGTTTTTTTTTTTTTTTTTTTTT\n++\n+BBBFFFFFFFFFBBFFFFIIIFIFFIFFIIIIIIIFFFFFB7'0<B0<BBFF'7<0'0<<''0<'0'0<BBB<<B'7'0'0'0<BFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:4999:3182 1:N:0:CAGATC\n+GTTTTATGAGGATTTTAGGGGAGTGATTGTTTAAAGTTTATAAGATTTATGATTTATATATAGTTAGAATAGTATGTGTTAAATAAATATAAAGGGAGAGG\n++\n+<BBFFFFBFFFFFFIIFBBBFFFIIFFIBFFIFIIBFFIIIIIFFIIIIIBFFIIIIIIIIIBFFIBFIII<FFFBFBFFFFFFFFFFFFFFF<07<7<<7\n+@HISEQ:105:C2UE1ACXX:3:1101:16790:3145 1:N:0:CAGATC\n+TATAGAGGTATTTTGTTATTTTGTTTTAGTTATTGCGGGTTAGAGTAGATGGTTATTTTTAGTAGAGTATTGTTTGTTGTTTTTTATATGTGGTATAGAGG\n++\n+BBBF<BBFFFFFFI<FFFIFIIBBFFIIBFFFFI<BBBFFFF<FFFF7FF70<BFFIFIIF7BFBB<FFBF'BBF'<<0<BFFBBBBBF'<70<<<B####\n+@HISEQ:105:C2UE1ACXX:3:1101:18065:3106 1:N:0:CAGATC\n+AAGTATTTGTTATATATATTTTAAAGTTTTTTTTTTTTTTAGGAATTTTTTTTTATAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGTTTTTTT\n++\n+BBBFFFFFBFFFFIIIIFIIIIIIIFIFIIIIIIIIIIFF''00<<BB'<<'7''000<BFFF77<77BFF77BFBB07<7<BFFFFFFFF##########\n+@HISEQ:105:C2UE1ACXX:3:1101:2300:3263 1:N:0:TAGATC\n+TTATTTTTAATAAAATTTTTATTATTTAATTTATTAGTTAATATTTAGGAGTTTTATGTTGTGGTAAAATTTTGTTAGAGAGATAGAGAAAGTATTTAGTT\n++\n+BBBFFFFFFFFBFFIIIIIIBIBBFFI0<FFFBFF70B00<'0<FFFBBBBFF''0B0BB000'7BBFFFFII'<BF0<'<''07'70BBF'7BBBFB7<B\n+@HISEQ:105:C2UE1ACXX:3:1101:5605:3427 1:N:0:CAGATC\n+AGATGAGAGGTATAGGATGTGGGGAGTTTTAGTAAGATTTATAGATAAGAAGTGGTTCGGTTATAGGATTTGTTTTAGATTTTTAGATTTTTTTGTGTAGA\n++\n+BBBFBFFFFFFFFFFBFFFFIIIIIIIBFFFBFFIBFFIIIIIFFIIIFFIBF7BFBFBBFFIFF<<FFFFBBFFBF0<BFFFFF7<BFFFFFF0B7BF0<\n+@HISEQ:105:C2UE1ACXX:3:1101:8129:3589 1:N:0:CAGATC\n+TTTTAGTTTTTAGTTAGGATTATACGTTTATTGTGATAAAAGAGTTTTTTGATTTATCGGGTTATGTTAGGGTTTATTGATATTAGGGAATTTGAAGAGAT\n++\n+BBBFFBFFFFFFBFFFBBFIIFIIFBFFFFII<BBFFFIIIBFFIFIIIIBBFFIIII<7<FBFF<FBF<7<BBFFFF7<BFFFF700<BBFF0<B'<07B\n+@HISEQ:105:C2UE1ACXX:3:1101:14304:3866 1:N:0:CAGATC\n+GTGTTTATATAGGGGATTTTTGAGTTTGATAGGTTGTTTTTGTAGAGGGTAGAATTTTGTGGAAATGTTGGTATTGGTTAAGGGGTTTTAGTGAAGAATAA\n++\n+<BBFFFFFFFFFFFFFFFFII<BBFFFBFFIFBFFBFFFII7FFFFFFFIF<BFIIII<F<BBFIFBFB7<BFFFBBBBBF777<B7BBF7B<BB7BBFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12720:4398 1:N:0:CAGATC\n+GTATGTGTGTGTGTGTGTGTATTTAATTGAAGTTGGGTTTGGTGATATATATGTTTAATTTTAGTATTTTAGTGGTAGAGGTAGGTTAATTTTTGTTGAGT\n++\n+<BBFBFFFFFFFFIFIFIFFFFFIIFII0BFBFFB7BFFF<BFFFFIIIIIIBFIIIIFIIII<FIFIIIFBFBBFB7B<<BB7<B<BFFFFFF0<B0<7B\n+@HISEQ:105:C2UE1ACXX:3:1101:14945:4439 1:N:0:CAGATC\n+AGTGTTGAGTGGAGTATTAGAGAAGAGAAATAAGATAATAAAGTAATAGTTGTGATTAGGAGGTTTTTATAAGTTGATGGTTTATGTTAAGTAAGTTTATT\n++\n+BBBBFFBFFFFFFFFFFIIBIFFIFFBFFIIIIBFFIIIIIIBFFIIIBFFBFFFIIIFBFFBFBFFIIIIIFFFBBF77BBFFF7BBFF<BBF<<BFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:8616:4508 1:N:0:CAGATC\n+AGA"..b"0<BFFBFFFBF7FBFIIFIF7BFFI7<7B<BBFB'<BF'7BBFF7BBBBBBB#######\n+@HISEQ:105:C2UE1ACXX:3:1101:7272:22581 1:N:0:CAGATC\n+TTAATGATATTAAGAATTTTTTAAAGAATTTTATTTTTTTTAGGAATAGAAGGAGGAGGAGTATTTTGATCGATTTTTTAGGTTTTTTATAGGTGGAGTTA\n++\n+BBBFFBFFFFFFF<BFIIIIIIFIIBFFFFIIFIIIIIIIII<<FFFIBFIBBFFBFFFFFFBBFFF<BBF<BBFFFFFF00<BBFFFFFF000<777B<B\n+@HISEQ:105:C2UE1ACXX:3:1101:10060:23020 1:N:0:CAGATC\n+TTTTAGATTATTTAAGAAGGTATTAGGTTTTTAAGAGGAAAGGGTAGTCTTATAGTTTTGAGTATTTTTTTTAAAAGGAAGTAAGGATGGTGTTTTTATTT\n++\n+BBBFFBFFFFFFFIIBFIFFFFFIBBBFFFFIIIBFFFIFIBFFIFFFFIIFFFBFFII7B<FFFIIIIIFFFFBB77<B<BBF<7BB<<BBBBBFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14440:23104 1:N:0:CAGATC\n+TTTAAATTTAAGTTAAGGTTTGGGGAGTTGATTTTTGTTTTGTGGGTTGTTTTTTTTGTAGGAGTTGGTTTTTAGAGGTTTTTAGGAATTTTTGGTGTTTT\n++\n+BBBFFFFFFFFFFFIIBBIFIBBBFFFFFBFFIIIIBFFIIBFBFFFFFFIIIIFFF7BB<<BBFB<BBBBFFB7<00B<BBFF00<BBFFFF''77BBBF\n+@HISEQ:105:C2UE1ACXX:3:1101:6941:23338 1:N:0:CAGATC\n+AGAAAGGTTTTAAGTTGGTTGGGAATATAGGGGTTTTTTAGAGTTTTTATTAGGAGTTATAGTGTGTTGAATTTGGTTTTGGGTGTTGATTATAGGTTGTT\n++\n+BBBFFFFFFFFFFBFFBBFFBBBFFFFIIBFFFIFFIIIIBFFFFFIIIIIIB<FFFFFFFBFBFFFF<BBFFF<<BBBB700<BBB0BBFFFF00<B7<B\n+@HISEQ:105:C2UE1ACXX:3:1101:10069:23622 1:N:0:CAGATC\n+TTTTGTTTTAGGGTTTTATTTTTGTGTTTTATTTTTATTTTCGTATTATTAGTTTTTTTTATACGTTATTTGTAGAAGGTTAGTTTTTTTAATTTAGGTTT\n++\n+BBBFBFFFFFBBBFFFIFIIIIIBFBFFIIBFIIIIIIIIIIBFFFIIIIIFFIIIIIIFFFFF<BFFFFF<BF<<B77BBB7BBFFFFFFFFFFF00<BB\n+@HISEQ:105:C2UE1ACXX:3:1101:14079:24078 1:N:0:CAGATC\n+TTTAAAGTTTTTAGTTTTGAGTGGAATTTTAAGAATATTAGTGCGTTTTAAGCTTAGGTAGTTTTGGTAGTTTGAAAGTAATAGGGTGTATTTTGTAAAGT\n++\n+BBBFFFFFFFFFFBFFFI<BBFFFFFFIIIIIBFFFIIIIBFFFFFFIIIIBFFIIBBFFFFFFI<7BFBFFF<BBF<BBFFF<07BBBBBFFF<BFFF<B\n+@HISEQ:105:C2UE1ACXX:3:1101:12064:24631 1:N:0:CCGATC\n+TTATAGTGTATTTATATATATGAAATGAATTAATGAATTTTAAAAAAAAAGAAAGTAAGTTGTTTTTAGGATTGATATTTAGAGTTAATTTTTTGAGTTTT\n++\n+BBBFFBFFFFFFFIIIIIIIIBFIFF0BFIIIIIBFFIIIIIIIIIIIIIBFIIFFFIBFFBFFFFFFB<BFF7BBFFFFF<B<BBBFFFFFFF'70<<BF\n+@HISEQ:105:C2UE1ACXX:3:1101:6662:24968 1:N:0:CAGATC\n+AGGTGTCGTTTAATTGTTTAGGTTTATGGTATTGTGTTTCGTTTTTTTGGTATTTGTGAGGGTAGAATTGTTTTTGGGTTTTAATTTTTTTAAGTATGGGA\n++\n+BBBFFFFFFFFFFIIBFFIFBBFFFFIB<FFFI<FBFFFFBFFIIIII77BFFIIBFBFBBBFBBFFFF<BBFFF'07BBBBFFFFFFFFFFB0<BF####\n+@HISEQ:105:C2UE1ACXX:3:1101:11630:24964 1:N:0:TAGATC\n+TTAGTTTTTTTAGTGTTTTTTATTTATTTCGTTTTATTATTGGAGTTTGTTAAGAAAATTAGGGTTTGATTTGGATGTTAAGGATTGGTTTTTTTTTTGAT\n++\n+BBBBFFFFFFFFBFFFFIIIIIIIIIIIIIBFFFIFIIIIIB7BFFFFBFFIIBFIIIIIFB<BFBFBFFFF<7BB<BBFF<7<BB77BBFFFFFFFF0<B\n+@HISEQ:105:C2UE1ACXX:3:1101:12594:24878 1:N:0:CCGATC\n+TTTAATAGGATATGATATTATTTAATTTATAGATTATGGAAATTTTTTATATTTAATGAAGAAAGTTGGAATGTTTTGGGAGGTGTTTAGAATAAATAAAT\n++\n+BBBFFFFB0BFFF<FFIIIFIIFIIIFIFIF<BFFFF<<BFFFIIIIIIIIIIIIIIBFFBFFIBFB7'<FF<<BBF0''77<BFB<BB0<BBBBBFFBBF\n+@HISEQ:105:C2UE1ACXX:3:1101:4483:25030 1:N:0:CAGATC\n+AGGATGGTGTTTTTATTTTTAGATTTATATTATTTTGTTATATTTGTATTTGAGTAAGTTTATGGGTTTTTTAAAGAGGTAGGAGGAAGTTTTTTGTTATT\n++\n+B<BFFFFFFFFFFIIIIIIIIBBFIIFIIIIFIIII<BFFIFFIIBFIIII<BBFFIFIFIIIB7<FFFIIIFBF<<77BB00<70<B7BBBFFF0BBFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12198:25235 1:N:0:CAGATC\n+ATATATGTAGTTTGTATTATTTTTGTTATAGTATATAAAGGTTAAAGAGTAGTTGTTTTAATTTTAGAGGTGGAGATTGGGTTGTATAGTTTTGGTTTTTA\n++\n+BBBFFFBFFFFFFBFFIIFIIIII0BF<FF0BFFFIFFIBBFBFII<FBFBBFF7FFFIIIIIIII<F7<B'<BB7<B''0<<0<BBB77<BB'0<<BBFB\n+@HISEQ:105:C2UE1ACXX:3:1101:20477:25084 1:N:0:CATATC\n+AGAGTTTATTGAGAAGTAAAGTATTAATTTTATGGGAGAAATGGGATAGAGGTAGTAGAAGTTGTTATGGAATGGGATTAATTAGGAAGTTAATTAAGTGT\n++\n+BBBFFFFFFF0BBFFBFFIIFFIIIIIIIIIFIBB7FFFFIIFFBFFIFIBFFFFIIFIIFFFBFFFF77BFF<<B<FFFFFFF70<B7BBFFFFFF7B<B\n+@HISEQ:105:C2UE1ACXX:3:1101:5725:25359 1:N:0:CAGATC\n+GAGAAATAAGATAATAAAGTAATAGTTGTGATTAGGAGGTTTTTTATAAGTTGATGGTTTATGTTAAGTAAGTTTATTAAGAAGTATAGTATTATATATAG\n++\n+BBBFFFFFFBFFFIIIIIIIIIIIBIFBFFFFIIBBFFFFFFFIIIIIIFFFBFFBBFFFIIBFFIIBFIIFIFIFFFFF<BF<BBFF7<<FFFFFFFFF0\n+@HISEQ:105:C2UE1ACXX:3:1101:5502:25591 1:N:0:CAGATC\n+ATATGATTTTATTTTTAGGGATAATATTTTTTAAGTGAATTTTGATTTTTTGGTTAGTTATTTTGATGATGTGTAGAGGGTGTATAGTTTTTGGATATAGA\n++\n+BBBFBFFFFFFFFIIIIBBBFFFIIIIIIIIIIIBFFFIIIIIBFFIIIII<7BFFBFFFIIII<FF<FFBFFFFFB<<<BFBBBF<<BFFF00<BFBF7<\n"
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/t_R1.fastq.gz
b
Binary file test-data/t_R1.fastq.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/t_R2.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/t_R2.fastq Thu Jun 21 15:20:14 2018 -0400
b
b"@@ -0,0 +1,400 @@\n+@HISEQ:105:C2UE1ACXX:3:1101:11160:2245 2:N:0:CAGATC\n+CATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAACAAAAACAACAACAACTAACACCTCAAAATCAACTCTAAATAAAAACTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:19338:2197 2:N:0:CAGATC\n+CTAATTTCTATTACCTACCTAACAACTATAACTATAATACTAACAAAAAACAAACAACATAGACCTAAATCCTACTTATACCCAACATTCTAAAAACAATT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII0<BFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:19467:2281 2:N:0:CATATC\n+ACACAACAAAATCCCTACTCCTATAACCTCTCACTACACCCAAAACTCCATTCTTTTCCCCCTTTACAAAAATCACTAAAATCCAAACTATACATCTCACC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIIIFFFFBBFFFFFFFBBFFFFFBBBFFFBFFFFBF\n+@HISEQ:105:C2UE1ACXX:3:1101:7009:2740 2:N:0:CAGATC\n+TAAATAAAACCCAAACCCACACTATCTATCCCTTATTAACATTACAATCACAATTATCAAATAAATAACAAAACCCAAAAAAACCTTACTTAACATTCCAT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:13708:2613 2:N:0:CAGATC\n+TTCAAAAACTCCATAACAAACACAAATAAAAAATAAAAAACTCCTAAATCTCACCTTAAAAACTTATCTAACTGCAACTATTATCTTACTTAAAAAAAAAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:19067:2707 2:N:0:CAGATC\n+ATCACTCTTCCAAAAATCACTCGAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCTTTAAAACCT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIFFIIIIIIIIIIIIIIIFIIIIIIIIIFIIIIIIIFIIIIIFFFFFFFFFFFFFFFBB####################\n+@HISEQ:105:C2UE1ACXX:3:1101:4999:3182 2:N:0:CAGATC\n+CTTATAATTCAAATTTCTAAACTCCTACTCCCTCTCCCTTTATATTTATTTAACACATACTATTCTAACTATATATAAATCATAAATCTTATAAACTTTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:16790:3145 2:N:0:CAGATC\n+AACATACCTATAAAAACGCACTACTTTTATATACAAAATCCGTCTCTACTAATACCATAACCAACCTCTATACCACATATAAAAAACAACAAACAATACTC\n++\n+BBBFFFFFFFFBFIIIIF<FFFFFIIIIIFFFIIIIIFIFFFIIIIIIFIFFFFIIIIIIIIIIIIFBBBBFFFFFFBBFFFFFFFFFFBBFFFFFFFFBB\n+@HISEQ:105:C2UE1ACXX:3:1101:18065:3106 2:N:0:CAGATC\n+AAAAACTAAACCCAAAAAAAAAACAATAAAAATAAAATAATAAAAATTATCATAATAAATTCCTAAAAAAAAAAAAAACTTTAAAAAAAAAAAAAAAAAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIBFIIIFFFFFIIIFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFF'0<<<BFB###############\n+@HISEQ:105:C2UE1ACXX:3:1101:2300:3263 2:N:0:TAGATC\n+AAAAATACAAAAAATAAAAAAAAAAAAAAACATTAAATTTAAAAAAAATTTATTTTTTATTTTATTTTATTTTTATTTTTTTAAATTAAAATAAAAAAAAA\n++\n+BBBFFFFBFFFFFIFFIIIIIIIIIIIFF<'<<<BBBFF0<FFBBBBFB7'0<BB000<'<F<0BBFB'<<BBB0<B<B<B7B<<0'<B'00BBFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:5605:3427 2:N:0:CAGATC\n+AACCTCTACACAAAAAAATCTAAAAATCTAAAACAAATCCTATAACCGAACCACTTCTTATCTATAAATCTTACTAAAACTCCCCACATCCTATACCTCTC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:8129:3589 2:N:0:CAGATC\n+TCTTATCTCTTCAAATTCCCTAATATCAATAAACCCTAACATAACCCGATAAATCAAAAAACTCTTTTATCACAATAAACGTATAATCCTAACTAAAAACT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFF<\n+@HISEQ:105:C2UE1ACXX:3:1101:14304:3866 2:N:0:CAGATC\n+AATTTATTCTTCACTAAAACCCCTTAACCAATACCAACATTTCCACAAAATTCTACCCTCTACAAAAACAACCTATCAAACTCAAAAATCCCCTATATAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12720:4398 2:N:0:CAGATC\n+CTAATTTAACCTTTAAACTCAACAAAAATTAACCTACCTCTACCACTAAAATACTAAAATTAAACATATATATCACCAAACCCAACTTCAATTAAATACAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:14945:4439 2:N:0:CAGATC\n+ACTCTCTCCTATATTCTTTACCAACATATATAACTTAACTCTCTAATAACCTTAACTATTCCTCACCCTAACCTCCACAAATACTATATATAATACTATAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:8616:4508 2:N:0:CAGATC\n+CTA"..b'IIIIIIIIFFIIIFIFFFFFFFFFFFFFFF<<BBFBBFFFFFBFBBBBFFFFFFFFFBF\n+@HISEQ:105:C2UE1ACXX:3:1101:7272:22581 2:N:0:CAGATC\n+CTAACTCCACCTATAAAAAACCTAAAAAATCGATCAAAATACTCCTCCTCCTTCTATTCCTAAAAAAAATAAAATTCTTTAAAAAATTCTTAATATCATTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIFIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:10060:23020 2:N:0:CAGATC\n+CAAAATAATATAAATCTAAAAATAAAAACACCATCCTTACTTCCTTTTAAAAAAAATACTCAAAACTATAAGACTACCCTTTCCTCTTAAAAACCTAATAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFFFFBFFBFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14440:23104 2:N:0:CAGATC\n+ATATTCTATAAAACACCAAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:6941:23338 2:N:0:CAGATC\n+CTTATTCATTCCATCGCCTATAACATAAAAACAAAAATAAACATTATCATAACAACCTATAATCAACACCCAAAACCAAATTCAACACACTATAACTCCTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:10069:23622 2:N:0:CAGATC\n+CTTAAAATTCACTTCACATTAACTTTTAAATATTATTCCAATATATTTAAAATAACACTTTAAAAAAAACCTAAATTAAAAAAACTAACCTTCTACAAATA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14079:24078 2:N:0:CAGATC\n+ATAAAACTAAACTTTCATATATTACTCTTAAATTTTTTTCCTAACTATAAAAAACTTTACAAAATACACCCTATTACTTTCAAACTACCAAAACTACCTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12064:24631 2:N:0:CCGATC\n+AAAAAAACCACCATTCTACAATCCCTTAAAAAAACCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:6662:24968 2:N:0:CAGATC\n+CTCCCATACTTAAAAAAATTAAAACCCAAAAACAATTCTACCCTCACAAATACCAAAAAAACGAAACACAATACCATAAACCTAAACAATTAAACGACACC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFBFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:11630:24964 2:N:0:TAGATC\n+AAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATCCTTAACATCCAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFBFBBFBFFFFFBFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12594:24878 2:N:0:CCGATC\n+ATATAATATTACCTATTTATTTTTATTATTATTATTATTATTATTTATTTATTTATTCTAAACACCTCCCAAAACATTCCAACTTTCTTCATTAAATATAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIFFFFFFIIIFFIFIIIFIIIIIIIIIIIFFFFIIIFFFFIIIIIIIIIIFFIIIIFIIFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:4483:25030 2:N:0:CAGATC\n+CCTAATACTCTATACTAAACAAATAACAAAAAACTTCCTCCTACCTCTTTAAAAAACCCATAAACTTACTCAAATACAAATATAACAAAATAATATAAATC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:12198:25235 2:N:0:CAGATC\n+CACAAAAACAACATACCTCTTAATAATCTTTCAAACTTACACCAAATCTATTTCTATCCTCATCTTCCATTTATAAAAACCAAAACTATACAACCCAATCT\n++\n+BBBFFFFFFFFFFFIFFFFFFBFBFFBBFF<FIBFFIF<BFIIIIIIIIFFFFFBFBFBBFF<B<BFFFBBFIIIIFIIFFFFFFFFFBFBFFBBBBBBB<\n+@HISEQ:105:C2UE1ACXX:3:1101:20477:25084 2:N:0:CATATC\n+AAAACCCAACTCTTCCACCAAAAAAATTACTTTATCTCTAATACTCTTAAAATACCCTATATTATCCTTATACAAAAACACTTAATTAACTTCCTAATTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIFIIIIIIIIIIIIIF0<BFFFFFFFFFFFFBBBFFFFFFFFFBBB<<BB70\n+@HISEQ:105:C2UE1ACXX:3:1101:5725:25359 2:N:0:CAGATC\n+ATAACTTAACTCTCTAATAACCTTAACTATTCCTCTCCCTAACCTCCACAAATACTATATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:5502:25591 2:N:0:CAGATC\n+TTCTTAATTCCATCTTTAAACAATAATATTAAAATATTACTTCCCAAAATACAAACCCATATCCATATCCAAAAACTAAACAACCTCTACACCTCAACAAA\n++\n+<<<BBBBBBBBBBBBBBBBB70B00B000<0BBB###################################################################\n'
b
diff -r 000000000000 -r 8db56d2f8b72 test-data/t_R2.fastq.gz
b
Binary file test-data/t_R2.fastq.gz has changed
b
diff -r 000000000000 -r 8db56d2f8b72 umi-tools_counts.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/umi-tools_counts.xml Thu Jun 21 15:20:14 2018 -0400
[
b'@@ -0,0 +1,188 @@\n+<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0">\n+    <description>Count UMIs from BAM files</description>\n+    <macros>\n+        <import>macros.xml</import>\n+        <xml name="sanitize_tag" >\n+            <sanitizer invalid_char="">\n+                <valid initial="string.letters,string.digits" />\n+            </sanitizer>\n+        </xml>\n+    </macros>\n+    <expand macro="requirements" />\n+    <command detect_errors="exit_code"><![CDATA[\n+\n+    ln -s \'${input_bam}\' \'input.bam\' &&\n+    ln -s \'${input_bam.metadata.bam_index}\' \'input.bam.bai\' &&\n+    \n+    umi_tools count\n+        -I input.bam\n+        \'$bam_paired\'\n+        --extract-umi-method=\'$barcodes.extract_umi_method.value\'\n+        #if $barcodes.extract_umi_method == \'read_id\':\n+            --umi-separator=\'$barcodes.delimiter\'\n+        #else if $barcodes.extract_umi_method == \'tag\':\n+            --umi-tag=\'$barcodes.umi_tag\'\n+            --cell-tag=\'$barcodes.cell_tag\'\n+        #end if\n+        --method=\'$grouping_method.value\'\n+        --edit-distance-threshold=\'$hamming_distance\'\n+        --mapping-quality=\'$advanced.mapping_quality\'\n+        --per-gene\n+        $wide_format_cell_counts\n+        $advanced.per_contig\n+        \'$advanced.per_cell\'\n+        #if $advanced.gene_tag:\n+            --gene-tag=\'$advanced.gene_tag\'\n+        #end if\n+        #if $advanced.skip_tags_regex.value:\n+            --skip-tags-regex=\'$advanced.skip_tags_regex\'\n+        #end if\n+        #if $advanced.random_seed != 0:\n+            --random-seed=\'$advanced.random_seed\'\n+        #end if\n+        -S \'$out_counts\'\n+        -L \'$out_log\'\n+    ]]></command>\n+    <inputs>\n+        <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" />\n+\n+        <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false"\n+               label="Bam is paired-end"\n+               help="both read pairs will be output. This will also force the use of the template length to determine \n+reads with the same mapping coordinates." />\n+\n+        <conditional name="barcodes" >\n+            <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" >\n+                <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option>\n+                <option value="tag" >Barcodes are contained in tags</option>\n+                <option value="umis" >Barcodes were extracted using umis</option>\n+            </param>\n+            <when value="read_id" >\n+                <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" >\n+                    <expand macro="sanitize_tag" />\n+                </param>\n+            </when>\n+            <when value="tag" >\n+                <param name="umi_tag" type="text" label="Tag which contains the UMI" >\n+                    <expand macro="sanitize_tag" />\n+                </param>\n+                <param name="cell_tag" type="text" label="Tag which contains the cell barcode" >\n+                    <expand macro="sanitize_tag" />\n+                </param>\n+            </when>\n+            <when value="umis"></when>\n+        </conditional>\n+\n+        <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods \'unique\' and \'percentile\' group identical \n+UMIs, however \'cluster\', \'adjacency\', and \'directional\' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with \n+counts &lt; 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as c'..b'       <add value="&#91;"/> <!-- left square bracket -->\n+                        <add value="&#93;"/> <!-- right square bracket -->\n+                        <add value="&#94;"/> <!-- caret -->\n+                        <add value="&#123;"/> <!-- left curly -->\n+                        <add value="&#125;"/> <!-- right curly -->\n+                        <add value="&#40;"/> <!-- left parenthesis -->\n+                        <add value="&#41;"/> <!-- right parenthesis -->\n+                    </valid>\n+                </sanitizer>\n+            </param>\n+            <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false"\n+                label="Deduplicate per contig (field 3 in BAM; RNAME)"\n+                help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." />\n+            <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false"\n+                label="Group reads only if they have the same cell barcode." />\n+            <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" />\n+        </section>        \n+    </inputs>\n+    <outputs>\n+        <data name="out_counts" format="tsv" />\n+        <data name="out_log" format="txt" />\n+    </outputs>\n+    <tests>\n+        <test><!--count_single_gene_tag:-->\n+            <param name="input_bam" value="chr19_gene_tags.bam" />\n+            <param name="random_seed" value="123456789" />\n+            <param name="grouping_method" value="directional" />\n+            <param name="gene_tag" value="XF" />\n+            <param name="skip_tags_regex" value="^[__|Unassigned]" />\n+            <param name="extract_umi_method" value="umis" />\n+            <output name="out_counts" value="count_single_gene_tag.tsv" />\n+        </test>\n+        <test><!--count_single_cells_gene_tag:-->\n+            <param name="input_bam" value="chr19_gene_tags.bam" />\n+            <param name="random_seed" value="123456789" />\n+            <param name="grouping_method" value="directional" />\n+            <param name="gene_tag" value="XF" />\n+            <param name="skip_tags_regex" value="^[__|Unassigned]" />\n+            <param name="per_cell" value="true" /><!-- new -->\n+            <param name="extract_umi_method" value="umis" />\n+            <output name="out_counts" value="count_single_cells_gene_tag.tsv" />\n+        </test>\n+        <test><!--count_single_cells_wide_gene_tag:-->\n+            <param name="input_bam" value="chr19_gene_tags.bam" />\n+            <param name="random_seed" value="123456789" />\n+            <param name="grouping_method" value="directional" />\n+            <param name="gene_tag" value="XF" />\n+            <param name="skip_tags_regex" value="^[__|Unassigned]" />\n+            <param name="per_cell" value="true" /><!-- new -->\n+            <param name="extract_umi_method" value="umis" />\n+            <param name="wide_format_cell_counts" value="true" />\n+            <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" />\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+\n+UMI Tools count - Count reads per gene from BAM using UMIs\n+----------------------------------------------------------\n+\n+Purpose\n+-------\n+\n+The purpose of this command is to count the number of reads per gene based\n+on the mapping co-ordinate and the UMI attached to the read.\n+\n+\n+It is assumed that the FASTQ files were processed with extract_umi.py\n+before mapping and thus the UMI is the last word of the read name. e.g:\n+\n+@HISEQ:87:00000000_AATT\n+\n+where AATT is the UMI sequeuence.\n+\n+If you have used an alternative method which does not separate the\n+read id and UMI with a "_", such as bcl2fastq which uses ":", you can\n+specify the separator, or if your UMIs are encoded in a tag you can also specify this.\n+\n+    ]]></help>\n+    <expand macro="citations" />\n+</tool>\n'