Repository 'umi_tools_extract'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/umi_tools_extract

Changeset 1:79436b3019e9 (2017-08-29)
Previous changeset 0:418b961e0576 (2017-08-10) Next changeset 2:d1015c2516b7 (2018-01-10)
Commit message:
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
modified:
test-data/out_R1.fastq.gz
test-data/out_R2.fastq.gz
test-data/out_paired.log
test-data/out_single.log
umi-tools_extract.xml
added:
macros.xml
test-data/group_in1.sam
test-data/group_in2.bam
test-data/group_in3.bam
test-data/group_in4.bam
test-data/group_in5.bam
test-data/group_in6.bam
test-data/group_out1.bam
test-data/group_out2.bam
test-data/group_out3.bam
test-data/group_out3.tab
test-data/group_out4.bam
test-data/group_out4.tab
test-data/group_out5.bam
test-data/group_out6.bam
b
diff -r 418b961e0576 -r 79436b3019e9 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Aug 29 17:37:07 2017 -0400
b
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/gr.209601.116</citation>
+            <citation type="bibtex">
+                @misc{githubUMI-tools,
+                title = {UMI-tools},
+                publisher = {GitHub},
+                journal = {GitHub repository},
+                url = {https://github.com/CGATOxford/UMI-tools},
+                }
+            </citation>
+        </citations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">umi_tools</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <token name="@VERSION@">0.5.0</token>
+</macros>
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in1.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_in1.sam Tue Aug 29 17:37:07 2017 -0400
b
b'@@ -0,0 +1,666 @@\n+@HD\tVN:1.0\tSO:coordinate\n+@PG\tID:Bowtie\tVN:1.1.2\tCL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -a /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam"\n+@SQ\tSN:chr10\tLN:129993255\n+@SQ\tSN:chr11\tLN:121843856\n+@SQ\tSN:chr12\tLN:121257530\n+@SQ\tSN:chr13\tLN:120284312\n+@SQ\tSN:chr14\tLN:125194864\n+@SQ\tSN:chr15\tLN:103494974\n+@SQ\tSN:chr16\tLN:98319150\n+@SQ\tSN:chr17\tLN:95272651\n+@SQ\tSN:chr18\tLN:90772031\n+@SQ\tSN:chr19\tLN:61342430\n+@SQ\tSN:chr1\tLN:197195432\n+@SQ\tSN:chr2\tLN:181748087\n+@SQ\tSN:chr3\tLN:159599783\n+@SQ\tSN:chr4\tLN:155630120\n+@SQ\tSN:chr5\tLN:152537259\n+@SQ\tSN:chr6\tLN:149517037\n+@SQ\tSN:chr7\tLN:152524553\n+@SQ\tSN:chr8\tLN:131738871\n+@SQ\tSN:chr9\tLN:124076172\n+@SQ\tSN:chrM\tLN:16299\n+@SQ\tSN:chrX\tLN:166650296\n+@SQ\tSN:chrY\tLN:15902555\n+SRR2057595.10000392_AAGTA\t16\tchr19\t31326011\t255\t57M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:23A33\tNM:i:1\tUG:i:512\tBX:Z:AAGTA\n+SRR2057595.10000468_GCTAT\t16\tchr19\t61240267\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C16\tNM:i:1\tUG:i:706\tBX:Z:GCTAT\n+SRR2057595.10000704_TGATG\t16\tchr19\t3544146\t255\t39M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:39\tNM:i:0\tUG:i:1\tBX:Z:TGATG\n+SRR2057595.10000838_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.10001118_TCTCA\t16\tchr19\t4078298\t255\t50M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A6\tNM:i:1\tUG:i:36\tBX:Z:TCTCA\n+SRR2057595.10001329_CGGTC\t16\tchr19\t28083193\t255\t60M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:60\tNM:i:0\tUG:i:497\tBX:Z:CGGTC\n+SRR2057595.10001382_CTGCC\t16\tchr19\t3957282\t255\t22M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:17C1G2\tNM:i:2\tUG:i:9\tBX:Z:CTGCC\n+SRR2057595.10001382_CTGCC\t16\tchr19\t3970897\t255\t22M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:17C1G2\tNM:i:2\tUG:i:10\tBX:Z:CTGCC\n+SRR2057595.10002047_ATTAT\t16\tchr19\t7546213\t255\t27M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:27\tNM:i:0\tUG:i:278\tBX:Z:ATTAT\n+SRR2057595.10002219_ATCAG\t16\tchr19\t5796783\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:234\tBX:Z:ATCAG\n+SRR2057595.10002597_AATTA\t16\tchr19\t50733887\t255\t64M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:58C0C4\tNM:i:2\tUG:i:645\tBX:Z:AATTA\n+SRR2057595.10003084_GTCTA\t16\tchr19\t4078420\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A44\tNM:i:2\tUG:i:124\tBX:Z:GTCTA\n+SRR2057595.10003230_ACATT\t0\tchr19\t8799370\t255\t64M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:64\tNM:i:0\tUG:i:297\tBX:Z:ACATT\n+SRR2057595.10003317_CGGCC\t16\tchr19\t52641725\t255\t20M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:1C13A4\tNM:i:2\tUG:i:648\tBX:Z:CGGCC\n+SRR2057595.10003567_ATGTC\t0\tchr19\t37052811\t255\t64M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C52\tNM:i:1\tUG:i:542\tBX:Z:ATGTC\n+SRR2057595.10003576_AATCG\t16\tchr19\t4078303\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:38A21A6\tNM:i:2\tUG:i:88\tBX:Z:AATCG\n+SRR2057595.10003630_GTGTT\t16\tchr19\t4078298\t255\t52M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A8\tNM:i:2\tUG:i:45\tBX:Z:GTGTT\n+SRR2057595.10003869_TCTTG\t16\tchr19\t4078423\t255\t67M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:19A47\tNM:i:1\tUG:i:111\tBX:Z:TCTTG\n+SRR2057595.1000404_AACTT\t16\tchr19\t4078420\t255\t64M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A41\tNM:i:2\tUG:i:55\tBX:Z:AACTT\n+SRR2057595.10004161_TTCGC\t16\tchr19\t4078428\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:14A24T27\tNM:i:2\tUG:i:74\tBX:Z:TTCGC\n+SRR2057595.1000475_TCTGG\t0\tchr19\t13129234\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:22T5\tNM:i:1\tUG:i:383\tBX:Z:TCTGG\n+SRR2057595.10004845_GGATA\t0\tchr19\t24394180\t255\t29M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:11C15T1\tNM:i:2\tUG:i:481\tBX:Z:GGATA\n+SRR2057595.10005100_TTAAA\t16\tchr19\t4785463\t255\t21M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:21\tNM:i:0\tUG:i:196\tBX:Z:TTAAA\n+SRR2057595.10005761_CTAGA\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:43\tBX:Z:CTAGA\n+SRR2057595.10005808_CAGTA\t16\tchr19\t7545872\t255\t24M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:24\tNM:i:0\tUG:i:276\tBX:Z:CAGTA\n+SRR2057595.10005955_GACAT\t16\tchr19\t4078302\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:39A21A5\tNM:i:2\tUG:i:82\tBX:Z:GACAT\n+SRR2057595.10006011_TCTCA\t16\tchr19\t4078298\t255\t50M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A6\tNM:i:1\tUG:i:36\tBX:Z:TCTCA\n+SRR2057595.10006546_GCTAC\t16\tchr19\t4078446\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:97\tBX:Z:GCTAC\n+SRR2057595.1000663_ACGCA\t16\tchr19\t51217013\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:3A46A16\tNM:i:2\tUG:i:647\tBX:Z:ACGCA\n+SRR2057595.10006870_GTCCA\t16\tchr19\t4078431\t255\t67M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11A55\tNM:i:1\tUG:i:107\tBX:Z:GTCCA\n+SRR2057595.10006926_GGATA\t0\tchr19\t24394180\t255\t29M\t*\t0\t0\t*\t*\tXA:'..b'\tchr19\t55501486\t255\t31M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:8A22\tNM:i:1\tUG:i:659\tBX:Z:GATTG\n+SRR2057595.1013616_ATAGA\t0\tchr19\t38795805\t255\t62M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:46A15\tNM:i:1\tUG:i:553\tBX:Z:ATAGA\n+SRR2057595.10136267_TTCGC\t16\tchr19\t4078428\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:14A24T27\tNM:i:2\tUG:i:74\tBX:Z:TTCGC\n+SRR2057595.10136714_ATCCG\t16\tchr19\t37369602\t255\t35M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:35\tNM:i:0\tUG:i:544\tBX:Z:ATCCG\n+SRR2057595.10136765_GCTAT\t0\tchr19\t57701750\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:34T1T30\tNM:i:2\tUG:i:677\tBX:Z:GCTAT\n+SRR2057595.10137151_ATTCT\t0\tchr19\t3576314\t255\t23M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:16T5T0\tNM:i:2\tUG:i:5\tBX:Z:ATTCT\n+SRR2057595.10137262_ACAAA\t16\tchr19\t4078298\t255\t49M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A5\tNM:i:1\tUG:i:27\tBX:Z:ACAAA\n+SRR2057595.10137317_GATTG\t0\tchr19\t8799830\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:314\tBX:Z:GATTG\n+SRR2057595.10137603_GCTAT\t16\tchr19\t61240267\t255\t28M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:11C16\tNM:i:1\tUG:i:706\tBX:Z:GCTAT\n+SRR2057595.10137788_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.1013793_GCGCA\t0\tchr19\t8798523\t255\t30M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:30\tNM:i:0\tUG:i:295\tBX:Z:GCGCA\n+SRR2057595.10138029_GTTGT\t16\tchr19\t12564367\t255\t21M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:0A12A7\tNM:i:2\tUG:i:380\tBX:Z:GTTGT\n+SRR2057595.10138656_CTAGC\t16\tchr19\t4078298\t255\t62M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:43A18\tNM:i:1\tUG:i:63\tBX:Z:CTAGC\n+SRR2057595.10138697_GCCGT\t0\tchr19\t30861987\t255\t20M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:3C2T13\tNM:i:2\tUG:i:511\tBX:Z:GCCGT\n+SRR2057595.10138764_GATGG\t0\tchr19\t44417644\t255\t23M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:15T4C2\tNM:i:2\tUG:i:601\tBX:Z:GATGG\n+SRR2057595.10138897_TGATT\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:41\tBX:Z:TGATT\n+SRR2057595.10139239_GAGTG\t0\tchr19\t8798502\t255\t51M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:51\tNM:i:0\tUG:i:289\tBX:Z:GAGTG\n+SRR2057595.10139499_AGTCT\t0\tchr19\t13129558\t255\t37M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:21A15\tNM:i:1\tUG:i:398\tBX:Z:AGTCT\n+SRR2057595.10139596_GCTAC\t16\tchr19\t4078446\t255\t67M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:67\tNM:i:0\tUG:i:97\tBX:Z:GCTAC\n+SRR2057595.10139652_GGATG\t0\tchr19\t13129322\t255\t27M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:27\tNM:i:0\tUG:i:402\tBX:Z:GGATG\n+SRR2057595.10139674_ATGAA\t16\tchr19\t4078298\t255\t37M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:37\tNM:i:0\tUG:i:175\tBX:Z:ATAAA\n+SRR2057595.10139952_TAGTA\t16\tchr19\t61240268\t255\t27M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:10C16\tNM:i:1\tUG:i:702\tBX:Z:TAGTA\n+SRR2057595.10140058_ATATA\t0\tchr19\t13129330\t255\t22M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:0T21\tNM:i:1\tUG:i:422\tBX:Z:ATATA\n+SRR2057595.10141122_GTCCC\t0\tchr19\t12347657\t255\t32M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:32\tNM:i:0\tUG:i:372\tBX:Z:GTCCC\n+SRR2057595.10141122_GTCCC\t0\tchr19\t12348043\t255\t32M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:32\tNM:i:0\tUG:i:374\tBX:Z:GTCCC\n+SRR2057595.10141122_GTCCC\t16\tchr19\t15035071\t255\t32M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:29T2\tNM:i:1\tUG:i:428\tBX:Z:GTCCC\n+SRR2057595.10141295_TAGTA\t0\tchr19\t10274608\t255\t34M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:34\tNM:i:0\tUG:i:324\tBX:Z:TAGTA\n+SRR2057595.10142441_GTCTA\t16\tchr19\t4078420\t255\t67M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:2T19A44\tNM:i:2\tUG:i:124\tBX:Z:GTCTA\n+SRR2057595.10142482_GCCGA\t16\tchr19\t7389994\t255\t48M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:47C0\tNM:i:1\tUG:i:274\tBX:Z:GCCGA\n+SRR2057595.10143102_AAGTA\t16\tchr19\t31326011\t255\t57M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:23A33\tNM:i:1\tUG:i:512\tBX:Z:AAGTA\n+SRR2057595.10143387_GCATA\t16\tchr19\t4078298\t255\t57M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A13\tNM:i:2\tUG:i:53\tBX:Z:GCATA\n+SRR2057595.10143631_ATTTT\t0\tchr19\t45095257\t255\t57M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:57\tNM:i:0\tUG:i:606\tBX:Z:ATTTT\n+SRR2057595.10143687_CGCTT\t0\tchr19\t13129322\t255\t26M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:26\tNM:i:0\tUG:i:406\tBX:Z:CGCTT\n+SRR2057595.10143772_TGATT\t16\tchr19\t4078299\t255\t50M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:41G0A7\tNM:i:2\tUG:i:41\tBX:Z:TGATT\n+SRR2057595.10143909_CAGAC\t0\tchr19\t5493782\t255\t41M\t*\t0\t0\t*\t*\tXA:i:0\tMD:Z:41\tNM:i:0\tUG:i:218\tBX:Z:CAGAC\n+SRR2057595.10143972_CTTAC\t16\tchr19\t4078298\t255\t54M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:42G0A10\tNM:i:2\tUG:i:49\tBX:Z:CTTAC\n+SRR2057595.10144216_CATAA\t0\tchr19\t13129322\t255\t35M\t*\t0\t0\t*\t*\tXA:i:1\tMD:Z:33C1\tNM:i:1\tUG:i:401\tBX:Z:CATAA\n+SRR2057595.10144367_ATGAT\t16\tchr19\t61240306\t255\t36M\t*\t0\t0\t*\t*\tXA:i:2\tMD:Z:16A4T14\tNM:i:2\tUG:i:716\tBX:Z:ATGAT\n'
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in2.bam
b
Binary file test-data/group_in2.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in3.bam
b
Binary file test-data/group_in3.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in4.bam
b
Binary file test-data/group_in4.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in5.bam
b
Binary file test-data/group_in5.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_in6.bam
b
Binary file test-data/group_in6.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out1.bam
b
Binary file test-data/group_out1.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out2.bam
b
Binary file test-data/group_out2.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out3.bam
b
Binary file test-data/group_out3.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out3.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_out3.tab Tue Aug 29 17:37:07 2017 -0400
b
b'@@ -0,0 +1,498 @@\n+read_id\tcontig\tposition\tgene\tumi\tumi_count\tfinal_umi\tfinal_umi_count\tunique_id\n+chrM_81_583_3:0:0_2:0:0_f4/1_TT\tchrM\t80\tNA\tTT\t1\tTT\t1\t0\n+chrM_110_732_3:0:0_2:0:0_160/1_CC\tchrM\t109\tNA\tCC\t1\tCC\t1\t1\n+chrM_118_613_6:0:0_4:0:0_169/1_AG\tchrM\t117\tNA\tAG\t1\tAG\t1\t2\n+chrM_149_684_2:0:0_2:0:0_6e/1_TA\tchrM\t148\tNA\tTA\t1\tTA\t1\t3\n+chrM_152_616_6:0:0_2:0:0_84/1_GC\tchrM\t151\tNA\tGC\t1\tGC\t1\t4\n+chrM_247_748_8:0:0_1:0:0_1b7/1_GC\tchrM\t246\tNA\tGC\t1\tGC\t1\t5\n+chrM_280_772_1:0:0_9:0:0_31/1_CG\tchrM\t279\tNA\tCG\t1\tCG\t1\t6\n+chrM_292_819_2:0:0_1:0:0_18d/1_CA\tchrM\t291\tNA\tCA\t1\tCA\t1\t7\n+chrM_390_890_3:0:0_6:0:0_bf/1_CA\tchrM\t389\tNA\tCA\t1\tCA\t1\t8\n+chrM_447_921_2:0:0_3:0:0_1cc/1_AC\tchrM\t446\tNA\tAC\t1\tAC\t1\t9\n+chrM_469_983_2:0:0_1:0:0_121/1_AC\tchrM\t468\tNA\tAC\t1\tAC\t1\t10\n+chrM_541_1074_3:0:0_8:1:0_22/1_AC\tchrM\t540\tNA\tAC\t1\tAC\t1\t11\n+chrM_8_556_3:0:0_2:0:0_1b1/1_AC\tchrM\t556\tNA\tAC\t1\tAC\t1\t12\n+chrM_112_577_2:0:0_4:0:0_17b/1_CG\tchrM\t577\tNA\tCG\t1\tCG\t1\t13\n+chrM_627_1063_6:0:0_4:1:0_12a/1_AA\tchrM\t626\tNA\tAA\t1\tAA\t1\t14\n+chrM_164_650_5:0:0_3:0:0_164/1_AC\tchrM\t650\tNA\tAC\t1\tAC\t1\t15\n+chrM_200_695_6:0:0_2:0:0_5a/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_168_695_5:0:0_2:0:0_1af/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_170_695_1:0:0_6:0:0_1e3/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_209_705_3:0:0_2:0:0_1b3/1_TA\tchrM\t705\tNA\tTA\t1\tTA\t1\t17\n+chrM_288_807_3:0:0_5:0:0_8e/1_AG\tchrM\t807\tNA\tAG\t1\tAG\t1\t18\n+chrM_818_1274_1:1:0_4:0:0_124/1_TC\tchrM\t817\tNA\tTC\t1\tTC\t1\t19\n+chrM_442_872_4:0:0_6:0:0_146/1_AT\tchrM\t872\tNA\tAT\t1\tAT\t1\t20\n+chrM_460_919_3:0:0_1:0:0_0/1_GA\tchrM\t919\tNA\tGA\t1\tGA\t1\t21\n+chrM_384_950_5:0:0_2:0:0_34/1_GG\tchrM\t950\tNA\tGG\t1\tGG\t1\t22\n+chrM_476_980_5:0:0_2:0:0_133/1_TA\tchrM\t980\tNA\tTA\t1\tTA\t1\t23\n+chrM_552_999_4:0:0_5:0:0_bb/1_AT\tchrM\t999\tNA\tAT\t1\tAT\t1\t24\n+chrM_512_1030_5:0:0_7:1:0_a5/1_AC\tchrM\t1030\tNA\tAC\t1\tAC\t1\t25\n+chrM_1103_1634_5:0:0_3:0:0_36/1_TA\tchrM\t1102\tNA\tTA\t1\tTA\t1\t26\n+chrM_648_1148_3:0:0_3:1:0_1d1/1_TT\tchrM\t1148\tNA\tTT\t1\tTT\t1\t27\n+chrM_1167_1797_8:0:0_2:1:0_14e/1_GT\tchrM\t1166\tNA\tGT\t1\tGT\t1\t28\n+chrM_619_1169_3:0:0_5:1:0_33/1_CC\tchrM\t1169\tNA\tCC\t1\tCC\t1\t29\n+chrM_668_1248_0:0:0_4:0:0_fa/1_TT\tchrM\t1248\tNA\tTT\t1\tTT\t1\t30\n+chrM_1288_1759_4:0:0_2:1:0_a1/1_TA\tchrM\t1287\tNA\tTA\t1\tTA\t1\t31\n+chrM_1327_1786_4:0:0_3:1:0_1ad/1_CA\tchrM\t1326\tNA\tCA\t1\tCA\t1\t32\n+chrM_866_1351_5:1:0_6:0:0_1c9/1_TA\tchrM\t1351\tNA\tTA\t1\tTA\t1\t33\n+chrM_853_1366_6:1:0_4:0:0_13b/1_TC\tchrM\t1366\tNA\tTC\t1\tTC\t1\t34\n+chrM_1399_1851_2:0:0_11:1:0_85/1_AT\tchrM\t1398\tNA\tAT\t1\tAT\t1\t35\n+chrM_946_1444_4:1:0_6:0:0_113/1_TT\tchrM\t1444\tNA\tTT\t1\tTT\t1\t36\n+chrM_943_1485_2:1:0_5:0:0_1e2/1_AT\tchrM\t1485\tNA\tAT\t1\tAT\t1\t37\n+chrM_1022_1501_4:0:0_5:0:0_132/1_GA\tchrM\t1501\tNA\tGA\t1\tGA\t1\t38\n+chrM_1504_2013_4:1:0_3:0:0_10b/1_AT\tchrM\t1503\tNA\tAT\t1\tAT\t1\t39\n+chrM_1505_1934_1:1:0_5:0:0_157/1_TC\tchrM\t1504\tNA\tTC\t1\tTC\t1\t40\n+chrM_997_1511_2:1:0_1:0:0_1d8/1_AT\tchrM\t1511\tNA\tAT\t1\tAT\t1\t41\n+chrM_1521_2070_5:1:0_3:0:0_1a/1_TA\tchrM\t1520\tNA\tTA\t1\tTA\t1\t42\n+chrM_1575_2126_5:1:0_2:1:0_18c/1_TA\tchrM\t1574\tNA\tTA\t1\tTA\t1\t43\n+chrM_1063_1598_5:0:0_5:0:0_f5/1_TG\tchrM\t1598\tNA\tTG\t1\tTG\t1\t44\n+chrM_1605_2128_3:1:0_4:1:0_1ea/1_TT\tchrM\t1604\tNA\tTT\t1\tTT\t1\t45\n+chrM_1065_1609_3:0:0_5:0:0_18e/1_TA\tchrM\t1609\tNA\tTA\t1\tTA\t1\t46\n+chrM_1140_1619_1:0:0_4:0:0_74/1_TT\tchrM\t1619\tNA\tTT\t1\tTT\t1\t47\n+chrM_1111_1626_4:0:0_2:0:0_186/1_AC\tchrM\t1626\tNA\tAC\t1\tAC\t1\t48\n+chrM_1664_2135_0:1:0_3:1:0_179/1_TT\tchrM\t1663\tNA\tTT\t1\tTT\t1\t49\n+chrM_1699_2147_1:0:0_7:0:0_10c/1_AA\tchrM\t1698\tNA\tAA\t1\tAA\t1\t50\n+chrM_1706_2240_3:0:0_6:0:0_99/1_GT\tchrM\t1705\tNA\tGT\t1\tGT\t1\t51\n+chrM_1756_2309_0:0:0_3:0:0_c3/1_AT\tchrM\t1755\tNA\tAT\t1\tAT\t1\t52\n+chrM_1344_1758_4:0:0_3:1:0_75/1_TT\tchrM\t1758\tNA\tTT\t1\tTT\t1\t53\n+chrM_1223_1777_5:0:0_6:1:0_42/1_TG\tchrM\t1777\tNA\tTG\t1\tTG\t1\t54\n+chrM_1790_2351_4:0:0_4:0:0_f3/1_CC\tchrM\t1789\tNA\tCC\t1\tCC\t1\t55\n+chrM_1308_1807_4:0:0_4:1:0_45/1_TA\tchrM\t1807\tNA\tTA\t1\tTA\t1\t56\n+chrM_1814_2315_6:0:0_3:0:0_12d/1_AA\tchrM\t1813\tNA\tAA\t1\tAA\t1\t57\n+chrM_1862_2304_4:0:0_1:0:0_19d/1_AA\tchrM\t1861\tNA\tAA\t1\tAA\t1\t58\n+chrM_1363_1869_5:0:0_3:1:0_aa/1_TA\tchrM\t1869\tNA\tTA\t1\tTA\t1\t59\n+chrM_1363_1887_2:0:0_3:0:0_d3/1_CA\tchrM\t1887\tNA\tCA\t1\tCA\t1\t60\n+chrM_1428_1904_7:0:0_1:0:0_1b0/1_AC\tchrM\t1904\tNA\tAC\t1\tAC\t1\t6'..b'\tTA\t1\t431\n+chrM_14135_14581_5:0:0_5:0:0_1de/1_AT\tchrM\t14581\tNA\tAT\t1\tAT\t1\t432\n+chrM_14612_15169_2:0:0_5:0:0_e4/1_TT\tchrM\t14611\tNA\tTT\t1\tTT\t1\t433\n+chrM_14668_15109_2:0:0_3:0:0_3d/1_TG\tchrM\t14667\tNA\tTG\t1\tTG\t1\t434\n+chrM_14696_15126_2:0:0_2:0:0_a7/1_AA\tchrM\t14695\tNA\tAA\t1\tAA\t1\t435\n+chrM_14727_15156_4:0:0_4:0:0_178/1_GA\tchrM\t14726\tNA\tGA\t1\tGA\t1\t436\n+chrM_14245_14783_3:0:0_1:0:0_9d/1_GG\tchrM\t14783\tNA\tGG\t1\tGG\t1\t437\n+chrM_14314_14802_5:0:0_3:0:0_191/1_GG\tchrM\t14802\tNA\tGG\t1\tGG\t1\t438\n+chrM_14817_15261_5:0:0_4:0:0_65/1_AT\tchrM\t14816\tNA\tAT\t1\tAT\t1\t439\n+chrM_14863_15320_2:0:0_4:0:0_d8/1_AA\tchrM\t14862\tNA\tAA\t1\tAA\t1\t440\n+chrM_14869_15387_2:0:0_7:0:0_1f/1_AC\tchrM\t14868\tNA\tAC\t1\tAC\t1\t441\n+chrM_14888_15340_3:0:0_3:0:0_112/1_GG\tchrM\t14887\tNA\tGG\t1\tGG\t1\t442\n+chrM_14368_14894_6:0:0_5:0:0_40/1_TC\tchrM\t14894\tNA\tTC\t1\tTC\t1\t443\n+chrM_14410_14941_6:0:0_3:0:0_e5/1_TA\tchrM\t14941\tNA\tTA\t1\tTA\t1\t444\n+chrM_14962_15543_5:0:0_8:0:0_46/1_CA\tchrM\t14961\tNA\tCA\t1\tCA\t1\t445\n+chrM_14456_14987_3:0:0_5:0:0_ab/1_GT\tchrM\t14987\tNA\tGT\t1\tGT\t1\t446\n+chrM_15070_15569_5:0:0_5:0:0_cf/1_CA\tchrM\t15069\tNA\tCA\t1\tCA\t1\t447\n+chrM_15140_15686_2:0:0_5:0:0_185/1_CC\tchrM\t15139\tNA\tCC\t1\tCC\t1\t448\n+chrM_15142_15661_7:0:0_3:0:0_11e/1_CT\tchrM\t15141\tNA\tCT\t1\tCT\t1\t449\n+chrM_15192_15694_5:0:0_2:0:0_f7/1_CA\tchrM\t15191\tNA\tCA\t1\tCA\t1\t450\n+chrM_15211_15685_5:0:0_2:0:0_1d7/1_AC\tchrM\t15210\tNA\tAC\t1\tAC\t1\t451\n+chrM_15225_15786_3:0:0_6:0:0_17e/1_TT\tchrM\t15224\tNA\tTT\t1\tTT\t1\t452\n+chrM_15258_15810_4:0:0_6:0:0_5f/1_TT\tchrM\t15257\tNA\tTT\t1\tTT\t1\t453\n+chrM_14817_15317_5:0:0_2:0:0_59/1_GC\tchrM\t15317\tNA\tGC\t1\tGC\t1\t454\n+chrM_15324_15836_4:0:0_3:0:0_94/1_AC\tchrM\t15323\tNA\tAC\t1\tAC\t1\t455\n+chrM_15365_15880_4:1:0_3:0:0_80/1_CA\tchrM\t15364\tNA\tCA\t1\tCA\t1\t456\n+chrM_15408_15863_2:1:0_3:0:0_1e6/1_AG\tchrM\t15407\tNA\tAG\t1\tAG\t1\t457\n+chrM_15439_15924_0:0:0_4:0:0_172/1_TG\tchrM\t15438\tNA\tTG\t1\tTG\t1\t458\n+chrM_14931_15457_2:0:0_4:1:0_1a1/1_AA\tchrM\t15457\tNA\tAA\t1\tAA\t1\t459\n+chrM_15547_16054_5:0:0_1:0:0_af/1_GA\tchrM\t15546\tNA\tGA\t1\tGA\t1\t460\n+chrM_15134_15560_3:0:0_4:0:0_1c4/1_GG\tchrM\t15560\tNA\tGG\t1\tGG\t1\t461\n+chrM_15575_16118_5:0:0_4:0:0_1aa/1_GC\tchrM\t15574\tNA\tGC\t1\tGC\t1\t462\n+chrM_15052_15629_7:0:0_9:0:0_15f/1_GA\tchrM\t15629\tNA\tGA\t1\tGA\t1\t463\n+chrM_15698_16224_5:0:0_6:0:0_138/1_AC\tchrM\t15697\tNA\tAC\t1\tAC\t1\t464\n+chrM_15247_15721_2:1:0_6:0:0_17/1_TC\tchrM\t15721\tNA\tTC\t1\tTC\t1\t465\n+chrM_15218_15763_2:0:0_2:0:0_171/1_AG\tchrM\t15763\tNA\tAG\t1\tAG\t1\t466\n+chrM_15860_16419_5:0:0_8:0:0_53/1_TA\tchrM\t15859\tNA\tTA\t1\tTA\t1\t467\n+chrM_15864_16329_3:0:0_3:0:0_18b/1_CG\tchrM\t15863\tNA\tCG\t1\tCG\t1\t468\n+chrM_15507_15962_3:0:0_0:0:0_5b/1_GT\tchrM\t15962\tNA\tGT\t1\tGT\t1\t469\n+chrM_15430_15985_5:0:0_3:0:0_2c/1_AT\tchrM\t15985\tNA\tAT\t1\tAT\t1\t470\n+chrM_15706_16238_6:0:0_7:0:0_7b/1_AG\tchrM\t16238\tNA\tAG\t1\tAG\t1\t471\n+chrM_16252_16701_2:1:0_5:0:0_147/1_TA\tchrM\t16251\tNA\tTA\t1\tTA\t1\t472\n+chrM_15753_16280_5:0:0_6:0:0_180/1_TG\tchrM\t16280\tNA\tTG\t1\tTG\t1\t473\n+chrM_15777_16347_2:0:0_6:0:0_16/1_AT\tchrM\t16347\tNA\tAT\t1\tAT\t1\t474\n+chrM_16366_16911_6:1:0_6:1:1_168/1_CC\tchrM\t16365\tNA\tCC\t1\tCC\t1\t475\n+chrM_16370_16833_6:1:0_6:0:1_1bb/1_AC\tchrM\t16369\tNA\tAC\t1\tAC\t1\t476\n+chrM_16402_16876_2:0:0_1:0:1_98/1_CA\tchrM\t16401\tNA\tCA\t1\tCA\t1\t477\n+chrM_16426_16953_3:0:0_2:2:0_60/1_AA\tchrM\t16425\tNA\tAA\t1\tAA\t1\t478\n+chrM_15986_16442_3:0:0_7:0:0_15d/1_TG\tchrM\t16442\tNA\tTG\t1\tTG\t1\t479\n+chrM_16030_16460_6:0:0_4:1:0_20/1_AT\tchrM\t16460\tNA\tAT\t1\tAT\t1\t480\n+chrM_15909_16463_3:0:0_7:1:0_106/1_GT\tchrM\t16463\tNA\tGT\t1\tGT\t1\t481\n+chrM_15984_16500_4:0:0_1:0:0_6b/1_GT\tchrM\t16500\tNA\tGT\t1\tGT\t1\t482\n+chrM_16070_16513_3:0:0_7:1:0_1e7/1_TA\tchrM\t16513\tNA\tTA\t1\tTA\t1\t483\n+chrM_16064_16572_4:0:0_6:1:0_f1/1_TG\tchrM\t16572\tNA\tTG\t1\tTG\t1\t484\n+chrM_16245_16740_4:1:0_2:0:1_127/1_TA\tchrM\t16742\tNA\tTA\t1\tTA\t1\t485\n+chrM_16315_16834_5:0:0_3:0:1_18/1_TT\tchrM\t16834\tNA\tTT\t1\tTT\t1\t486\n+chrM_16229_16843_2:1:0_2:0:1_c4/1_GC\tchrM\t16843\tNA\tGC\t1\tGC\t1\t487\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16419_16923_3:0:0_3:0:1_183/1_CG\tchrM\t16923\tNA\tCG\t1\tCG\t1\t489\n+chrM_16585_16993_5:0:1_5:1:0_1b9/1_GA\tchrM\t16993\tNA\tGA\t1\tGA\t1\t490\n'
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out4.bam
b
Binary file test-data/group_out4.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out5.bam
b
Binary file test-data/group_out5.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/group_out6.bam
b
Binary file test-data/group_out6.bam has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/out_R1.fastq.gz
b
Binary file test-data/out_R1.fastq.gz has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/out_R2.fastq.gz
b
Binary file test-data/out_R2.fastq.gz has changed
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/out_paired.log
--- a/test-data/out_paired.log Thu Aug 10 06:37:09 2017 -0400
+++ b/test-data/out_paired.log Tue Aug 29 17:37:07 2017 -0400
b
@@ -1,116 +1,36 @@
-# output generated by extract --bc-pattern=NNNXXX --stdin=input_read1.gz --read2-in=input_read2.gz --stdout out1.gz --read2-out=out2.gz --log=/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_863.dat
-# job started at Tue Aug  8 15:14:12 2017 on packer-test -- cba9bf6d-f8cd-49c4-8184-3f4a090cbc3e
-# pid: 32709, system: Linux 4.4.0-83-generic #106-Ubuntu SMP Mon Jun 26 17:54:43 UTC 2017 x86_64
+# output generated by extract --bc-pattern=NNNXXX --stdin=input_read1.gz --read2-in=input_read2.gz --stdout out1.gz --read2-out=out2.gz --log=/tmp/tmpREDhtd/files/003/dataset_3773.dat
+# job started at Tue Aug 29 14:20:44 2017 on gaius -- 5913ff4c-b1d1-4f65-abec-66ca6684bdbd
+# pid: 21437, system: Linux 4.10.0-33-generic #37-Ubuntu SMP Fri Aug 11 10:55:28 UTC 2017 x86_64
+# blacklist                               : None
+# compresslevel                           : 6
+# error_correct_cell                      : False
+# extract_method                          : string
+# filter_cell_barcode                     : None
+# filter_cell_barcodes                    : False
 # log2stderr                              : False
 # loglevel                                : 1
 # pattern                                 : NNNXXX
 # pattern2                                : None
-# prime3                                  : False
+# prime3                                  : None
 # quality_encoding                        : None
+# quality_filter_mask                     : None
 # quality_filter_threshold                : None
 # random_seed                             : None
 # read2_in                                : input_read2.gz
 # read2_out                               : out2.gz
+# read2_stdout                            : False
+# reads_subset                            : None
+# reconcile                               : False
 # short_help                              : None
-# split                                   : False
-# stats                                   : True
 # stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
 # stdin                                   : <_io.TextIOWrapper name='input_read1.gz' encoding='ascii'>
-# stdlog                                  : <_io.TextIOWrapper name='/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_863.dat' mode='a' encoding='UTF-8'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpREDhtd/files/003/dataset_3773.dat' mode='a' encoding='UTF-8'>
 # stdout                                  : <_io.TextIOWrapper name='out1.gz' encoding='ascii'>
 # timeit_file                             : None
 # timeit_header                           : None
 # timeit_name                             : all
-Barcode UMI Sample Count
-AAAAGT AAA AGT 1
-TTTTTT TTT TTT 1
-TTGGTT TTG GTT 1
-AAGTTT AAG TTT 1
-ATATAA ATA TAA 1
-GTTTTT GTT TTT 4
-GTTTTA GTT TTA 1
-TATAGA TAT AGA 1
-AAGTAT AAG TAT 1
-TTATTT TTA TTT 1
-AGATGA AGA TGA 1
-TTTTAG TTT TAG 2
-GTGTTT GTG TTT 1
-GTATGT GTA TGT 1
-AGTGTT AGT GTT 1
-AGAAAT AGA AAT 1
-TATAAA TAT AAA 1
-TGTATC TGT ATC 1
-TAAAAT TAA AAT 1
-ATTATT ATT ATT 1
-ATTTAG ATT TAG 1
-TATATT TAT ATT 1
-ATTTGG ATT TGG 1
-AAAATA AAA ATA 1
-AAGTTA AAG TTA 1
-GTTGTA GTT GTA 1
-TTTGAA TTT GAA 1
-ATTTGA ATT TGA 1
-TTTATA TTT ATA 2
-ATACGT ATA CGT 1
-GGTTAG GGT TAG 1
-GGTGTT GGT GTT 1
-TGGTTG TGG TTG 1
-AGTTAT AGT TAT 1
-GAGTAG GAG TAG 1
-TGTGTT TGT GTT 1
-TGAATT TGA ATT 1
-TTTAGG TTT AGG 2
-TAGATG TAG ATG 1
-TTGGAT TTG GAT 1
-TTGGAC TTG GAC 1
-TGTATG TGT ATG 1
-AGAGTG AGA GTG 1
-AGAATA AGA ATA 1
-TTGTTT TTG TTT 1
-AATATA AAT ATA 1
-TTATTA TTA TTA 1
-TTAAAG TTA AAG 2
-AGGAGT AGG AGT 1
-ATAAGT ATA AGT 1
-TTGTGG TTG TGG 1
-GTATTA GTA TTA 1
-AGTGGA AGT GGA 1
-TTTGGG TTT GGG 1
-TAAGAG TAA GAG 1
-TTGGGA TTG GGA 1
-TATAGG TAT AGG 1
-TTTTTG TTT TTG 1
-AAGAGG AAG AGG 1
-GAGAGA GAG AGA 1
-TTGGTG TTG GTG 1
-TAGAGG TAG AGG 1
-ATTTTG ATT TTG 1
-TGGATG TGG ATG 1
-ATTTTT ATT TTT 1
-TTATAT TTA TAT 1
-TGTTTG TGT TTG 1
-GTTGGT GTT GGT 1
-TCTTTA TCT TTA 1
-ATTGTT ATT GTT 2
-ATAAGA ATA AGA 1
-TATCGA TAT CGA 1
-TTTGTT TTT GTT 1
-GTGTAT GTG TAT 1
-AGTGAA AGT GAA 1
-GTTGTG GTT GTG 1
-TAGTGA TAG TGA 1
-TATTGA TAT TGA 1
-TTAATG TTA ATG 1
-TTTAAA TTT AAA 2
-AGAAAG AGA AAG 1
-TTTTGT TTT TGT 1
-TTATAG TTA TAG 1
-AGGTGT AGG TGT 1
-TTAGTT TTA GTT 1
-TTTAAT TTT AAT 1
-AGGATG AGG ATG 1
-ATATAT ATA TAT 1
-AGAGTT AGA GTT 1
-GAGAAA GAG AAA 1
-ATATGA ATA TGA 1
-# job finished in 0 seconds at Tue Aug  8 15:14:12 2017 --  0.41  0.02  0.00  0.00 -- cba9bf6d-f8cd-49c4-8184-3f4a090cbc3e
+# whitelist                               : None
+2017-08-29 14:20:44,451 INFO Starting barcode extraction
+2017-08-29 14:20:44,461 INFO Input Reads: 100
+2017-08-29 14:20:44,461 INFO Reads output: 100
+# job finished in 0 seconds at Tue Aug 29 14:20:44 2017 --  0.73  0.05  0.00  0.00 -- 5913ff4c-b1d1-4f65-abec-66ca6684bdbd
b
diff -r 418b961e0576 -r 79436b3019e9 test-data/out_single.log
--- a/test-data/out_single.log Thu Aug 10 06:37:09 2017 -0400
+++ b/test-data/out_single.log Tue Aug 29 17:37:07 2017 -0400
b
@@ -1,92 +1,37 @@
-# output generated by extract --bc-pattern=XXXNNN --stdin=/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_867.dat --stdout /home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_886.dat --3prime --quality-filter-threshold 10 --quality-encoding phred33 --log=/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_887.dat
-# job started at Wed Aug  9 09:51:05 2017 on packer-test -- e5896848-87b1-4bf4-a96c-b7a4a83a0b8b
-# pid: 7652, system: Linux 4.4.0-83-generic #106-Ubuntu SMP Mon Jun 26 17:54:43 UTC 2017 x86_64
+# output generated by extract --bc-pattern=XXXNNN --stdin=/tmp/tmpREDhtd/files/003/dataset_3766.dat --stdout /tmp/tmpREDhtd/files/003/dataset_3767.dat --3prime --quality-filter-threshold 10 --quality-encoding phred33 --log=/tmp/tmpREDhtd/files/003/dataset_3768.dat
+# job started at Tue Aug 29 14:20:26 2017 on gaius -- 1cfd46e7-5d74-4c25-8cdd-89cf8f41a393
+# pid: 21338, system: Linux 4.10.0-33-generic #37-Ubuntu SMP Fri Aug 11 10:55:28 UTC 2017 x86_64
+# blacklist                               : None
+# compresslevel                           : 6
+# error_correct_cell                      : False
+# extract_method                          : string
+# filter_cell_barcode                     : None
+# filter_cell_barcodes                    : False
 # log2stderr                              : False
 # loglevel                                : 1
 # pattern                                 : XXXNNN
 # pattern2                                : None
 # prime3                                  : True
 # quality_encoding                        : phred33
+# quality_filter_mask                     : None
 # quality_filter_threshold                : 10
 # random_seed                             : None
 # read2_in                                : None
-# read2_out                               : None
+# read2_out                               : False
+# read2_stdout                            : False
+# reads_subset                            : None
+# reconcile                               : False
 # short_help                              : None
-# split                                   : False
-# stats                                   : True
 # stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
-# stdin                                   : <_io.TextIOWrapper name='/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_867.dat' mode='r' encoding='UTF-8'>
-# stdlog                                  : <_io.TextIOWrapper name='/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_887.dat' mode='a' encoding='UTF-8'>
-# stdout                                  : <_io.TextIOWrapper name='/home/ubuntu/mount/git/temp/galaxy/database/files/000/dataset_886.dat' mode='w' encoding='UTF-8'>
+# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpREDhtd/files/003/dataset_3766.dat' mode='r' encoding='UTF-8'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpREDhtd/files/003/dataset_3768.dat' mode='a' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='/tmp/tmpREDhtd/files/003/dataset_3767.dat' mode='w' encoding='UTF-8'>
 # timeit_file                             : None
 # timeit_header                           : None
 # timeit_name                             : all
-Barcode UMI Sample Count
-AAGTTA TTA AAG 1
-GGATTT TTT GGA 1
-GGATAG TAG GGA 1
-TTTTTT TTT TTT 4
-GAGAGG AGG GAG 1
-TTAGTT GTT TTA 1
-TGTAGA AGA TGT 1
-AGAGAT GAT AGA 1
-GAATAA TAA GAA 1
-TTGAGT AGT TTG 1
-TTTATT ATT TTT 1
-GTAAGG AGG GTA 1
-GAAGAG GAG GAA 1
-TGTTTG TTG TGT 1
-ATAATG ATG ATA 1
-TTAGAT GAT TTA 1
-GTCTGA TGA GTC 1
-ATCATC ATC ATC 1
-GTCACC ACC GTC 1
-AACTCC TCC AAC 1
-TTATTA TTA TTA 1
-TGTTGT TGT TGT 2
-GTTTAT TAT GTT 1
-ATGAGT AGT ATG 1
-ATAATT ATT ATA 1
-TTGGTT GTT TTG 1
-GTTTTG TTG GTT 1
-CACGTC GTC CAC 1
-TTGTGT TGT TTG 1
-TGAATT ATT TGA 1
-GGATGT TGT GGA 1
-AGTCAC CAC AGT 1
-GTTTTT TTT GTT 1
-TATAGT AGT TAT 1
-TTTTAT TAT TTT 1
-TAAAAA AAA TAA 1
-ATAGGT GGT ATA 1
-ATTTTA TTA ATT 1
-TGTATA ATA TGT 1
-TGTTTT TTT TGT 2
-AGATTT TTT AGA 1
-TAGAAT AAT TAG 1
-GAAGGA GGA GAA 1
-GTTTGA TGA GTT 1
-TTTTGT TGT TTT 1
-ATTTTT TTT ATT 1
-TGTTAT TAT TGT 1
-CAGGAT GAT CAG 1
-GTATTT TTT GTA 1
-TGGGTA GTA TGG 1
-AAAGTT GTT AAA 1
-GGTGTG GTG GGT 1
-AAGATC ATC AAG 1
-TAGGTT GTT TAG 1
-GAGTTA TTA GAG 1
-TTATTT TTT TTA 1
-GTTGTT GTT GTT 1
-AGGTTT TTT AGG 1
-TAAAGT AGT TAA 1
-AGTTTT TTT AGT 1
-TTTGAT GAT TTT 1
-ATAAAT AAT ATA 1
-GTTATT ATT GTT 1
-TTTTTA TTA TTT 1
-AAGTGT TGT AAG 1
-ATATAG TAG ATA 1
-TATAGA AGA TAT 1
-# job finished in 0 seconds at Wed Aug  9 09:51:05 2017 --  0.36  0.02  0.00  0.00 -- e5896848-87b1-4bf4-a96c-b7a4a83a0b8b
+# whitelist                               : None
+2017-08-29 14:20:26,493 INFO Starting barcode extraction
+2017-08-29 14:20:26,495 INFO Input Reads: 100
+2017-08-29 14:20:26,495 INFO Reads output: 72
+2017-08-29 14:20:26,495 INFO filtered: umi quality: 28
+# job finished in 0 seconds at Tue Aug 29 14:20:26 2017 --  0.75  0.02  0.00  0.00 -- 1cfd46e7-5d74-4c25-8cdd-89cf8f41a393
b
diff -r 418b961e0576 -r 79436b3019e9 umi-tools_extract.xml
--- a/umi-tools_extract.xml Thu Aug 10 06:37:09 2017 -0400
+++ b/umi-tools_extract.xml Tue Aug 29 17:37:07 2017 -0400
[
@@ -1,8 +1,9 @@
-<tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0">
+<tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.0">
     <description>Extract UMI from fastq files</description>
-    <requirements>
-        <requirement type="package" version="0.4.4">umi_tools</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
         #set $gz = False
         #if $input_type.type == 'single':
@@ -150,16 +151,16 @@
             <param name="quality_filter_threshold" value="10" />
             <param name="quality_encoding" value="phred33" />
             <output name="out" file="out_SE.fastq" />
-            <output name="out_log" file="out_single.log" lines_diff="15"/>
+            <output name="out_log" file="out_single.log" lines_diff="22"/>
         </test>
         <test>
             <param name="type" value="paired" />
             <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
             <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
             <param name="bc_pattern" value="NNNXXX" />
-            <output name="out1" file="out_R1.fastq.gz" decompress="true" />
-            <output name="out2" file="out_R2.fastq.gz" decompress="true" />
-            <output name="out_log" file="out_paired.log" lines_diff="10"/>
+            <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
+            <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
+            <output name="out_log" file="out_paired.log" lines_diff="16"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -241,15 +242,5 @@
 and end two to FASTQOUT.
 
     ]]></help>
-    <citations>
-        <citation type="doi">10.1101/gr.209601.116</citation>
-        <citation type="bibtex">
-            @misc{githubUMI-tools,
-            title = {UMI-tools},
-            publisher = {GitHub},
-            journal = {GitHub repository},
-            url = {https://github.com/CGATOxford/UMI-tools},
-            }
-        </citation>
-    </citations>
+    <expand macro="citations" />
 </tool>