Repository 'umi_tools_extract'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/umi_tools_extract

Changeset 15:27ac32a22ad2 (2021-09-13)
Previous changeset 14:9fa7803d1c51 (2021-06-02) Next changeset 16:7accf7407811 (2021-10-23)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
modified:
macros.xml
test-data/dedup_out1.bam
test-data/dedup_out2.bam
test-data/dedup_out3.bam
test-data/dedup_out4.bam
test-data/dedup_out5.bam
test-data/dedup_out6.bam
test-data/group_out4.bam
test-data/group_out4.tab
test-data/out_wl_paired.log
umi-tools_extract.xml
added:
test-data/chr19_gene_tags.sam
test-data/group_in2.sam
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 macros.xml
--- a/macros.xml Wed Jun 02 18:27:33 2021 +0000
+++ b/macros.xml Mon Sep 13 14:52:06 2021 +0000
[
b'@@ -1,5 +1,43 @@\n <?xml version="1.0"?>\n <macros>\n+\n+    <!-- macros applying to all umi_tools -->\n+\n+    <token name="@TOOL_VERSION@">1.1.2</token>\n+    <token name="@VERSION_SUFFIX@">0</token>\n+    <token name="@PROFILE@">21.01</token>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@TOOL_VERSION@">umi_tools</requirement>\n+            <yield />\n+        </requirements>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1101/gr.209601.116</citation>\n+            <citation type="bibtex">\n+                @misc{githubUMI-tools,\n+                title = {UMI-tools},\n+                publisher = {GitHub},\n+                journal = {GitHub repository},\n+                url = {https://github.com/CGATOxford/UMI-tools},\n+                }\n+            </citation>\n+        </citations>\n+    </xml>\n+    <xml name="advanced_options_macro">\n+        <section name="advanced" title="Extra parameters" expanded="false">\n+            <param argument="--random-seed" type="integer" min="0" optional="true" label="Random Seed" />\n+        </section>\n+    </xml>\n+    <token name="@ADVANCED_OPTIONS@"><![CDATA[\n+        #if str($advanced.random_seed) != \'\'\n+            --random-seed=\'$advanced.random_seed\'\n+        #end if\n+    ]]></token>\n+    \n+    <!-- macros for extract and whitelist-->\n+    \n     <macro name="barcode_sanitizer" >\n         <sanitizer invalid_char="">\n             <valid initial="string.letters,string.digits">\n@@ -23,90 +61,510 @@\n             </valid>\n         </sanitizer>\n     </macro>\n-    <macro name="barcode2_conditional" >\n-        <conditional name="barcode">\n-            <param name="barcode_select" argument="--split-barcode" type="select" label="Barcode on both reads?">\n-                <option value="first_read_only">Barcode on first read only</option>\n-                <option value="both_reads">Barcode on both reads</option>\n-            </param>\n-            <when value="first_read_only"/>\n-            <when value="both_reads">\n-                <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"\n-                       help="Use this option to specify the format of the UMI/barcode for\n-                             the second read pair if required." >\n-                    <expand macro="barcode_sanitizer" />\n-                </param>\n-            </when>\n-        </conditional>\n+    <xml name="sanitize_tag" >\n+        <sanitizer invalid_char="">\n+            <valid initial="string.letters,string.digits" />\n+        </sanitizer>\n+    </xml>\n+    <macro name="barcode1_macro" >\n+        <param argument="--bc-pattern" type="text" label="Barcode pattern for first read"\n+            help="Use this option to specify the format of the UMI/barcode. Use Ns to\n+                    represent the random positions and Xs to indicate the bc positions.\n+                    Bases with Ns will be extracted and added to the read name. Remaining\n+                    bases, marked with an X will be reattached to the read">\n+            <validator type="empty_field" /> \n+            <expand macro="barcode_sanitizer" />\n+        </param>\n     </macro>\n+    <macro name="barcode2_macro" >\n+        <param argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"\n+            help="Use this option to specify the format of the UMI/barcode for\n+                the second read pair if required" >\n+            <expand macro="barcode_sanitizer" />\n+        </param>\n+    </macro>\n+    <!-- not just fastq because this would allow also fastqcsanger -->\n+    <token name="@FASTQ_FORMATS@">fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz,fastqsolexa,fastqsolexa.gz</token>\n+    <xml name="bio_tools">\n+        <xrefs>\n+            <xref type="bio.tools">umi-tools</xref>\n+        </xrefs>\n+    </xml>\n     <xml name="input_types">\n-        <conditional name="in'..b'tag specified so you do not need to supply the mapping file">\n+                <expand macro="sanitize_tag" />\n+            </param>\n+            <param argument="--assigned-status-tag" type="text" optional="true" label="Bam tag describing whether read is assigned to a gene" help="By default, this is set as the same tag as --gene-tag">\n+                <expand macro="sanitize_tag" />\n+            </param>\n+            <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >\n+                <expand macro="barcode_sanitizer" />\n+            </param>\n+            <param argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />\n+            <param argument="--gene-transcript-map" type="data" format="tabular" optional="true" label="Tabular file mapping genes to transripts" />\n+            <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" label="Group reads only if they have the same cell barcode" />\n+        </section>\n+    </xml>\n+    <token name="@SC_OPTIONS@"><![CDATA[\n+            #if str($sc.gene_tag) != "":\n+                --gene-tag \'$sc.gene_tag\'\n+            #end if\n+            #if str($sc.assigned_status_tag) != "":\n+                --assigned-status-tag \'$sc.assigned_status_tag\'\n+            #end if\n+            #if str($sc.skip_tags_regex) != "":\n+                --skip-tags-regex \'$sc.skip_tags_regex\'\n+            #end if\n+            $sc.per_contig\n+            #if $sc.gene_transcript_map:\n+                --gene-transcript-map \'$sc.gene_transcript_map\'\n+            #end if\n+            $sc.per_cell\n+    ]]></token>\n+\n+    <xml name="groupdedup_options_macro">\n+        <section name="gd" title="group/dedup specific options">\n+            <param argument="--buffer-whole-contig" type="boolean" truevalue="--buffer-whole-contig" falsevalue="" label="Read whole contig before outputting bundles" help="Guarantees that no reads are missed, but increases memory usage" />\n+            <!-- TODO this option is hidden on the CLI. Should we expose it? -->\n+            <param argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />\n+            <param argument="--multimapping-detection-method" type="select" optional="true" label="BAM Tag indicating multimapping " help="Some aligners identify multimapping using bam tags. Setting this option to NH, X0 or XT will use these tags when selecting the best read amongst reads with the same position and umi">\n+                <option value="NH">NH</option>\n+                <option value="X0">X0</option>\n+                <option value="XT">XT</option>\n+            </param>\n+        </section>\n+    </xml>\n+    <token name="@GROUPDEDUP_OPTIONS@"><![CDATA[\n+        $gd.buffer_whole_contig\n+        $gd.whole_contig\n+        $gd.multimapping_detection_method\n+    ]]></token>\n+    \n+    <xml name="log_input_macro">\n+        <param argument="--log" type="boolean" label="Output log?" truevalue="--log" falsevalue="" help="Choose if you want to generate a text file containing logging information" />\n+    </xml>\n+    <xml name="log_output_macro">\n+        <data name="out_log" format="txt" label="${tool.name} on ${on_string}: logfile" >\n+            <filter>log</filter>\n+        </data>\n+    </xml>\n+    <token name="@LOG@"><![CDATA[\n+        #if $log:\n+            --log=\'$out_log\'\n+        #end if\n+        --log2stderr\n+    ]]></token>\n+\n </macros>\n'
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/chr19_gene_tags.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr19_gene_tags.sam Mon Sep 13 14:52:06 2021 +0000
b
b'@@ -0,0 +1,1492 @@\n+@HD\tVN:1.4\tSO:queryname\n+@SQ\tSN:chr1\tLN:248956422\n+@SQ\tSN:chr2\tLN:242193529\n+@SQ\tSN:chr3\tLN:198295559\n+@SQ\tSN:chr4\tLN:190214555\n+@SQ\tSN:chr5\tLN:181538259\n+@SQ\tSN:chr6\tLN:170805979\n+@SQ\tSN:chr7\tLN:159345973\n+@SQ\tSN:chr8\tLN:145138636\n+@SQ\tSN:chr9\tLN:138394717\n+@SQ\tSN:chr10\tLN:133797422\n+@SQ\tSN:chr11\tLN:135086622\n+@SQ\tSN:chr12\tLN:133275309\n+@SQ\tSN:chr13\tLN:114364328\n+@SQ\tSN:chr14\tLN:107043718\n+@SQ\tSN:chr15\tLN:101991189\n+@SQ\tSN:chr16\tLN:90338345\n+@SQ\tSN:chr17\tLN:83257441\n+@SQ\tSN:chr18\tLN:80373285\n+@SQ\tSN:chr19\tLN:58617616\n+@SQ\tSN:chr20\tLN:64444167\n+@SQ\tSN:chr21\tLN:46709983\n+@SQ\tSN:chr22\tLN:50818468\n+@SQ\tSN:chrX\tLN:156040895\n+@SQ\tSN:chrY\tLN:57227415\n+@SQ\tSN:chrM\tLN:16569\n+@SQ\tSN:GL000008.2\tLN:209709\n+@SQ\tSN:GL000009.2\tLN:201709\n+@SQ\tSN:GL000194.1\tLN:191469\n+@SQ\tSN:GL000195.1\tLN:182896\n+@SQ\tSN:GL000205.2\tLN:185591\n+@SQ\tSN:GL000208.1\tLN:92689\n+@SQ\tSN:GL000213.1\tLN:164239\n+@SQ\tSN:GL000214.1\tLN:137718\n+@SQ\tSN:GL000216.2\tLN:176608\n+@SQ\tSN:GL000218.1\tLN:161147\n+@SQ\tSN:GL000219.1\tLN:179198\n+@SQ\tSN:GL000220.1\tLN:161802\n+@SQ\tSN:GL000221.1\tLN:155397\n+@SQ\tSN:GL000224.1\tLN:179693\n+@SQ\tSN:GL000225.1\tLN:211173\n+@SQ\tSN:GL000226.1\tLN:15008\n+@SQ\tSN:KI270302.1\tLN:2274\n+@SQ\tSN:KI270303.1\tLN:1942\n+@SQ\tSN:KI270304.1\tLN:2165\n+@SQ\tSN:KI270305.1\tLN:1472\n+@SQ\tSN:KI270310.1\tLN:1201\n+@SQ\tSN:KI270311.1\tLN:12399\n+@SQ\tSN:KI270312.1\tLN:998\n+@SQ\tSN:KI270315.1\tLN:2276\n+@SQ\tSN:KI270316.1\tLN:1444\n+@SQ\tSN:KI270317.1\tLN:37690\n+@SQ\tSN:KI270320.1\tLN:4416\n+@SQ\tSN:KI270322.1\tLN:21476\n+@SQ\tSN:KI270329.1\tLN:1040\n+@SQ\tSN:KI270330.1\tLN:1652\n+@SQ\tSN:KI270333.1\tLN:2699\n+@SQ\tSN:KI270334.1\tLN:1368\n+@SQ\tSN:KI270335.1\tLN:1048\n+@SQ\tSN:KI270336.1\tLN:1026\n+@SQ\tSN:KI270337.1\tLN:1121\n+@SQ\tSN:KI270338.1\tLN:1428\n+@SQ\tSN:KI270340.1\tLN:1428\n+@SQ\tSN:KI270362.1\tLN:3530\n+@SQ\tSN:KI270363.1\tLN:1803\n+@SQ\tSN:KI270364.1\tLN:2855\n+@SQ\tSN:KI270366.1\tLN:8320\n+@SQ\tSN:KI270371.1\tLN:2805\n+@SQ\tSN:KI270372.1\tLN:1650\n+@SQ\tSN:KI270373.1\tLN:1451\n+@SQ\tSN:KI270374.1\tLN:2656\n+@SQ\tSN:KI270375.1\tLN:2378\n+@SQ\tSN:KI270376.1\tLN:1136\n+@SQ\tSN:KI270378.1\tLN:1048\n+@SQ\tSN:KI270379.1\tLN:1045\n+@SQ\tSN:KI270381.1\tLN:1930\n+@SQ\tSN:KI270382.1\tLN:4215\n+@SQ\tSN:KI270383.1\tLN:1750\n+@SQ\tSN:KI270384.1\tLN:1658\n+@SQ\tSN:KI270385.1\tLN:990\n+@SQ\tSN:KI270386.1\tLN:1788\n+@SQ\tSN:KI270387.1\tLN:1537\n+@SQ\tSN:KI270388.1\tLN:1216\n+@SQ\tSN:KI270389.1\tLN:1298\n+@SQ\tSN:KI270390.1\tLN:2387\n+@SQ\tSN:KI270391.1\tLN:1484\n+@SQ\tSN:KI270392.1\tLN:971\n+@SQ\tSN:KI270393.1\tLN:1308\n+@SQ\tSN:KI270394.1\tLN:970\n+@SQ\tSN:KI270395.1\tLN:1143\n+@SQ\tSN:KI270396.1\tLN:1880\n+@SQ\tSN:KI270411.1\tLN:2646\n+@SQ\tSN:KI270412.1\tLN:1179\n+@SQ\tSN:KI270414.1\tLN:2489\n+@SQ\tSN:KI270417.1\tLN:2043\n+@SQ\tSN:KI270418.1\tLN:2145\n+@SQ\tSN:KI270419.1\tLN:1029\n+@SQ\tSN:KI270420.1\tLN:2321\n+@SQ\tSN:KI270422.1\tLN:1445\n+@SQ\tSN:KI270423.1\tLN:981\n+@SQ\tSN:KI270424.1\tLN:2140\n+@SQ\tSN:KI270425.1\tLN:1884\n+@SQ\tSN:KI270429.1\tLN:1361\n+@SQ\tSN:KI270435.1\tLN:92983\n+@SQ\tSN:KI270438.1\tLN:112505\n+@SQ\tSN:KI270442.1\tLN:392061\n+@SQ\tSN:KI270448.1\tLN:7992\n+@SQ\tSN:KI270465.1\tLN:1774\n+@SQ\tSN:KI270466.1\tLN:1233\n+@SQ\tSN:KI270467.1\tLN:3920\n+@SQ\tSN:KI270468.1\tLN:4055\n+@SQ\tSN:KI270507.1\tLN:5353\n+@SQ\tSN:KI270508.1\tLN:1951\n+@SQ\tSN:KI270509.1\tLN:2318\n+@SQ\tSN:KI270510.1\tLN:2415\n+@SQ\tSN:KI270511.1\tLN:8127\n+@SQ\tSN:KI270512.1\tLN:22689\n+@SQ\tSN:KI270515.1\tLN:6361\n+@SQ\tSN:KI270516.1\tLN:1300\n+@SQ\tSN:KI270517.1\tLN:3253\n+@SQ\tSN:KI270518.1\tLN:2186\n+@SQ\tSN:KI270519.1\tLN:138126\n+@SQ\tSN:KI270521.1\tLN:7642\n+@SQ\tSN:KI270522.1\tLN:5674\n+@SQ\tSN:KI270528.1\tLN:2983\n+@SQ\tSN:KI270529.1\tLN:1899\n+@SQ\tSN:KI270530.1\tLN:2168\n+@SQ\tSN:KI270538.1\tLN:91309\n+@SQ\tSN:KI270539.1\tLN:993\n+@SQ\tSN:KI270544.1\tLN:1202\n+@SQ\tSN:KI270548.1\tLN:1599\n+@SQ\tSN:KI270579.1\tLN:31033\n+@SQ\tSN:KI270580.1\tLN:1553\n+@SQ\tSN:KI270581.1\tLN:7046\n+@SQ\tSN:KI270582.1\tLN:6504\n+@SQ\tSN:KI270583.1\tLN:1400\n+@SQ\tSN:KI270584.1\tLN:4513\n+@SQ\tSN:KI270587.1\tLN:2969\n+@SQ\tSN:KI270588.1\tLN:6158\n+@SQ\tSN:KI270589.1\tLN:44474\n+@SQ\tSN:KI270590.1\tLN:4685\n+@SQ\tSN:KI270591.1\tLN:5796\n+@SQ\tSN:KI270593.1\tLN:3041\n+@SQ\tSN:KI270706.1\tLN:175055\n+@SQ\tSN:KI270707.1\tLN:32032\n+@SQ\tSN:KI270708.1\tLN:127682\n+@SQ\tSN:KI270709.1\tLN'..b'44:H5FCJBGXY:4:23606:6087:6093:CELL_ACAAGG:UMI_GCACAA:SAMPLE_CGATGT:UID_CGATGTACAAGGGCACAA\t16\tchr19\t647844\t255\t60M\t*\t0\t0\tTAATTTAAAATTATAAAAATCTTTCCACCGCTGAACGTTTAGAGGGTGAGGTTAGACAGA\t/<A<E<AAEE/AE6EEEEEAE/AEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000070423.17\n+NS500668:144:H5FCJBGXY:4:23606:19406:18847:CELL_ACAAGG:UMI_ACGTAT:SAMPLE_CGATGT:UID_CGATGTACAAGGACGTAT\t0\tchr19\t807553\t255\t60M\t*\t0\t0\tCCTTCCCCTTTCCCTATTTTTTTTCTTGCCCTGATCCGGAATTTCTTTGCCAACTGACTG\tAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEAEE/EEEEEEEEEEEEEEEEEEEEEA<AA/\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23606:22946:6130:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812039\t255\t60M\t*\t0\t0\tTGTTGTGAGACCCGAGGGGCGGCGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTG\tAAAAA//EEEEE<<E///EAEEEEEEEEEA/<E//EEE/E//EEEEEEEA/EEEEE/<A/\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23607:1822:13398:CELL_ACAAGG:UMI_GCCGAA:SAMPLE_CGATGT:UID_CGATGTACAAGGGCCGAA\t16\tchr19\t938931\t255\t13S47M\t*\t0\t0\tGCCAAAAAATGGTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTTGTTTTGTCACCCAGGC\t/<//EA/A////<AEEEEEEAEEEEEEEEEEEEEEEEEEE/EEEEEEEEE/EEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:46\tnM:i:0\tXF:Z:Unassigned_NoFeatures\n+NS500668:144:H5FCJBGXY:4:23607:10971:2167:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812062\t255\t58M2S\t*\t0\t0\tGGCGCGCTTTTTTAAGGAGACACAAATATATATTTTGCTAACAGCAATTCCAAGCTCACA\t//A/A6/EEEEEEE/EE/E6EEEEEA//E/E///A/E<///</<6/E//A<E///<////\tNH:i:1\tHI:i:1\tAS:i:47\tnM:i:5\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23608:13283:1095:CELL_ACAAGG:UMI_AGTTTA:SAMPLE_CGATGT:UID_CGATGTACAAGGAGTTTA\t0\tchr19\t812062\t255\t60M\t*\t0\t0\tGGCGCCGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCATGCTCAGT\t//A/A6/E//E</AA//E/<E/E6EE///E/</A//E/AAA///EEAA/E/<////EA/<\tNH:i:1\tHI:i:1\tAS:i:55\tnM:i:2\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23609:5585:17562:CELL_ACAAGG:UMI_GCTAGG:SAMPLE_CGATGT:UID_CGATGTACAAGGGCTAGG\t0\tchr19\t812089\t255\t60M\t*\t0\t0\tGTATATTTTGCTAACAGCAATTCCAGGCTCAGTATTGTGACCGCGGAACCACAGGGGACC\tAAA/AA////E/EEE/EEEE//A/E//EEE6E/EE/</6/EA<//E//E/EEEA////A<\tNH:i:1\tHI:i:1\tAS:i:57\tnM:i:1\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:19058:11533:CELL_TTCACG:UMI_GCCTTA:SAMPLE_CGATGT:UID_CGATGTTTCACGGCCTTA\t0\tchr19\t811797\t255\t60M\t*\t0\t0\tCCTGCAGTCGCCTAGAAAACTTGCTCTCAAACTTCAGGGTTTTTTCTTCCTTCAAATTTT\tAAAAAEEEEAEEAEEEE/AEEEEE/EE/EA/EEAAEEEEEEEAEEEEEE<<AEEEEEEEE\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:23053:18525:CELL_ACAAGG:UMI_CCAGCA:SAMPLE_CGATGT:UID_CGATGTACAAGGCCAGCA\t0\tchr19\t812062\t255\t60M\t*\t0\t0\tGGCGCGGTTTTTTATTTTTACAAAAATTTATATTTTGCTAACAGCAATTCCAGGCTCAGT\t/AA/A6E/E/EEE/E//E6////AAEE6E/EEEEEAE////AEAEEEAEE/E/AEEE<EE\tNH:i:1\tHI:i:1\tAS:i:49\tnM:i:5\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23611:23963:1840:CELL_TTCACG:UMI_GCTTTT:SAMPLE_CGATGT:UID_CGATGTTTCACGGCTTTT\t16\tchr19\t647540\t255\t50M10S\t*\t0\t0\tAGTGTATTTTAAATAGCTTTCAAGATACACATATTTTTTCCTTTAAAAAAAAAGTCTGGT\t/EE<E/EEE//EAEEA<EEE//EEEEEEAEAEEEEEE/E6EEEEEEEEEEAEEEEAAAAA\tNH:i:1\tHI:i:1\tAS:i:49\tnM:i:0\tXF:Z:ENSG00000070423.17\n+NS500668:144:H5FCJBGXY:4:23612:8565:12159:CELL_ACAAGG:UMI_ACATAG:SAMPLE_CGATGT:UID_CGATGTACAAGGACATAG\t0\tchr19\t812062\t255\t2S58M\t*\t0\t0\tAGGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCAGGCTCA\tA///A/AE/E/E//E/EEE/EEAEEEEEAEEAE/AAA/66EEEEEA<EE/<AEAE/E/AE\tNH:i:1\tHI:i:1\tAS:i:57\tnM:i:0\tXF:Z:ENSG00000011304.18\n+NS500668:144:H5FCJBGXY:4:23612:14819:6245:CELL_TTCACG:UMI_CTCATT:SAMPLE_CGATGT:UID_CGATGTTTCACGCTCATT\t0\tchr19\t994331\t255\t60M\t*\t0\t0\tGGCCGGCCAAGTGAGGCCCGGAGACCCCGGCCCGAGGCGCCCAGGCCTGAGCCCCATGCC\tAAAAAEEEEEEEE/EEEEEEE/E/EEEAE<EEAE<</6EEEE/</<EE/AAAEEA<A/<A\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000065268.10\n+NS500668:144:H5FCJBGXY:4:23612:25822:12295:CELL_TTCACG:UMI_GATTGT:SAMPLE_CGATGT:UID_CGATGTTTCACGGATTGT\t0\tchr19\t812059\t255\t60M\t*\t0\t0\tGGCGGCGCGGTTTTTTATGGTGACACAAATGTATATTTTGCTAACAGCAATTCCAGGCTC\tAAAAAEEEEEEEEEEEEEEEEEAEEEAAAEAEAE/EE//AEA/EEEEEEEEAE6A<<AA<\tNH:i:1\tHI:i:1\tAS:i:59\tnM:i:0\tXF:Z:ENSG00000011304.18\n'
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out1.bam
b
Binary file test-data/dedup_out1.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out2.bam
b
Binary file test-data/dedup_out2.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out3.bam
b
Binary file test-data/dedup_out3.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out4.bam
b
Binary file test-data/dedup_out4.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out5.bam
b
Binary file test-data/dedup_out5.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/dedup_out6.bam
b
Binary file test-data/dedup_out6.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/group_in2.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_in2.sam Mon Sep 13 14:52:06 2021 +0000
b
b'@@ -0,0 +1,403 @@\n+@HD\tVN:1.0\tSO:coordinate\n+@SQ\tSN:chrM\tLN:17009\n+@PG\tID:hisat2\tPN:hisat2\tVN:2.0.5\tCL:"/var/galaxy/tool_dependencies/_conda/envs/mulled-v1-2bb67013a57cac1e35f407d06d1f347baae35159f498496f1e36f84784069212/bin/hisat2-align-s --wrapper basic-0 -p 4 -x genome -1 input_f.fastq -2 input_r.fastq"\n+chrM_111_723_3:2:0_4:1:0_c3/1_CT\t163\tchrM\t114\t60\t1S197M\t=\t524\t611\tCGACTCAGTCAAATATGTGGTTGCTGGGCTTATTCTCTATGCGGGTTCTCCACACGCACAGACAGTCAGGGTGCTATTCAGTCAATGGTCACAGGACATATACTTAAATTCCTATTGTTCCACAGGACACGGGATGCGCGCACCCAGGTTTGCGTGCACACGTGTACACGTACACACGTAGACACGTACACACGTACA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tZS:i:-28\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:15A1T26G100C33C17\tYS:i:-15\tYT:Z:CP\tNH:i:1\n+chrM_513_1000_4:1:0_4:2:0_60/1_GT\t163\tchrM\t516\t60\t1S197M\t=\t801\t486\tCACACACGTGTACACGTACACACGTACACACGTACACACGTACACACTTATCCACGCGAACGCTTTAATTTAAGTAAATAACTCGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTCTAATAATACGTGCCTATTTATGTCTTGCCCAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:46G3A31A39A29A44\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_518_1106_2:1:0_5:2:0_44/1_CA\t163\tchrM\t520\t60\t198M\t=\t907\t587\tACGTGTACACGTACACACTTACACACGTACACACGTACACACTTATACACGCGAACGCTTTAATTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTAAACCTTATTTATAATAATACGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAAGAAAA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-9\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:18G23G64T90\tYS:i:-21\tYT:Z:CP\tNH:i:1\n+chrM_111_723_3:2:0_4:1:0_c3/2_CT\t83\tchrM\t524\t60\t200M\t=\t114\t-611\tGTACACGTACACACGTACACACGTACACACGTACACACTTATACACCCGAACGCTTTATTTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTATAATAATACGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATTAGGCCTAAGATAACGCTTA\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:38G7G11A121A10A8\tYS:i:-16\tYT:Z:CP\tNH:i:1\n+chrM_578_1113_4:1:0_4:2:0_19/1_TT\t163\tchrM\t580\t60\t198M\t=\t914\t534\tTAATTTAAGTAAATAACTAGCTTAATCAAACCCCCCTTACCCCCCGTTAACCTTATTTATAATAATCCGTGCCTATTTATGTCTTGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTACGAAAACGCTTATAAGCTTACCAATCCCCTATTATTACTAGCTACTACGCCTAAATCATAACTCTG\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tZS:i:-24\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:66A65A46A0T0A16\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_637_1116_4:2:0_2:5:0_8f/2_AT\t99\tchrM\t637\t60\t200M\t=\t917\t478\tTATCATAATACGTGCCTATTTATGTCATGCCAAACCCCACAAACAAGACTAGACCGTACCTAAATATAAGGCCTAAGACAACGCTTATAAGCTTACCAATCCCCTATTATTACTAGCTACTAAGCCTAAATCATAACTCTGTTCGCAGTTATCTATAGATATACCGACCTGACTCTAATTCGACCCTATCGAACAACATT\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-18\tZS:i:-19\tXN:i:0\tXM:i:6\tXO:i:0\tXG:i:0\tNM:i:6\tMD:Z:3A22T51A44T0A57T17\tYS:i:-21\tYT:Z:CP\tNH:i:1\n+chrM_513_1000_4:1:0_4:2:0_60/2_GT\t83\tchrM\t801\t60\t200M\t=\t516\t-486\tCGACCTGACTCTAATTCGTCCCTATCGAACAACATTTTACATGTCTACGTTAGCACCACATCCCAGTTAATGTAGCGTAAACCTATAAAGCAAGGCACTGAAAATGCCTAGATGAGTAGCCAGACTCCATAAACACAAAAGTTTGGTCCTGGCCTTTCCATTAGTTATTAATAAGATTACACATGCAAGCCTCCGCAGCC\t22222222222222222222222222222222222222222222222222222222222222222222'..b'TACATAAGACATACTATGTATATCGTGCATTAATTGCTAGTCCCCATAAATATTAAGCATGTACAGTAGTTTATATATATTACATAAGACATAATAGTGCTTAATCGGGCATTCACCTTAATTCTAGGACAGTCTTCTATGGAACTCAACTATTCCAAAGAT\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-16\tZS:i:-36\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:18A6T57G59T35C17\tYS:i:-15\tYT:Z:CP\tNH:i:1\n+chrM_16373_16837_7:2:0_4:1:0_9/2_GT\t83\tchrM\t16638\t60\t200M\t=\t16375\t-463\tATTAAGCATGTACAGTAGTTTATATATATTACATAACACATACTATGTATTTCGTGCATTAATTGCTAGTCCCAATAAATATTAAGCATGTACAGTTGTTTATATATATTACATAAGACATAATAGTGCTTAATCGTGCATTCACCTTAATTCTAGGACAGTCTTCTATGGACCTCAACTATTCCAAAGAGCTTAATCAC\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:36G13A22C2G19A103\tYS:i:-27\tYT:Z:CP\tNH:i:1\n+chrM_16363_16864_3:2:1_3:2:0_ba/2_TA\t83\tchrM\t16665\t60\t200M\t=\t16365\t-500\tATTACATAAGACATACTATGTATATCGTACATTAATTGCTAGTCCCCATAAATATTAAGCATGTACAGTAGTTTATATATATTAAATAAGACATAATAGTGCTTTATAGTGCATTCACCTTAATTCTAGGACAGTCTTCTATGGACCTCAACTATTCCAAAGAGCTTAATCACCTGGCCTCGAGAAACCAGCAATCCTTG\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-15\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:28G20G34C19A2C92\tYS:i:-23\tYT:Z:CP\tNH:i:1\n+chrM_16484_16992_6:0:0_3:0:0_2c/1_AG\t147\tchrM\t16793\t60\t198M\t=\t16484\t-507\tGGACAGTCTTCTATGGACCTCAACGATTCCAAAGAGCTTAATCACCTGGCCTCGAGAAACCAGCAATCCTTGCTCGAACGTGTACCTCTTCTCGCTCCGGGCCCATTTCAACGTGGGGGTTTCTATAACGGAACTATAACTGGCATATGGTTCTTACTTCAGGGCCATAAAATCCTTGAAACCAATCCTTCAGTTCTC\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tAS:i:-9\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:24T113C7C51\tYS:i:-18\tYT:Z:CP\tNH:i:1\n+chrM_12782_13288_5:1:0_6:0:0_3/2_TA\t77\t*\t0\t0\t*\t*\t0\t0\tCAACTATAATATTTATCTCCTCAGGAGAACAAGCAATTATCTCAAACTGACACTGACTATCAATCCAAACTCTCAAGCTATCACTAAGCTTTAAAATAGATTAATTCTCAACCATCATTATCCGTGTAGCGCTTTTCGTCACATGGTCCATCATAGAATTCTCAATGTGGTACATGCACTCCGACCCATACATCAACCGA\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_12782_13288_5:1:0_6:0:0_3/1_TA\t141\t*\t0\t0\t*\t*\t0\t0\tCATCGAAGCCCTAGTAATGGAATATTTAGGTTCTCGTGTTGGGTGATAAAGATTTGTTGGAAGTCCCATGCGTTTGAGTTGGTTAGAAATCCTGCTATGGCTATGATTAAGCCTACGTCTCCAATTCGGTTGTAGAGGATTGCTTGTAGGGCGGCAGTGTTTGCATCTGCTCGGCCATATCATCATCCGATAAGTCGA\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_15422_15911_4:2:0_5:1:0_99/2_GA\t77\t*\t0\t0\t*\t*\t0\t0\tATGGGATACGTCCTGCCATGAGGCCAAATGTCCTTCTGAGGAGCAACGGTAATCCCTAACCTGCTGTCAGCAATTCCATACCTCGGGACTGAACTAGTAGAATGAATCTGAGGGGGGTTCTCAGTAGACAAAGCCACCCTAACACGATTCTTTGGCTTCCACTTCATTCTTTCATTCATTATCTCAGCCTTAGCAGGAGT\t22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n+chrM_15422_15911_4:2:0_5:1:0_99/1_GA\t141\t*\t0\t0\t*\t*\t0\t0\tGCTGCCCCTAGTTTGTTAGGGATGGATCGGAGAATTGCGTATGCGAATAGGAAGTATCATTCAGGTTAAATATGGGGAGGTGAATTTAAAGGGTTGGCTGGGATGTAGTTGTGTGGGTCTCCTAGCAGGTCTGGTGAAAATAGGACGAGTAGTATGAGTGTTAAAACTAGTACTAGAAGACCTAGGATTTCTTTGATT\t222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222\tYT:Z:UP\n'
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/group_out4.bam
b
Binary file test-data/group_out4.bam has changed
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/group_out4.tab
--- a/test-data/group_out4.tab Wed Jun 02 18:27:33 2021 +0000
+++ b/test-data/group_out4.tab Mon Sep 13 14:52:06 2021 +0000
b
b'@@ -0,0 +1,498 @@\n+read_id\tcontig\tposition\tgene\tumi\tumi_count\tfinal_umi\tfinal_umi_count\tunique_id\n+chrM_81_583_3:0:0_2:0:0_f4/1_TT\tchrM\t80\tNA\tTT\t1\tTT\t1\t0\n+chrM_110_732_3:0:0_2:0:0_160/1_CC\tchrM\t109\tNA\tCC\t1\tCC\t1\t1\n+chrM_118_613_6:0:0_4:0:0_169/1_AG\tchrM\t117\tNA\tAG\t1\tAG\t1\t2\n+chrM_149_684_2:0:0_2:0:0_6e/1_TA\tchrM\t148\tNA\tTA\t1\tTA\t1\t3\n+chrM_152_616_6:0:0_2:0:0_84/1_GC\tchrM\t151\tNA\tGC\t1\tGC\t1\t4\n+chrM_247_748_8:0:0_1:0:0_1b7/1_GC\tchrM\t246\tNA\tGC\t1\tGC\t1\t5\n+chrM_280_772_1:0:0_9:0:0_31/1_CG\tchrM\t279\tNA\tCG\t1\tCG\t1\t6\n+chrM_292_819_2:0:0_1:0:0_18d/1_CA\tchrM\t291\tNA\tCA\t1\tCA\t1\t7\n+chrM_390_890_3:0:0_6:0:0_bf/1_CA\tchrM\t389\tNA\tCA\t1\tCA\t1\t8\n+chrM_447_921_2:0:0_3:0:0_1cc/1_AC\tchrM\t446\tNA\tAC\t1\tAC\t1\t9\n+chrM_469_983_2:0:0_1:0:0_121/1_AC\tchrM\t468\tNA\tAC\t1\tAC\t1\t10\n+chrM_541_1074_3:0:0_8:1:0_22/1_AC\tchrM\t540\tNA\tAC\t1\tAC\t1\t11\n+chrM_8_556_3:0:0_2:0:0_1b1/1_AC\tchrM\t556\tNA\tAC\t1\tAC\t1\t12\n+chrM_112_577_2:0:0_4:0:0_17b/1_CG\tchrM\t577\tNA\tCG\t1\tCG\t1\t13\n+chrM_627_1063_6:0:0_4:1:0_12a/1_AA\tchrM\t626\tNA\tAA\t1\tAA\t1\t14\n+chrM_164_650_5:0:0_3:0:0_164/1_AC\tchrM\t650\tNA\tAC\t1\tAC\t1\t15\n+chrM_200_695_6:0:0_2:0:0_5a/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_168_695_5:0:0_2:0:0_1af/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_170_695_1:0:0_6:0:0_1e3/1_TA\tchrM\t695\tNA\tTA\t3\tTA\t3\t16\n+chrM_209_705_3:0:0_2:0:0_1b3/1_TA\tchrM\t705\tNA\tTA\t1\tTA\t1\t17\n+chrM_288_807_3:0:0_5:0:0_8e/1_AG\tchrM\t807\tNA\tAG\t1\tAG\t1\t18\n+chrM_818_1274_1:1:0_4:0:0_124/1_TC\tchrM\t817\tNA\tTC\t1\tTC\t1\t19\n+chrM_442_872_4:0:0_6:0:0_146/1_AT\tchrM\t872\tNA\tAT\t1\tAT\t1\t20\n+chrM_460_919_3:0:0_1:0:0_0/1_GA\tchrM\t919\tNA\tGA\t1\tGA\t1\t21\n+chrM_384_950_5:0:0_2:0:0_34/1_GG\tchrM\t950\tNA\tGG\t1\tGG\t1\t22\n+chrM_476_980_5:0:0_2:0:0_133/1_TA\tchrM\t980\tNA\tTA\t1\tTA\t1\t23\n+chrM_552_999_4:0:0_5:0:0_bb/1_AT\tchrM\t999\tNA\tAT\t1\tAT\t1\t24\n+chrM_512_1030_5:0:0_7:1:0_a5/1_AC\tchrM\t1030\tNA\tAC\t1\tAC\t1\t25\n+chrM_1103_1634_5:0:0_3:0:0_36/1_TA\tchrM\t1102\tNA\tTA\t1\tTA\t1\t26\n+chrM_648_1148_3:0:0_3:1:0_1d1/1_TT\tchrM\t1148\tNA\tTT\t1\tTT\t1\t27\n+chrM_1167_1797_8:0:0_2:1:0_14e/1_GT\tchrM\t1166\tNA\tGT\t1\tGT\t1\t28\n+chrM_619_1169_3:0:0_5:1:0_33/1_CC\tchrM\t1169\tNA\tCC\t1\tCC\t1\t29\n+chrM_668_1248_0:0:0_4:0:0_fa/1_TT\tchrM\t1248\tNA\tTT\t1\tTT\t1\t30\n+chrM_1288_1759_4:0:0_2:1:0_a1/1_TA\tchrM\t1287\tNA\tTA\t1\tTA\t1\t31\n+chrM_1327_1786_4:0:0_3:1:0_1ad/1_CA\tchrM\t1326\tNA\tCA\t1\tCA\t1\t32\n+chrM_866_1351_5:1:0_6:0:0_1c9/1_TA\tchrM\t1351\tNA\tTA\t1\tTA\t1\t33\n+chrM_853_1366_6:1:0_4:0:0_13b/1_TC\tchrM\t1366\tNA\tTC\t1\tTC\t1\t34\n+chrM_1399_1851_2:0:0_11:1:0_85/1_AT\tchrM\t1398\tNA\tAT\t1\tAT\t1\t35\n+chrM_946_1444_4:1:0_6:0:0_113/1_TT\tchrM\t1444\tNA\tTT\t1\tTT\t1\t36\n+chrM_943_1485_2:1:0_5:0:0_1e2/1_AT\tchrM\t1485\tNA\tAT\t1\tAT\t1\t37\n+chrM_1022_1501_4:0:0_5:0:0_132/1_GA\tchrM\t1501\tNA\tGA\t1\tGA\t1\t38\n+chrM_1504_2013_4:1:0_3:0:0_10b/1_AT\tchrM\t1503\tNA\tAT\t1\tAT\t1\t39\n+chrM_1505_1934_1:1:0_5:0:0_157/1_TC\tchrM\t1504\tNA\tTC\t1\tTC\t1\t40\n+chrM_997_1511_2:1:0_1:0:0_1d8/1_AT\tchrM\t1511\tNA\tAT\t1\tAT\t1\t41\n+chrM_1521_2070_5:1:0_3:0:0_1a/1_TA\tchrM\t1520\tNA\tTA\t1\tTA\t1\t42\n+chrM_1575_2126_5:1:0_2:1:0_18c/1_TA\tchrM\t1574\tNA\tTA\t1\tTA\t1\t43\n+chrM_1063_1598_5:0:0_5:0:0_f5/1_TG\tchrM\t1598\tNA\tTG\t1\tTG\t1\t44\n+chrM_1605_2128_3:1:0_4:1:0_1ea/1_TT\tchrM\t1604\tNA\tTT\t1\tTT\t1\t45\n+chrM_1065_1609_3:0:0_5:0:0_18e/1_TA\tchrM\t1609\tNA\tTA\t1\tTA\t1\t46\n+chrM_1140_1619_1:0:0_4:0:0_74/1_TT\tchrM\t1619\tNA\tTT\t1\tTT\t1\t47\n+chrM_1111_1626_4:0:0_2:0:0_186/1_AC\tchrM\t1626\tNA\tAC\t1\tAC\t1\t48\n+chrM_1664_2135_0:1:0_3:1:0_179/1_TT\tchrM\t1663\tNA\tTT\t1\tTT\t1\t49\n+chrM_1699_2147_1:0:0_7:0:0_10c/1_AA\tchrM\t1698\tNA\tAA\t1\tAA\t1\t50\n+chrM_1706_2240_3:0:0_6:0:0_99/1_GT\tchrM\t1705\tNA\tGT\t1\tGT\t1\t51\n+chrM_1756_2309_0:0:0_3:0:0_c3/1_AT\tchrM\t1755\tNA\tAT\t1\tAT\t1\t52\n+chrM_1344_1758_4:0:0_3:1:0_75/1_TT\tchrM\t1758\tNA\tTT\t1\tTT\t1\t53\n+chrM_1223_1777_5:0:0_6:1:0_42/1_TG\tchrM\t1777\tNA\tTG\t1\tTG\t1\t54\n+chrM_1790_2351_4:0:0_4:0:0_f3/1_CC\tchrM\t1789\tNA\tCC\t1\tCC\t1\t55\n+chrM_1308_1807_4:0:0_4:1:0_45/1_TA\tchrM\t1807\tNA\tTA\t1\tTA\t1\t56\n+chrM_1814_2315_6:0:0_3:0:0_12d/1_AA\tchrM\t1813\tNA\tAA\t1\tAA\t1\t57\n+chrM_1862_2304_4:0:0_1:0:0_19d/1_AA\tchrM\t1861\tNA\tAA\t1\tAA\t1\t58\n+chrM_1363_1869_5:0:0_3:1:0_aa/1_TA\tchrM\t1869\tNA\tTA\t1\tTA\t1\t59\n+chrM_1363_1887_2:0:0_3:0:0_d3/1_CA\tchrM\t1887\tNA\tCA\t1\tCA\t1\t60\n+chrM_1428_1904_7:0:0_1:0:0_1b0/1_AC\tchrM\t1904\tNA\tAC\t1\tAC\t1\t6'..b'\tTA\t1\t431\n+chrM_14135_14581_5:0:0_5:0:0_1de/1_AT\tchrM\t14581\tNA\tAT\t1\tAT\t1\t432\n+chrM_14612_15169_2:0:0_5:0:0_e4/1_TT\tchrM\t14611\tNA\tTT\t1\tTT\t1\t433\n+chrM_14668_15109_2:0:0_3:0:0_3d/1_TG\tchrM\t14667\tNA\tTG\t1\tTG\t1\t434\n+chrM_14696_15126_2:0:0_2:0:0_a7/1_AA\tchrM\t14695\tNA\tAA\t1\tAA\t1\t435\n+chrM_14727_15156_4:0:0_4:0:0_178/1_GA\tchrM\t14726\tNA\tGA\t1\tGA\t1\t436\n+chrM_14245_14783_3:0:0_1:0:0_9d/1_GG\tchrM\t14783\tNA\tGG\t1\tGG\t1\t437\n+chrM_14314_14802_5:0:0_3:0:0_191/1_GG\tchrM\t14802\tNA\tGG\t1\tGG\t1\t438\n+chrM_14817_15261_5:0:0_4:0:0_65/1_AT\tchrM\t14816\tNA\tAT\t1\tAT\t1\t439\n+chrM_14863_15320_2:0:0_4:0:0_d8/1_AA\tchrM\t14862\tNA\tAA\t1\tAA\t1\t440\n+chrM_14869_15387_2:0:0_7:0:0_1f/1_AC\tchrM\t14868\tNA\tAC\t1\tAC\t1\t441\n+chrM_14888_15340_3:0:0_3:0:0_112/1_GG\tchrM\t14887\tNA\tGG\t1\tGG\t1\t442\n+chrM_14368_14894_6:0:0_5:0:0_40/1_TC\tchrM\t14894\tNA\tTC\t1\tTC\t1\t443\n+chrM_14410_14941_6:0:0_3:0:0_e5/1_TA\tchrM\t14941\tNA\tTA\t1\tTA\t1\t444\n+chrM_14962_15543_5:0:0_8:0:0_46/1_CA\tchrM\t14961\tNA\tCA\t1\tCA\t1\t445\n+chrM_14456_14987_3:0:0_5:0:0_ab/1_GT\tchrM\t14987\tNA\tGT\t1\tGT\t1\t446\n+chrM_15070_15569_5:0:0_5:0:0_cf/1_CA\tchrM\t15069\tNA\tCA\t1\tCA\t1\t447\n+chrM_15140_15686_2:0:0_5:0:0_185/1_CC\tchrM\t15139\tNA\tCC\t1\tCC\t1\t448\n+chrM_15142_15661_7:0:0_3:0:0_11e/1_CT\tchrM\t15141\tNA\tCT\t1\tCT\t1\t449\n+chrM_15192_15694_5:0:0_2:0:0_f7/1_CA\tchrM\t15191\tNA\tCA\t1\tCA\t1\t450\n+chrM_15211_15685_5:0:0_2:0:0_1d7/1_AC\tchrM\t15210\tNA\tAC\t1\tAC\t1\t451\n+chrM_15225_15786_3:0:0_6:0:0_17e/1_TT\tchrM\t15224\tNA\tTT\t1\tTT\t1\t452\n+chrM_15258_15810_4:0:0_6:0:0_5f/1_TT\tchrM\t15257\tNA\tTT\t1\tTT\t1\t453\n+chrM_14817_15317_5:0:0_2:0:0_59/1_GC\tchrM\t15317\tNA\tGC\t1\tGC\t1\t454\n+chrM_15324_15836_4:0:0_3:0:0_94/1_AC\tchrM\t15323\tNA\tAC\t1\tAC\t1\t455\n+chrM_15365_15880_4:1:0_3:0:0_80/1_CA\tchrM\t15364\tNA\tCA\t1\tCA\t1\t456\n+chrM_15408_15863_2:1:0_3:0:0_1e6/1_AG\tchrM\t15407\tNA\tAG\t1\tAG\t1\t457\n+chrM_15439_15924_0:0:0_4:0:0_172/1_TG\tchrM\t15438\tNA\tTG\t1\tTG\t1\t458\n+chrM_14931_15457_2:0:0_4:1:0_1a1/1_AA\tchrM\t15457\tNA\tAA\t1\tAA\t1\t459\n+chrM_15547_16054_5:0:0_1:0:0_af/1_GA\tchrM\t15546\tNA\tGA\t1\tGA\t1\t460\n+chrM_15134_15560_3:0:0_4:0:0_1c4/1_GG\tchrM\t15560\tNA\tGG\t1\tGG\t1\t461\n+chrM_15575_16118_5:0:0_4:0:0_1aa/1_GC\tchrM\t15574\tNA\tGC\t1\tGC\t1\t462\n+chrM_15052_15629_7:0:0_9:0:0_15f/1_GA\tchrM\t15629\tNA\tGA\t1\tGA\t1\t463\n+chrM_15698_16224_5:0:0_6:0:0_138/1_AC\tchrM\t15697\tNA\tAC\t1\tAC\t1\t464\n+chrM_15247_15721_2:1:0_6:0:0_17/1_TC\tchrM\t15721\tNA\tTC\t1\tTC\t1\t465\n+chrM_15218_15763_2:0:0_2:0:0_171/1_AG\tchrM\t15763\tNA\tAG\t1\tAG\t1\t466\n+chrM_15860_16419_5:0:0_8:0:0_53/1_TA\tchrM\t15859\tNA\tTA\t1\tTA\t1\t467\n+chrM_15864_16329_3:0:0_3:0:0_18b/1_CG\tchrM\t15863\tNA\tCG\t1\tCG\t1\t468\n+chrM_15507_15962_3:0:0_0:0:0_5b/1_GT\tchrM\t15962\tNA\tGT\t1\tGT\t1\t469\n+chrM_15430_15985_5:0:0_3:0:0_2c/1_AT\tchrM\t15985\tNA\tAT\t1\tAT\t1\t470\n+chrM_15706_16238_6:0:0_7:0:0_7b/1_AG\tchrM\t16238\tNA\tAG\t1\tAG\t1\t471\n+chrM_16252_16701_2:1:0_5:0:0_147/1_TA\tchrM\t16251\tNA\tTA\t1\tTA\t1\t472\n+chrM_15753_16280_5:0:0_6:0:0_180/1_TG\tchrM\t16280\tNA\tTG\t1\tTG\t1\t473\n+chrM_15777_16347_2:0:0_6:0:0_16/1_AT\tchrM\t16347\tNA\tAT\t1\tAT\t1\t474\n+chrM_16366_16911_6:1:0_6:1:1_168/1_CC\tchrM\t16365\tNA\tCC\t1\tCC\t1\t475\n+chrM_16370_16833_6:1:0_6:0:1_1bb/1_AC\tchrM\t16369\tNA\tAC\t1\tAC\t1\t476\n+chrM_16402_16876_2:0:0_1:0:1_98/1_CA\tchrM\t16401\tNA\tCA\t1\tCA\t1\t477\n+chrM_16426_16953_3:0:0_2:2:0_60/1_AA\tchrM\t16425\tNA\tAA\t1\tAA\t1\t478\n+chrM_15986_16442_3:0:0_7:0:0_15d/1_TG\tchrM\t16442\tNA\tTG\t1\tTG\t1\t479\n+chrM_16030_16460_6:0:0_4:1:0_20/1_AT\tchrM\t16460\tNA\tAT\t1\tAT\t1\t480\n+chrM_15909_16463_3:0:0_7:1:0_106/1_GT\tchrM\t16463\tNA\tGT\t1\tGT\t1\t481\n+chrM_15984_16500_4:0:0_1:0:0_6b/1_GT\tchrM\t16500\tNA\tGT\t1\tGT\t1\t482\n+chrM_16070_16513_3:0:0_7:1:0_1e7/1_TA\tchrM\t16513\tNA\tTA\t1\tTA\t1\t483\n+chrM_16064_16572_4:0:0_6:1:0_f1/1_TG\tchrM\t16572\tNA\tTG\t1\tTG\t1\t484\n+chrM_16245_16740_4:1:0_2:0:1_127/1_TA\tchrM\t16742\tNA\tTA\t1\tTA\t1\t485\n+chrM_16315_16834_5:0:0_3:0:1_18/1_TT\tchrM\t16834\tNA\tTT\t1\tTT\t1\t486\n+chrM_16229_16843_2:1:0_2:0:1_c4/1_GC\tchrM\t16843\tNA\tGC\t1\tGC\t1\t487\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16420_16905_5:0:0_2:0:1_159/1_GT\tchrM\t16905\tNA\tGT\t2\tGT\t2\t488\n+chrM_16419_16923_3:0:0_3:0:1_183/1_CG\tchrM\t16923\tNA\tCG\t1\tCG\t1\t489\n+chrM_16585_16993_5:0:1_5:1:0_1b9/1_GA\tchrM\t16993\tNA\tGA\t1\tGA\t1\t490\n'
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 test-data/out_wl_paired.log
--- a/test-data/out_wl_paired.log Wed Jun 02 18:27:33 2021 +0000
+++ b/test-data/out_wl_paired.log Mon Sep 13 14:52:06 2021 +0000
b
@@ -1,14 +1,20 @@
-# output generated by whitelist --bc-pattern=CCCNNNNNNNNXXXXX --subset-reads=0 --stdin=/tmp/tmpibtvD6/files/000/dataset_5.dat --read2-in=/tmp/tmpibtvD6/files/000/dataset_6.dat --expect-cells=5 --error-correct-threshold=3 --method=reads --plot-prefix=OUT --log=/tmp/tmpibtvD6/files/000/dataset_8.dat
-# job started at Sun Feb 25 10:50:16 2018 on bag -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18
-# pid: 2385, system: Linux 4.13.0-32-generic #35-Ubuntu SMP Thu Jan 25 09:13:46 UTC 2018 x86_64
+# UMI-tools version: 1.1.2
+# output generated by whitelist --bc-pattern=CCCNNNNNNNNXXXXX --extract-method=string --subset-reads=0 --knee-method=density --stdin=input_read1.gz --read2-in=input_read2.gz --expect-cells=5 --error-correct-threshold=3 --method=reads --plot-prefix=OUT --log=/tmp/tmpcx2d26we/files/0/0/8/dataset_008b1843-bfa2-44fb-9d3c-52695bd9ce74.dat --log2stderr
+# job started at Tue Jul 13 15:21:12 2021 on MOLSYB009 -- ba3841c0-b2d5-4188-88ca-4ee241163293
+# pid: 1155608, system: Linux 5.4.0-74-generic #83-Ubuntu SMP Sat May 8 02:35:39 UTC 2021 x86_64
+# allow_threshold_error                   : False
 # blacklist_tsv                           : None
 # cell_number                             : False
 # compresslevel                           : 6
+# ed_above_threshold                      : None
 # error_correct_threshold                 : 3
 # expect_cells                            : 5
 # extract_method                          : string
-# filter_cell_barcodes                    : False
-# log2stderr                              : False
+# filtered_out                            : None
+# filtered_out2                           : None
+# ignore_suffix                           : False
+# knee_method                             : density
+# log2stderr                              : True
 # loglevel                                : 1
 # method                                  : reads
 # pattern                                 : CCCNNNNNNNNXXXXX
@@ -16,25 +22,31 @@
 # plot_prefix                             : OUT
 # prime3                                  : None
 # random_seed                             : None
-# read2_in                                : /tmp/tmpibtvD6/files/000/dataset_6.dat
+# read2_in                                : input_read2.gz
 # short_help                              : None
-# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF-8'>
-# stdin                                   : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_5.dat' mode='r' encoding='UTF-8'>
-# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpibtvD6/files/000/dataset_8.dat' mode='a' encoding='UTF-8'>
-# stdout                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
+# stderr                                  : <_io.TextIOWrapper name='<stderr>' mode='w' encoding='utf-8'>
+# stdin                                   : <_io.TextIOWrapper name='input_read1.gz' encoding='ascii'>
+# stdlog                                  : <_io.TextIOWrapper name='/tmp/tmpcx2d26we/files/0/0/8/dataset_008b1843-bfa2-44fb-9d3c-52695bd9ce74.dat' mode='a' encoding='UTF-8'>
+# stdout                                  : <_io.TextIOWrapper name='<stdout>' mode='w' encoding='utf-8'>
 # subset_reads                            : 0
 # timeit_file                             : None
 # timeit_header                           : None
 # timeit_name                             : all
+# tmpdir                                  : None
 # whitelist_tsv                           : None
-2018-02-25 10:50:16,016 INFO Starting barcode extraction
-2018-02-25 10:50:16,017 INFO Parsed 0 reads
-2018-02-25 10:50:16,019 INFO Starting - whitelist determination
-2018-02-25 10:50:17,208 INFO Finished - whitelist determination
-2018-02-25 10:50:17,208 INFO Starting - finding putative error cell barcodes
-2018-02-25 10:50:17,208 INFO Finished - finding putative error cell barcodes
-2018-02-25 10:50:17,208 INFO Writing out whitelist
-2018-02-25 10:50:17,208 INFO Parsed 100 reads
-2018-02-25 10:50:17,208 INFO 100 reads matched the barcode pattern
-2018-02-25 10:50:17,208 INFO Found 23 unique cell barcodes
-# job finished in 1 seconds at Sun Feb 25 10:50:17 2018 --  2.35  0.08  0.00  0.00 -- e78e4e5b-e99e-426a-8a92-c8b3beeadf18
+2021-07-13 15:21:12,587 INFO Starting barcode extraction
+2021-07-13 15:21:12,588 INFO Parsed 0 reads
+2021-07-13 15:21:12,590 INFO Starting - whitelist determination
+2021-07-13 15:21:14,249 INFO Finished - whitelist determination
+2021-07-13 15:21:14,249 INFO Starting - finding putative error cell barcodes
+2021-07-13 15:21:14,249 INFO building bktree
+2021-07-13 15:21:14,249 INFO done building bktree
+2021-07-13 15:21:14,249 INFO Finished - finding putative error cell barcodes
+2021-07-13 15:21:14,249 INFO Top 1 cell barcodes passed the selected threshold
+2021-07-13 15:21:14,249 INFO Writing out whitelist
+2021-07-13 15:21:14,249 INFO Parsed 100 reads
+2021-07-13 15:21:14,249 INFO 100 reads matched the barcode pattern
+2021-07-13 15:21:14,249 INFO Found 23 unique cell barcodes
+2021-07-13 15:21:14,249 INFO Found 15 total reads matching the selected cell barcodes
+2021-07-13 15:21:14,249 INFO Found 85 total reads which can be error corrected to the selected cell barcodes
+# job finished in 1 seconds at Tue Jul 13 15:21:14 2021 --  7.19  0.62  0.08  0.02 -- ba3841c0-b2d5-4188-88ca-4ee241163293
b
diff -r 9fa7803d1c51 -r 27ac32a22ad2 umi-tools_extract.xml
--- a/umi-tools_extract.xml Wed Jun 02 18:27:33 2021 +0000
+++ b/umi-tools_extract.xml Mon Sep 13 14:52:06 2021 +0000
[
b'@@ -1,118 +1,96 @@\n-<tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.2">\n+<tool id="umi_tools_extract" name="UMI-tools extract" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n     <description>Extract UMI from fastq files</description>\n+    <expand macro="bio_tools"/>\n     <macros>\n         <import>macros.xml</import>\n-        <macro name="out_conditional">\n-            <actions>\n-                <conditional name="input_type.type">\n-                    <when value="paired_collection" >\n-                        <action type="format">\n-                            <option type="from_param" name="input_type.input_readpair" param_attribute="forward.ext" />\n-                        </action>\n-                    </when>\n-                    <when value="paired" >\n-                        <action type="format">\n-                            <option type="from_param" name="input_type.input_read1"  param_attribute="ext" />\n-                        </action>\n-                    </when>\n-                </conditional>\n-            </actions>\n-        </macro>\n     </macros>\n     <expand macro="requirements" />\n     <command detect_errors="exit_code"><![CDATA[\n     @COMMAND_LINK@\n \n     umi_tools extract\n-            --extract-method=\'$extract_method.value\'\n-            --bc-pattern=\'$bc_pattern\'\n-\n-            #if $input_type.type == \'single\':\n-                #if $gz:\n-                    --stdin=input_single.gz\n-                    --stdout out.gz\n-                #else\n-                    --stdin=input_single.txt\n-                    --stdout \'$out\'\n-                #end if\n+        \n+        @FASTQ_BARCODE_EXTRACTION_OPTIONS@\n+        #if $input_type_cond.input_type == \'single\':\n+            #if $gz:\n+                --stdin=input_single.gz\n+                --stdout out.gz\n+            #else\n+                --stdin=input_single.txt\n+                --stdout \'$out\'\n+            #end if\n+        #else:\n+            #if $gz:\n+                --stdin=input_read1.gz\n+                --read2-in=input_read2.gz\n+                --stdout out1.gz\n+                --read2-out=out2.gz\n             #else:\n-                #if $gz:\n-                    --stdin=input_read1.gz\n-                    --read2-in=input_read2.gz\n-                    --stdout out1.gz\n-                    --read2-out=out2.gz\n-                #else:\n-                    --stdin=input_read1.txt\n-                    --read2-in=input_read2.txt\n-                    --stdout \'$out1\'\n+                --stdin=input_read1.txt\n+                --read2-in=input_read2.txt\n+                #if $input_type_cond.input_type == \'paired\'\n+                    --stdout \'$out\'\n                     --read2-out=\'$out2\'\n-                #end if\n-                #if $input_type.barcode.barcode_select == "both_reads":\n-                    --split-barcode\n-                    --bc-pattern2=\'$input_type.barcode.bc_pattern2\'\n+                #else\n+                    --stdout \'$out_paired_collection.forward\'\n+                    --read2-out=\'$out_paired_collection.reverse\'\n                 #end if\n             #end if\n+            $input_type_cond.reconcile_pairs\n+        #end if\n \n-            #if $barcodes.use_barcodes.value == \'yes\':\n-                --filter-cell-barcode\n-                --whitelist=\'$barcodes.filter_barcode_file\'\n-                \'$barcodes.filter_correct.value\'\n-            #end if\n+        #if $whitelist\n+            --whitelist=\'$whitelist\'\n+        #end if\n+        #if $blacklist\n+            --blacklist=\'$blacklist\'\n+        #end if\n+        $error_correct_cell.value\n \n-            #if not $prime3:\n-                --3prime\n-            #end if\n-            #if $quality.quality_selector ==\'true\':\n+        #if $quality.quality_selector ==\'true\':\n+            #if str($quality.quality_filter_threshold) != \'\'\n                 --quality-filter-threshold \'$quality.quality_filter_threshold\'\n-                --quality-encoding \'$qual'..b'\n \n-\n-UMI-tools extract.py - Extract UMI from fastq\n-=============================================\n-\n-Purpose\n--------\n+extract - Extract UMI from fastq\n+================================\n \n Extract UMI barcode from a read and add it to the read name, leaving\n-any sample barcode in place. Can deal with paired end reads and UMIs\n-split across the paired ends\n-\n-Options\n--------\n+any sample barcode in place\n \n---split-barcode\n-       By default the UMI is assumed to be on the first read. Use this\n-       option if the UMI is contained on both reads and specify the\n-       pattern of the barcode/UMI on the second read using the option\n-       ``--bc-pattern2``\n+Can deal with paired end reads and UMIs\n+split across the paired ends. Can also optionally extract cell\n+barcodes and append these to the read name also. See the section below\n+for an explanation for how to encode the barcode pattern(s) to\n+specficy the position of the UMI +/- cell barcode.\n+\n \n---bc-pattern\n-       Use this option to specify the format of the UMI/barcode. Use Ns to\n-       represent the random positions and Xs to indicate the bc positions.\n-       Bases with Ns will be extracted and added to the read name. Remaining\n-       bases, marked with an X will be reattached to the read.\n-\n-       E.g. If the pattern is NNXXNN,\n-       Then the read:\n+Filtering and correcting cell barcodes\n+--------------------------------------\n \n-       @HISEQ:87:00000000 read1\n-       AAGGTTGCTGATTGGATGGGCTAG\n-       DA1AEBFGGCG01DFH00B1FF0B\n-       +\n-\n-       will become:\n-       @HISEQ:87:00000000_AATT read1\n-       GGGCTGATTGGATGGGCTAG\n-       1AFGGCG01DFH00B1FF0B\n-       +\n+``umi_tools extract`` can optionally filter cell barcodes against a user-supplied\n+whitelist (``--whitelist``). If a whitelist is not available for your data,\n+e.g\n+if you have performed droplet-based scRNA-Seq, you can use the\n+whitelist tool.\n \n---bc-pattern2\n-       Use this option to specify the format of the UMI/barcode for\n-       the second read pair if required. If --bc-pattern2 is not\n-       supplied, this defaults to the same pattern as --bc-pattern\n+Cell barcodes which do not match the whitelist (user-generated or\n+automatically generated) can also be optionally corrected using the\n+``--error-correct-cell`` option.\n \n---3prime\n-       By default the barcode is assumed to be on the 5\' end of the read, but\n-       use this option to sepecify that it is on the 3\' end instead\n-\n--L\n-       Specify a log file to retain logging information and final statistics\n-\n---split-barcode\n-       barcode is split across read pair\n+The whitelist should be in  the following format (tab-separated)::\n \n---quality-filter-threshold=QUALITY_FILTER_THRESHOLD\n-       Remove reads where any UMI base quality score falls\n-       below this threshold\n---quality-encoding=QUALITY_ENCODING\n-       Quality score encoding. Choose from phred33[33-77]\n-       phred64 [64-106] or solexa [59-106]\n-\n-Usage:\n-------\n+        AAAAAA    AGAAAA\n+        AAAATC\n+        AAACAT\n+        AAACTA    AAACTN,GAACTA\n+        AAATAC\n+        AAATCA    GAATCA\n+        AAATGT    AAAGGT,CAATGT\n \n-For single ended reads:\n-        umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]\n-\n-reads from stdin and outputs to stdout.\n+Where column 1 is the whitelisted cell barcodes and column 2 is\n+the list (comma-separated) of other cell barcodes which should be\n+corrected to the barcode in column 1. If the ``--error-correct-cell``\n+option is not used, this column will be ignored. Any additional columns\n+in the whitelist input, such as the counts columns from the output of\n+umi_tools whitelist, will be ignored.\n \n-For paired end reads:\n-        umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]\n-\n-reads end one from stdin and end two from FASTQIN and outputs end one to stdin\n-and end two to FASTQOUT.\n+@FASTQ_BARCODE_EXTRACTION_HELP@\n \n     ]]></help>\n     <expand macro="citations" />\n'