Repository 'umi_tools_extract'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/umi_tools_extract

Changeset 5:f77bc14eba31 (2018-06-05)
Previous changeset 4:e73a22ff585c (2018-04-16) Next changeset 6:6417d5ed05c6 (2018-06-21)
Commit message:
planemo upload commit 57e3e460a740aa7aad217c8365527c49e88c9a30
modified:
umi-tools_extract.xml
added:
test-data/scrb_extract.fastq.gz
test-data/scrb_seq_barcodes
test-data/scrb_seq_fastq.1.gz
test-data/scrb_seq_fastq.2.gz
test-data/t_R2.fastq
b
diff -r e73a22ff585c -r f77bc14eba31 test-data/scrb_extract.fastq.gz
b
Binary file test-data/scrb_extract.fastq.gz has changed
b
diff -r e73a22ff585c -r f77bc14eba31 test-data/scrb_seq_barcodes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scrb_seq_barcodes Tue Jun 05 19:44:38 2018 -0400
b
@@ -0,0 +1,384 @@
+AAAACT
+GCTAGA
+AAAATC
+GCTTAC
+AAACAT
+GGACAT
+AAACTA
+GGCAAT
+AAAGTT
+GGGATT
+AAATAC
+GTACAC
+AAATCA
+GTCAAG
+AAATGT
+GTGACT
+AAATTG
+GTTCGA
+AACAAT
+TAGTGG
+AACATA
+TCCAAC
+AACTAA
+TCGAAG
+AAGATT
+TCTGCA
+AAGTAT
+TTCCTC
+AAGTTA
+TTGTCC
+AATAAC
+TTTGGC
+AATACA
+CCAACC
+AATAGT
+CCTTCC
+AATATG
+CTCTCC
+AATCAA
+GGACCA
+AATCTT
+GTACCG
+AATGAT
+ACCCCC
+AATGTA
+ACCCGG
+AATTAG
+ACCGCG
+AATTCT
+ACCGGC
+AATTGA
+ACGCCG
+AATTTC
+ACGCGC
+ACAAAT
+ACGGCC
+ACAATA
+ACGGGG
+ACATAA
+AGCCCG
+ACTAAA
+AGCCGC
+ACTATT
+AGCGCC
+ACTTAT
+AGCGGG
+ACTTTA
+AGGCCC
+AGAATT
+AGGCGG
+AGATAT
+AGGGCG
+AGATTA
+AGGGGC
+AGTAAT
+CACCCC
+AGTATA
+CACCGG
+AGTTAA
+CACGCG
+ATAAAC
+CACGGC
+ATAACA
+CAGCCG
+ATAAGT
+CAGCGC
+ATAATG
+CAGGCC
+ATACAA
+CAGGGG
+ATACTT
+CCACCG
+ATAGAT
+CCACGC
+ATAGTA
+CCAGGG
+ATATAG
+CCCACG
+ATATCT
+CCCAGC
+ATATGA
+CCCCAC
+ATATTC
+CCCCCA
+ATCAAA
+CCCCGT
+ATCATT
+CCCCTG
+ATCTAT
+CCCGAG
+ATCTTA
+CCCGGA
+ATGAAT
+CCCTGG
+ATGATA
+CCGAGG
+ATGTAA
+CCGCAG
+ATTAAG
+CCGCGA
+ATTACT
+CCGGAC
+ATTAGA
+CCGGCA
+ATTATC
+CCGGGT
+ATTCAT
+CCGGTG
+ATTCTA
+CCGTCG
+ATTGAA
+CCGTGC
+ATTGTT
+CCTCGG
+ATTTAC
+CCTGCG
+ATTTCA
+CCTGGC
+ATTTGT
+CGACCC
+ATTTTG
+CGACGG
+CAAAAT
+CGAGCG
+CAAATA
+CGAGGC
+CAATAA
+CGCACC
+CATAAA
+CGCAGG
+CATATT
+CGCCAG
+CATTAT
+CGCCCT
+CATTTA
+CGCCGA
+CTAAAA
+CGCCTC
+CTAATT
+CGCGAC
+CTATAT
+CGCGCA
+CTATTA
+CGCGGT
+CTTAAT
+CGCGTG
+CTTATA
+CGCTCG
+CTTTAA
+CGCTGC
+GAAATT
+CGGACG
+GAATAT
+CGGAGC
+GAATTA
+CGGCAC
+GATAAT
+CGGCCA
+GATATA
+CGGCGT
+GATTAA
+CGGCTG
+GTAAAT
+CGGGAG
+GTAATA
+CGGGCT
+GTATAA
+CGGGGA
+GTTAAA
+CGGGTC
+GTTATT
+CGGTCC
+GTTTAT
+CGGTGG
+GTTTTA
+CGTCCG
+TAAAAC
+CGTCGC
+TAAACA
+CGTGCC
+TAAAGT
+CGTGGG
+TAAATG
+CTCCCG
+TAACAA
+CTCCGC
+TAACTT
+CTCGGG
+TAAGAT
+CTGCGG
+TAAGTA
+CTGGCG
+TAATAG
+CTGGGC
+TAATCT
+GACCCG
+TAATGA
+GACCGC
+TAATTC
+GACGCC
+TACAAA
+GACGGG
+TACATT
+GAGCCC
+TACTAT
+GAGCGG
+TACTTA
+GAGGCG
+TAGAAT
+GAGGGC
+TAGATA
+GCACCC
+TAGTAA
+GCACGG
+TAGTTT
+GCAGCG
+TATAAG
+GCAGGC
+TATACT
+GCCACC
+TATAGA
+GCCAGG
+TATATC
+GCCCAG
+TATCAT
+GCCCCT
+TATCTA
+GCCCGA
+TATGAA
+GCCCTC
+TATGTT
+GCCGAC
+TATTAC
+GCCGCA
+TATTCA
+GCCGGT
+TATTGT
+GCCGTG
+TATTTG
+GCCTCG
+TCAAAA
+GCCTGC
+TCAATT
+GCGACG
+TCATAT
+GCGAGC
+TCATTA
+GCGCAC
+TCTAAT
+GCGCCA
+TCTATA
+GCGCGT
+TCTTAA
+GCGCTG
+TGAAAT
+GCGGAG
+TGAATA
+GCGGCT
+TGATAA
+GCGGGA
+TGATTT
+GCGGTC
+TGTAAA
+GCGTCC
+TGTATT
+GCGTGG
+TGTTAT
+GCTCCG
+TGTTTA
+GCTCGC
+TTAAAG
+GCTGCC
+TTAACT
+GCTGGG
+TTAAGA
+GGACGC
+TTAATC
+GGAGCC
+TTACAT
+GGAGGG
+TTACTA
+GGCACG
+TTAGAA
+GGCAGC
+TTAGTT
+GGCCAC
+TTATAC
+GGCGAG
+TTATCA
+GGCGCT
+TTATGT
+GGCGGA
+TTATTG
+GGCGTC
+TTCAAT
+GGCTCC
+TTCATA
+GGGACC
+TTCTAA
+GGGAGG
+TTGAAA
+GGGCAG
+TTGATT
+GGGCCT
+TTGTTA
+GGGCGA
+TTTAAC
+GGGCTC
+TTTACA
+GGGGAC
+TTTAGT
+GGGGCA
+TTTATG
+GGGGGT
+TTTCAA
+GGGGTG
+TTTCTT
+GGGTCG
+TTTGTA
+GGGTGC
+TTTTAG
+GGTCCC
+TTTTCT
+GGTGCG
+TTTTGA
+GGTGGC
+TCTTTC
+GTCCCC
+TTGGAT
+GTCGCG
+ACCGTA
+GTCGGC
+AGACCT
+GTGCGC
+AGGGAT
+GTGGCC
+ATCGAG
+GTGGGG
+CAAGCT
+TCCCCG
+CACCAA
+TCCCGC
+CAGTCA
+TCCGGG
+CATCAG
+TCGCGG
+CATGGT
+TCGGCG
+CCACAT
+TCGGGC
+CCGATT
+TGCCCC
+CGACTT
+TGCGCG
+CGATTG
+TGCGGC
+CTAGTG
+TGGCCG
+CTTCTG
+TGGCGC
+GAAGAC
+TGGGCC
+GATCGT
+TGGGGG
b
diff -r e73a22ff585c -r f77bc14eba31 test-data/scrb_seq_fastq.1.gz
b
Binary file test-data/scrb_seq_fastq.1.gz has changed
b
diff -r e73a22ff585c -r f77bc14eba31 test-data/scrb_seq_fastq.2.gz
b
Binary file test-data/scrb_seq_fastq.2.gz has changed
b
diff -r e73a22ff585c -r f77bc14eba31 test-data/t_R2.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/t_R2.fastq Tue Jun 05 19:44:38 2018 -0400
b
b"@@ -0,0 +1,400 @@\n+@HISEQ:105:C2UE1ACXX:3:1101:11160:2245 2:N:0:CAGATC\n+CATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAACAAAAACAACAACAACTAACACCTCAAAATCAACTCTAAATAAAAACTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:19338:2197 2:N:0:CAGATC\n+CTAATTTCTATTACCTACCTAACAACTATAACTATAATACTAACAAAAAACAAACAACATAGACCTAAATCCTACTTATACCCAACATTCTAAAAACAATT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII0<BFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:19467:2281 2:N:0:CATATC\n+ACACAACAAAATCCCTACTCCTATAACCTCTCACTACACCCAAAACTCCATTCTTTTCCCCCTTTACAAAAATCACTAAAATCCAAACTATACATCTCACC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIIIFFFFBBFFFFFFFBBFFFFFBBBFFFBFFFFBF\n+@HISEQ:105:C2UE1ACXX:3:1101:7009:2740 2:N:0:CAGATC\n+TAAATAAAACCCAAACCCACACTATCTATCCCTTATTAACATTACAATCACAATTATCAAATAAATAACAAAACCCAAAAAAACCTTACTTAACATTCCAT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:13708:2613 2:N:0:CAGATC\n+TTCAAAAACTCCATAACAAACACAAATAAAAAATAAAAAACTCCTAAATCTCACCTTAAAAACTTATCTAACTGCAACTATTATCTTACTTAAAAAAAAAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:19067:2707 2:N:0:CAGATC\n+ATCACTCTTCCAAAAATCACTCGAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCTTTAAAACCT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIFFIIIIIIIIIIIIIIIFIIIIIIIIIFIIIIIIIFIIIIIFFFFFFFFFFFFFFFBB####################\n+@HISEQ:105:C2UE1ACXX:3:1101:4999:3182 2:N:0:CAGATC\n+CTTATAATTCAAATTTCTAAACTCCTACTCCCTCTCCCTTTATATTTATTTAACACATACTATTCTAACTATATATAAATCATAAATCTTATAAACTTTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:16790:3145 2:N:0:CAGATC\n+AACATACCTATAAAAACGCACTACTTTTATATACAAAATCCGTCTCTACTAATACCATAACCAACCTCTATACCACATATAAAAAACAACAAACAATACTC\n++\n+BBBFFFFFFFFBFIIIIF<FFFFFIIIIIFFFIIIIIFIFFFIIIIIIFIFFFFIIIIIIIIIIIIFBBBBFFFFFFBBFFFFFFFFFFBBFFFFFFFFBB\n+@HISEQ:105:C2UE1ACXX:3:1101:18065:3106 2:N:0:CAGATC\n+AAAAACTAAACCCAAAAAAAAAACAATAAAAATAAAATAATAAAAATTATCATAATAAATTCCTAAAAAAAAAAAAAACTTTAAAAAAAAAAAAAAAAAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIBFIIIFFFFFIIIFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFF'0<<<BFB###############\n+@HISEQ:105:C2UE1ACXX:3:1101:2300:3263 2:N:0:TAGATC\n+AAAAATACAAAAAATAAAAAAAAAAAAAAACATTAAATTTAAAAAAAATTTATTTTTTATTTTATTTTATTTTTATTTTTTTAAATTAAAATAAAAAAAAA\n++\n+BBBFFFFBFFFFFIFFIIIIIIIIIIIFF<'<<<BBBFF0<FFBBBBFB7'0<BB000<'<F<0BBFB'<<BBB0<B<B<B7B<<0'<B'00BBFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:5605:3427 2:N:0:CAGATC\n+AACCTCTACACAAAAAAATCTAAAAATCTAAAACAAATCCTATAACCGAACCACTTCTTATCTATAAATCTTACTAAAACTCCCCACATCCTATACCTCTC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:8129:3589 2:N:0:CAGATC\n+TCTTATCTCTTCAAATTCCCTAATATCAATAAACCCTAACATAACCCGATAAATCAAAAAACTCTTTTATCACAATAAACGTATAATCCTAACTAAAAACT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFF<\n+@HISEQ:105:C2UE1ACXX:3:1101:14304:3866 2:N:0:CAGATC\n+AATTTATTCTTCACTAAAACCCCTTAACCAATACCAACATTTCCACAAAATTCTACCCTCTACAAAAACAACCTATCAAACTCAAAAATCCCCTATATAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12720:4398 2:N:0:CAGATC\n+CTAATTTAACCTTTAAACTCAACAAAAATTAACCTACCTCTACCACTAAAATACTAAAATTAAACATATATATCACCAAACCCAACTTCAATTAAATACAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:14945:4439 2:N:0:CAGATC\n+ACTCTCTCCTATATTCTTTACCAACATATATAACTTAACTCTCTAATAACCTTAACTATTCCTCACCCTAACCTCCACAAATACTATATATAATACTATAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:8616:4508 2:N:0:CAGATC\n+CTA"..b'IIIIIIIIFFIIIFIFFFFFFFFFFFFFFF<<BBFBBFFFFFBFBBBBFFFFFFFFFBF\n+@HISEQ:105:C2UE1ACXX:3:1101:7272:22581 2:N:0:CAGATC\n+CTAACTCCACCTATAAAAAACCTAAAAAATCGATCAAAATACTCCTCCTCCTTCTATTCCTAAAAAAAATAAAATTCTTTAAAAAATTCTTAATATCATTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIFIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:10060:23020 2:N:0:CAGATC\n+CAAAATAATATAAATCTAAAAATAAAAACACCATCCTTACTTCCTTTTAAAAAAAATACTCAAAACTATAAGACTACCCTTTCCTCTTAAAAACCTAATAC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFFFFBFFBFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14440:23104 2:N:0:CAGATC\n+ATATTCTATAAAACACCAAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTT\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:6941:23338 2:N:0:CAGATC\n+CTTATTCATTCCATCGCCTATAACATAAAAACAAAAATAAACATTATCATAACAACCTATAATCAACACCCAAAACCAAATTCAACACACTATAACTCCTA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:10069:23622 2:N:0:CAGATC\n+CTTAAAATTCACTTCACATTAACTTTTAAATATTATTCCAATATATTTAAAATAACACTTTAAAAAAAACCTAAATTAAAAAAACTAACCTTCTACAAATA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:14079:24078 2:N:0:CAGATC\n+ATAAAACTAAACTTTCATATATTACTCTTAAATTTTTTTCCTAACTATAAAAAACTTTACAAAATACACCCTATTACTTTCAAACTACCAAAACTACCTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12064:24631 2:N:0:CCGATC\n+AAAAAAACCACCATTCTACAATCCCTTAAAAAAACCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:6662:24968 2:N:0:CAGATC\n+CTCCCATACTTAAAAAAATTAAAACCCAAAAACAATTCTACCCTCACAAATACCAAAAAAACGAAACACAATACCATAAACCTAAACAATTAAACGACACC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFBFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:11630:24964 2:N:0:TAGATC\n+AAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATCCTTAACATCCAAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFBFBBFBFFFFFBFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:12594:24878 2:N:0:CCGATC\n+ATATAATATTACCTATTTATTTTTATTATTATTATTATTATTATTTATTTATTTATTCTAAACACCTCCCAAAACATTCCAACTTTCTTCATTAAATATAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIFFFFFFIIIFFIFIIIFIIIIIIIIIIIFFFFIIIFFFFIIIIIIIIIIFFIIIIFIIFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:4483:25030 2:N:0:CAGATC\n+CCTAATACTCTATACTAAACAAATAACAAAAAACTTCCTCCTACCTCTTTAAAAAACCCATAAACTTACTCAAATACAAATATAACAAAATAATATAAATC\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB\n+@HISEQ:105:C2UE1ACXX:3:1101:12198:25235 2:N:0:CAGATC\n+CACAAAAACAACATACCTCTTAATAATCTTTCAAACTTACACCAAATCTATTTCTATCCTCATCTTCCATTTATAAAAACCAAAACTATACAACCCAATCT\n++\n+BBBFFFFFFFFFFFIFFFFFFBFBFFBBFF<FIBFFIF<BFIIIIIIIIFFFFFBFBFBBFF<B<BFFFBBFIIIIFIIFFFFFFFFFBFBFFBBBBBBB<\n+@HISEQ:105:C2UE1ACXX:3:1101:20477:25084 2:N:0:CATATC\n+AAAACCCAACTCTTCCACCAAAAAAATTACTTTATCTCTAATACTCTTAAAATACCCTATATTATCCTTATACAAAAACACTTAATTAACTTCCTAATTAA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIFIIIIIIIIIIIIIF0<BFFFFFFFFFFFFBBBFFFFFFFFFBBB<<BB70\n+@HISEQ:105:C2UE1ACXX:3:1101:5725:25359 2:N:0:CAGATC\n+ATAACTTAACTCTCTAATAACCTTAACTATTCCTCTCCCTAACCTCCACAAATACTATATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCA\n++\n+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFF\n+@HISEQ:105:C2UE1ACXX:3:1101:5502:25591 2:N:0:CAGATC\n+TTCTTAATTCCATCTTTAAACAATAATATTAAAATATTACTTCCCAAAATACAAACCCATATCCATATCCAAAAACTAAACAACCTCTACACCTCAACAAA\n++\n+<<<BBBBBBBBBBBBBBBBB70B00B000<0BBB###################################################################\n'
b
diff -r e73a22ff585c -r f77bc14eba31 umi-tools_extract.xml
--- a/umi-tools_extract.xml Mon Apr 16 16:38:40 2018 -0400
+++ b/umi-tools_extract.xml Tue Jun 05 19:44:38 2018 -0400
[
@@ -8,13 +8,15 @@
     @COMMAND_LINK@
 
     umi_tools extract
+            --extract-method='$extract_method.value'
             --bc-pattern='$bc_pattern'
+
             #if $input_type.type == 'single':
                 #if $gz:
                     --stdin=input_single.gz
                     --stdout out.gz
                 #else
-                    --stdin='$input_type.input_single'
+                    --stdin=input_single.txt
                     --stdout '$out'
                 #end if
             #else:
@@ -24,8 +26,8 @@
                     --stdout out1.gz
                     --read2-out=out2.gz
                 #else:
-                    --stdin='$input_type.input_read1'
-                    --read2-in='$input_type.input_read2'
+                    --stdin=input_read1.txt
+                    --read2-in=input_read2.txt
                     --stdout '$out1'
                     --read2-out='$out2'
                 #end if
@@ -34,6 +36,13 @@
                     --bc-pattern2='$input_type.barcode.bc_pattern2'
                 #end if
             #end if
+
+            #if $barcodes.use_barcodes.value == 'yes':
+                --filter-cell-barcode
+                --whitelist='$barcodes.filter_barcode_file'
+                '$barcodes.filter_correct.value'
+            #end if
+
             #if not $prime3:
                 --3prime
             #end if
@@ -57,12 +66,49 @@
     ]]></command>
     <inputs>
         <expand macro="input_types" />
+
+        <conditional name="barcodes" >
+            <param name="use_barcodes" argument="--filter-cell-barcode" type="select" label="Use Known Barcodes?" >
+                <option value="yes">Yes</option>
+                <option value="no" selected="true" >No</option>
+            </param>
+            <when value="no" />
+            <when value="yes" >
+                <param name="filter_barcode_file" type="data" format="tsv" label="Barcode File" />
+                <param name="filter_correct" argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command." />
+            </when>
+        </conditional>
+
+        <param name="extract_method" type="select" label="Method to extract barcodes" >
+            <option value="regex">Regular Expressions</option>
+            <option value="string" selected="true">String</option>
+        </param>
+            
         <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
             help="Use this option to specify the format of the UMI/barcode. Use Ns to
                     represent the random positions and Xs to indicate the bc positions.
                     Bases with Ns will be extracted and added to the read name. Remaining
                     bases, marked with an X will be reattached to the read.">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="!="/>
+                    <add value="-"/>
+                    <add value="_"/>
+                    <add value="."/>
+                    <add value="?"/>
+                    <add value="&lt;"/><!-- left triangle bracket -->
+                    <add value="&gt;"/><!-- right triangle bracket -->
+                    <add value="&#91;"/> <!-- left square bracket -->
+                    <add value="&#93;"/> <!-- right square bracket -->
+                    <add value="&#94;"/> <!-- caret -->
+                    <add value="&#123;"/> <!-- left curly -->
+                    <add value="&#125;"/> <!-- right curly -->
+                    <add value="&#40;"/> <!-- left parenthesis -->
+                    <add value="&#41;"/> <!-- right parenthesis -->
+                </valid>
+            </sanitizer>
         </param>
+
         <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
             truevalue="1" falsevalue="0" checked="true"
             help="By default the barcode is assumed to be on the 5' end of the read, but
@@ -126,17 +172,37 @@
             <output name="out_log" file="out_paired.log" lines_diff="16"/>
         </test>
         <test>
-            <param name="type" value="paired_collection" />
+            <param name="type" value="paired_collection" /> <!-- same as before, but uncompressed -->
             <param name="input_readpair" >
                 <collection type="paired">
-                    <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" />
-                    <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" />
+                    <element name="forward" ftype="fastq" value="t_R1.fastq" />
+                    <element name="reverse" ftype="fastq" value="t_R2.fastq" />
                 </collection>
             </param>
             <param name="bc_pattern" value="NNNXXX" />
             <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
             <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
-            <output name="out_log" file="out_paired.log" lines_diff="16"/>
+            <output name="out_log" file="out_paired.log" lines_diff="25" />
+        </test>
+        <test>
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="string" />
+            <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
+        </test>
+        <test><!-- same as above but with regex barcode-->
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="regex" />
+            <param name="bc_pattern" value="^(?P&lt;cell_1&gt;.{6})(?P&lt;umi_1&gt;.{10})" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
         </test>
     </tests>
     <help><![CDATA[