changeset 5:f77bc14eba31 draft

planemo upload commit 57e3e460a740aa7aad217c8365527c49e88c9a30
author iuc
date Tue, 05 Jun 2018 19:44:38 -0400
parents e73a22ff585c
children 6417d5ed05c6
files test-data/scrb_extract.fastq.gz test-data/scrb_seq_barcodes test-data/scrb_seq_fastq.1.gz test-data/scrb_seq_fastq.2.gz test-data/t_R2.fastq umi-tools_extract.xml
diffstat 6 files changed, 857 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/scrb_extract.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scrb_seq_barcodes	Tue Jun 05 19:44:38 2018 -0400
@@ -0,0 +1,384 @@
+AAAACT
+GCTAGA
+AAAATC
+GCTTAC
+AAACAT
+GGACAT
+AAACTA
+GGCAAT
+AAAGTT
+GGGATT
+AAATAC
+GTACAC
+AAATCA
+GTCAAG
+AAATGT
+GTGACT
+AAATTG
+GTTCGA
+AACAAT
+TAGTGG
+AACATA
+TCCAAC
+AACTAA
+TCGAAG
+AAGATT
+TCTGCA
+AAGTAT
+TTCCTC
+AAGTTA
+TTGTCC
+AATAAC
+TTTGGC
+AATACA
+CCAACC
+AATAGT
+CCTTCC
+AATATG
+CTCTCC
+AATCAA
+GGACCA
+AATCTT
+GTACCG
+AATGAT
+ACCCCC
+AATGTA
+ACCCGG
+AATTAG
+ACCGCG
+AATTCT
+ACCGGC
+AATTGA
+ACGCCG
+AATTTC
+ACGCGC
+ACAAAT
+ACGGCC
+ACAATA
+ACGGGG
+ACATAA
+AGCCCG
+ACTAAA
+AGCCGC
+ACTATT
+AGCGCC
+ACTTAT
+AGCGGG
+ACTTTA
+AGGCCC
+AGAATT
+AGGCGG
+AGATAT
+AGGGCG
+AGATTA
+AGGGGC
+AGTAAT
+CACCCC
+AGTATA
+CACCGG
+AGTTAA
+CACGCG
+ATAAAC
+CACGGC
+ATAACA
+CAGCCG
+ATAAGT
+CAGCGC
+ATAATG
+CAGGCC
+ATACAA
+CAGGGG
+ATACTT
+CCACCG
+ATAGAT
+CCACGC
+ATAGTA
+CCAGGG
+ATATAG
+CCCACG
+ATATCT
+CCCAGC
+ATATGA
+CCCCAC
+ATATTC
+CCCCCA
+ATCAAA
+CCCCGT
+ATCATT
+CCCCTG
+ATCTAT
+CCCGAG
+ATCTTA
+CCCGGA
+ATGAAT
+CCCTGG
+ATGATA
+CCGAGG
+ATGTAA
+CCGCAG
+ATTAAG
+CCGCGA
+ATTACT
+CCGGAC
+ATTAGA
+CCGGCA
+ATTATC
+CCGGGT
+ATTCAT
+CCGGTG
+ATTCTA
+CCGTCG
+ATTGAA
+CCGTGC
+ATTGTT
+CCTCGG
+ATTTAC
+CCTGCG
+ATTTCA
+CCTGGC
+ATTTGT
+CGACCC
+ATTTTG
+CGACGG
+CAAAAT
+CGAGCG
+CAAATA
+CGAGGC
+CAATAA
+CGCACC
+CATAAA
+CGCAGG
+CATATT
+CGCCAG
+CATTAT
+CGCCCT
+CATTTA
+CGCCGA
+CTAAAA
+CGCCTC
+CTAATT
+CGCGAC
+CTATAT
+CGCGCA
+CTATTA
+CGCGGT
+CTTAAT
+CGCGTG
+CTTATA
+CGCTCG
+CTTTAA
+CGCTGC
+GAAATT
+CGGACG
+GAATAT
+CGGAGC
+GAATTA
+CGGCAC
+GATAAT
+CGGCCA
+GATATA
+CGGCGT
+GATTAA
+CGGCTG
+GTAAAT
+CGGGAG
+GTAATA
+CGGGCT
+GTATAA
+CGGGGA
+GTTAAA
+CGGGTC
+GTTATT
+CGGTCC
+GTTTAT
+CGGTGG
+GTTTTA
+CGTCCG
+TAAAAC
+CGTCGC
+TAAACA
+CGTGCC
+TAAAGT
+CGTGGG
+TAAATG
+CTCCCG
+TAACAA
+CTCCGC
+TAACTT
+CTCGGG
+TAAGAT
+CTGCGG
+TAAGTA
+CTGGCG
+TAATAG
+CTGGGC
+TAATCT
+GACCCG
+TAATGA
+GACCGC
+TAATTC
+GACGCC
+TACAAA
+GACGGG
+TACATT
+GAGCCC
+TACTAT
+GAGCGG
+TACTTA
+GAGGCG
+TAGAAT
+GAGGGC
+TAGATA
+GCACCC
+TAGTAA
+GCACGG
+TAGTTT
+GCAGCG
+TATAAG
+GCAGGC
+TATACT
+GCCACC
+TATAGA
+GCCAGG
+TATATC
+GCCCAG
+TATCAT
+GCCCCT
+TATCTA
+GCCCGA
+TATGAA
+GCCCTC
+TATGTT
+GCCGAC
+TATTAC
+GCCGCA
+TATTCA
+GCCGGT
+TATTGT
+GCCGTG
+TATTTG
+GCCTCG
+TCAAAA
+GCCTGC
+TCAATT
+GCGACG
+TCATAT
+GCGAGC
+TCATTA
+GCGCAC
+TCTAAT
+GCGCCA
+TCTATA
+GCGCGT
+TCTTAA
+GCGCTG
+TGAAAT
+GCGGAG
+TGAATA
+GCGGCT
+TGATAA
+GCGGGA
+TGATTT
+GCGGTC
+TGTAAA
+GCGTCC
+TGTATT
+GCGTGG
+TGTTAT
+GCTCCG
+TGTTTA
+GCTCGC
+TTAAAG
+GCTGCC
+TTAACT
+GCTGGG
+TTAAGA
+GGACGC
+TTAATC
+GGAGCC
+TTACAT
+GGAGGG
+TTACTA
+GGCACG
+TTAGAA
+GGCAGC
+TTAGTT
+GGCCAC
+TTATAC
+GGCGAG
+TTATCA
+GGCGCT
+TTATGT
+GGCGGA
+TTATTG
+GGCGTC
+TTCAAT
+GGCTCC
+TTCATA
+GGGACC
+TTCTAA
+GGGAGG
+TTGAAA
+GGGCAG
+TTGATT
+GGGCCT
+TTGTTA
+GGGCGA
+TTTAAC
+GGGCTC
+TTTACA
+GGGGAC
+TTTAGT
+GGGGCA
+TTTATG
+GGGGGT
+TTTCAA
+GGGGTG
+TTTCTT
+GGGTCG
+TTTGTA
+GGGTGC
+TTTTAG
+GGTCCC
+TTTTCT
+GGTGCG
+TTTTGA
+GGTGGC
+TCTTTC
+GTCCCC
+TTGGAT
+GTCGCG
+ACCGTA
+GTCGGC
+AGACCT
+GTGCGC
+AGGGAT
+GTGGCC
+ATCGAG
+GTGGGG
+CAAGCT
+TCCCCG
+CACCAA
+TCCCGC
+CAGTCA
+TCCGGG
+CATCAG
+TCGCGG
+CATGGT
+TCGGCG
+CCACAT
+TCGGGC
+CCGATT
+TGCCCC
+CGACTT
+TGCGCG
+CGATTG
+TGCGGC
+CTAGTG
+TGGCCG
+CTTCTG
+TGGCGC
+GAAGAC
+TGGGCC
+GATCGT
+TGGGGG
Binary file test-data/scrb_seq_fastq.1.gz has changed
Binary file test-data/scrb_seq_fastq.2.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/t_R2.fastq	Tue Jun 05 19:44:38 2018 -0400
@@ -0,0 +1,400 @@
+@HISEQ:105:C2UE1ACXX:3:1101:11160:2245 2:N:0:CAGATC
+CATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAACAAAAACAACAACAACTAACACCTCAAAATCAACTCTAAATAAAAACTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:19338:2197 2:N:0:CAGATC
+CTAATTTCTATTACCTACCTAACAACTATAACTATAATACTAACAAAAAACAAACAACATAGACCTAAATCCTACTTATACCCAACATTCTAAAAACAATT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII0<BFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:19467:2281 2:N:0:CATATC
+ACACAACAAAATCCCTACTCCTATAACCTCTCACTACACCCAAAACTCCATTCTTTTCCCCCTTTACAAAAATCACTAAAATCCAAACTATACATCTCACC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIIIFFFFBBFFFFFFFBBFFFFFBBBFFFBFFFFBF
+@HISEQ:105:C2UE1ACXX:3:1101:7009:2740 2:N:0:CAGATC
+TAAATAAAACCCAAACCCACACTATCTATCCCTTATTAACATTACAATCACAATTATCAAATAAATAACAAAACCCAAAAAAACCTTACTTAACATTCCAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:13708:2613 2:N:0:CAGATC
+TTCAAAAACTCCATAACAAACACAAATAAAAAATAAAAAACTCCTAAATCTCACCTTAAAAACTTATCTAACTGCAACTATTATCTTACTTAAAAAAAAAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:19067:2707 2:N:0:CAGATC
+ATCACTCTTCCAAAAATCACTCGAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCTTTAAAACCT
++
+BBBFFFFFFFFFFIIIIIIIIIIFFIIIIIIIIIIIIIIIFIIIIIIIIIFIIIIIIIFIIIIIFFFFFFFFFFFFFFFBB####################
+@HISEQ:105:C2UE1ACXX:3:1101:4999:3182 2:N:0:CAGATC
+CTTATAATTCAAATTTCTAAACTCCTACTCCCTCTCCCTTTATATTTATTTAACACATACTATTCTAACTATATATAAATCATAAATCTTATAAACTTTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:16790:3145 2:N:0:CAGATC
+AACATACCTATAAAAACGCACTACTTTTATATACAAAATCCGTCTCTACTAATACCATAACCAACCTCTATACCACATATAAAAAACAACAAACAATACTC
++
+BBBFFFFFFFFBFIIIIF<FFFFFIIIIIFFFIIIIIFIFFFIIIIIIFIFFFFIIIIIIIIIIIIFBBBBFFFFFFBBFFFFFFFFFFBBFFFFFFFFBB
+@HISEQ:105:C2UE1ACXX:3:1101:18065:3106 2:N:0:CAGATC
+AAAAACTAAACCCAAAAAAAAAACAATAAAAATAAAATAATAAAAATTATCATAATAAATTCCTAAAAAAAAAAAAAACTTTAAAAAAAAAAAAAAAAAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIBFIIIFFFFFIIIFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFF'0<<<BFB###############
+@HISEQ:105:C2UE1ACXX:3:1101:2300:3263 2:N:0:TAGATC
+AAAAATACAAAAAATAAAAAAAAAAAAAAACATTAAATTTAAAAAAAATTTATTTTTTATTTTATTTTATTTTTATTTTTTTAAATTAAAATAAAAAAAAA
++
+BBBFFFFBFFFFFIFFIIIIIIIIIIIFF<'<<<BBBFF0<FFBBBBFB7'0<BB000<'<F<0BBFB'<<BBB0<B<B<B7B<<0'<B'00BBFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:5605:3427 2:N:0:CAGATC
+AACCTCTACACAAAAAAATCTAAAAATCTAAAACAAATCCTATAACCGAACCACTTCTTATCTATAAATCTTACTAAAACTCCCCACATCCTATACCTCTC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:8129:3589 2:N:0:CAGATC
+TCTTATCTCTTCAAATTCCCTAATATCAATAAACCCTAACATAACCCGATAAATCAAAAAACTCTTTTATCACAATAAACGTATAATCCTAACTAAAAACT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFF<
+@HISEQ:105:C2UE1ACXX:3:1101:14304:3866 2:N:0:CAGATC
+AATTTATTCTTCACTAAAACCCCTTAACCAATACCAACATTTCCACAAAATTCTACCCTCTACAAAAACAACCTATCAAACTCAAAAATCCCCTATATAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12720:4398 2:N:0:CAGATC
+CTAATTTAACCTTTAAACTCAACAAAAATTAACCTACCTCTACCACTAAAATACTAAAATTAAACATATATATCACCAAACCCAACTTCAATTAAATACAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:14945:4439 2:N:0:CAGATC
+ACTCTCTCCTATATTCTTTACCAACATATATAACTTAACTCTCTAATAACCTTAACTATTCCTCACCCTAACCTCCACAAATACTATATATAATACTATAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:8616:4508 2:N:0:CAGATC
+CTAATAAAAACCCTAAAAAATAAATCCTATCCTAATTCATTACCCACCATACCTTACAATACTAAAAATAACTACCCTACCTATTAAACCTCAAAAAAAAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIBFIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:18975:4834 2:N:0:CAGATC
+CTACAAAAATCCCCCTACCTCAACCTCCCACAAATAACTATTACCAATAATTACTTTTAAATCTTTAAATTACAAAATTACCATTTATAAGATCGGAAGAG
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFBFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:4984:5374 2:N:0:CAGATC
+AAAACACTTTTCATAAACGACCCACTCTAAAAACAAACACCTCAACTAATACTTATTATCATTTTCCAAACTAATACTCAAAACCATTAACTAACAAATAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:14432:5660 2:N:0:CAGATC
+TTACAAAAAAAAATTTTTTAATTAAATAAAAATAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCCTTTTTTTTTTTTTTTTC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFB<BBBFFB777<B######################
+@HISEQ:105:C2UE1ACXX:3:1101:17410:5545 2:N:0:CAGATC
+ATATCCAAACCTCTAATAAAATAAAACCTAAATAAAACTAACATCTAAAATAATTAAATATACTCAAATTTCACTAAATAACCCTAACAACTTCAAAAAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIIIIIFFFFFFFFFFBFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12336:6058 2:N:0:CAGATC
+TAAAAAAAATCACTCTTCCAAAAATCACTCAAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAACACACACCCT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB##########
+@HISEQ:105:C2UE1ACXX:3:1101:5999:6265 2:N:0:CAGATC
+ATAAAACATTATATATAAAAACAACCCAATATCCAAAAACCCTATCAACCACAAAAACTAATAAACAAAAAATAAATTCAAAAAAAACACAAATATATAAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:5836:6568 2:N:0:CAGATC
+AAAACTACCTATAAAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBBFBB
+@HISEQ:105:C2UE1ACXX:3:1101:15437:6678 2:N:0:CAGATC
+CACCATAAACATACTCTACATCTCTACCTCCACTATTCAATTTTCAAAAACAAAAACTACCAACATTTAAAACTCAAATATCATAAAAAAATCTAAACATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFIIIIFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12432:6996 2:N:0:CAGATC
+TAAAAAATAAAACTATATAAAATATACAAAATAAAATAAAAAAAAAACAACCTTTAAAAATAACACTTATCTAAAATTACAACTTCAAATATTACTAAAAT
++
+BBBFFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBFFBFFFFFBFFFBBFFFFFFFFFFFFFFFFFBFFFFFFFFFF<
+@HISEQ:105:C2UE1ACXX:3:1101:19878:7022 2:N:0:CATATC
+TACAAACGCAACGTAAACTCAATATACCAAAAAACCCTACGCCCAACCACACCCCTAAATCGAACATAAATCTACACAAACTAATAAACAAAACACAAACT
++
+BBBFFFF'BFFB'<BFIIFFIIIIIFFFIIIIIIIIIIII07BFIIIIFFFFFFBBBBBBB'7B7B<BBBFBBBB00<BBBBBBBBBBB00BB7BB'0BB0
+@HISEQ:105:C2UE1ACXX:3:1101:6837:7574 2:N:0:CAGATC
+AAATCTTTTCATTCAATATAACATTTTAACAAAACTATATTATTACACCCATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:15853:7717 2:N:0:CAGATC
+CTCAAAAAAATAAAAAAAAAATCCTTAACAAAAAACAATATACTAAACACTAACACCCACAACTACGATTAATTCTACCTTCACCCACCTTAAACACTTAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFF<BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:2440:7931 2:N:0:CAGATC
+TATAAATCTTACTAAAACTCCCCACATCCTATACCTCTCATCTCCATACCTTTCTAATATCTATAACACTTTTCAAATAGATCGGAAGAGCGTCGTGTAGG
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFFFIFFFFFFFFFFFFFBBBBB<BBF
+@HISEQ:105:C2UE1ACXX:3:1101:6407:7896 2:N:0:CAGATC
+CTCCTAACTATAACCCTCTAATAACTCCCAATACATATCCTCAACCCAAACCTATAAAAGATCGGAAGAGCGTCGTTAGGGAAAGAGTGTAGATCTCGGTG
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFFIFFIIBFBBBFBFBBFFF<BFFBF<00770<BBFFB007
+@HISEQ:105:C2UE1ACXX:3:1101:19497:7952 2:N:0:CAGATC
+TCCACTTAACCATAAACCTCATAAACCTCTAAATACATTTATTTAAACTTTTAAACATCTACGTATATATCGGAAGAGCGTCATGTATGGAAAAAATGTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'7FFI'7BF0'0B##########################
+@HISEQ:105:C2UE1ACXX:3:1101:3853:8232 2:N:0:CAGATC
+CTACCGACAAACTAAATAAACAACAACTCAAAAACAACTCCTAAAAACCTCTAAATACAACAACAACCTAACCAGATCGGAAGAGCGTCGTGTAGGGAAAG
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFBFFFBBFFFFFFFBFFBBBBF<<7BB#
+@HISEQ:105:C2UE1ACXX:3:1101:7442:8079 2:N:0:CAGATC
+CAAAATATATCAATCAAAAAAACATCCAAAACCGAAACTCCAAACAACAAAAAAATAATAAAATACTACCCATAAAACCTAACCCTCAATATACCCCGACA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFBFFF
+@HISEQ:105:C2UE1ACXX:3:1101:14324:8171 2:N:0:CAGATC
+AATAACCCTAAATTCCTCTTCCTCCCTCACCATAACTAACATAACACGTCCTACCAAAAACAATCCCCAACAAACTATCACTCACACACACACACACACAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFFFFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:3147:8365 2:N:0:CAGATC
+AATCTTTAAAAAAAACTTTCAAAACAAAATATAAAATTTCAAACTAAAAACAATAAAAACTAATAAATAAATATCTAAAAACGAAAACCAAAGAACAAAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIFIIIFIIFFIIIIIIIIIIIIIIIIIIIFFBBBBFFFFFFFFFFFFFFFFFBBFFFFFFFFBFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:7150:8384 2:N:0:CAGATC
+TCTAAAATAAAAATACCACTCACTCTATCCTAAATCCTTAAATACTTTCCTTCCAAATACTAAAAACCCTCCTAAACAATCCACTATAATAATAAACAATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:11512:8610 2:N:0:CAGATC
+AACCTTACGCACGCCCTACTCGCACTTCCACGCATAAACCAAAACCTTATAAATAAACAACATCTCCTACACTTCTACAAAAAAAAACGAAAACCCCGACC
++
+BBBFFFFFFFF<BBFFIFIIIFFFIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFBFFFFBBBFFFFFFFFFFFFFFFFFFFFFF<<BFFF77B<BBB7
+@HISEQ:105:C2UE1ACXX:3:1101:18380:8734 2:N:0:CAGATC
+ATAAAAAAAAAAAAAAAATTTTTTAAATAAAAAAAAATAAACAAAAACAACTAAACAACAACACAAATCCAAACATCCAAACTAATACCTAACTCTATAAA
++
+BBBFFFFFFFFFFIIIIBFFFFFBFFBBFFFFFFFFB0BBB0BBFFF7<BB'7BB0BB0B<770BBB0<<BFB<B'<0BBBB0BB0<BB<B<BBB<B<BBF
+@HISEQ:105:C2UE1ACXX:3:1101:18629:8512 2:N:0:CAGATC
+CCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCAATCCTAAAAACAACTTACTTTCTTTTTTTTTAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFIIFF<FFFB<B<BBF<BBBBFBBBBBFFFFBBBBBBBFBFFFFFFFFFFFFBBF
+@HISEQ:105:C2UE1ACXX:3:1101:5146:8981 2:N:0:CAGATC
+ATCAATACAACTCATACTAAACTCACCCCAAACCTTCTAAAAACAACAAAATCAACAAAAAAATTATAACTACACTAATACAAAACAAACAACAACAACTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:3344:9104 2:N:0:CAGATC
+CCAAAATAAAAAAACAAATAAATACTAAAATTCATATACCTCTTATCTCACTATAAATCACCTAAATTATTATCGTCCGCTTACAACTATACTCTCTCCTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:8326:9445 2:N:0:CAGATC
+CTAAAATAATATCATCCTCCAACATCAAACCTACCAAAAACATAACTTCATTTTCAAAAACAATACCTTCCAAATAAACTTTAATTTCCTAACCAATCACT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:2097:9536 2:N:0:CAGATC
+ATAACAAAAAATAATTTAAAAACAATAACAACAAAAACTCCCACCCCATCCCTATCCCAAAATTCCCAAAAAACCAAACTTAAATAAAATCCCATTAACTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:16351:9944 2:N:0:CAGATC
+TATCAATTCATAATCACTCAAAATAAAAAATAACTAATTTAACAACCAACAAACTTAATCCCAACACAACCAAAACAAACAATAAAAATAAATCCCACAAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:17628:10309 2:N:0:CAGATC
+CCCGAACACACGAAAAAACAAAAAATAATAAAAAATAATCAACAACCAAAAACGCAAATTCAAAACAAATCTAAACATACAAGATCGGAAGAGCGTCGTGT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFBBBBF<
+@HISEQ:105:C2UE1ACXX:3:1101:1440:10515 2:N:0:CAGATC
+ATATTATTAATCATAATATTTATCACAAAAATAAAAACCTACCTAACTCACCAAACACAAAAAATTAAACTAAAATAAAACATAAATCATAACCTAAATAA
++
+BBBFFFFFFFFFFIIIIIFIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:11787:10735 2:N:0:CAGATC
+ACTAACTAACACACTTACTAAATACCAAACACAATACCCCATAAAAAAAATATTCTACTTAAAAATTCAAAAAAATCTAATAACTACTTATATCTAAAAAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:10231:11124 2:N:0:CAGATC
+CTCTCTAACCCAAAAATCCTTATATCCATCTACAAACCTCCTACACCCTCTAATAACACAAAATAATACTATCTCCTAACATCCTACAAAAACATTTCACA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12958:11220 2:N:0:CAGATC
+TTTAATTATAAACTTAACGAAAACCTAAATTTCGAATTACCTAAATCAAACTAACCTATTAAAAAACGAAAAAAAAAAAAAAAACAATTAATAAAATTAAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIFFFFFFFFFFFFFFF####################
+@HISEQ:105:C2UE1ACXX:3:1101:9833:11396 2:N:0:CAGATC
+CCTATATCATTCTTACTATAACACACAAAAATCAAAAAACAACTACCTTAATCCCAAAAATAAAAACAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBBFBFBBBFFBFFBBB####
+@HISEQ:105:C2UE1ACXX:3:1101:13437:11566 2:N:0:CAGATC
+AAACTCCTAAAATAACCTAAACCAACAAACTATTCTCAATAACAAAATCTAAAAAAAAACCAAAAAATAAAAAAATAAAAAAAACAAAAAAACAAAAATCA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12514:13081 2:N:0:CAGATC
+AAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTTAAATTTAAAATCAAAAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF<
+@HISEQ:105:C2UE1ACXX:3:1101:18902:13084 2:N:0:CAGATC
+ACCAAAACTATCAAAATAAAAAAAAACTAAAAATCTCCATTCCAAAACCAACACCTATCTCTACTATAATATTTCACAAAACCCGACTCCTAGATCGGAAG
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:3574:13312 2:N:0:GAGATC
+TACCTCTCTCTCCAATAAAACCAAAAAATTCCAACAAACATTACAATACCCATTCCATTCTAACCAACTCAAATTATAAAACAAAAACCTGATTACCTAAC
++
+BBBFFFFFFFFFFIIIIIIIFFFIIIIIIIIIFIFFFIBFFFIFFIFIIFFFBFFFIIFFFBFFIFFIFFFFFB<BBFFFFFFFFFF<<<7BBBFBBBBF<
+@HISEQ:105:C2UE1ACXX:3:1101:5163:13594 2:N:0:CAGATC
+ATAAAAAAAAAATAAATACTTAAACTAATAAACTAAAAATAACCATTACCATATTAACTATATATTCCTCTAAATAACCCATTTCTAACCCACAAAGATCG
++
+BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFBBBFB
+@HISEQ:105:C2UE1ACXX:3:1101:10809:14594 2:N:0:CAGATC
+ATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCATCAACTTATAAAAAACCTCCTAATCACAACTATTACTTTATTATCTTATTTCTCTTCT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:7778:14963 2:N:0:CAGATC
+AAAATCCTCATAAAACACCCTCAAATAACCCAAAATAAAAAAACAAATAAATACTAAAATTCATATACCTCTTATCTCACTATAAATCACCTAAATTATTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:2575:15554 2:N:0:CAGATC
+TTAAAATAAAAATTTCATACATATTCAATATCTTAATAAAATTATAAAAAAAACCAAAACCCTATAACCTCTCTAAAAAAAAAAAAAACCAACCCAAAAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFB7'77<B'7BBB##
+@HISEQ:105:C2UE1ACXX:3:1101:20234:16099 2:N:0:CATATC
+ACTTTTAACCAAACACTAAATCTCCCACACTACCCAAATATAATACCACATACCTATAATTCCCAAAAATTAAACCATTAACCTCAACTATCATTTTTACC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:20842:16102 2:N:0:CAGATC
+CATCTAATTTAAAATAACCCAAACTACCTCGTTTCCTTAACTAAAATAAATTTTAAAAATTTCCTACAAATTCCATATTCTAAAAATAAACAACACATTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIII<FFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIIIIIIFFFFIIIFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:21194:16434 2:N:0:CAAATC
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
++
+BBBFFFFFFFFFFIIIFFFFFFFFFFFFB77BFFFFFFFFFFFFFFFFFFFFBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:7164:16848 2:N:0:CAGATC
+AAAAAAAATAGTATACAACATAAAAATTTATTCTTCACTAAAACCCCTTAACCAATACCAACGTTTCCACAAAATTCTACCCTCTACAAAAACAACCTATC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIFFFFBFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFBFFBBBB
+@HISEQ:105:C2UE1ACXX:3:1101:2125:17112 2:N:0:CAGATC
+AAACTCTCTACTAAACAACAATCCCAACCCCACAACACCTTAAACATACTCTACTTCTCTGCCTCCACTATTCAATTTTCAAAAACAAAAACTACCAACAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIFFIIIIIIFFFIIIFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:6615:17184 2:N:0:CAGATC
+AAAAATAAATTTATTTCAACTACTATTATTCATATACCTCTATAAAAAAACGTATTTTTATAACTTATAACTTATCCTTATAATTAAACACCTTACTCATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12454:17532 2:N:0:CAGATC
+CAACTCAAACTCATTTTAACTTCTCCTCCACCTAAAAAAAAACAAAACCCCTAACTCTCAACTTAATACAAAAATCACATTTAACCAATAAAACACGAATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFBFFBB
+@HISEQ:105:C2UE1ACXX:3:1101:2584:17964 2:N:0:CAGATC
+AATTCTCTCTCACTCAACTTCTTAACAAACTAACTCCAAAAAAAAACCCTACTTCTAAAACACTAATAACCTAAATAAACACCACAAAAACCCAAAAAGAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF<
+@HISEQ:105:C2UE1ACXX:3:1101:9624:17911 2:N:0:CAGATC
+CACAACAACAATAATTCCTCCTACAAATACCCTCATAAACATTTAAAAAAATAAATATATACTACCCCCTAATAATCATTACAAAATCAATACTAAACTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBB'<BFBBBFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:19795:18124 2:N:0:CAGATC
+CCAAAACTCCATCCCTATACCTCACCCCCACCCCCGCACTATCAACTTCCTTCATACGCCATTTATAAAAAACTAATCTCTTCAATTCAAATTTCTCTTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIII0BFFIIIIIIIIIIFFFFFFFF'7<BBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:2293:18893 2:N:0:CAGATC
+CATCAAACAACCGAATCTTTAAAAAAAACTTTCAAAACAAAATATAAAATTTCAAACTAAAAACAATAAAAACTAATAAATAAATATCTAAAAACGAAAAC
++
+BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFIIFFIFFFFFFFFFFFFBFBFFFFFFB<<<BBFBFFFFFFFFFFFFFF<BFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12741:20220 2:N:0:CAGATC
+AAAAAAAAATAATATACAACATAAAAATTTATTCTTCACTAAAACCCCTTAACCAATACCAACGTTTCCACAAAATTCTACCCTCTACAAAAACAACCTAT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFFFFFFFFFFBFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:18080:20193 2:N:0:CAGATC
+CTCTACCAAACAAACCAATAATTAATTAATCTTACCTACCAACAAAAAAATTCAACCAACTTCTTAAAACCTACAAAAATCTACATACATCATTAATCATT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:18680:20134 2:N:0:CAGATC
+CAAAAAATTTCCAAAACCTACCCCAACCAAAAAAACAAAATAATCTAAATCATTTCAAACTAAATAACCAAAAACCTCTTAAAAAACACCATACTTCCAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:19451:20037 2:N:0:CATATC
+ATTTACAACACGAATTACACACGTTTTTCCGAAACACTTTTCATAAACAACCCACTCTAAAAACAAACACCTCAACTAATACTTATTATCATTTTCCAAAC
++
+BBBFFFFFFFF'BFFFIIIIFI'BFFIIFF'BFFFFBFFIIIFIIIFFIIFFFIIBFBFFFFFFFFFBBBB7<BBBBBBBBBBBFBFFFBFFFFFFBFBBB
+@HISEQ:105:C2UE1ACXX:3:1101:14848:20360 2:N:0:CAGATC
+AACTACCGACATCCTGTATCTATACCTCAATCTATATACTCCTCTATAAAACAAACATAACAAAAACACCCTCGATCCTCCCTAATATCTTTTTTTATATA
++
+BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:20622:20397 2:N:0:CAAATC
+CTCCAAATCCCCAAATTTAAACTAATCAAACACAAAAAAATCCCCAAAATATTCTTAATACATATTAAACTATACAAAACTCAATTTACTAAAATCTTTAA
++
+<BBFFFFFFFFFFIIIIIIFIBFFIFIIBFFFFIIIIFIIFFBFFIIIIIIIIIFFFIIIIFFFFFFFFFBFBFFFFFBB<BBBBBBBFBFBBBFBBBBBB
+@HISEQ:105:C2UE1ACXX:3:1101:12295:20565 2:N:0:CAGATC
+ATATCTAATAAAAAAACTTTCATACGTATACAAATACTCTAAAAATCCAAAACATTCAATCCCCTAAAACTAAAATAACAAATACTAAAAATCAAACCTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:8987:20904 2:N:0:CAGATC
+TACACACACACGCCAAACACACAATCCACCACCACCAAAACTCCATCGATAACACGCAAAACAACTATCACCTCTAAAAAAAAATCCACATAACCGAAAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIFFFIBFFIFFIIIIIIFFFIIFFIIFFFFFBFFFBFBBFFBB<BBBBBBBFFFFFFFFF<BBBFFBFBBB7<BFFF
+@HISEQ:105:C2UE1ACXX:3:1101:11915:21129 2:N:0:CAGATC
+CAAAAAATACTTCTTAACAAACAAATCCTAAAATTACCCACTCCTCATTATAAAACCAAAAATTACTTCACTATTTAACACATACTATTTCTAAAACGAAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12442:21376 2:N:0:CAGATC
+AACCTACCAAAAACAAACCACGCTAAAAAACCATAATAACTAATATAACTTTCACCTTATATTTTTTTATTTACTTACTTACTTTTTTATATTTTATTCCA
++
+BBBFFFFFFFFFFIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:10225:21919 2:N:0:CAGATC
+ATAAAAAAATAAAAAAAAAAAAATAACTATACAAAATAACCCTCTAACCTCCTCATAAATATCATAACATATACACAACTACACACCACACACAAACTTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIFFFB<<<B<BFFB<BBBFBBBBFFFFBBB7BBBBFF<B<<B<BBBB<B0<BBBBBBBBFFB<BBBB7<B<BBB<B<BB
+@HISEQ:105:C2UE1ACXX:3:1101:12240:21986 2:N:0:CAGATC
+TCCTAATTTCAAACTCTCCCTCCTACACAAAAAATAATTCTCCTATAATTCCTCCCTCAATCCCAAACCAAAACTACCCCCTATATTAATAACAATAGATC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:2199:22213 2:N:0:CAGATC
+AATCAAAAAACCAAAAACCTCTAATACCCACCCCTAAACAAACCTAAAAAATAACGCCGATATAAAACAATCCAATCAATAAAAAAATAAAAACAATAATA
++
+BBBFFFFFFFFFFIIIFIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:9023:22492 2:N:0:CAGATC
+TAATAAAAAAATTCTTTCTTAATAACCCTAAATTCCTCTTCCTCCCTCACCATAACTAACATAACACGTCCTACCAAAAACAATCCCCAACAAACTATCAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIFIIFFIFFFFFBFFFIIIIIIFFFFFIFFIFFIFFIFFIIIF7<BB<BBFFFFBFFBBBBBBBFFFBFFFBF<<<<
+@HISEQ:105:C2UE1ACXX:3:1101:13060:22287 2:N:0:CAGATC
+ACTCATCACTAAAACGACCAAAAAATACATAAAATCACACCCGTAACTCCTTTATATTATAACTAAACTAACGCAATTAAAAATTACAACAAAAAATAATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIFFFIFIIIIIIIFIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:16084:22385 2:N:0:CAGATC
+TTAAAATATATATCGTCAACACCAAAAAAACAAAATCAAACAAAACTCTCAAAACTATAAAAACACAAACAATCCCATTTAAAAAATCTTCCATAAAACTA
++
+BBBFFFFFFFFFFIIFIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFFIIIFIFFFFFFFFFFFFFFF<<BBFBBFFFFFBFBBBBFFFFFFFFFBF
+@HISEQ:105:C2UE1ACXX:3:1101:7272:22581 2:N:0:CAGATC
+CTAACTCCACCTATAAAAAACCTAAAAAATCGATCAAAATACTCCTCCTCCTTCTATTCCTAAAAAAAATAAAATTCTTTAAAAAATTCTTAATATCATTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIFIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:10060:23020 2:N:0:CAGATC
+CAAAATAATATAAATCTAAAAATAAAAACACCATCCTTACTTCCTTTTAAAAAAAATACTCAAAACTATAAGACTACCCTTTCCTCTTAAAAACCTAATAC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFFFFBFFBFF
+@HISEQ:105:C2UE1ACXX:3:1101:14440:23104 2:N:0:CAGATC
+ATATTCTATAAAACACCAAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTT
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:6941:23338 2:N:0:CAGATC
+CTTATTCATTCCATCGCCTATAACATAAAAACAAAAATAAACATTATCATAACAACCTATAATCAACACCCAAAACCAAATTCAACACACTATAACTCCTA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:10069:23622 2:N:0:CAGATC
+CTTAAAATTCACTTCACATTAACTTTTAAATATTATTCCAATATATTTAAAATAACACTTTAAAAAAAACCTAAATTAAAAAAACTAACCTTCTACAAATA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:14079:24078 2:N:0:CAGATC
+ATAAAACTAAACTTTCATATATTACTCTTAAATTTTTTTCCTAACTATAAAAAACTTTACAAAATACACCCTATTACTTTCAAACTACCAAAACTACCTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12064:24631 2:N:0:CCGATC
+AAAAAAACCACCATTCTACAATCCCTTAAAAAAACCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:6662:24968 2:N:0:CAGATC
+CTCCCATACTTAAAAAAATTAAAACCCAAAAACAATTCTACCCTCACAAATACCAAAAAAACGAAACACAATACCATAAACCTAAACAATTAAACGACACC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFBFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:11630:24964 2:N:0:TAGATC
+AAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATCCTTAACATCCAAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFBFBBFBFFFFFBFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:12594:24878 2:N:0:CCGATC
+ATATAATATTACCTATTTATTTTTATTATTATTATTATTATTATTTATTTATTTATTCTAAACACCTCCCAAAACATTCCAACTTTCTTCATTAAATATAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIFFFFFFIIIFFIFIIIFIIIIIIIIIIIFFFFIIIFFFFIIIIIIIIIIFFIIIIFIIFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:4483:25030 2:N:0:CAGATC
+CCTAATACTCTATACTAAACAAATAACAAAAAACTTCCTCCTACCTCTTTAAAAAACCCATAAACTTACTCAAATACAAATATAACAAAATAATATAAATC
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB
+@HISEQ:105:C2UE1ACXX:3:1101:12198:25235 2:N:0:CAGATC
+CACAAAAACAACATACCTCTTAATAATCTTTCAAACTTACACCAAATCTATTTCTATCCTCATCTTCCATTTATAAAAACCAAAACTATACAACCCAATCT
++
+BBBFFFFFFFFFFFIFFFFFFBFBFFBBFF<FIBFFIF<BFIIIIIIIIFFFFFBFBFBBFF<B<BFFFBBFIIIIFIIFFFFFFFFFBFBFFBBBBBBB<
+@HISEQ:105:C2UE1ACXX:3:1101:20477:25084 2:N:0:CATATC
+AAAACCCAACTCTTCCACCAAAAAAATTACTTTATCTCTAATACTCTTAAAATACCCTATATTATCCTTATACAAAAACACTTAATTAACTTCCTAATTAA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIFIIIIIIIIIIIIIF0<BFFFFFFFFFFFFBBBFFFFFFFFFBBB<<BB70
+@HISEQ:105:C2UE1ACXX:3:1101:5725:25359 2:N:0:CAGATC
+ATAACTTAACTCTCTAATAACCTTAACTATTCCTCTCCCTAACCTCCACAAATACTATATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCA
++
+BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFF
+@HISEQ:105:C2UE1ACXX:3:1101:5502:25591 2:N:0:CAGATC
+TTCTTAATTCCATCTTTAAACAATAATATTAAAATATTACTTCCCAAAATACAAACCCATATCCATATCCAAAAACTAAACAACCTCTACACCTCAACAAA
++
+<<<BBBBBBBBBBBBBBBBB70B00B000<0BBB###################################################################
--- a/umi-tools_extract.xml	Mon Apr 16 16:38:40 2018 -0400
+++ b/umi-tools_extract.xml	Tue Jun 05 19:44:38 2018 -0400
@@ -8,13 +8,15 @@
     @COMMAND_LINK@
 
     umi_tools extract
+            --extract-method='$extract_method.value'
             --bc-pattern='$bc_pattern'
+
             #if $input_type.type == 'single':
                 #if $gz:
                     --stdin=input_single.gz
                     --stdout out.gz
                 #else
-                    --stdin='$input_type.input_single'
+                    --stdin=input_single.txt
                     --stdout '$out'
                 #end if
             #else:
@@ -24,8 +26,8 @@
                     --stdout out1.gz
                     --read2-out=out2.gz
                 #else:
-                    --stdin='$input_type.input_read1'
-                    --read2-in='$input_type.input_read2'
+                    --stdin=input_read1.txt
+                    --read2-in=input_read2.txt
                     --stdout '$out1'
                     --read2-out='$out2'
                 #end if
@@ -34,6 +36,13 @@
                     --bc-pattern2='$input_type.barcode.bc_pattern2'
                 #end if
             #end if
+
+            #if $barcodes.use_barcodes.value == 'yes':
+                --filter-cell-barcode
+                --whitelist='$barcodes.filter_barcode_file'
+                '$barcodes.filter_correct.value'
+            #end if
+
             #if not $prime3:
                 --3prime
             #end if
@@ -57,12 +66,49 @@
     ]]></command>
     <inputs>
         <expand macro="input_types" />
+
+        <conditional name="barcodes" >
+            <param name="use_barcodes" argument="--filter-cell-barcode" type="select" label="Use Known Barcodes?" >
+                <option value="yes">Yes</option>
+                <option value="no" selected="true" >No</option>
+            </param>
+            <when value="no" />
+            <when value="yes" >
+                <param name="filter_barcode_file" type="data" format="tsv" label="Barcode File" />
+                <param name="filter_correct" argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command." />
+            </when>
+        </conditional>
+
+        <param name="extract_method" type="select" label="Method to extract barcodes" >
+            <option value="regex">Regular Expressions</option>
+            <option value="string" selected="true">String</option>
+        </param>
+            
         <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
             help="Use this option to specify the format of the UMI/barcode. Use Ns to
                     represent the random positions and Xs to indicate the bc positions.
                     Bases with Ns will be extracted and added to the read name. Remaining
                     bases, marked with an X will be reattached to the read.">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="!="/>
+                    <add value="-"/>
+                    <add value="_"/>
+                    <add value="."/>
+                    <add value="?"/>
+                    <add value="&lt;"/><!-- left triangle bracket -->
+                    <add value="&gt;"/><!-- right triangle bracket -->
+                    <add value="&#91;"/> <!-- left square bracket -->
+                    <add value="&#93;"/> <!-- right square bracket -->
+                    <add value="&#94;"/> <!-- caret -->
+                    <add value="&#123;"/> <!-- left curly -->
+                    <add value="&#125;"/> <!-- right curly -->
+                    <add value="&#40;"/> <!-- left parenthesis -->
+                    <add value="&#41;"/> <!-- right parenthesis -->
+                </valid>
+            </sanitizer>
         </param>
+
         <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
             truevalue="1" falsevalue="0" checked="true"
             help="By default the barcode is assumed to be on the 5' end of the read, but
@@ -126,17 +172,37 @@
             <output name="out_log" file="out_paired.log" lines_diff="16"/>
         </test>
         <test>
-            <param name="type" value="paired_collection" />
+            <param name="type" value="paired_collection" /> <!-- same as before, but uncompressed -->
             <param name="input_readpair" >
                 <collection type="paired">
-                    <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" />
-                    <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" />
+                    <element name="forward" ftype="fastq" value="t_R1.fastq" />
+                    <element name="reverse" ftype="fastq" value="t_R2.fastq" />
                 </collection>
             </param>
             <param name="bc_pattern" value="NNNXXX" />
             <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
             <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
-            <output name="out_log" file="out_paired.log" lines_diff="16"/>
+            <output name="out_log" file="out_paired.log" lines_diff="25" />
+        </test>
+        <test>
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="string" />
+            <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
+        </test>
+        <test><!-- same as above but with regex barcode-->
+            <param name="type" value="paired" />
+            <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" />
+            <param name="extract_method" value="regex" />
+            <param name="bc_pattern" value="^(?P&lt;cell_1&gt;.{6})(?P&lt;umi_1&gt;.{10})" />
+            <param name="use_barcodes" value="yes" />
+            <param name="filter_barcode_file" value="scrb_seq_barcodes" />
+            <output name="out2" file="scrb_extract.fastq.gz" decompress="true" />
         </test>
     </tests>
     <help><![CDATA[