# HG changeset patch # User devteam # Date 1447261998 18000 # Node ID de2db1bdfbf8fc07b304a7d4c569b6f70bc0064a # Parent d8cc2c8eef14aa240eed2e8ad77fdc06306aec32 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit a1517c9d22029095120643bbe2c8fa53754dd2b7 diff -r d8cc2c8eef14 -r de2db1bdfbf8 fasta_compute_length.py --- a/fasta_compute_length.py Tue Jul 21 14:18:55 2015 -0400 +++ b/fasta_compute_length.py Wed Nov 11 12:13:18 2015 -0500 @@ -6,4 +6,4 @@ import sys from utils.fasta_to_len import compute_fasta_length -compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], False ) \ No newline at end of file +compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only' ) diff -r d8cc2c8eef14 -r de2db1bdfbf8 fasta_compute_length.xml --- a/fasta_compute_length.xml Tue Jul 21 14:18:55 2015 -0400 +++ b/fasta_compute_length.xml Wed Nov 11 12:13:18 2015 -0500 @@ -1,51 +1,78 @@ - - - fasta_compute_length.py $input $output $keep_first - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + fasta_compute_length.py $input $output $keep_first $keep_first_word + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** -This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry. +This tool counts the length of each fasta sequence in the file. The output file has two columns per line (separated by tab): fasta titles and lengths of the sequences. The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry. ------ +----- **Example** Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run:: - >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa + >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ + TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG + TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG + >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ + AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa Running this tool while setting **How many characters to keep?** to **14** will produce this:: - - EYKX4VC02EQLO5 108 - EYKX4VC02D4GS2 60 + + EYKX4VC02EQLO5 108 + EYKX4VC02D4GS2 60 + +However, if your IDs are not all the same length, you may wish to just keep the fasta ID, and not the description:: + + >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ + TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG + TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG + >EYKX4VC length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ + AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAAfa + +Running this tool with **Strip fasta description from header** set to **True** and **How many characters to keep?** set to **0** will produce:: + + EYKX4VC02EQLO5 108 + EYKX4VC 60 - + + + 10.1093/bioinformatics/btq281 + diff -r d8cc2c8eef14 -r de2db1bdfbf8 test-data/454.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/454.fasta Wed Nov 11 12:13:18 2015 -0500 @@ -0,0 +1,52 @@ +>EYKX4VC01B65GS length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_ +CCGGTATCCGGGTGCCGTGATGAGCGCCACCGGAACGAATTCGACTATGCCGAA +>EYKX4VC01BNCSP length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_ +CTTACCGGTCACCACCGTGCCTTCAGGATTGATCGCCAGATCGGTCGGTGCGTCAGGCGG +GGTGACATCGCCCACCACGGTACTCACTGGCTGGCTCTGGTTCCCGGCGGCATCGGAGGC +CACCACGTTGAGGGTATTCCCCTCGGTTTGTGGCTCGGTGAGAACCACGTTGTAGTCGCC +ATTGGTC +>EYKX4VC01CD9FT length=115 xy=0865_1719 region=1 run=R_2007_11_07_16_15_57_ +GGGGGCTTTGGCCTGTCGTCCGGCACCTCGCAAGAGCTACAGCAGGCGCGGCTGGCGATC +ATCGGCGGCACGCCGGCCTATATGTCGCCGGAACACACCACCCGCACCCAACGCG +>EYKX4VC01B8FW0 length=95 xy=0799_0514 region=1 run=R_2007_11_07_16_15_57_ +TAAATTTCAAGGAATGCAAATCAGGGTCGTGTGTTTAGACTTCGGCTTTAGAGACCTGAA +TACGTCAAAAACATAACTTCATGATATCTTGCAGT +>EYKX4VC01BCGYW length=115 xy=0434_3926 region=1 run=R_2007_11_07_16_15_57_ +GGCCAGCCGGGACAGCGTTGTTGGGCTGCATGGCGACGAGCTAAAAGTCGCCATCACCGC +CCCGCCGGTTGATGGGCAGGCTAATGCCCATCTGGTAAAAACTTTCTCGCCAAAC +>EYKX4VC01AZXC6 length=116 xy=0292_0280 region=1 run=R_2007_11_07_16_15_57_ +GGGGGCGTTTGGCCTGTCGTCCGGCACCTCGCAAGAGCTACAGCAGGCGCGGCTGGCGAT +CATCGGCGGCACGCCGGCCTATATGTCGCCGGAACACACCACCCGCACCCAACGCG +>EYKX4VC01CATH5 length=82 xy=0826_0843 region=1 run=R_2007_11_07_16_15_57_ +CGAAATTGCACATTCTCGGCCATATCTCTGGACCTACATGACCGATTTGATCATCTTCGA +ACTTAGCCTTCCTTTNTTAACG +>EYKX4VC01BCEIV length=47 xy=0434_0757 region=1 run=R_2007_11_07_16_15_57_ +TGACGTCGTGCCGAGCTACGACAATGCCGACATGGTGATCGTTAACA +>EYKX4VC01BWERM length=83 xy=0662_0304 region=1 run=R_2007_11_07_16_15_57_ +CGGTCGGCCTCACCATGGAGAAGATCCCGCCCCGGCCGAGGTCATGGTGGATCTCGGCCA +GGGCGTGCTGATGAAGTTCAAAT +>EYKX4VC01BT2O7 length=69 xy=0635_1945 region=1 run=R_2007_11_07_16_15_57_ +AGCGTTTCTCCAGCCGGTCGGCTACGCCGTTTGCCCCTGAAAGACGCTGTTCAGACCGAA +CGCGGTAAA +>EYKX4VC01BO0UO length=222 xy=0577_3838 region=1 run=R_2007_11_07_16_15_57_ +AGACCTGGGACAGCGGCGGGCTGCTGAAGCCGCAGGCGATAGAGGACAAACTGCAGTACC +GCTTCTGGCTGCACTATGCCGAAGGCTCGCTGATGCCGCTGCTGTTAATGAAGCTGGTGT +TCGCCAGCCTGGGTAAACCCCCTGTGCCCTTTGGCGTCCGCTCGCTGGGCGCCCTGCTGG +GCAAGGGCATTCAGAAAGCGTGGCTGGATCCCCAGCTGGCCA +>EYKX4VC01CBCPK length=83 xy=0832_1158 region=1 run=R_2007_11_07_16_15_57_ +CGGTCGGCCTCACCATGGAGAAGATCCCGCCCCGGCCGAGGTCATGGTGGATCTCGGCCA +GGGCGTGCTGATGAAGTTCAAAT +>EYKX4VC01B474S length=54 xy=0762_2010 region=1 run=R_2007_11_07_16_15_57_ +AGCAGTTTTCCAGCGCTTTCGAAGAGCGCTGGCGCGCGCGGGCTTCCAGCATAT +>EYKX4VC01BB4QL length=57 xy=0431_0363 region=1 run=R_2007_11_07_16_15_57_ +GGGGAGGAGCTAATAATATGCTCTTGGGGAGGAGCTAATTATATGCTCTTGGGGAGG +>EYKX4VC01BJ37M length=64 xy=0522_0192 region=1 run=R_2007_11_07_16_15_57_ +TCGAGTATGTATCAAGGACTACATACAAATTTGCCAAAAGAGATTATGCACTATCCCGAC +TTCC +>EYKX4VC01BV9R8 length=54 xy=0660_2038 region=1 run=R_2007_11_07_16_15_57_ +AAAACTCGGAGAAACTATTCAGCAGCACTGCGTTTCGCTGAATTTTAGACCGTT +>EYKX4VC01CEPP8 length=60 xy=0870_2350 region=1 run=R_2007_11_07_16_15_57_ +CTGGGTGGGTGCACTACAGGAACGTCATTTGTTCAATCCTCACGTTGTTGTTAGTGTCAG +>EYKX4VC01BTLME length=78 xy=0630_0292 region=1 run=R_2007_11_07_16_15_57_ +TTATCCACACGCTGTCCGGATCCAGCGCCAGGCGCCGACGCTGGACTTCCGCCGCCTGCG +CCCAGTTGCCCTGACTTC diff -r d8cc2c8eef14 -r de2db1bdfbf8 test-data/extract_genomic_dna_out1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/extract_genomic_dna_out1.fasta Wed Nov 11 12:13:18 2015 -0500 @@ -0,0 +1,456 @@ +>hg17_chr1_147962192_147962580_- +ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG +GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT +GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT +CCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGG +ACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATT +CGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGC +CTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTG +GGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA +>hg17_chr1_147984545_147984630_+ +ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTT +TGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG +>hg17_chr1_148078400_148078582_- +GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAA +GAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGA +GCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACG +GCTTCCAGTACCTCACCAACGGCATCATGTGA +>hg17_chr1_148185136_148185276_+ +ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGC +CCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATC +CGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA +>hg17_chr10_55251623_55253124_- +TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCT +CAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACT +ATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAA +ACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAA +GTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAAT +TCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAAT +ACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGC +TGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCA +AATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAA +AGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCA +ATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATA +AGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCT +CTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCA +CTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTC +ATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTG +TGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTAC +CTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAAC +ATCTCTCCTTCTGCCTGtccccttccccctcctcctcctatttctcctcc +ttctcctcctcctgctcctgctcctcttgctcctcctcctgacatttctc +ctttttctcttttttgtcctcctccctctcctccttctatccctcttcct +cttcctcctcctACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCC +AACAccacctcttctacctccatttccaactcctcttcctccaccacctc +cttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAG +TGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGA +GAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAA +CAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTA +GCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAA +CTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAG +TAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAAT +ATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTA +A +>hg17_chr11_116124407_116124501_- +ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGG +CAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA +>hg17_chr11_116206508_116206563_+ +ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTC +TGCCC +>hg17_chr11_116211733_116212337_- +CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGC +TGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAA +AAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGT +GAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGG +AGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTC +CAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCC +ACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGC +GCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCG +CGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCA +CGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCG +CGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAG +GTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCA +GTGA +>hg17_chr11_1812377_1812407_+ +ATGCTCCACCTGCATGGCTGGCAAACCATG +>hg17_chr12_38440094_38440321_- +GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATC +TATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATC +ACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAG +GGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCT +GACAATGATGCTTCTGATGTGGAATAA +>hg17_chr13_112381694_112381953_+ +ATGAACTCACCAGAGGCGAGGCTCTGCGTTGCTCAATGCAGAGACTCTTA +CCCAGGGTGTCAGCCTCTGAAAGATACACGTGCCTGGGCCTCTTCCCTGA +AGATGGACCCGGCAGGTCTGGAGGGAGGCCCCCGTGATGAATCCCGTGAT +GAGCCGCCGATCCGAGCTCAGGCTGCGTCATGGGACCAGCCACAAGGTTG +CCTGACCTATAAAGGTCGCAGGAGTGCCTCAGGGACACAGAAGCAGTTAC +AGCTGCCAG +>hg17_chr14_98710240_98712285_- +GTAAAGATGAGCCTTCCAGCTACATTTGCACAACATGCAAGCAGCCCTTC +AACAGCGCGTGGTTCCTGCTGCAGCACGCGCAGAACACGCACGGCTTCCG +CATCTACCTGGAGCCCGGGCCGGCCAGCAGCTCGCTCACGCCGCGGCTCA +CCATCCCGCCGCCGCTCGGGCCGGAGGCCGTGGCGCAGTCCCCGCTCATG +AATTTCCTGGGCGACAGCAACCCCTTCAACCTGCTGCGCATGACGGGCCC +CATCCTGCGGGACCACCCGGGCTTCGGCGAGGGCCGCCTGCCGGGCACGC +CGCCTCTCTTCAGTCCCCCGCCGCGCCACCACCTGGACCCGCACCGCCTC +AGTGCCGAGGAGATGGGGCTCGTCGCCCAGCACCCCAGTGCCTTCGACCG +AGTCATGCGCCTGAACCCCATGGCCATCGACTCGCCCGCCATGGACTTCT +CGCGGCGGCTCCGCGAGCTGGCGGGCAACAGCTCCACGCCGCCGCCCGTG +TCCCCGGGCCGCGGCAACCCTATGCACCGGCTCCTGAACCCCTTCCAGCC +CAGCCCCAAGTCCCCGTTCCTGAGCACGCCGCCGCTGCCGCCCATGCCCC +CTGGCGGCACGCCGCCCCCGCAGCCGCCAGCCAAGAGCAAGTCGTGCGAG +TTCTGCGGCAAGACCTTCAAGTTCCAGAGCAATCTCATCGTGCACCGGCG +CAGTCACACGGGCGAGAAGCCCTACAAGTGCCAGCTGTGCGACCACGCGT +GCTCGCAGGCCAGCAAGCTCAAGCGCCACATGAAGACGCACATGCACAAG +GCCGGCTCGCTGGCCGGCCGCTCCGACGACGGGCTCTCGGCCGCCAGCTC +CCCCGAGCCCGGCACCAGCGAGCTGGCGGGCGAGGGCCTCAAGGCGGCCG +ACGGTGACTTCCGCCACCACGAGAGCGACCCGTCGCTGGGCCACGAGCCg +gaggaggaggacgaggaggaggaggaggaggaggaggagCTGCTACTGGA +GAACGAGAGCCGGCCCGAGTCGAGCTTCAGCATGGACTCGGAGCTGAGCC +GCAACCGCGAGAACGGCGGTGGTGGGGTgcccggggtcccgggcgcgggg +ggcggcgcggccAAGGCGCTGGCTGACGAGAAGGCGCTGGTGCTGGGCAA +GGTCATGGAGAACGTGGGCCTAGGCGCACTGCCGCAGTACGGCGAGCTCC +TGGCCGACAAGCAGAAGCGCGGCGCCTTCCTGAAGCGTGCggcgggcggc +ggggacgcgggcgacgacgacgacgcgggcggctgcggggacgcgggcgc +gggcggcgcggtcaacgggcgcgggggcggCTTCGCGCCAGGCACCGAGC +CCTTCCCCGGGCTCTTCCCGCGCAAGCCCGCGCCGCTGCCCAGCCCCGGG +CTCAACAGCGCCGCCAAGCGCATCAAGGTGGAGAAGGACCTGGAGCTGCC +GCCCGCCGCGCTCATCCCGTCCGAGAACGTGTACTCGCAGTGGCTGGTGG +GCTACGCGGCGTCGCGGCACTTCATGAAGGACCCCTTCCTGGGCTTCACG +GACGCACGACAGTCGCCCTTCGCCACGTCGTCCGAGCACTCGTCCGAGAA +CGGCAGCCTGCGCTTCTCCACGCCGCCCGGGGACCTGCTGGACGGCGGCC +TCTCGGGCCGCAGCGGCACGGCCAGCGGAGGCAGCACCCCGCACCTgggc +ggcccgggccccgggcggcccAGCTCCAAGGAGGGCCGCCGCAGCGACAC +GTGCGAGTACTGCGGCAAGGTGTTCAAGAACTGCAGCAACTTGACGGTGC +ACCGGCGGAGCCACACCGGCGAGCGGCCTTACAAGTGCGAGCTGTGCAAC +TACGCGTGCGCGCAGAGCAGCAAGCTCACGCGCCACATGAAGACGCACGG +GCAGATCGGCAAGGAGGTGTACCGCTGCGACATCTGCCAGATGCCCTTCA +GCGTCTACAGCACCCTGGAGAAACACATGAAAAAGTGGCACGGCGAGCAC +TTGCTGACTAACGACGTCAAAATCGAGCAGGCCGAGAGGAGCTAA +>hg17_chr15_41486872_41487060_- +ATATTGCTTTAGGGGTATTTGATGTGGTGGTGACGGACCCCTCATGCCCA +GCCTCGGTGCTGAAGTGTGCTGAAGCATTGCAGCTGCCTGTGGTGTCACA +AGAGTGGGTGATCCAGTGCCTCATTGTTGGGGAGAGAATTGGATTCAAGC +AGCATCCAAAATATAAACACGATTATGTTTCTCACTAA +>hg17_chr15_41673708_41673857_+ +ATGGCTGGTCCCTTCTCCCGTCTGCTGTCCGCCCGCCCGGGACTCAGGCT +CCTGGCTTTGGCCGGAGCGGGGTCTCTAGCCGCTGGGTTTCTGCTCCGAC +CGGAACCTGTACGAGCTGCCAGTGAACGACGGAGGCTGTATCCCCCGAG +>hg17_chr15_41679161_41679250_- +GTCGAAGTACAGCCTGGGGCCTCCAGGACTGGTCACGACCTTCCTGGTCC +CTGGTATTGACTATCAGCTTCCTTGGCCACCTGCTATGA +>hg17_chr15_41826029_41826196_+ +ATGCGCCTCCGCCGCCTAGCGCTGTTCCCGGGTGTGGCGCTGCTTCTTGC +CGCGGCCCGCCTCGCCGCTGCCTCCGACGTGCTAGAACTCACGGACGACA +ACTTCGAGAGTCGCATCTCCGACACGGGCTCTGCGGGCCTCATGCTCGTC +GAGTTCTTCGCCCCCTG +>hg17_chr16_142908_143003_+ +ATGTCTCTGACCAAGACTGAGAGGACCATCATTGTGTCCATGTGGGCCAA +GATCTCCACGCAGGCCGACACCATCGGCACCGAGACTCTGGAGAG +>hg17_chr16_179963_180135_- +GTCACGCTCCCGGGATCGGCGTCGGAGGCGGTCAAGATCTACCTCCCGAG +AGCGACGGAAATTGTCCCGGTCCCGGTCCCGAGATAGACATCGGCGCCAC +CGCAGCCGTTCCCGGAGCCACAGCCGGGGACATCGTCGGGCTTCCCGGGA +CCGAAGTGCGAAATACAAGTAA +>hg17_chr16_244413_244681_+ +ATGTTGGACCACAAGGACTTAGAGGCCGAAATCCACCCCTTGAAAAATGA +AGAAAGAAAATCGCAGGAAAATCTGGGAAATCCATCAAAAAATGAGGATA +ACGTGAAAAGCGCGCCTCCACAGTCCCGGCTCTCCCGGTGCCGAGCGGCG +GCGTTTTTTCTTTCATTGTTTCTCTGCCTTTTTGTGGTGTTCGTCGTCTC +ATTCGTCATCCCGTGTCCAGACCGGCCGGCGTCACAGCGAATGTGGAGGA +TAGACTACAGTGCCGCTG +>hg17_chr16_259268_259383_- +CGTGTTCCCGTTTACGTGGAGGCCACGGCACTCGAGCCCCAGCCCTGCAC +TCCTTCCCACCCCTGTGGAGCCCACAGCGGCTTGTGGCCCTGGGGGTGGA +GATGGGGTGGCCTAG +>hg17_chr18_23786114_23786321_- +GGCCTTAAAGCGGCTGACAATGACCCCACAGCTCCACCATATGACTCCCT +GTTAGTGTTTGACTATGAAGGCAGTGGCTCCACTGCTGGGTCCTTGAGCT +CCCTTAATTCCTCAAGTAGTGGTGGTGAGCAGGACTATGATTACCTGAAC +GACTGGGGGCCACGGTTCAAGAAACTTGCTGACATGTATGGTGGAGGTGA +TGACTGA +>hg17_chr18_59406881_59407046_+ +ATGGATTCACTTGGCGCCGTCAGCACTCGACTTGGGTTTGATCTTTTCAA +AGAGCTGAAGAAAACAAATGATGGCAACATCTTCTTTTCCCCTGTGGGCA +TCTTGACTGCAATTGGCATGGTCCTCCTGGGGACCCGAGGAGCCACCGCT +TCCCAGTTGGAGGAG +>hg17_chr18_59455932_59456337_- +CTTGAAGAGAAACTCACTGCTGAGAAATTGATGGAATGGACAAGTTTGCA +GAATATGAGAGAGACATGTGTCGATTTACACTTACCTCGGTTCAAAATGG +AAGAGAGCTATGACCTCAAGGACACGTTGAGAACCATGGGAATGGTGAAT +ATCTTCAATGGGGATGCAGACCTCTCAGGCATGACCTGGAGCCACGGTCT +CTCAGTATCTAAAGTCCTACACAAGGCCTTTGTGGAGGTCACTGAGGAGG +GAGTGGAAGCTGCAGCTGCCACCGCTGTAGTAGTAGTCGAATTATCATCT +CCTTCAACTAATGAAGAGTTCTGTTGTAATCACCCTTTCCTATTCTTCAT +AAGGCAAAATAAGACCAACAGCATCCTCTTCTATGGCAGATTCTCATCCC +CATAG +>hg17_chr18_59600586_59600754_+ +ATGGCCTCCCTTGCTGCAGCAAATGCAGAGTTTTGCTTCAACCTGTTCAG +AGAGATGGATGACAATCAAGGAAATGGAAATGTGTTCTTTTCCTCTCTGA +GCCTCTTCGCTGCCCTGGCCCTGGTCCGCTTGGGCGCTCAAGATGACTCC +CTCTCTCAGATTGATAAG +>hg17_chr19_59068595_59069564_+ +ATGCCAGTGACGGTAACCCGCACCACCATCACAACCACCACGACGTCATC +TTCGGGCCTGGGGTCCCCCATGATCGTGGGGTCCCCTCGGGCCCTGACAC +AGCCCCTGGGTCTCCTTCGCCTGCTGCAGCTGGTGTCTACCTGCGTGGCC +TTCTCGCTGGTGGCTAGCGTGGGCGCCTGGACGGGGTCCATGGGCAACTG +GTCCATGTTCACCTGGTGCTTCTGCTTCTCCGTGACCCTGATCATCCTCA +TCGTGGAGCTGTGCGGGCTCCAGGCCCGCTTCCCCCTGTCTTGGCGCAAC +TTCCCCATCACCTTCGCCTGCTATGCGGCCCTCTTCTGCCTCTCGGCCTC +CATCATCTACCCCACCACCTATGTCCAGTTCCTGTCCCACGGCCGTTCGC +GGGACCACGCCATCGCCGCCACCTTCTTCTCCTGCATCGCGTGTGTGGCT +TACGCCACCGAAGTGGCCTGGACCCGGGCCCGGCCCGGCGAGATCACTGG +CTATATGGCCACCGTACCCGGGCTGCTGAAGGTGCTGGAGACCTTCGTTG +CCTGCATCATCTTCGCGTTCATCAGCGACCCCAACCTGTACCAGCACCAG +CCGGCCCTGGAGTGGTGCGTGGCGGTGTACGCCATCTGCTTCATCCTAGC +GGCCATCGCCATCCTGCTGAACCTGGGGGAGTGCACCAACGTGCTACCCA +TCCCCTTCCCCAGCTTCCTGTCGGGGCTGGCCTTGCTGTCTGTCCTCCTC +TATGCCACCGCCCTTGTTCTCTGGCCCCTCTACCAGTTCGATGAGAAGTA +TGGCGGCCAGCCTCGGCGCTCGAGAGATGTAAGCTGCAGCCGCAGCCATG +CCTACTACGTGTGTGCCTGGGACCGCCGACTGGCTGTGGCCATCCTGACG +GCCATCAACCTACTGGCGTATGTGGCTGACCTGGTGCACTCTGCCCACCT +GGTTTTTGTCAAGGTCTAA +>hg17_chr19_59236026_59236146_- +ACGGCAGACCCCCAAGGAGTGACCTATGCTGAGCTAAGCACCAGCGCCCT +GTCTGAGGCAGCTTCAGACACCACCCAGGAGCCCCCAGGATCTCATGAAT +ATGCGGCACTGAAAGTGTAG +>hg17_chr19_59297998_59298008_+ +ATGGCTGCGA +>hg17_chr19_59302168_59302288_- +ATTAAGGTTGAGGAAGACTTTGGCTTTGAAGCAGATGAGGCCCTGGATTC +CAGTTGGGTTTCTCGGGGTCCAGACAAACTGCTGCCCTACCCGACCCTGG +CCAGCCCAGCCTCTGACTGA +>hg17_chr2_118288583_118288668_+ +ATGTCACACCTGCCGATGAAACTCCTGCGTAAGAAGATCGAGAAGCGGAA +CCTCAAATTGCGGCAGCGGAACCTAAAGTTTCAGG +>hg17_chr2_118394148_118394202_- +GAGGGCCGCAAGAACGAGATGCTGCTGTCCAAGGTGAAAGCGAAGGCCTC +CTGA +>hg17_chr2_220190202_220190242_+ +ATGCTCAAAGCGGTGATCCTGATTGGAGGCCCTCAAAAGG +>hg17_chr2_220229609_220230869_- +TGGGAGATCCAGAATACCAGCCATCTGGCCGTTGATGGGGACCAGGCAGC +TGCTTGGCCCGTGGGTATTCCAGCACCATCCCGCCCGGCCTCCCGCTTTG +AGGTGCTGCGCTGGGACTACTTCACGGAGCAGCACGCTTTCTCCTGCGCC +GATGGCTCACCCCGCTGCCCACTGCGTGGGGCTGACCGGGCTGATGTGGC +CGATGTTCTGGGGACAGCTCTAGAGGAGCTGAACCGCCGCTACCACCCGG +CCTTGCGGCTCCAGAAGCAGCAGCTGGTGAATGGCTACCGACGCTTTGAT +CCGGCCCGGGGTATGGAATACACGCTGGACTTGCAGCTGGAGGCACTGAC +CCCCCAGGGAGGCCGCCGGCCCCTCACTCGCCGAGTGCAGCTGCTCCGGC +CGCTGAGCCGCGTGGAGATCTTGCCTGTGCCCTATGTCACTGAGGCCTCA +CGTCTCACTGTGCTGCTGCCTCTAGCTGCGGCTGAGCGTGACCTGGCCCC +TGGCTTCTTGGAGGCCTTTGCCACTGCAGCACTGGAGCCTGGTGATGCTG +CGGCAGCCCTGACCCTGCTGCTACTGTATGAGCCGCGCCAGGCCCAGCGC +GTGGCCCATGCAGATGTCTTCGCACCTGTCAAGGCCCACGTGGCAGAGCT +GGAGCGGCGTTTCCCCGGTGCCCGGGTGCCATGGCTCAGTGTGCAGACAG +CCGCACCCTCACCACTGCGCCTCATGGATCTACTCTCCAAGAAGCACCCG +CTGGACACACTGTTCCTGCTGGCCGGGCCAGACACGGTGCTCACGCCTGA +CTTCCTGAACCGCTGCCGCATGCATGCCATCTCCGGCTGGCAGGCCTTCT +TTCCCATGCATTTCCAAGCCTTCCACCCAGCTGTGGCCCCACCACAAGGG +CCTGGGCCCCCAGAGCTGGGCCGTGACACTGGCCGCTTTGATCGCCAGGC +AGCCAGCGAGGCCTGCTTCTACAACTCCGACTATGTGGCAGCCCGTGGGC +GCCTGGCGGCAGCCTCAGAACAAGAAGAGGAGCTGCTGGAGAGCCTGGAT +GTGTACGAGCTGTTCCTCCACTTCTCCAGTCTGCATGTGCTGCGGGCGGT +GGAGCCGGCGCTGCTGCAGCGCTACCGGGCCCAGACGTGCAGCGCGAGGC +TCAGTGAGGACCTGTACCACCGCTGCCTCCAGAGCGTGCTTGAGGGCCTC +GGCTCCCGAACCCAGCTGGCCATGCTACTCTTTGAACAGGAGCAGGGCAA +CAGCACCTGA +>hg17_chr20_33330413_33330423_- +CCTCACCTGA +>hg17_chr20_33513606_33513792_+ +ATGGAGACAAGAAGCCCTGGGTTGAACAACATGAAGCCCCAGTCACTGCA +GCTGGTACTGGAAGAGCAGGTGCTGGCACTACAGCAGCAGATGGCAGAGA +ATCAGGCAGCCTCCTGGCGGAAGCTGAAGAACTCCCAGGAGGCCCAGCAG +AGACAAGCAACCCTTGTGAGGAAGCTGCAGGCCAAG +>hg17_chr20_33579500_33579527_- +ATTTTGGAAGATGGTCTGGTTCCCTAG +>hg17_chr20_33593260_33593348_+ +ATGGAGGCGCTGGGGAAGCTGAAGCAGTTCGATGCCTACCCCAAGACTTT +GGAGGACTTCCGGGTCAAGACCTGCGGGGGCGCCACCG +>hg17_chr21_32707032_32707192_+ +ATGCTTCTGCCGGGACGCGCACGCCAACCGCCGACGCCCCAGCCCGTGCA +GCATCCCGGCCTCCGCCGGCAGGTAGAGCCGCCGGGGCAGCTCCTGCGCC +TCTTCTACTGCACTGTCCTGGTCTGCTCCAAAGAGATCTCAGCGCTCACC +GACTTCTCTG +>hg17_chr21_32869641_32870022_- +ATGGAGCGCCCTCTCATCTGGCACCTTCCTGGCCTCTTTCCCAGGCCCCA +GTTCTGTCCATGCAGCTGTGGGTGCTTCCTGCATTGCGGGTCTCACGGGG +AGGAGACGAGAGTGCCCCTGGTTGAGTCAGGAAAGAATTCTATCTTCACG +TCGCTGCCAGCAAATGACCACAGCAGCTTCACGACCTCTGCAGGAACCTA +TCTTGGTAAAGAAACGGGGCCTATGTGGTGGCCGAGCCTCAGGTGTGGCC +GAGCTTCAGGTGTGGCCCTTATGCACAGCACAGCCCAAGCCTGTGGGCAC +CACTCGCCCTGGGCTGCCTGGCACCTGGACTCCTTCCCATCCTTGGCCGA +GGTCTGCGTGGCCCTTCAGGGCCGAATCTGA +>hg17_chr21_33321040_33322012_+ +ATGGACTCGGACGCCAGCCTGGTGTCCAGCCGCCCGTCGTCGCCAGAGCC +CGATGACCTTTTTCTGCCGGCCCGGAGTAAGGGCAGCAGCGGCAGCGCCT +TCACTGGGGGCACCGTGTCCTCGTCCACCCCGAGTGACTGCCCGCCGGAG +CTGAGCGCCGAGCTGCGCGGCGCTATGGGCTCTGCGGGCGCGCATCCTGG +GGACAAGCTAGGAGGCAGTGGCTTCAAGTCATCCTCGTCCAGCACCTCGT +CGTCTACGTCGTCGGCGGCTGCGTCGTCCACCAAGAAGGACAAGAAGCAA +ATGACAGAGCCGGAGCTGCAGCAGCTGCGTCTCAAGATCAACAGCCGCGA +GCGCAAGCGCATGCACGACCTCAACATCGCCATGGATGGCCTCCGCGAGG +TCATGCCGTACGCACACGGCCCTTCGGTGCGCAAGCTTTCCAAGATCGCC +ACGCTGCTGCTGGCGCGCAACTACATCCTCATGCTCACCAACTCGCTGGA +GGAGATGAAGCGACTGGTGAGCGAGATCTACGGGGGCCACCACGCTGGCT +TCCACCCGTCGGCCTGCGGCGGCCTGGCGCACTCCGCGCCCCTGCCCGCC +GCCACCGCGCACCCGGCAGCAGCAGCGCACGCCGCACATCACCCCGCGGT +GCACCACCCCATCctgccgcccgccgccgcagcggctgctgccgccgctg +cagccgcggctgTGTCCAGCGCCTCTCTGCCCGGATCCGGGCTGCCGTCG +GTCGGCTCCATCCGTCCACCGCACGGCCTACTCAAGTCTCCGTCTGCTgc +cgcggccgccccgctggggggcgggggcggcggcAGTGGGGCGAGCGGGG +GCTTCCAGCACTGGGGCGGCATGCCCTGCCCCTGCAGCATGTGCCAGGTG +CCGCCGCCGCACCACCACGTGTCGGCTATGGGCGCCGGCAGCCTGCCGCG +CCTCACCTCCGACGCCAAGTGA +>hg17_chr21_33744994_33745040_- +CACTCTGATCTACAAATTTGGAAGAACCGAAGAGCTATGGACCTGA +>hg17_chr22_30120223_30120265_+ +ATGAGCAGCACCTTAGCTAAGATCGCGGAGATAGAAGCAGAG +>hg17_chr22_30160419_30160661_- +TTCTGCATCCTCCAGGCTCTGGTTCCCATGCAGCAGCTGTCAGCGTTCAG +ACAACCCCTCAGAACGTGCCCAGCCGGTCAGGCCTGCCCCACATGCACTC +CCAGCTGGAGCATCGCCCCAGCCAGAGGAGCAGCTCCCCTGTGGGCCTTG +CCAAATGGTTTGGCTCAGATGTGCTACAGCAACCCCTGCCCTCCATGCCC +GCCAAAGTTATCAGTGTAGATGAATTGGAATACCGACAGTGA +>hg17_chr22_30665273_30665360_+ +ATGGGGGACCGGGAGCAGCTGCTGCAGCGGGCGCGGCTGGCCGAGCAGGC +GGAGCGCTACGACGACATGGCCTCCGCTATGAAGGCG +>hg17_chr22_30939054_30939266_- +ATTATCCTGAGAAATCACGTGGATGCCTCAAGAAAGCTTATGACTTGTTC +TGCGGTTTGCAGAAGGGACCCAAGCTAACCAAGGAGGAGGAGGAAGCCTT +GAGCAAGAAGCTCACAGACACGTCTGAGAGGCCCTCGTGGAGGACAATAG +TGAACATCAACGCCATCCTCCTCCTGGCTGTGGTGGTCTTTATTCACGGC +TACTATGCCTGA +>hg17_chr5_131424298_131424460_+ +ATGAGCCGCCTGCCCGTCCTGCTCCTGCTCCAACTCCTGGTCCGCCCCGG +ACTCCAAGCTCCCATGACCCAGACAACGCCCTTGAAGACAAGCTGGGTTA +ACTGCTCTAACATGATCGATGAAATTATAACACACTTAAAGCAGCCACCT +TTGCCTTTGCTG +>hg17_chr5_131556601_131556672_- +TCTCCAATAAGTGGTTCCATGAACGAGGACAGGAGTTCTTGAGACCTTGT +GGATCAACAGAAGTTGACTGA +>hg17_chr5_131621326_131621419_+ +ATGCCCCATTCCGTGACCCTGCGCGGGCCTTCGCCCTGGGGCTTCCGCCT +GGTGGGCGGCCGGGACTTCAGCGCGCCCCTCACCATCTCACGG +>hg17_chr5_131847541_131847666_- +GGGATATTGGGCTGAGTCTACAGCGTGTCTTCACAGATCTGAAGAACATG +GATGCCACCTGGCTGGACAGCCTGCTGACCCCAGTCCGGTTGCCCTCCAT +CCAGGCCATTCCCTGTGCACCGTAG +>hg17_chr6_108299600_108299744_- +TTGGAAGTTCATGAGGCTAAGCCTGTGCCAGAAAATCACCCACAGTGGGA +TACAGCAATAGAGGGGGATGAAGACCAGGAGGACAGTGAGGGCTTTGAAG +ATAGCTTTGAGGAAGAAGAGGAGGAAGAAGAAGATGATGACTAA +>hg17_chr6_108594662_108594687_+ +ATGAGCAAGCCAGCCGGATCAACAA +>hg17_chr6_108640045_108640151_- +GGTCGCTGGTCATCCTCTGGCACAGAACGAACGTTGTCTTCACATGTTTT +TACAAGATGAAATAATAGATAAAAGCTATACTCCATCTAAAATAAGACAT +GCCTGA +>hg17_chr6_108722976_108723115_+ +ATGGCGGCCTCCTGGTCGCTCTTGGTTACCCTGCGCCCCTTAGCACAGAG +CCCGCTGAGAGGGAGATGTGTTGGGTGCGGGGCCTGGGCCGCCGCTCTCG +CTCCTCTGGCCACCGCCCCTGGGAAGCCCTTTTGGAAAG +>hg17_chr7_113660517_113660685_+ +ATGATGCAGGAATCTGCGACAGAGACAATAAGCAACAGTTCAATGAATCA +AAATGGAATGAGCACTCTAAGCAGCCAATTAGATGCTGGCAGCAGAGATG +GAAGATCAAGTGGTGACACCAGCTCTGAAGTAAGCACAGTAGAACTGCTG +CATCTGCAACAACAGCAG +>hg17_chr7_116512159_116512389_- +GCTCCCTGGGTACAGCAGGCCGTGTGTGCAACCTGACTTCCCGGGGCATG +GACAGCTGTGAAGTCATGTGCTGTGGGAGAGGCTACGACACCTCCCATGT +CACCCGGATGACCAAGTGTGGGTGTAAGTTCCACTGGTGCTGCGCCGTGC +GCTGTCAGGACTGCCTGGAAGCTCTGGATGTGCACACATGCAAGGCCCCC +AAGAACGCTGACTGGACAACCGCTACATGA +>hg17_chr7_116714099_116714152_+ +ATGCAGAGGTCGCCTCTGGAAAAGGCCAGCGTTGTCTCCAAACTTTTTTT +CAG +>hg17_chr7_116945541_116945787_- +GAGGTCAGTCCTCTCAGCAGCCATCAAACTACTGAATGCAGCAACAGTAA +ATCAAAGACTGAGTTGGGTGTTTCAAGAGTTAAATCTTTTCTTCCTGTTC +CTAGAAGTAAAGTCACCCAGTGTTCCCAGAACACCAAAAGAAGCAGCAGC +AGCAGTAATACAAGGCAAATAGAAATCAACAACAACTCAAAAGAAGTGAA +TTGGAACTTACACAAAAATGAACACCTAGAAAAACCTAACAAATAG +>hg17_chr8_118881131_118881317_- +ACTTCTCGGGCTTCCCGTTGGGCTGACCCTGACCACTTTGCCCAGCGACA +GAGCTGCATGAATACGTTTGCCAGCTGGTTTGGCTACATGCCGCTGATCC +ACTCTCAGATGAGGCTCGACCCCGTCCTCTTTAAAGACCAGGTCTCTATT +TTGAGGAAGAAATACCGAGACATTGAGCGACTTTGA +>hg17_chr9_128764156_128764189_+ +ATGGCCTGCCTGAGCCCCTCGCAGCTCCAGAAG +>hg17_chr9_128787519_128789136_- +ATGACCCGAGAGTGCCCATCTCCGGCCCCGGGGCCTGGGGCTCCGCTGAG +TGGATCGGTGCTGGCAGAGGCGGCAGTAGTGTTTGCAGTGGTGCTGAGCA +TCCACGCAACCGTATGGGACCGATACTCGTGGTGCGCCGTGGCCCTCGCA +GTGCAGGCCTTCTACGTCCAATACAAGTGGGACCGGCTGCTACAGCAGGG +AAGCGCCGTCTTCCAGTTCCGAATGTCCGCAAACAGTGGCCTATTGCCCG +CCTCCATGGTCATGCCTTTGCTTGGACTAGTCATGAAGGAGCGGTGCCAG +ACTGCTGGGAACCCGTTCTTTGAGCGTTTTGGCATTGTGGTGGCAGCCAC +TGGCATGGCAGTGGCCCTCTTCTCATCAGTGTTGGCGCTCGGCATCACTC +GCCCAGTGCCAACCAACACTTGTGTCATCTTGGGCTTGGCTGGAGGTGTT +ATCATTTATATCATGAAGCACTCGTTGAGCGTGGGGGAGGTGATCGAAGT +CCTGGAAGTCCTTCTGATCTTCGTTTATCTCAACATGATCCTGCTGTACC +TGCTGCCCCGCTGCTTCACCCCTGGTGAGGCACTGCTGGTATTGGGTGGC +ATTAGCTTTGTCCTCAACCAGCTCATCAAGCGCTCTCTGACACTGGTGGA +AAGTCAGGGGGACCCAGTGGACTTCTTCCTGCTGGTGGTGGTAGTAGGGA +TGGTACTCATGGGCATTTTCTTCAGCACTCTGTTTGTCTTCATGGACTCA +GGCACCTGGGCCTCCTCCATCTTCTTCCACCTCATGACCTGTGTGCTGAG +CCTTGGTGTGGTCCTACCCTGGCTGCACCGGCTCATCCGCAGGAATCCCC +TGCTCTGGCTTCTTCAGTTTCTCTTCCAGACAGACACCCGCATCTACCTC +CTAGCCTATTGGTCTCTGCTGGCCACCTTGGCCTGCCTGGTGGTGCTGTA +CCAGAATGCCAAGCGGTCATCTTCCGAGTCCAAGAAGCACCAGGCCCCCA +CCATCGCCCGAAAGTATTTCCACCTCATTGTGGTAGCCACCTACATCCCA +GGTATCATCTTTGACCGGCCACTGCTCTATGTAGCCGCCACTGTATGCCT +GGCGGTCTTCATCTTCCTGGAGTATGTGCGCTACTTCCGCATCAAGCCTT +TGGGTCACACTCTACGGAGCTTCCTGTCCCTTTTTCTGGATGAACGAGAC +AGTGGACCACTCATTCTGACACACATCTACCTGCTCCTGGGCATGTCTCT +TCCCATCTGGCTGATCCCCAGACCCTGCACACAGAAGGGTAGCCTGGGAG +GAGCCAGGGCCCTCGTCCCCTATGCCGGTGTCCTGGCTGTGGGTGTGGGT +GATACTGTGGCCTCCATCTTCGGTAGCACCATGGGGGAGATCCGCTGGCC +TGGAACCAAAAAGACTTTTGAGGGGACCATGACATCTATATTTGCGCAGA +TCATTTCTGTAGCTCTGATCTTAATCTTTGACAGTGGAGTGGACCTAAAC +TACAGTTATGCTTGGATTTTGGGGTCCATCAGCACTGTGTCCCTCCTGGA +AGCATACACTACACAGATAGACAATCTCCTTCTGCCTCTCTACCTCCTGA +TATTGCTGATGGCCTAG +>hg17_chr9_128882427_128882523_+ +ATGGCGTTCCGGAGGGCCGAGGGCACGTCTATGATCCAGGCCCTGGCCAT +GACGGTGGCCGAGATCCCCGTGTTCCTGTACACGACGTTTGGGCAG +>hg17_chr9_128937229_128937445_- +GTCCCTGCCAAGACAGACTGTGTCATGTTCTTCGGGCCCGTGGTCCCCGA +CGGCTACGGTGTCTGCTATAACCCCATGGAGGCCCACATCAACTTCTCCC +TGTCGGCCTACAACAGCTGCGCGGAGACCAACGCCGCCCGCCTGGCGCAT +TACCTGGAGAAGGCGCTCCTGGACATGCGTGCCCTGCTGCAGAGCCACCC +CCGGGCCAAGCTCTGA +>hg17_chrX_122745047_122745924_+ +ATGACTTTTAACAGTTTTGAAGGATCTAAAACTTGTGTACCTGCAGACAT +CAATAAGGAAGAAGAATTTGTAGAAGAGTTTAATAGATTAAAAACTTTTG +CTAATTTTCCAAGTGGTAGTCCTGTTTCAGCATCAACACTGGCACGAGCA +GGGTTTCTTTATACTGGTGAAGGAGATACCGTGCGGTGCTTTAGTTGTCA +TGCAGCTGTAGATAGATGGCAATATGGAGACTCAGCAGTTGGAAGACACA +GGAAAGTATCCCCAAATTGCAGATTTATCAACGGCTTTTATCTTGAAAAT +AGTGCCACGCAGTCTACAAATTCTGGTATCCAGAATGGTCAGTACAAAGT +TGAAAACTATCTGGGAAGCAGAGATCATTTTGCCTTAGACAGGCCATCTG +AGACACATGCAGACTATCTTTTGAGAACTGGGCAGGTTGTAGATATATCA +GACACCATATACCCGAGGAACCCTGCCATGTATAGTGAAGAAGCTAGATT +AAAGTCCTTTCAGAACTGGCCAGACTATGCTCACCTAACCCCAAGAGAGT +TAGCAAGTGCTGGACTCTACTACACAGGTATTGGTGACCAAGTGCAGTGC +TTTTGTTGTGGTGGAAAACTGAAAAATTGGGAACCTTGTGATCGTGCCTG +GTCAGAACACAGGCGACACTTTCCTAATTGCTTCTTTGTTTTGGGCCGGA +ATCTTAATATTCGAAGTGAATCTGATGCTGTGAGTTCTGATAGGAATTTC +CCAAATTCAACAAATCTTCCAAGAAATCCATCCATGGCAGATTATGAAGC +ACGGATCTTTACTTTTGGGACATGGATATACTCAGTTAACAAGGAGCAGC +TTGCAAGAGCTGGATTTTATGCTTTAG +>hg17_chrX_152648964_152649196_- +TGACAACGAGGAGAAGGCCTTTGGCAGCAGCCAGCCATCGCTCAACGGGG +ACATCAAGCCCCTGGGCAGTGACGACAGCCTGGCCGATTATGGGGGCAGC +GTGGATGTTCAGTTCAACGAGGATGGTTCGTTCATTGGCCAGTACAGTGG +CAAGAAGGAGAAGGAGGCGGCAGGGGGCAATGACAGCTCAGGGGCCACTT +CCCCCATCAACCCTGCCGTGGCCCTAGAATAG +>hg17_chrX_152691446_152691471_+ +ATGCTCATGGCGTCCACCACTTCCG +>hg17_chrX_152694029_152694263_- +GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC +CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG +GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA +GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC +CCAGGGCCTAGACACGACCCCCAAGCCACACTGA diff -r d8cc2c8eef14 -r de2db1bdfbf8 test-data/fasta_tool_compute_length_1.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_1.out Wed Nov 11 12:13:18 2015 -0500 @@ -0,0 +1,18 @@ +EYKX4VC01B65GS length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01BNCSP length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_ 187 +EYKX4VC01CD9FT length=115 xy=0865_1719 region=1 run=R_2007_11_07_16_15_57_ 115 +EYKX4VC01B8FW0 length=95 xy=0799_0514 region=1 run=R_2007_11_07_16_15_57_ 95 +EYKX4VC01BCGYW length=115 xy=0434_3926 region=1 run=R_2007_11_07_16_15_57_ 115 +EYKX4VC01AZXC6 length=116 xy=0292_0280 region=1 run=R_2007_11_07_16_15_57_ 116 +EYKX4VC01CATH5 length=82 xy=0826_0843 region=1 run=R_2007_11_07_16_15_57_ 82 +EYKX4VC01BCEIV length=47 xy=0434_0757 region=1 run=R_2007_11_07_16_15_57_ 47 +EYKX4VC01BWERM length=83 xy=0662_0304 region=1 run=R_2007_11_07_16_15_57_ 83 +EYKX4VC01BT2O7 length=69 xy=0635_1945 region=1 run=R_2007_11_07_16_15_57_ 69 +EYKX4VC01BO0UO length=222 xy=0577_3838 region=1 run=R_2007_11_07_16_15_57_ 222 +EYKX4VC01CBCPK length=83 xy=0832_1158 region=1 run=R_2007_11_07_16_15_57_ 83 +EYKX4VC01B474S length=54 xy=0762_2010 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01BB4QL length=57 xy=0431_0363 region=1 run=R_2007_11_07_16_15_57_ 57 +EYKX4VC01BJ37M length=64 xy=0522_0192 region=1 run=R_2007_11_07_16_15_57_ 64 +EYKX4VC01BV9R8 length=54 xy=0660_2038 region=1 run=R_2007_11_07_16_15_57_ 54 +EYKX4VC01CEPP8 length=60 xy=0870_2350 region=1 run=R_2007_11_07_16_15_57_ 60 +EYKX4VC01BTLME length=78 xy=0630_0292 region=1 run=R_2007_11_07_16_15_57_ 78 diff -r d8cc2c8eef14 -r de2db1bdfbf8 test-data/fasta_tool_compute_length_2.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_2.out Wed Nov 11 12:13:18 2015 -0500 @@ -0,0 +1,65 @@ +hg17_chr1_147962192_147962580_- 388 +hg17_chr1_147984545_147984630_+ 85 +hg17_chr1_148078400_148078582_- 182 +hg17_chr1_148185136_148185276_+ 140 +hg17_chr10_55251623_55253124_- 1501 +hg17_chr11_116124407_116124501_- 94 +hg17_chr11_116206508_116206563_+ 55 +hg17_chr11_116211733_116212337_- 604 +hg17_chr11_1812377_1812407_+ 30 +hg17_chr12_38440094_38440321_- 227 +hg17_chr13_112381694_112381953_+ 259 +hg17_chr14_98710240_98712285_- 2045 +hg17_chr15_41486872_41487060_- 188 +hg17_chr15_41673708_41673857_+ 149 +hg17_chr15_41679161_41679250_- 89 +hg17_chr15_41826029_41826196_+ 167 +hg17_chr16_142908_143003_+ 95 +hg17_chr16_179963_180135_- 172 +hg17_chr16_244413_244681_+ 268 +hg17_chr16_259268_259383_- 115 +hg17_chr18_23786114_23786321_- 207 +hg17_chr18_59406881_59407046_+ 165 +hg17_chr18_59455932_59456337_- 405 +hg17_chr18_59600586_59600754_+ 168 +hg17_chr19_59068595_59069564_+ 969 +hg17_chr19_59236026_59236146_- 120 +hg17_chr19_59297998_59298008_+ 10 +hg17_chr19_59302168_59302288_- 120 +hg17_chr2_118288583_118288668_+ 85 +hg17_chr2_118394148_118394202_- 54 +hg17_chr2_220190202_220190242_+ 40 +hg17_chr2_220229609_220230869_- 1260 +hg17_chr20_33330413_33330423_- 10 +hg17_chr20_33513606_33513792_+ 186 +hg17_chr20_33579500_33579527_- 27 +hg17_chr20_33593260_33593348_+ 88 +hg17_chr21_32707032_32707192_+ 160 +hg17_chr21_32869641_32870022_- 381 +hg17_chr21_33321040_33322012_+ 972 +hg17_chr21_33744994_33745040_- 46 +hg17_chr22_30120223_30120265_+ 42 +hg17_chr22_30160419_30160661_- 242 +hg17_chr22_30665273_30665360_+ 87 +hg17_chr22_30939054_30939266_- 212 +hg17_chr5_131424298_131424460_+ 162 +hg17_chr5_131556601_131556672_- 71 +hg17_chr5_131621326_131621419_+ 93 +hg17_chr5_131847541_131847666_- 125 +hg17_chr6_108299600_108299744_- 144 +hg17_chr6_108594662_108594687_+ 25 +hg17_chr6_108640045_108640151_- 106 +hg17_chr6_108722976_108723115_+ 139 +hg17_chr7_113660517_113660685_+ 168 +hg17_chr7_116512159_116512389_- 230 +hg17_chr7_116714099_116714152_+ 53 +hg17_chr7_116945541_116945787_- 246 +hg17_chr8_118881131_118881317_- 186 +hg17_chr9_128764156_128764189_+ 33 +hg17_chr9_128787519_128789136_- 1617 +hg17_chr9_128882427_128882523_+ 96 +hg17_chr9_128937229_128937445_- 216 +hg17_chrX_122745047_122745924_+ 877 +hg17_chrX_152648964_152649196_- 232 +hg17_chrX_152691446_152691471_+ 25 +hg17_chrX_152694029_152694263_- 234 diff -r d8cc2c8eef14 -r de2db1bdfbf8 test-data/fasta_tool_compute_length_3.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_tool_compute_length_3.out Wed Nov 11 12:13:18 2015 -0500 @@ -0,0 +1,18 @@ +EYKX4VC01B65GS 54 +EYKX4VC01BNCSP 187 +EYKX4VC01CD9FT 115 +EYKX4VC01B8FW0 95 +EYKX4VC01BCGYW 115 +EYKX4VC01AZXC6 116 +EYKX4VC01CATH5 82 +EYKX4VC01BCEIV 47 +EYKX4VC01BWERM 83 +EYKX4VC01BT2O7 69 +EYKX4VC01BO0UO 222 +EYKX4VC01CBCPK 83 +EYKX4VC01B474S 54 +EYKX4VC01BB4QL 57 +EYKX4VC01BJ37M 64 +EYKX4VC01BV9R8 54 +EYKX4VC01CEPP8 60 +EYKX4VC01BTLME 78