Mercurial > repos > artbio > blast_to_scaffold
changeset 0:7d96b28eec49 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
author | artbio |
---|---|
date | Sun, 15 Oct 2017 12:52:40 -0400 |
parents | |
children | be61d0d0d622 |
files | blast_to_scaffold.py blast_to_scaffold.xml test-data/assembly.fa test-data/blast.tab test-data/contigs.fa test-data/guideSequence.fa |
diffstat | 6 files changed, 797 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast_to_scaffold.py Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,140 @@ +#!/usr/bin/env python +import argparse + + +def insert_newlines(string, every=60): + lines = [] + for i in range(0, len(string), every): + lines.append(string[i:i+every]) + return '\n'.join(lines) + + +def getseq(fastadict, transcript, up, down, orientation="direct"): + def reverse(seq): + revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} + revseq = [revdict[i] for i in seq[::-1]] + return "".join(revseq) + pickseq = fastadict[transcript][up-1:down] + if orientation == "direct": + return pickseq + else: + return reverse(pickseq) + + +def Parser(): + the_parser = argparse.ArgumentParser( + description="Generate DNA scaffold from blastn or tblastx alignment\ + of Contigs") + the_parser.add_argument('--sequences', action="store", type=str, + help="input sequence file in fasta format") + the_parser.add_argument('--guideSequence', action="store", type=str, + help="the reference sequence to guide the scaffold\ + assembly in fasta format") + the_parser.add_argument('--blast-tab', dest="blast_tab", action="store", + type=str, + help="13-columns tabular blastn or tblastx output") + the_parser.add_argument('--output', action="store", type=str, + help="output file path, fasta format") + the_parser.add_argument('--scaffold_prefix', action="store", type=str, + help="the prefix that will be used for the header\ + of the fasta scaffold") + the_parser.add_argument('--scaffold_suffix', action="store", type=str, + help="the sufix that will be used for the header\ + of the fasta scaffold") + args = the_parser.parse_args() + return args + + +def blatnInfo(file): + blastlist = [] + with open(file, "r") as f: + for line in f: + minilist = [] + fields = line.rstrip().split() + minilist.append(fields[0]) + minilist.extend(fields[6:10]) + blastlist.append(minilist) + blastlist.sort(key=lambda x: x[3], reverse=True) + return blastlist + + +def myContigs(file): + Contigs = {} + with open(file, "r") as f: + for line in f: + if line[0] == ">": + header = line[1:-1] + Contigs[header] = "" + else: + Contigs[header] += line[:-1] + return Contigs + + +def myGuide(file): + Guide = {} + coordinate = 0 + with open(file, "r") as f: + for line in f: + if line[0] == ">": + continue + else: + for nucleotide in line[:-1]: + coordinate += 1 + Guide[coordinate] = nucleotide.lower() + return Guide + + +def updateGuide(blastlist, GuideDict, ContigsDict): + ''' + the blastlist object is a list of list with + element [0] : name of the blasted Contig + element [1] : queryStart of the alignment to the reference + element [2] = queryStop of the alignment to the reference + element [3] : subjectStart of the alignment to the reference + element [4] = subjectStop of the alignment to the reference + ''' + for fields in blastlist: + seqHeader = fields[0] + queryStart = int(fields[1]) + queryStop = int(fields[2]) + subjectStart = int(fields[3]) + subjectStop = int(fields[4]) + if subjectStart > subjectStop: + subjectStart, subjectStop = subjectStop, subjectStart + orientation = "reverse" + else: + orientation = "direct" + sequence = getseq(ContigsDict, seqHeader, queryStart, queryStop, + orientation) + for i in range(subjectStart, subjectStop+1): + try: + del GuideDict[i] + except KeyError: + continue + for i, nucleotide in enumerate(sequence): + GuideDict[i+subjectStart] = nucleotide + + +def finalAssembly(GuideDict, outputfile, prefix, suffix): + finalSeqList = [] + for keys in sorted(GuideDict): + finalSeqList.append(GuideDict[keys]) + finalSequence = insert_newlines("".join(finalSeqList)) + Out = open(outputfile, "w") + Out.write(">Scaffold_from_%s_guided_by_%s\n" % (prefix, suffix)) + Out.write("%s\n" % finalSequence) + Out.close() + + +def __main__(): + args = Parser() + ContigsDict = myContigs(args.sequences) + GuideDict = myGuide(args.guideSequence) + blastlist = blatnInfo(args.blast_tab) + updateGuide(blastlist, GuideDict, ContigsDict) + finalAssembly(GuideDict, args.output, args.scaffold_prefix, + args.scaffold_suffix) + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast_to_scaffold.xml Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,49 @@ +<tool id="blast2scaffold" name="blast_to_scaffold" version="1.0.0"> +<description>Generate DNA scaffold from blastn or tblastx alignment of Contigs</description> +<requirements> +</requirements> +<command interpreter="python"> + blast_to_scaffold.py --sequences "$sequences" + --guideSequence "$guideSequence" + --blast-tab "$blast_tab" + --output "$output" + --scaffold_prefix "$sequences.element_identifier" + --scaffold_suffix "$guideSequence.element_identifier" +</command> +<inputs> +<param name="sequences" type="data" format="fasta" label="Select a fasta contigs file"/> +<param name="guideSequence" type="data" format="fasta" label="Select the fasta guide sequence for scaffolding"/> +<param name="blast_tab" type="data" format="tabular" label="Select a blastn or tblastx output from your history" help="must have 13 columns with column 13 containing the subject lenght, other columns are standard"/> + +</inputs> +<outputs> + <data format="fasta" name="output"/> +</outputs> + + +<tests> + <test> + <param name="sequences" value="contigs.fa" ftype="fasta"/> + <param name="blast_tab" value="blast.tab" ftype="tabular"/> + <param name="guideSequence" value="guideSequence.fa" ftype="tabular"/> + <output name="output" file="assembly.fa" ftype="fasta"/> + </test> +</tests> + + +<help> + + +**What it Does** +This tool starts from DNA contigs that aligned to a subject DNA sequence through blastn or tblastx. +The contigs must be provided in fasta format. The blastn or tblastx output must be tabular, the 12 standard column plus column 13 with the length of the blastn or tblastx subject. +The sequence used to BLAST (blastn or tblastx) the contigs must be provided to serve as a guide to the final assembly +The final assembly is a DNA sequence. +Nucleotides of the guide sequence which were not covered by contigs are in small letters in the output assembly. + + +**Attribution** +This Galaxy tool was created by drosofff@gmail.com on 5/01/2016 +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/assembly.fa Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,207 @@ +>Scaffold_from_contigs.fa_guided_by_guideSequence.fa +gcaaaaaggcccctgggGGGGGGTTAATGAGTACTGGAAAAAGAAGCGCGAGATACCACT +TCGCTGATTATGCTGATTCTGGTATTAAGGATTAGTAGAGTATCGGAAGTGCCGAAGGGA +ATAATGCTGGTCCGTTGTAAACAGGATACTTATGAGACCTCTTAACAGAAACTTTGAGCA +AACAACAAATAAACGAACAAAAAGAGTTATACATTGAACTTGAAAAAAATCAATGTCTTT +TATTTAGACTGATTTATATTGGTCTTTATAATCGTAAGGAATAATTCTCATCTAATGATA +TCGATGTAAAATGTATCATTCTAGCTATTCAATTATATTAATTGAGCTTAGCATCTGATT +AAATTTATTATAAAATAATATTTAATCCCCATCTTTGCAAGATGGAAACTCTCAATAAAT +CAGGAGTATAATAGATTTAAACCCTCACACTGAATCACGAGTATAAACAGATTCAAATAT +GATTAACAATCAAACAAACAAAAAGGGACCACAACTAGAGAGAGTACATTTTGGTAGTGC +GCAGGTTGTGGGAAAAAGTACCAAACGACGACAACGCGGAACCAAATTTGACATTGAATA +TACTGTTAAAAGGAACGATGCACCAAAAGAGCAGAAATTCTTAGTTTCAGAAATTTTTGA +TGAAAAGTTGGATAAACAAATAAAATATGAGAAGAAGCAAAATCATACTTTTATTAAACC +GAAATTAAATTTAGTTACTAGAGAAGAACAACACGTGACTAAGAAGGTTTTAAGAGGTAA +AGAACGAGCTGCAACTCATGCTTTTATGAAGGAAATGGTTGAATCTAACAAGATACAACC +TAGTTGGAATGTCGAATACGAAAAAGAAATAGATGAGGTTGATCTATTTTTTATGAAGAA +GAAAACCAAACCTTTCTCAGGTTTTTCTATTAAGGAATTAAGAGATAGTCTAATTGTGCA +GTCAGACGATAAAAACATGGCACAGCCAACCGTGATGAGTTCAACCAATGAAATTGTTAC +ACCCCGTGAGGAGATAAGCGTTTCTGCTATCTCTGAACAACTGGCATCCTTGATGGAGAG +AGTTGATAAACTCGAGAAGATGAATGCTGCTTTGGAGGAAGAAAACAAGCAGCTAAAGAA +AGAGAGAGAGGCGACTATTAAGTCAGTTAAGAAAGAGGCAAAGAGGACTAAACAAGAGAA +GCCTCAGATTGCGAAGAAAACGCAACACAAGAGTTTAGGAGTAAATCTTAAAATCACCAA +GACCAAAGTAGTTGGTCAGGAACAATGTTTGGAAATTGAAAATACTCAGCATAAGAAATT +TGTTGAGAAGCCAAGCATGCCATCCAAAGTGAGCAAGAAGATGAAGGGACAACAGTTGAA +AAAGACTATTCGTACTTGGTATGAATTTGATCCCTCTAAACTCGTTCAGCATCAAAAAGA +AGTGTTGAACAGTGTTGTTACCAACACAACCTTCGCAGATAAAGTCCGTGAAACTGGTAT +ACCTAAACAAAAGATTAGGTATACTGCAAAACCACCAGCAGAGGAGAAGAGGAGTATCCA +TTTCTATGGTTATAAGCCAAAAGGAATCCCTAACAAAGTTTGGTGGAATTGGGTCACCAC +TGGCACAGCTATGGACGCTTATGAAAAAGCTGACCATTATCTGTATCACCAATTTAAACG +AGAAATGATGGTATACAGAAATAAATGGGTCAAGTTTAGTAAGGAGTTCAATCCGTACCT +ATCGGAACCGAAAATGGTATGGGAAGAGAATACATGGGAATATGAATATAAAACAGACGT +TCCCTACAATTTTATTCTCAAATGGCGCCAGTTAGTGCAGACCTACAAGCCTAACACACC +AATCCAGGCTGATTGGTACAAAATCTCGCAGAAACAACAATGTTAATTGAAGTTTTCATT +AATTCTTTGTTGCAAAATCTAGGTATCATGATGTCTTTCCGTGACCTAGTGGCGAGCCCA +TGGATATTGCTAGTAATAGCTATACCCTTGTGTGCATTTGCCAGTTCAGCGTCTATGGTT +AGGGAGATGCTTTTCCGTCATAAAATTACAGAAAATATTTTAAAAGGAACAGGAGTAGAA +GAATTGTTTAatccattcgggataattattaaataTTTCCTTTATTTTGCAATTTTGTAT +GCTTTTATTAAATATATTAGAAATAACATTAACGTAATAACAGAGAAAGTTAATTTTATA +CGGAGAATAGTATCGAACCCAACTGGAACAACAGGACGTAGAGGTGTGTTAGGGCGATGT +GTAGAACAAATCATAGAATATCCAACATTCTTTATCACGATGGTCTACGAACTACAGCAA +ATTAAGAACAAAAAAGATCTTATCTCGAAAATTACGATGATAAGTAGTATTCTCAAGTTA +CCACTTGGTATTTGGGAAAGTACTGTAGGACGGATGCTAGATCGACCGGCGATAGAAGGA +ACAGAAGAAATGTTGGAAGATGTTCTACCTATGGTAGCAATGGGATTGACGATTACAAAG +ACTCAAATTGGAGATGTTCCAGTTGAAAGTTTTCTTGTGAATTTGGACCGTAATCAAAAG +GCTTGCGAAAATATAATAAAACGTATGCAGCCGTTGATGATTAAAATGGGAATGATGAAA +GATAGTTCATATGATACTATTTTGCAAGTTGCAAAAGAAGTTAATGAATTATCGGAAGCG +GAAACATGGATGAAAACAACGCTCAAATTAAACCCAAATGAGTTCTTACAAACACAAGGG +GCTGTAAGAGTTGGCGAAATCAGGGAAAAAGTTGCAACTTTGAGAAATAAGTTAAACACT +TTGCAAACGAAAGAACTTCGATCAGATAAAGTAGTAACAGAGTGCCAAAAACATCTGGCA +TCACTCGAAGTGCTACTTATTGAAGTTAAGGTGTTAGAGAACAGTAACCAAACCCGAGTA +AAACCCGTAGGAGTTACCATACAAGGCGAAAAACAAATCGGAAAGACAAACCTCGTAGCA +ATCCTCTCAAGAAAAGTGTGCGAGTATGTTCAGGAACATGGCGATATTTCTTTTAGAAAC +GCTACTAAATGGACAACTTGGTCTAGGCAGTGCAGAGATGAATTTGATACAGGATACACC +GGACAAGAAATAACTTACGTAGATGATGCCTTTCAACAAAAAGACAACAAGGATCATTTG +ATGTGGTTTACTTTTATTTCTAATACAGCAGTGGGAACAAACCAAGCTGATTTGAAACAA +AAAGGTTTACCATACAGAAGTAAGTTGGTGTTTACGACCTGTAACAAGTTGCCGGACAAA +AGCGTAACGATTGAGGATATCGAAGCATTACATGCTCGATTCCCTCATACAATTTGCTTG +AGGAGGAACAAGAACAAAATGCCAAAACGAGGAGCGATTGACGAGAGCTATGACTGGGTC +GATTTTTATTACGGACCAATGTCGAAAGCCGTCAGCGCTATTGGAAGCAACACGACCAGC +ACGCTAAAAACCATGTCTTTGAGTGAAATAGTGAAGATAATTGGAGATGATTTGATTATT +CAAAATAATTTCTATAACTCCACTATTAAAGATGTGGGAATCACCGGACAAGAACAAATG +GATGGAGCGCAATTAGAAAGAAGACAACGAATGCGCGAATTGAGGGATCACTTGTTGAGA +ATCCGCCCAGGAGACGAAAACATGCCTTTCCTTGATGAAACATTTGAGTTGAATAGCCGA +CCTATCCAGACAGATGAGAAATTTATACCGCTCAAAGATAACTTAGATGAAGAAGTTATG +TATGGCGGTATTTCAGATCAGTTGTTAACTAGATTTGATAATATCATCGAAAGGAGTTTA +GAAGGCTATGATGTTGAGTCGCGAGAGCTAGGTGTTGAACCTTTAACTACACTAAATCAC +GTGAGAAGTAATATGTTGAGTTATAGAGCGTGGAATCTCATTAATTCCATGTGTATTAAC +AAAACAGAAACTTTCAGTGCTTGGTTAGGAAGGTACATCACCGAATGTGTAGAAGGAGCA +GCAGAAAATCTAGTTACGACGAAAGTCAAAATTAGAGTTAATCCTTTTACAGGTTTGCAG +TTGATCGCGGCCAAACGAATGTTGCAAGAGAATAAATTGATCGATATGGATGAAATTCCA +TCGACATCAGCGAATTCATATGAAACAGTTTACGATCAAATCAAAAATTTCGTGAACGAT +GAACTGAGTCTGATGGAAACAGATATAGTTGATTTAGCTTTAGCAAAAATTAGCTTATCG +CAAATCCGAGGCAACATCAAGAGATCAACCTGGCTGGACGTAAATGATTGGATATTAGCT +TTGAAACATAAAATTTCAGGAAAAAGCTTTGCCAAACATATGGATCTGTATCCTAGCTCG +CTAGATTCTTTTCTTCTTACACTCAAAGATTGGGAGGTTGAAGACCGTATAAAGTTTAAC +TCTATTTACAAGCAAAAGGTATTGTTTGTTCAATCTAGATTTTCACTTTACTGTTGGTCT +CCTTTTATATCTCGTGGAACACGATTTGTGAAAGTGACTTCTCAGTTTAGAGAGTTAGTG +GATAAGTTAGAAACTGGCATTCTATTCCATGAAATAAAATCGGTTACAAATGGAATTAGA +TGGTTAGGAGGAGCAGGGAACAATGGACACGTTGGAGAAAGAGTAAGAGTTATTGCTCAT +ACAGCCCAATTTCCGAAAAAGAGTTACCCTCAGAATGGATTCCCTATAAACGAAGAACTA +CATAGGGAATGGATCCAACTTGTTATGAACTCGGACTATAAGTATCACTCACTTATAGGA +GAAGAAAAAGTAAATATTCTTTGGAACCTTATTAGGCTCCAACCTCAACATGAGGTGGAG +AATTTTAAGGTTTACTTAGAAGATTTGCAGGCGTCCCCGCCGAAAACTGGGACAATTTGT +GCAAAAGTAGTGAATGATATAAAAGCCGAAGTAACTTCAAGTTATAGGCAATTTAACAAT +TACTACACTCGTTTAACAAAAGATGGTATGCACACACTTTTATCAATGCTTTCTAGGATA +GGTGTTCCAATTTCAGATTATTGGAATGACCTTCTTGTCGACAAAGCGCCAGCTATTACA +GCGGTTACTGTTGGAGCAATAACAAGTTtagctattattacaatAGTAAAAACTTTTCAA +TATGCTATAGCTGGAGAAGAGCAAAGTAAAGGTGAAAAACGAGCCAAACAGAAGAACATC +GCAACAACTAAGCTTCAAAAATTGAAGTTTACACTTGGCAAAGAACAAGCTGAAGGGGAT +AGTATCGAACATGTTAAAGAATTTGACGGTGATGTTAAGTTCGAAACAATTGAGAAACTA +TTTGATCACATTGATGAACACCCCAACCTCAACATAGTAGGCTTAAACTTGGTAGCACCA +GAAAACCCAATTGCTATCTATGCCGCAAGAGAAGAATCGTACGACTTTTCATTTTCCGAA +CCGCGCCCGCCTCAATGGAAAAAGGTGGTAACGTTTAAGGAAGACAGTAAACGAATAATC +TCTCTACAGTTGCGAGGAGAAGATACTGAAGATAACATCCTTGATGAGATAGAACACGCT +ATAAAAGTCTCACATGGAATGCCGTATGCTGAATGGATATTCAATGGATGGTTTAAGAAA +GAGAGCAACGACAATATATTATATTGTGTCGAGTTGGACCTGGTGACCGCAAAAACCCAA +TCAGGCCCAGTTGGATGGACACGTGCTCAAACGAAAAACCTGAAAGATCTTGAAATTCAG +CTAAATAAAGGCAAACCGATCGATGTTAAATCAGTGGTTTTAGGTGCACCGCAAGCATCT +ACGCAGGCGACCGATACTATGGACGTGCTAGTGAATAAGCATTTAGTTAAAGTTCATTGT +CTGAGTTATGAAAATTTGAACAACTTAGCTTTGAACGGAACACAAGTGTTTGCTTTAGCA +TCCGACAATATATTGATCGTGCCTGCACATGCAGCCAGACAGAACAAGTGGATCCGATTT +AGTCGCGCAACACAAACTGGTCATTATGGAGTAGCGAAAGTTGATGAACGTAGAATTGAT +TTTACACGTGACATCGCTATAGCCGTTATTTTGACTAGAACCGAAGCAGAACAAAAATTG +TGCGAATTAGATTATTCGATCCAGTTGACGAATATTAGTAAAGAGAAATTCCACTTCCCT +CTTATTACGAAATATTTGTTAACCGCTGACCAGTCAGAAGTAGAATGGATGAATTGTACG +ACCCTACATTATTTTGCAAAGAATAGAACCGTGGGGTTAGGAAGAACAACATCATTCCAA +GTTTCTGAATTTCTATGTGGAAACGAGTACATATCCAAGAAACTGGTCGCATGCGCGCAA +GGACTCCAGTCAAGTGTCGAACTAAGCCGACTTGGTGATTGTGGAAGTCCAATTGTCTTG +GCGTCGGGAAAGAAGGCAGGAAAACTGATAGGTTTTCACGGTTATCACTCTCCAAATCTA +CAAACGTGGTATGGAGCAATGTTGACTGTTGAGGACTTGGGAATCATCAACGGCGTAGAA +GAACATTTTGATGACCCATGGGCCAAACTTATTACACAAGGATTACCTGTCGATTTGCCA +ATTGGACCAGAAGTTGAGTATGTTGGTAATCTAATAAGACCTAGTTTACCTGTGACAAAT +GACTCATTGGACCATTGGCACAAATCACCATTTGCTGATCAATTTGAAGAACAACTAGCA +CCTGGTCGATTGAATCCATATGATTCATATATTGAAGGAGATTTGCCAACTAATCTTGAG +GGCCGAAAAAGTTTAATCTTAGGCCCGAATAGTGAAATGGCAAAAACTCTTCCAGAATTG +GATCAAGGAATTATCGACTGGATTGTAGATCAGTTGGTGGTGGAACAAGTTGCAACTTTC +AAAGCAGAAAACCTTTTAACGAAAGTTAGTGACGATATTGACGAAATGCTTGATTATGCC +CTGAATGGAAATGTAGATAACACATATGTTAGGGGTATGGAAGTCAACAAAGCATCAGGA +TTACCTTGGAGTCTTTCAGGTAGTCCGAAAAAGAGTGACTTTATCGATGTAGATGAAGCT +ACTGGAGTCAGATCGTTTAAGGTTAATGCTAATGGAGACGCTCTTAAAAATAGAGTTATC +CTGAAGTTGCAACAGGCGAAGATGGGAAATAGAATCTTGAGTTTTTCAAGTTCGAAACTA +AAGGACCAACCCATCAAAATAGCGCAAGCGAAGAGTGGAAGGACGAGAGTATTCCATTGT +ATCCCAGTGGACTTAATCTTGTTTTCGGGAGCGCTGTACGGCCCGTACAAAGAAGCATAC +ACAAAGGCTGGACTGAAATGCTATCATGCTGTAGGAATAGATCCGAAATCAGTTGGTTGG +CAACAGTTGGCTACGTATATGACGAAGCATCCTAATTATTTTGATGCTGATTATAAGAAT +TACGATAAGTATTTGCATAGGCAGGTATTTAAAGCAGTTCGAAAAATTCAGCGATCAGTG +ATTCAACAGATGTGCCCAGATAAATGGGATAAAGCAAGAGCTGTTGAAGAATTAGATGCT +ATTGACACGTATGTAGTTGACTATCAAACAGTCTACAAAACCAGTCGTGGTAACAAAAGT +GGTAGTTATACTACTACAATTGATAATTGCCTGGCGAACGATATTTATGGTTTGTATGCG +TGGGTGAAGACAACCGGCCTAAGATCACTGTGGGATTACCGACAGAATGTCTCGAGCGTC +GCGTTTGGTGATGATATTATAAAAAGCGTTAGCGATGAGTATAAGGATAAGTATAATTAT +TGTACTTATCGAGATGTACTAAATGCTACAGGACATATCATGACACCAGGTTCAAAGGAT +GGAGAAGAGAAGCCATTTACTTCTTTTGAAAACCTACAATTCTTGAAAAGAGGATTTAAG +TTAGAAAACGGTATGGTTTTAGCTCCATTACTACAACGATCTATTGAAGGACCGTTTGTA +TGGACTGATATCCGCGAAGATCAGATAACTGTGTGGGTAAATCTCGTGCAAGAACAGTTG +ATCGAAGCCGCTCTTTGGGGTGAGGAGTATTACAATGAGCTTTGTCAAAAGCTAAAATGT +GGTACAAATAGAACCTTGAATGGAGCATTAGCAGTATTGTTGAATACGAGCTGGGAAGTT +ACTTTCCAAAAATTCTGTAATCGTTATTATGGCATTAAAAGAGGAGATCTTTGATCAGAA +CACCACTCTTTTCACCGTTTTAGACGAGAACGAGGTTACTGAGATTAAGTCAATTCAATC +TTCAGTAACAGCAGTAAAGACCCAGCTCGATCAACAAAAACTGCAACTTGACGGTTTAGC +TAAAGTAGTTGACAACAATCAAGCTCGAAATGAAGAACAATTCGTTAATATCAATACAGC +ATTGGTAGAAATGAATTCAGAAGTTGACAAATTAACCACAACAACAAGTCAACAGGCAAA +ACAGATTAACACCCTTGCCACAACATTAAACGAGCTTGATCAAACTACGAAAGACTCCTT +AGACACGTTGAACACAACAACAGAATCTCTTAGTAAACAAGTATTATTTAATACTGATGA +GATTACCGTGTTAAAGGTAGACGTAGCAACCGTCACACAAAAACAACAGGACGTAGAACA +TTCACTTGTGACAATGAAAGATGAGATAGGAGAATTGCACATATCAGTGAATGCCAATGC +TAACTCCATTGAGGCATTACGCACCAGAATTGCCGCGCTAGAGGTTAGAGATGTAGGACC +GTGGGTTTTGAAAAACAGAATTTACAAATTCGTGATCAATATGCCAAATGGAACTACTCG +CTATACTACAATATACTTTTTCGCCGATGTATATTATAGCACTGGAGTAAGAGCAGCACC +TACAAATGCGGGAACAACTACAAGCATATTGACGATTACTTCGTTGACAACATCATATAG +TTTGGCTAACGTCCCCGTTTTAAAAGGTGTACCTTATAGAGTCAATGGTTACTTTGCTAA +CGGAAATAGTATCGAAGATATAACCGGAAGCACGTCAGTGATTTACGACTCTATGTAAAC +CGGACATGACATGTCTATGACACGACATTAAACTGTCAGAACCTGTTTGGTTAACACAGA +GAGATTAACCGCAAGACGAGTATGCAGAATCCAACACAAACCATGCATATATACGACATG +CCCCTACGCGTCATCGCTGGCCTGTCAACCCTTGCCAAAACAACTGAAGAAGACGACAAC +ACCTCAACTGGAATAGTAGTTAGTGAAGTAGGAGAGCCACAAGTGGTCGACCATCCAGCA +TGGATTGATCCCTTTGTTGCTTATCAATTGCGAGCTCCACGTAAAAACATCACACCAGAT +TTTATATTTGGTCGAGCCGATATTGGTAATGCGTTTAGTGCTTTCTTACCGCGCCGCTTT +TCTGCTCCAGCAGTAGGAACCCGACTTGTGATAGACCCTGTTTTCACTTACCAACAGAAA +ACGGTGCTAGGACTATATAATTATTTCCATGCGGATTTTTATTATATAGTGCATGTTCCA +GCACCCTTGGGAACAGGTATCTATCTGAAGATCTATGCTCCTGAATTTGACACTACAACC +GTAACACGAGGAATTCGGTTTAAGCCAAGTGCATCTCCAACAATTGCACTTTCAGTCCCT +TGGAGCAACGATCTATCGACCGTAGAAACATCCGTAGGTCGAGTCGGACAGAGTGGAGGG +AGTATTGTTATCGAAACTATCGAAGATAACAGTAACGAGACGGTCAACACCCCACTCAGC +ATCACCGTTTGGTGTTGCATGGCGAATATTAAAGCCACAGGCTACAAAAATGCGGATACG +TCAGCTTACAACGAAAAAGGCATGAACTTTGTCCCAGTTCCAGTGCCAAAGCCGCCCGTT +CCCCCAACAAAACCAATTATGGGCGAGGAACAAGCTGACAATGAAGTTACAGCCGAAGGT +GGTAAACTTGTACAGGAATTGGTTTACGACCATTCTGCGATTCCTGTAGCGCCAGTCGTC +GAAACACAAGCAGAACAGCCGGAAGTCCCAGTTTCATCAGTGGCAACGCGAAAGAACGAT +ACGGGACATTTGGCAACAAAGTGGTATGATTTCGCCAAAATCAGTCTGTCAAACCCAGCT +AACATGAACTGGACCACGCTAACCATAGACCCGTACAACAATGTTACATTGTCTAGAGAT +GGTGAGTCGATGGTCCTACCATGGAGGCGAAATGTTTGGACAACCGGATCGAAAAGTATT +GGATATATCCGAACGATGGTTGCACAAATTAACATACCACGCCCGCCGCAGATCAGTGGA +GTGCTCGAAGTTAAAGATTCAATCAATAACTCAAGTATTTCACTGGTAGAATTTGGAGGA +AAAGTAGAGATTCCAATTATTCCGAAGGTTATGAACGGACTAGTAACAAGTGCTAGTTTG +CCAAGGCATAGACTAAACCCATGGATGAGAACCGCCGAAAGTAAGGTTGAATTGCAATAT +CGAATTATTGCTTTTAATCGAACTAGCGACATTGCTGACCTTAACGTTAGCGTTTTGTTG +CGACCTGGCGATTCGCAATTCCAATTGCCGATGAAACCTGACAATAGTGTGGATACACGT +CATTTTGAGCTTGTTGAAGCTTTAATGTACCACTACGATAGCCTCCGAATTCGAGGAGAA +GAACAGAGTCTGCCAGAAAATGCACCTAATGCAGTTTCAAACCCTCAGCAGTTCATTACA +CCCGCAACCGCTCTAAGTGCCGAAGAATATAATGTGCACGAGGCGTTGGGTGAAACTGAG +GAGTTGGAGCTGGATGAATTTCCGGTTCTGGTGTTCAAGGGAAATGTTCCCGTTGACTCA +GTGACGTCCATTCCTTTGGACCTCGCAACTATATACGACTTTGCTTGGGACGGAGAGCAG +AATGCAATTTCTCAGAAATTTCAGCGTTTTGCTCATCTGATACCGAAAAGCGCAGGTGGT +TTTGGCCCAGTGATTGGTAATTATACTATCACGGCTAACCTCCCCACCGGTGTAGCAGGT +CGTATTCTGCACAATTGTCTCCCAGGAGATTGTGTAGATCTAGCAGTATCGAGAATTTTT +GGCTTGAAGAGCCTTCTCGGTGTTGCAGGAACAGCAGTTTCAGCCATTGGTGGCCCACTC +CTTAACGGTTTGGTTAACACCGCAGCGCCTATCCTTAGTGGAGCAGCGCACGCCATTGGT +GGAAATGTTGTAGGAGGACTGGCTGATGCAGTGATCGACATTGGATCTAATTTGCTGACG +CCAAAAGAAAAAGAACAACCAAGCGCAAATTCAAGCGCGATTTCTGGAGATATTCCGATT +TCGCGTTTTGTAGAGATGCTCAAGTATGTCAAGGAGAATTACCAGGATAATCCTGTGTTC +CCGACTTTACTTGTTGAACCACAAAACTTCATCTCAAACGCAATGACCGCACTCAAAACA +ATTCCAATTGAGGTCTTTGCCAACATGCGCAATGTGAAAGTTGAACGAAATTTGTTCGAC +CGAACGGTTGTTCCAACTGTGAAGGAAGCTACTCTAGCTGATATTGTCATACCTAATCAT +ATGTATGGCTATATTCTCCGAGACTTCCTCCAAAACAAACGAGCGTTTCAATCTGGGACC +AAACAAAATGTGTACTTTCAGCAGTTTCTAACTGTTCTATCTCAACGTAATACCCGTACA +CATATTACGCTCAACGACATCACAAGTTGCTCTATTGATAGTGAGTCGATTGCAAACAAA +ATAGAGAGAGTAAAACGCTACTTGAGTGCAAACTCGAGTGGGGAGACCACGGAAGAATTT +TCGCGAACAGACACCGGTCTGCTACCTACTACGACCCGCAAAATCGTATTAGGTGAAAGT +AAACGCAGGACGGAACGAAACGTAGCAGAAACTGTTTTCCCTTCCGTGAGGCAGTAGATT +AAATCCAAAACTCGCCATGTGCGCGTCTCAGAAACTAATTTTTAAAATTGGTTTGCGCGT +ATGGTTAAAAAGTCACATTAAATATGAGGATGGTCGCTATAATTTTCTAGCAGAAGATTA +TAAAGGACGGTGTGATGATGTTTTGGAGCCCGCACTGGGTGTGGGTTCGTAGTGCTAGCA +ATTGCAACTTTTCCTGAAAGTTAATACCTATAGTTTATAGGAGAAAAATTAACATTTATT +GAGAATATCTGTGCAAACGACTCCCCACGCGGGGAGTAGTGGAATCTTGATAAACAGGTT +TTTTAAGCTTTTGCGTTTCCGAAAGGGTTACTCTGGCTGAACCAGCAGTTTTTATAACTG +TGGGTAGCCCCTCGGAAAATCAGGTTTGC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blast.tab Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,7 @@ +Contig1 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 100.00 2113 0 0 1 2113 4248 2136 0.0 3811 12333 +Contig2 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 99.89 1745 2 0 1 1745 366 2110 0.0 3139 12333 +Contig3 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 98.78 164 0 1 1 164 377 216 4e-80 284 12333 +Contig4 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 99.92 2476 2 0 1 2476 5145 7620 0.0 4457 12333 +Contig5 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 99.89 4714 1 2 6 4715 12333 7620 0.0 8473 12333 +Contig6 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 99.89 894 1 0 1 894 4235 5128 0.0 1608 12333 +Contig7 gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome 99.10 222 0 1 1 222 237 18 2e-111 389 12333
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/contigs.fa Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,216 @@ +>Contig1 +TTTTGCTAAAGCTAAATCAACTATATCTGTTTCCATCAGACTCAGTTCATCGTTCACGAA +ATTTTTGATTTGATCGTAAACTGTTTCATATGAATTCGCTGATGTCGATGGAATTTCATC +CATATCGATCAATTTATTCTCTTGCAACATTCGTTTGGCCGCGATCAACTGCAAACCTGT +AAAAGGATTAACTCTAATTTTGACTTTCGTCGTAACTAGATTTTCTGCTGCTCCTTCTAC +ACATTCGGTGATGTACCTTCCTAACCAAGCACTGAAAGTTTCTGTTTTGTTAATACACAT +GGAATTAATGAGATTCCACGCTCTATAACTCAACATATTACTTCTCACGTGATTTAGTGT +AGTTAAAGGTTCAACACCTAGCTCTCGCGACTCAACATCATAGCCTTCTAAACTCCTTTC +GATGATATTATCAAATCTAGTTAACAACTGATCTGAAATACCGCCATACATAACTTCTTC +ATCTAAGTTATCTTTGAGCGGTATAAATTTCTCATCTGTCTGGATAGGTCGGCTATTCAA +CTCAAATGTTTCATCAAGGAAAGGCATGTTTTCGTCTCCTGGGCGGATTCTCAACAAGTG +ATCCCTCAATTCGCGCATTCGTTGTCTTCTTTCTAATTGCGCTCCATCCATTTGTTCTTG +TCCGGTGATTCCCACATCTTTAATAGTGGAGTTATAGAAATTATTTTGAATAATCAAATC +ATCTCCAATTATCTTCACTATTTCACTCAAAGACATGGTTTTTAGCGTGCTGGTCGTGTT +GCTTCCAATAGCGCTGACGGCTTTCGACATTGGTCCGTAATAAAAATCGACCCAGTCATA +GCTCTCGTCAATCGCTCCTCGTTTTGGCATTTTGTTCTTGTTCCTCCTCAAGCAAATTGT +ATGAGGGAATCGAGCATGTAATGCTTCGATATCCTCAATCGTTACGCTTTTGTCCGGCAA +CTTGTTACAGGTCGTAAACACCAACTTACTTCTGTATGGTAAACCTTTTTGTTTCAAATC +AGCTTGGTTTGTTCCCACTGCTGTATTAGAAATAAAAGTAAACCACATCAAATGATCCTT +GTTGTCTTTTTGTTGAAAGGCATCATCTACGTAAGTTATTTCTTGTCCGGTGTATCCTGT +ATCAAATTCATCTCTGCACTGCCTAGACCAAGTTGTCCATTTAGTAGCGTTTCTAAAAGA +AATATCGCCATGTTCCTGAACATACTCGCACACTTTTCTTGAGAGGATTGCTACGAGGTT +TGTCTTTCCGATTTGTTTTTCGCCTTGTATGGTAACTCCTACGGGTTTTACTCGGGTTTG +GTTACTGTTCTCTAACACCTTAACTTCAATAAGTAGCACTTCGAGTGATGCCAGATGTTT +TTGGCACTCTGTTACTACTTTATCTGATCGAAGTTCTTTCGTTTGCAAAGTGTTTAACTT +ATTTCTCAAAGTTGCAACTTTTTCCCTGATTTCGCCAACTCTTACAGCCCCTTGTGTTTG +TAAGAACTCATTTGGGTTTAATTTGAGCGTTGTTTTCATCCATGTTTCCGCTTCCGATAA +TTCATTAACTTCTTTTGCAACTTGCAAAATAGTATCATATGAACTATCTTTCATCATTCC +CATTTTAATCATCAACGGCTGCATACGTTTTATTATATTTTCGCAAGCCTTTTGATTACG +GTCCAAATTCACAAGAAAACTTTCAACTGGAACATCTCCAATTTGAGTCTTTGTAATCGT +CAATCCCATTGCTACCATAGGTAGAACATCTTCCAACATTTCTTCTGTTCCTTCTATCGC +CGGTCGATCTAGCATCCGTCCTACAGTACTTTCCCAAATACCAAGTGGTAACTTGAGAAT +ACTACTTATCATCGTAATTTTCGAGATAAGATCTTTTTTGTTCTTAATTTGCTGTAGTTC +GTAGACCATCGTGATAAAGAATGTTGGATATTCTATGATTTGTTCTACACATCGCCCTAA +CACACCTCTACGTCCTGTTGTTCCAGTTGGGTTCGATACTATTCTCCGTATAAAATTAAC +TTTCTCTGTTATTACGTTAATGTTATTTCTAATATATTTAATAAAAGCATACAAAATTGC +AAAATAAAGGAAA +>Contig2 +TATTATAAAATAATATTTAATCCCCATCTTTGCAAGATGGAAACTCTCAATAAATCAGGA +GTATAATAGATTTAAACCCTCACACTGAATCACGAGTATAAACAGATTCAAATATGATTA +ACAATCAAACAAACAAAAAGGGACCACAACTAGAGAGAGTACATTTTGGTAGTGCGCAGG +TTGTGGGAAAAAGTACCAAACGACGACAACGCGGAACCAAATTTGACATTGAATATACTG +TTAAAAGGAACGATGCACCAAAAGAGCAGAAATTCTTAGTTTCAGAAATTTTTGATGAAA +AGTTGGATAAACAAATAAAATATGAGAAGAAGCAAAATCATACTTTTATTAAACCGAAAT +TAAATTTAGTTACTAGAGAAGAACAACACGTGACTAAGAAGGTTTTAAGAGGTAAAGAAC +GAGCTGCAACTCATGCTTTTATGAAGGAAATGGTTGAATCTAACAAGATACAACCTAGTT +GGAATGTCGAATACGAAAAAGAAATAGATGAGGTTGATCTATTTTTTATGAAGAAGAAAA +CCAAACCTTTCTCAGGTTTTTCTATTAAGGAATTAAGAGATAGTCTAATTGTGCAGTCAG +ACGATAAAAACATGGCACAGCCAACCGTGATGAGTTCAACCAATGAAATTGTTACACCCC +GTGAGGAGATAAGCGTTTCTGCTATCTCTGAACAACTGGCATCCTTGATGGAGAGAGTTG +ATAAACTCGAGAAGATGAATGCTGCTTTGGAGGAAGAAAACAAGCAGCTAAAGAAAGAGA +GAGAGGCGACTATTAAGTCAGTTAAGAAAGAGGCAAAGAGGACTAAACAAGAGAAGCCTC +AGATTGCGAAGAAAACGCAACACAAGAGTTTAGGAGTAAATCTTAAAATCACCAAGACCA +AAGTAGTTGGTCAGGAACAATGTTTGGAAATTGAAAATACTCAGCATAAGAAATTTGTTG +AGAAGCCAAGCATGCCATCCAAAGTGAGCAAGAAGATGAAGGGACAACAGTTGAAAAAGA +CTATTCGTACTTGGTATGAATTTGATCCCTCTAAACTCGTTCAGCATCAAAAAGAAGTGT +TGAACAGTGTTGTTACCAACACAACCTTCGCAGATAAAGTCCGTGAAACTGGTATACCTA +AACAAAAGATTAGGTATACTGCAAAACCACCAGCAGAGGAGAAGAGGAGTATCCATTTCT +ATGGTTATAAGCCAAAAGGAATCCCTAACAAAGTTTGGTGGAATTGGGTCACCACTGGCA +CAGCTATGGACGCTTATGAAAAAGCTGACCATTATCTGTATCACCAATTTAAACGAGAAA +TGATGGTATACAGAAATAAATGGGTCAAGTTTAGTAAGGAGTTCAATCCGTACCTATCGG +AACCGAAAATGGTATGGGAAGAGAATACATGGGAATATGAATATAAAACAGACGTTCCCT +ACAATTTTATTCTCAAATGGCGCCAGTTAGTGCAGACCTACAAGCCTAACACACCAATCC +AGGCTGATTGGTACAAAATCTCGCAGAAACAACAATGTTAATTGAAGTTTTCATTAATTC +TTTGTTGCAAAATCTAGGTATCATGATGTCTTTCCGTGACCTAGTGGCGAGCCCATGGAT +ATTGCTAGTAATAGCTATACCCTTGTGTGCATTTGCCAGTTCAGCGTCTATGGTTAGGGA +GATGCTTTTCCGTCATAAAATTACAGAAAATATTTTAAAAGGAACAGGAGTAGAAGAATT +GTTTA +>Contig3 +TATTTTATAATAATAATTTAATCAGATGCTAAGCTCAATTAATATAATTGAATAGCTAGA +ATGATACATTTTACATCGATATCATTAGATGAGAATTATTCCTTACGATTATAAAGACCA +ATATAAATCAGTCTAAATAAAAGACATTGATTTTTTTCAAGTTC +>Contig4 +AGTAAAAACTTTTCAATATGCTATAGCTGGAGAAGAGCAAAGTAAAGGTGAAAAACGAGC +CAAACAGAAGAACATCGCAACAACTAAGCTTCAAAAATTGAAGTTTACACTTGGCAAAGA +ACAAGCTGAAGGGGATAGTATCGAACATGTTAAAGAATTTGACGGTGATGTTAAGTTCGA +AACAATTGAGAAACTATTTGATCACATTGATGAACACCCCAACCTCAACATAGTAGGCTT +AAACTTGGTAGCACCAGAAAACCCAATTGCTATCTATGCCGCAAGAGAAGAATCGTACGA +CTTTTCATTTTCCGAACCGCGCCCGCCTCAATGGAAAAAGGTGGTAACGTTTAAGGAAGA +CAGTAAACGAATAATCTCTCTACAGTTGCGAGGAGAAGATACTGAAGATAACATCCTTGA +TGAGATAGAACACGCTATAAAAGTCTCACATGGAATGCCGTATGCTGAATGGATATTCAA +TGGATGGTTTAAGAAAGAGAGCAACGACAATATATTATATTGTGTCGAGTTGGACCTGGT +GACCGCAAAAACCCAATCAGGCCCAGTTGGATGGACACGTGCTCAAACGAAAAACCTGAA +AGATCTTGAAATTCAGCTAAATAAAGGCAAACCGATCGATGTTAAATCAGTGGTTTTAGG +TGCACCGCAAGCATCTACGCAGGCGACCGATACTATGGACGTGCTAGTGAATAAGCATTT +AGTTAAAGTTCATTGTCTGAGTTATGAAAATTTGAACAACTTAGCTTTGAACGGAACACA +AGTGTTTGCTTTAGCATCCGACAATATATTGATCGTGCCTGCACATGCAGCCAGACAGAA +CAAGTGGATCCGATTTAGTCGCGCAACACAAACTGGTCATTATGGAGTAGCGAAAGTTGA +TGAACGTAGAATTGATTTTACACGTGACATCGCTATAGCCGTTATTTTGACTAGAACCGA +AGCAGAACAAAAATTGTGCGAATTAGATTATTCGATCCAGTTGACGAATATTAGTAAAGA +GAAATTCCACTTCCCTCTTATTACGAAATATTTGTTAACCGCTGACCAGTCAGAAGTAGA +ATGGATGAATTGTACGACCCTACATTATTTTGCAAAGAATAGAACCGTGGGGTTAGGAAG +AACAACATCATTCCAAGTTTCTGAATTTCTATGTGGAAACGAGTACATATCCAAGAAACT +GGTCGCATGCGCGCAAGGACTCCAGTCAAGTGTCGAACTAAGCCGACTTGGTGATTGTGG +AAGTCCAATTGTCTTGGCGTCGGGAAAGAAGGCAGGAAAACTGATAGGTTTTCACGGTTA +TCACTCTCCAAATCTACAAACGTGGTATGGAGCAATGTTGACTGTTGAGGACTTGGGAAT +CATCAACGGCGTAGAAGAACATTTTGATGACCCATGGGCCAAACTTATTACACAAGGATT +ACCTGTCGATTTGCCAATTGGACCAGAAGTTGAGTATGTTGGTAATCTAATAAGACCTAG +TTTACCTGTGACAAATGACTCATTGGACCATTGGCACAAATCACCATTTGCTGATCAATT +TGAAGAACAACTAGCACCTGGTCGATTGAATCCATATGATTCATATATTGAAGGAGATTT +GCCAACTAATCTTGAGGGCCGAAAAAGTTTAATCTTAGGCCCGAATAGTGAAATGGCAAA +AACTCTTCCAGAATTGGATCAAGGAATTATCGACTGGATTGTAGATCAGTTGGTGGTGGA +ACAAGTTGCAACTTTCAAAGCAGAAAACCTTTTAACGAAAGTTAGTGACGATATTGACGA +AATGCTTGATTATGCCCTGAATGGAAATGTAGATAACACATATGTTAGGGGTATGGAAGT +CAACAAAGCATCAGGATTACCTTGGAGTCTTTCAGGTAGTCCGAAAAAGAGTGACTTTAT +CGATGTAGATGAAGCTACTGGAGTCAGATCGTTTAAGGTTAATGCTAATGGAGACGCTCT +TAAAAATAGAGTTATCCTGAAGTTGCAACAGGCGAAGATGGGAAATAGAATCTTGAGTTT +TTCAAGTTCGAAACTAAAGGACCAACCCATCAAAATAGCGCAAGCGAAGAGTGGAAGGAC +GAGAGTATTCCATTGTATCCCAGTGGACTTAATCTTGTTTTCGGGAGCGCTGTACGGCCC +GTACAAAGAAGCATACACAAAGGCTGGACTGAAATGCTATCATGCTGTAGGAATAGATCC +GAAATCAGTTGGTTGGCAACAGTTGGCTACGTATATGACGAAGCATCCTAATTATTTTGA +TGCTGATTATAAGAATTACGATAAGTATTTGCATAGGCAGGTATTTAAAGCAGTTCGAAA +AATTCAGCGATCAGTGATTCAACAGATGTGCCCAGATAAATGGGATAAAGCAAGAGCTGT +TGAAGAATTAGATGCTATTGACACGTATGTAGTTGACTATCAAACAGTCTACAAAACCAG +TCGTGGTAACAAAAGT +>Contig5 +TTTTTGCAAACCTGATTTTCCGAGGGGCTACCCACAGTTATAAAAACTGCTGGTTCAGCC +AGAGTAACCCTTTCGGAAACGCAAAAGCTTAAAAAACCTGTTTATCAAGATTCCACTACT +CCCCGCGTGGGGAGTCGTTTGCACAGATATTCTCAATAAATGTTAATTTTTCTCCTATAA +ACTATAGGTATTAACTTTCAGGAAAAGTTGCAATTGCTAGCACTACGAACCCACACCCAG +TGCGGGCTCCAAAACATCATCACACCGTCCTTTATAATCTTCTGCTAGAAAATTATAGCG +ACCATCCTCATATTTAATGTGACTTTTTAACCATACGCGCAAACCAATTTTAAAAATTAG +TTTCTGAGACGCGCACATGGCGAGTTTTGGATTTAATCTACTGCCTCACGGAAGGGAAAA +CAGTTTCTGCTACGTTTCGTTCCGTCCTGCGTTTACTTTCACCTAATACGATTTTGCGGG +TCGTAGTAGGTAGCAGACCGGTGTCTGTTCGCGAAAATTCTTCCGTGGTCTCCCCACTCG +AGTTTGCACTCAAGTAGCGTTTTACTCTCTCTATTTTGTTTGCAATCGACTCACTATCAA +TAGAGCAACTTGTGATGTCGTTGAGCGTAATATGTGTACGGGTATTACGTTGAGATAGAA +CAGTTAGAAACTGCTGAAAGTACACATTTTGTTTGGTCCCAGATTGAAACGCTCGTTTGT +TTTGGAGGAAGTCTCGGAGAATATAGCCATACATATGATTAGGTATGACAATATCAGCTA +GAGTAGCTTCCTTCACAGTTGGAACAACCGTTCGGTCGAACAAATTTCGTTCAACTTTCA +CATTGCGCATGTTGGCAAAGACCTCAATTGGAATTGTTTTGAGTGCGGTCATTGCGTTTG +AGATGAAGTTTTGTGGTTCAACAAGTAAAGTCGGGAACACAGGATTATCCTGGTAATTCT +CCTTGACATACTTGAGCATCTCTACAAAACGCGAAATCGGAATATCTCCAGAAATCGCGC +TTGAATTTGCGCTTGGTTGTTCTTTTTCTTTTGGCGTCAGCAAATTAGATCCAATGTCGA +TCACTGCATCAGCCAGTCCTCCTACAACATTTCCACCAATGGCGTGCGCTGCTCCACTAA +GGATAGGCGCTGCGGTGTTAACCAAACCGTTAAGGAGTGGGCCACCAATGGCTGAAACTG +CTGTTCCTGCAACACCGAGAAGGCTCTTCAAGCCAAAAATTCTCGATACTGCTAGATCTA +CACAATCTCCTGGGAGACAATTGTGCAGAATACGACCTGCTACACCGGTGGGGAGGTTAG +CCGTGATAGTATAATTACCAATCACTGGGCCAAAACCACCTGCGCTTTTCGGTATCAGAT +GAGCAAAACGCTGAAATTTCTGAGAAATTGCATTCTGCTCTCCGTCCCAAGCAAAGTCGT +ATATAGTTGCGAGGTCCAAAGGAATGGACGTCACTGAGTCAACGGGAACATTTCCCTTGA +ACACCAGAACCGGAAATTCATCCAGCTCCAACTCCTCAGTTTCACCCAACGCCTCGTGCA +CATTATATTCTTCGGCACTTAGAGCGGTTGCGGGTGTAATGAACTGCTGAGGGTTTGAAA +CTGCATTAGGTGCATTTTCTGGCAGACTCTGTTCTTCTCCTCGAATTCGGAGGCTATCGT +AGTGGTACATTAAAGCTTCAACAAGCTCAAAATGACGTGTATCCACACTATTGTCAGGTT +TCATCGGCAATTGGAATTGCGAATCGCCAGGTCGCAACAAAACGCTAACGTTAAGGTCAG +CAATGTCGCTAGTTCGATTAAAAGCAATAATTCGATATTGCAATTCAACCTTACTTTCGG +CGGTTCTCATCCATGGGTTTAGTCTATGCCTTGGCAAACTAGCACTTGTTACTAGTCCGT +TCATAACCTTCGGAATAATTGGAATCTCTACTTTTCCTCCAAATTCTACCAGTGAAATAC +TTGAGTTATTGATTGAATCTTTAACTTCGAGCACTCCACTGATCTGCGGCGGGCGTGGTA +TGTTAATTTGTGCAACCATCGTTCGGATATATCCAATACTTTTCGATCCGGTTGTCCAAA +CATTTCGCCTCCATGGTAGGACCATCGACTCACCATCTCTAGACAATGTAACATTGTTGT +ACGGGTCTATGGTTAGCGTGGTCCAGTTCATGTTAGCTGGGTTTGACAGACTGATTTTGG +CGAAATCATACCACTTTGTTGCCAAATGTCCCGTATCGTTCTTTCGCGTTGCCACTGATG +AAACTGGGACTTCCGGCTGTTCTGCTTGTGTTTCGACGACTGGCGCTACAGGAATCGCAG +AATGGTCGTAAACCAATTCCTGTACAAGTTTACCACCTTCGGCTGTAACTTCATTGTCAG +CTTGTTCCTCGCCCATAATTGGTTTTGTTGGGGGAACGGGCGGCTTTGGCACTGGAACTG +GGACAAAGTTCATGCCTTTTTCGTTGTAAGCTGACGTATCCGCATTTTTGTAGCCTGTGG +CTTTAATATTCGCCATGCAACACCAAACGGTGATGCTGAGTGGGGTGTTGACCGTCTCGT +TACTGTTATCTTCGATAGTTTCGATAACAATACTCCCTCCACTCTGTCCGACTCGACCTA +CGGATGTTTCTACGGTCGATAGATCGTTGCTCCAAGGGACTGAAAGTGCAATTGTTGGAG +ATGCACTTGGCTTAAACCGAATTCCTCGTGTTACGGTTGTAGTGTCAAATTCAGGAGCAT +AGATCTTCAGATAGATACCTGTTCCCAAGGGTGCTGGAACATGCACTATATAATAAAAAT +CCGCATGGAAATAATTATATAGTCCTAGCACCGTTTTCTGTTGGTAAGTGAAAACAGGGT +CTATCACAAGTCGGGTTCCTACTGCTGGAGCAGAAAAGCGGCGCGGTAAGAAAGCACTAA +ACGCATTACCAATATCGGCTCGACCAAATATAAAATCTGGTGTGATGTTTTTACGTGGAG +CTCGCAATTGATAAGCAACAAAGGGATCAATCCATGCTGGATGGTCGACCACTTGTGGCT +CTCCTACTTCACTAACTACTATTCCAGTTGAGGTGTTGTCGTCTTCTTCAGTTGTTTTGG +CAAGGGTTGACAGGCCAGCGATGACGCGTAGGGGCATGTCGTATATATGCATGGTTTGTG +TTGGATTCTGCATACTCGTCTTGCGGTTAATCTCTCTGTGTTAACCAAACAGGTTCTGAC +AGTTTAATGTCGTGTCATAGACATGTCATGTCCGGTTTACATAGAGTCGTAAATCACTGA +CGTGCTTCCGGTTATATCTTCGATACTATTTCCGTTAGCAAAGTAACCATTGACTCTATA +AGGTACACCTTTTAAAACGGGGACGTTAGCCAAACTATATGATGTTGTCAACGAAGTAAT +CGTCAATATGCTTGTAGTTGTTCCCGCATTTGTAGGTGCTGCTCTTACTCCAGTGCTATA +ATATACATCGGCGAAAAAGTATATTGTAGTATAGCGAGTAGTTCCATTTGGCATATTGAT +CACGAATTTGTAAATTCTGTTTTTCAAAACCCACGGTCCTACATCTCTAACCTCTAGCGC +GGCAATTCTGGTGCGTAATGCCTCAATGGAGTTAGCATTGGCATTCACTGATATGTGCAA +TTCTCCTATCTCATCTTTCATTGTCACAAGTGAATGTTCTACGTCCTGTTGTTTTTGTGT +GACGGTTGCTACGTCTACCTTTAACACGGTAATCTCATCAGTATTAAATAATACTTGTTT +ACTAAGAGATTCTGTTGTTGTGTTCAACGTGTCTAAGGAGTCTTTCGTAGTTTGATCAAG +CTCGTTTAATGTTGTGGCAAGGGTGTTAATCTGTTTTGCCTGTTGACTTGTTGTTGTGGT +TAATTTGTCAACTTCTGAATTCATTTCTACCAATGCTGTATTGATATTAACGAATTGTTC +TTCATTTCGAGCTTGATTGTTGTCAACTACTTTAGCTAAACCGTCAAGTTGCAGTTTTTG +TTGATCGAGCTGGGTCTTTACTGCTGTTACTGAAGATTGAATTGACTTAATCTCAGTAAC +CTCGTTCTCGTCTAAAACGGTGAAAAGAGTGGTGTTCTGATCAAAGATCTCCTCTTTTAA +TGCCATAATAACGATTACAGAATTTTTGGAAAGTAACTTCCCAGCTCGTATTCAACAATA +CTGCTAATGCTCCATTCAAGGTTCTATTTGTACCACATTTTAGCTTTTGACAAAGCTCAT +TGTAATACTCCTCACCCCAAAGAGCGGCTTCGATCAACTGTTCTTGCACGAGATTTACCC +ACACAGTTATCTGATCTTCGCGGATATCAGTCCATACAAACGGTCCTTCAATAGATCGTT +GTAGTAATGGAGCTAAAACCATACCGTTTTCTAACTTAAATCCTCTTTTCAAGAATTGTA +GGTTTTCAAAAGAAGTAAATGGCTTCTCTTCTCCATCCTTTGAACCTGGTGTCATGATAT +GTCCTGTAGCATTTAGTACATCTCGATAAGTACAATAATTATACTTATCCTTATACTCAT +CGCTAACGCTTTTTATAATATCATCACCAAACGCGACGCTCGAGACATTCTGTCGGTAAT +CCCACAGTGATCTTAGGCCGGTTGTCTTCACCCACGCATACAAACCATAAATATCGTTCG +CCAGGCAATTATCAATTGTAGTAGTATAACTACCA +>Contig6 +TAGCTTTAGCAAAAATTAGCTTATCGCAAATCCGAGGCAACATCAAGAGATCAACCTGGC +TGGACGTAAATGATTGGATATTAGCTTTGAAACATAAAATTTCAGGAAAAAGCTTTGCCA +AACATATGGATCTGTATCCTAGCTCGCTAGATTCTTTTCTTCTTACACTCAAAGATTGGG +AGGTTGAAGACCGTATAAAGTTTAACTCTATTTACAAGCAAAAGGTATTGTTTGTTCAAT +CTAGATTTTCACTTTACTGTTGGTCTCCTTTTATATCTCGTGGAACACGATTTGTGAAAG +TGACTTCTCAGTTTAGAGAGTTAGTGGATAAGTTAGAAACTGGCATTCTATTCCATGAAA +TAAAATCGGTTACAAATGGAATTAGATGGTTAGGAGGAGCAGGGAACAATGGACACGTTG +GAGAAAGAGTAAGAGTTATTGCTCATACAGCCCAATTTCCGAAAAAGAGTTACCCTCAGA +ATGGATTCCCTATAAACGAAGAACTACATAGGGAATGGATCCAACTTGTTATGAACTCGG +ACTATAAGTATCACTCACTTATAGGAGAAGAAAAAGTAAATATTCTTTGGAACCTTATTA +GGCTCCAACCTCAACATGAGGTGGAGAATTTTAAGGTTTACTTAGAAGATTTGCAGGCGT +CCCCGCCGAAAACTGGGACAATTTGTGCAAAAGTAGTGAATGATATAAAAGCCGAAGTAA +CTTCAAGTTATAGGCAATTTAACAATTACTACACTCGTTTAACAAAAGATGGTATGCACA +CACTTTTATCAATGCTTTCTAGGATAGGTGTTCCAATTTCAGATTATTGGAATGACCTTC +TTGTCGACAAAGCGCCAGCTATTACAGCGGTTACTGTTGGAGCAATAACAAGTT +>Contig7 +AAGACATTGATTTTTTTCAAGTTCAATGTATAACTCTTTTTGTTCGTTTATTTGTTGTTT +GCTCAAAGTTTCTGTTAAGAGGTCTCATAAGTATCCTGTTTACAACGGACCAGCATTATT +CCCTTCGGCACTTCCGATACTCTACTAATCCTTAATACCAGAATCAGCATAATCAGCGAA +GTGGTATCTCGCGCTTCTTTTTCCAGTACTCATTAACCCCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/guideSequence.fa Sun Oct 15 12:52:40 2017 -0400 @@ -0,0 +1,178 @@ +>gi|402295620|gb|JX220408.1|_Nora_virus_isolate_FR1,_complete_genome +GCAAAAAGGCCCCTGGGGGGGGGTTAATGAGTACTGGAAAAAGAAGCGCGAGATACCACTTCGCTGATTA +TGCTGATTCTGGTATTAAGGATTAGTAGAGTATCGGAAGTGCCGAAGGGAATAATGCTGGTCCGTTGTAA +ACAGGATACTTATGAGACCTCTTAACAGAAACTTTGAGCAAACAACAAATAAACGAACAAAAAGAGTTAT +ACATTGAACTTGAAAAATCAATGTCTTTTATTTAGACTGATTTATATTGGTCTTTATAATCGTAAGGAAT +AATTCTCATCTAATGATATCGATGTAAAATGTATCATTCTAGCTATTCAATTATATTAATTGAGCTTAGC +ATCTGATTAAATTATTATTATAAAATAATATTTAATCCCCATCTTTGCAAGATGGAAACTCTCAATAAAT +CAGGAGTATAATAGATTTAAACCCTCACACTGAATCACGAGTATAAACAGATTCAAATATGATTAACAAT +CAAACAAACAAAAAGGGACCACAACTAGAGAGAGTACATTTTGGTAGTGCGCAGGTTGTGGGAAAAAGTA +CCAAACGACGACAACGCGGAACCAAATTTGACATTGAATATACTGTTAAAAGGAACGATGCACCAAAAGA +GCAGAAATTCTTAGTTTCAGAAATTTTTGATGAAAAGTTGGATAAACAAATAAAATATGAGAAGAAGCAA +AATCATACTTTTATTAAACCGAAATTAAATTTAGTTACTAGAGAAGAACAACACGTGACTAAGAAGGTTT +TAAGAGGTAAAGAACGAGCTGCAACTCATGCTTTTATGAAGGAAATGGTTGAATCTAACAAGATACAACC +TAGTTGGAATGTCGAATACGAAAAAGAAATAGATGAGGTTGATCTATTTTTTATGAAGAAGAAAACCAAA +CCTTTCTCAGGTTTTTCTATTAAGGAATTAAGAGATAGTCTAATTGTGCAGTCAGACGATAAAAACATGG +CACAGCCAACCGTGATGAGTTCAACCAATGAAATTGTTACACCCCGTGAGGAGATAAGCGTTTCTGCTAT +CTCTGAACAACTGGCATCCTTGATGGAGAGAGTTGATAAACTCGAGAAGATGAATGCTGCTTTGGAGGAA +GAAAACAAGCAGCTAAAGAAAGAGAGAGAGGCGACTATTAAGTCAGTTAAGAAAGAGGCAAAGAGGACTA +AACAAGAGAAGCCTCAGATTGCGAAGAAAACGCAACACAAGAGTTTAGGAGTAAATCTTAAAATCACCAA +GACCAAAGTAGTTGGTCAGGAACAATGTTTGGAAATTGAAAATACTCAGCATAAGAAATTTGTTGAGAAG +CCAAGCATGCCATCCAAAGTGAGCAAGAAGATGAAGGGACAACAGTTGAAAAAGACTATTCGTACTTGGT +ATGAATTTGATCCCTCTAAACTCGTTCAGCATCAAAAAGAAGTGTTGAACAGTGTTGTTACTAACACAAC +CTTCGCAGATAAAGTCCGTGAAACTGGTATACCTAAACAAAAGATTAGGTATACTGCAAAACCACCAGCA +GAGGAGAAGAGGAGTATCCATTTCTATGGTTATAAGCCAAAAGGAATCCCTAACAAAGTTTGGTGGAATT +GGGTCACCACTGGCACAGCTATGGACGCTTATGAAAAAGCTGACCATTATCTGTATCACCAATTTAAACG +AGAAATGATGGTATACAGAAATAAATGGGTCAAGTTTAGTAAGGAGTTCAATCCGTACCTATCGGAACCG +AAAATGGTATGGGAAGAGAATACATGGGAATATGAATATAAAACAGACGTTCCCTACAATTTTATTCTCA +AATGGCGCCAGTTAGTGCAGACCTACAAGCCTAACACACCAATCCAGGCTGATTGGTACAAAATCTCGCA +GAAACAACAATGTTAATTGAAGTTTTCATTAATTCTTTGTTGCAAAATCTAGGTATCATGATGTCTTTCC +GTGACCTAGTGGCGAGCCCATGGATACTGCTAGTAATAGCTATACCCTTGTGTGCATTTGCCAGTTCAGC +GTCTATGGTTAGGGAGATGCTTTTCCGTCATAAAATTACAGAAAATATTTTAAAAGGAACAGGAGTAGAA +GAATTGTTTAATCCATTCGGGATAATTATTAAATATTTCCTTTATTTTGCAATTTTGTATGCTTTTATTA +AATATATTAGAAATAACATTAACGTAATAACAGAGAAAGTTAATTTTATACGGAGAATAGTATCGAACCC +AACTGGAACAACAGGACGTAGAGGTGTGTTAGGGCGATGTGTAGAACAAATCATAGAATATCCAACATTC +TTTATCACGATGGTCTACGAACTACAGCAAATTAAGAACAAAAAAGATCTTATCTCGAAAATTACGATGA +TAAGTAGTATTCTCAAGTTACCACTTGGTATTTGGGAAAGTACTGTAGGACGGATGCTAGATCGACCGGC +GATAGAAGGAACAGAAGAAATGTTGGAAGATGTTCTACCTATGGTAGCAATGGGATTGACGATTACAAAG +ACTCAAATTGGAGATGTTCCAGTTGAAAGTTTTCTTGTGAATTTGGACCGTAATCAAAAGGCTTGCGAAA +ATATAATAAAACGTATGCAGCCGTTGATGATTAAAATGGGAATGATGAAAGATAGTTCATATGATACTAT +TTTGCAAGTTGCAAAAGAAGTTAATGAATTATCGGAAGCGGAAACATGGATGAAAACAACGCTCAAATTA +AACCCAAATGAGTTCTTACAAACACAAGGGGCTGTAAGAGTTGGCGAAATCAGGGAAAAAGTTGCAACTT +TGAGAAATAAGTTAAACACTTTGCAAACGAAAGAACTTCGATCAGATAAAGTAGTAACAGAGTGCCAAAA +ACATCTGGCATCACTCGAAGTGCTACTTATTGAAGTTAAGGTGTTAGAGAACAGTAACCAAACCCGAGTA +AAACCCGTAGGAGTTACCATACAAGGCGAAAAACAAATCGGAAAGACAAACCTCGTAGCAATCCTCTCAA +GAAAAGTGTGCGAGTATGTTCAGGAACATGGCGATATTTCTTTTAGAAACGCTACTAAATGGACAACTTG +GTCTAGGCAGTGCAGAGATGAATTTGATACAGGATACACCGGACAAGAAATAACTTACGTAGATGATGCC +TTTCAACAAAAAGACAACAAGGATCATTTGATGTGGTTTACTTTTATTTCTAATACAGCAGTGGGAACAA +ACCAAGCTGATTTGAAACAAAAAGGTTTACCATACAGAAGTAAGTTGGTGTTTACGACCTGTAACAAGTT +GCCGGACAAAAGCGTAACGATTGAGGATATCGAAGCATTACATGCTCGATTCCCTCATACAATTTGCTTG +AGGAGGAACAAGAACAAAATGCCAAAACGAGGAGCGATTGACGAGAGCTATGACTGGGTCGATTTTTATT +ACGGACCAATGTCGAAAGCCGTCAGCGCTATTGGAAGCAACACGACCAGCACGCTAAAAACCATGTCTTT +GAGTGAAATAGTGAAGATAATTGGAGATGATTTGATTATTCAAAATAATTTCTATAACTCCACTATTAAA +GATGTGGGAATCACCGGACAAGAACAAATGGATGGAGCGCAATTAGAAAGAAGACAACGAATGCGCGAAT +TGAGGGATCACTTGTTGAGAATCCGCCCAGGAGACGAAAACATGCCTTTCCTTGATGAAACATTTGAGTT +GAATAGCCGACCTATCCAGACAGATGAGAAATTTATACCGCTCAAAGATAACTTAGATGAAGAAGTTATG +TATGGCGGTATTTCAGATCAGTTGTTAACTAGATTTGATAATATCATCGAAAGGAGTTTAGAAGGCTATG +ATGTTGAGTCGCGAGAGCTAGGTGTTGAACCTTTAACTACACTAAATCACGTGAGAAGTAATATGTTGAG +TTATAGAGCGTGGAATCTCATTAATTCCATGTGTATTAACAAAACAGAAACTTTCAGTGCTTGGTTAGGA +AGGTACATCACCGAATGTGTAGAAGGAGCAGCAGAAAATCTAGTTACGACGAAAGTCAAAATTAGAGTTA +ATCCTTTTACAGGTTTGCAGTTGATCGCGGCCAAACGAATGTTGCAAGAGAATAAATTGATCGATATGGA +TGAAATTCCATCGACATCAGCGAATTCATATGAAACAGTTTACGATCAAATCAAAAATTTCGTGAACGAT +GAACTGAGTCTGATGGAAACAGATATAGTTGATTTAGCTTTAGCAAAAATTAGCTTATCGCAAATCCGAG +GCAACATCAAAAGATCAACCTGGCTGGACGTAAATGATTGGATATTAGCTTTGAAACATAAAATTTCAGG +AAAAAGCTTTGCCAAACATATGGATCTGTATCCTAGCTCGCTAGATTCTTTTCTTCTTACACTCAAAGAT +TGGGAGGTTGAAGACCGTATAAAGTTTAACTCTATTTACAAGCAAAAGGTATTGTTTGTTCAATCTAGAT +TTTCACTTTACTGTTGGTCTCCTTTTATATCTCGTGGAACACGATTTGTGAAAGTGACTTCTCAGTTTAG +AGAGTTAGTGGATAAGTTAGAAACTGGCATTCTATTCCATGAAATAAAATCGGTTACAAATGGAATTAGA +TGGTTAGGAGGAGCAGGGAACAATGGACACGTTGGAGAAAGAGTAAGAGTTATTGCTCATACAGCCCAAT +TTCCGAAAAAGAGTTACCCTCAGAATGGATTCCCTATAAACGAAGAACTACATAGGGAATGGATCCAACT +TGTTATGAACTCGGACTATAAGTATCACTCACTTATAGGAGAAGAAAAAGTAAATATTCTTTGGAACCTT +ATTAGGCTCCAACCTCAACATGAGGTGGAGAATTTTAAGGTTTACTTAGAAGATTTGCAGGCGTCCCCGC +CGAAAACTGGGACAATTTGTGCAAAAGTAGTGAATGATATAAAAGCCGAAGTAACTTCAAGTTATAGGCA +ATTTAACAATTACTACACTCGTTTAACAAAAGATGGTATGCACACACTTTTATCAATGCTTTCTAGGATA +GGTGTTCCAATTTCAGATTATTGGAATGACCTTCTTGTCGACAAAGCGCCAGCTATTACAGCGGTTACTG +TTGGAGCAATAACAAGTTTAGCTATTATTACAATAGTAAAAACTTTTCAATATGCTATAGCTGGAGAAGA +GCAAAGTAAAGGTGAAAAACGAGCCAAACAGAAGAACATCGCAACAACTAAGCTTCAAAAATTGAAGTTT +ACACTTGGCAAAGAACAAGCTGAAGGGGATAGTATCGAACATGTTAAAGAATTTGACGGTGATGTTAAGT +TCGAAACAATTGAGAAACTATTTGATCACATTGATGAACACCCCAACCTCAACATAGTAGGCTTAAACTT +GGTAGCACCAGAAAACCCAATTGCTATCTATGCCGCAAGAGAAGAATCGTACGACTTTTCATTTTCCGAA +CCGCGCCCGCCTCAATGGAAAAAGGTGGTAACGTTTAAGGAAGACAGTAAACGAATAATCTCTCTACAGT +TGCGAGGAGAAGATACTGAAGATAACATCCTTGATGAGATAGAACACGCTATAAAAGTCTCACATGGAAT +GCCGTATGCTGAATGGATATTCAATGGATGGTTTAAGAAAGAGAGCAACGACAATATATTATATTGTGTC +GAGTTGGACCTGGTGACCGCAAAAACCCAATCAGGCCCAGTTGGATGGACACGTGCTCAAACGAAAAACC +TGAAAGATCTTGAAATTCAGCTAAATAAAGGCAAACCGATCGATGTTAAATCAGTGGTTTTAGGTGCACC +GCAAGCATCTACGCAGGCGACCGATACTATGGACGTGCTAGTGAATAAGCATTTAGTTAAAGTTCATTGT +CTGAGTTATGAAAATTTGAACAACTTAGCTTTGAACGGAACACAAGTGTTTGCTTTAGCATCCGACAATA +TATTGATCGTGCCTGCACATGCAGCCAGACAGAACAAGTGGATCCGATTTAGTCGCGCAACACAAACTGG +TCATTATGGAGTAGCGAAAGTTGATGAACGTAGAATTGATTTTACACGTGACATCGCTATAGCCGTTATT +TTGACTAGAACCGAAGCAGAACAAAAATTGTGCGAATTAGATTATTCGATCCAGTTGACGAATATTAGTA +AAGAGAAATTCCACTTCCCTCTTATTACGAAATATTTGTTAACCGCTGACCAGTCAGAAGTAGAATGGAT +GAATTGTACGACCCTACATTATTTTGCAAAGAATAGAACCGTGGGGTTAGGAAGAACAACATCATTCCAA +GTTTCTGAATTTCTATGTGGAAACGAGTACATATCCAAGAAACTGGTCGCATGCGCGCAAGGACTCCAGT +CAAGTGTCGAACTAAGCCGACTTGGTGATTGTGGAAGTCCAATTGTCTTGGCGTCGGGAAAGAAGGCAGG +AAAACTGATAGGTTTTCACGGTTATCACTCTCCAAATCTACAAACGTGGTATGGAGCAATGTTGACTGTT +GAGGACTTGGGAATCATCAACGGCGTAGAAGAACATTTTGATGACCCATGGGCCAAACTTATTACACAAG +GATTACCTGTCGATTTGCCAATTGGACCAGAAGTTGAGTATGTTGGTAATCTAATAAGACCTAGTTTACC +TGTGACAAATGACTCATTGGACCATTGGCACAAATCACCATTTGCTGATCAATTTGAAGAACAACTAGCA +CCTGGTCGATTGAATCCATATGATTCATATATTGAAGGAGATTTGCCAACTAATCTTGAGGGCCGAAAAA +GTTTAATCTTAGGCCCGAATAGTGAAATGGCAAAAACTCTTCCAGAATTGGATCAAGGAATTATCGACTG +GATTGTAGATCAGTTGGTGGTGGAACAAGTTGCAACTTTCAAAGCAGAAAACCTTTTAACGAAAGTTAGT +GACGATATTGACGAAATGCTTGATTATGCCCTGAATGGAAATGTAGATAACACATATGTTAGGGGTATGG +AAGTCAACAAAGCATCAGGATTACCTTGGAGTCTTTCAGGTAGTCCGAAAAAGAGTGACTTTATCGATGT +AGATGAAGCTACTGGAGTCAGATCGTTTAAGGTTAATGCTAATGGAGACGCTCTTAAAAATAGAGTTATC +CTGAAGTTGCAACAGGCGAAGATGGGAAATAGAATCTTGAGTTTTTCAAGTTCGAAACTAAAGGACCAAC +CCATCAAAATAGCGCAAGCGAAGAGTGGAAGGACGAGAGTATTCCATTGTATCCCAGTGGACTTAATCTT +GTTTTCGGGAGCGCTGTACGGCCCGTACAAAGAAGCATACACAAAGGCTGGACTGAAATGCTATCATGCT +GTAGGAATAGATCCGAAATCAGTTGGTTGGCAACAGTTGGCTACGTATATGACGAAGCATCCTAATTATT +TTGATGCTGATTATAAGAATTACGATAAGTATTTGCATAGGCAGGTATTTAAAGCAGTTCGAAAAATTCA +GCGATCAGTGATTCAACAGATGTGCCCAGATAAATGGGATAAAGCAAGAGCTGTTGAAGAATTAGATGCT +ATTGACACGTATGTAGTTGACTATCAAACAGTCTACAAAACCAATCGTGGCAACAAAAGTGGTAGTTATA +CTACTACAATTGATAATTGCCTGGCGAACGATATTTATGGTTTGTATGCGTGGGTGAAGACAACCGGCCT +AAGATCACTGTGGGATTACCGACAGAATGTCTCGAGCGTCGCGTTTGGTGATGATATTATAAAAAGCGTT +AGCGATGAGTATAAGGATAAGTATAATTATTGTACTTATCGAGATGTACTAAATGCTACAGGACATATCA +TGACACCAGGTTCAAAGGATGGAGAAGAGAAGCCATTTACTTCTTTTGAAAACCTACAATTCTTGAAAAG +AGGATTTAAGTTAGAAAACGGTATGGTTTTAGCTCCATTACTACAACGATCTATTGAAGGACCGTTTGTA +TGGACTGATATCCGCGAAGATCAGATAACTGTGTGGGTAAATCTCGTGCAAGAACAGTTGATCGAAGCCG +CTCTTTGGGGTGAGGAGTATTACAATGAGCTTTGTCAAAAGCTAAAATGTGGTACAAATAGAACCTTGAA +TGGAGCATTAGCAGTATTGTTGAATACGAGCTGGGAAGTTACTTTCCAAAAATTCTGTAATCGTTATTAT +GGCATTAAAAGAGGAGATCTTTGATCAGAACACCACTCTTTTCACCGTTTTAGACGAGAACGAGGTTACT +GAGATTAAGTCAATTCAATCTTCAGTAACAGCAGTAAAGACCCAGCTCGATCAACAAAAACTGCAACTTG +ACGGTTTAGCTAAAGTAGTTGACAACAATCAAGCTCGAAATGAAGAACAATTCGTTAATATCAATACAGC +ATTGGTAGAAATGAATTCAGAAGTTGACAAATTAACCACAACAACAAGTCAACAGGCAAAACAGATTAAC +ACCCTTGCCACAACATTAAACGAGCTTGATCAAACTACGAAAGACTCCTTAGACACGTTGAACACAACAA +CAGAATCTCTTAGTAAACAAGTATTATTTAATACTGATGAGATTACCGTGTTAAAGGTAGACGTAGCAAC +CGTCACACAAAAACAACAGGACGTAGAACATTCACTTGTGACAATGAAAGATGAGATAGGAGAATTGCAC +ATATCAGTGAATGCCAATGCTAACTCCATTGAGGCATTACGCACCAGAATTGCCGCGCTAGAGGTTAGAG +ATGTAGGACCGTGGGTTTTGAAAAACAGAATTTACAAATTCGTGATCAATATGCCAAATGGAACTACTCG +CTATACTACAATATACTTTTTCGCCGATGTATATTATAGCACTGGAGTAAGAGCAGCACCTACAAATGCG +GGAACAACTACAAGCATATTGACGATTACTTCGTTGACAACATCATATAGTTTGGCTAACGTCCCCGTTT +TAAAAGGTGTACCTTATAGAGTCAATGGTTACTTTGCTAACGGAAATAGTATCGAAGATATAACCGGAAG +CACGTCAGTGATTTACGACTCTATGTAAAAACCGGACATGACATGTCTATGACACGACATTAAACTGTCA +GAACCTGTTTGGTTAACACAGAGAGATTAACCGCAAGACGAGTATGCAGAATCCAACACAAACCATGCAT +ATATACGACATGCCCCTACGCGTCATCGCTGGCCTGTCAACCCTTGCCAAAACAACTGAAGAAGACGACA +ACACCTCAACTGGAATAGTAGTTAGTGAAGTAGGAGAGCCACAAGTGGTCGACCATCCAGCATGGATTGA +TCCCTTTGTTGCTTATCAATTGCGAGCTCCACGTAAAAACATCACACCAGATTTTATATTTGGTCGAGCC +GATATTGGTAATGCGTTTAGTGCTTTCTTACCGCGCCGCTTTTCTGCTCCAGCAGTAGGAACCCGACTTG +TGATAGACCCTGTTTTCACTTACCAACAGAAAACGGTGCTAGGACTATATAATTATTTCCATGCGGATTT +TTATTATATAGTGCATGTTCCAGCACCCTTGGGAACAGGTATCTATCTGAAGATCTATGCTCCTGAATTT +GACACTACAACCGTAACACGAGGAATTCGGTTTAAGCCAAGTGCATCTCCAACAATTGCACTTTCAGTCC +CTTGGAGCAACGATCTATCGACCGTAGAAACATCCGTAGGTCGAGTCGGACAGAGTGGAGGGAGTATTGT +TATCGAAACTATCGAAGATAACAGTAACGAGACGGTCAACACCCCACTCAGCATCACCGTTTGGTGTTGC +ATGGCGAACATTAAAGCCACAGGCTACAAAAATGCGGATACGTCAGCTTACAACGAAAAAGGCATGAACT +TTGTCCCAGTTCCAGTGCCAAAGCCGCCCGTTCCCCCAACAAAACCAATTATGGGCGAGGAACAAGCTGA +CAATGAAGTTACAGCCGAAGGTGGTAAACTTGTACAGGAATTGGTTTACGACCATTCTGCGATTCCTGTA +GCGCCAGTCGTCGAAACACAAGCAGAACAGCCGGAAGTCCCAGTTTCATCAGTGGCAACGCGAAAGAACG +ATACGGGACATTTGGCAACAAAGTGGTATGATTTCGCCAAAATCAGTCTGTCAAACCCAGCTAACATGAA +CTGGACCACGCTAACCATAGACCCGTACAACAATGTTACATTGTCTAGAGATGGTGAGTCGATGGTCCTA +CCATGGAGGCGAAATGTTTGGACAACCGGATCGAAAAGTATTGGATATATCCGAACGATGGTTGCACAAA +TTAACATACCACGCCCGCCGCAGATCAGTGGAGTGCTCGAAGTTAAAGATTCAATCAATAACTCAAGTAT +TTCACTGGTAGAATTTGGAGGAAAAGTAGAGATTCCAATTATTCCGAAGGTTATGAACGGACTAGTAACA +AGTGCTAGTTTGCCAAGGCATAGACTAAACCCATGGATGAGAACCGCCGAAAGTAAGGTTGAATTGCAAT +ATCGAATTATTGCTTTTAATCGAACTAGCGACATTGCTGACCTTAACGTTAGCGTTTTGTTGCGACCTGG +CGATTCGCAATTCCAATTGCCGATGAAACCTGACAATAGTGTGGATACACGTCATTTTGAGCTTGTTGAA +GCTTTAATGTACCACTACGATAGCCTCCGAATTCGAGGAGAAGAACAGAGTCTGCCAGAAAATGCACCTA +ATGCAGTTTCAAACCCTCAGCAGTTCATTACACCCGCAACCGCTCTAAGTGCCGAAGAATATAATGTGCA +CGAGGCGTTGGGTGAAACTGAGGAGTTGGAGCTGGATGAATTTCCGGTTCTGGTGTTCAAGGGAAATGTT +CCCGTTGACTCAGTGACGTCCATTCCTTTGGACCTCGCAACTATATACGACTTTGCTTGGGACGGAGAGC +AGAATGCAATTTCTCAGAAATTTCAGCGTTTTGCTCATCTGATACCGAAAAGCGCAGGTGGTTTTGGCCC +AGTGATTGGTAATTATACTATCACGGCTAACCTCCCCACCGGTGTAGCAGGTCGTATTCTGCACAATTGT +CTCCCAGGAGATTGTGTAGATCTAGCAGTATCGAGAATTTTTGGCTTGAAGAGCCTTCTCGGTGTTGCAG +GAACAGCAGTTTCAGCCATTGGTGGCCCACTCCTTAACGGTTTGGTTAACACCGCAGCGCCTATCCTTAG +TGGAGCAGCGCACGCCATTGGTGGAAATGTTGTAGGAGGACTGGCTGATGCAGTGATCGACATTGGATCT +AATTTGCTGACGCCAAAAGAAAAAGAACAACCAAGCGCAAATTCAAGCGCGATTTCTGGAGATATTCCGA +TTTCGCGTTTTGTAGAGATGCTCAAGTATGTCAAGGAGAATTACCAGGATAATCCTGTGTTCCCGACTTT +ACTTGTTGAACCACAAAACTTCATCTCAAACGCAATGACCGCACTCAAAACAATTCCAATTGAGGTCTTT +GCCAACATGCGCAATGTGAAAGTTGAACGAAATTTGTTCGACCGAACGGTTGTTCCAACTGTGAAGGAAG +CTACTCTAGCTGATATTGTCATACCTAATCATATGTATGGCTATATTCTCCGAGACTTCCTCCAAAACAA +ACGAGCGTTTCAATCTGGGACCAAACAAAATGTGTACTTTCAGCAGTTTCTAACTGTTCTATCTCAACGT +AATACCCGTACACATATTACGCTCAACGACATCACAAGTTGCTCTATTGATAGTGAGTCGATTGCAAACA +AAATAGAGAGAGTAAAACGCTACTTGAGTGCAAACTCGAGTGGGGAGACCACGGAAGAATTTTCGCGAAC +AGACACCGGTCTGCTACCTACTACGACCCGCAAAATCGTATTAGGTGAAAGTAAACGCAGGACGGAACGA +AACGTAGCAGAAACTGTTTTCCCTTCCGTGAGGCAGTAGATTAAATCCAAAACTCGCCATGTGCGCGTCT +CAGAAACTAATTTTTAAAATTGGTTTGCGCGTATGGTTAAAAAGAGTCACATTAAATATGAGGATGGTCG +CTATAATTTTCTAGCAGAAGATTATAAAGGACGGTGTGATGATGTTTTGGAGCCCGCACTGGGTGTGGGT +TCGTAGTGCTAGCAATTGCAACTTTTCCTGAAAGTTAATACCTATAGTTTATAGGAGAAAAATTAACATT +TATTGAGAATATCTGTGCAAACGACTCCCCACGCGGGGAGTAGTGGAATCTTGATAAACAGGTTTTTTAA +GCTTTTGCGTTTCCGAAAGGGTTACTCTGGCTGAACCAGCAGTTTTTATAACTGTGGGTAGCCCCTCGGA +AAATCAGGTTTGC