Repository 'blastparser_and_hits'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/blastparser_and_hits

Changeset 0:9dfb65ebb02e (2017-10-15)
Next changeset 1:9beb85dba280 (2018-02-16)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
added:
BlastParser_and_hits.py
BlastParser_and_hits.xml
test-data/al_sequences.fa
test-data/blast.tab
test-data/input.fa
test-data/output.fa
test-data/output.tab
test-data/un_sequences.fa
b
diff -r 000000000000 -r 9dfb65ebb02e BlastParser_and_hits.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BlastParser_and_hits.py Sun Oct 15 18:43:37 2017 -0400
[
b'@@ -0,0 +1,342 @@\n+#!/usr/bin/python\n+import argparse\n+from collections import defaultdict\n+\n+\n+def Parser():\n+    the_parser = argparse.ArgumentParser()\n+    the_parser.add_argument(\'--blast\', action="store", type=str,\n+                            help="Path to the blast output\\\n+                                  (tabular format, 12 column)")\n+    the_parser.add_argument(\'--sequences\', action="store", type=str,\n+                            help="Path to the fasta file with blasted\\\n+                                  sequences")\n+    the_parser.add_argument(\'--fastaOutput\', action="store", type=str,\n+                            help="fasta output file of blast hits")\n+    the_parser.add_argument(\'--tabularOutput\', action="store", type=str,\n+                            help="tabular output file of blast analysis")\n+    the_parser.add_argument(\'--flanking\', action="store", type=int,\n+                            help="number of flanking nucleotides\\\n+                                  added to the hit sequences")\n+    the_parser.add_argument(\'--mode\', action="store",\n+                            choices=["verbose", "short"], type=str,\n+                            help="reporting (verbose) or not reporting (short)\\\n+                                  oases contigs")\n+    the_parser.add_argument(\'--filter_relativeCov\', action="store", type=float,\n+                            default=0,\n+                            help="filter out relative coverages\\\n+                                  below the specified ratio (float number)")\n+    the_parser.add_argument(\'--filter_maxScore\', action="store", type=float,\n+                            default=0, help="filter out best BitScores below\\\n+                                             the specified float number")\n+    the_parser.add_argument(\'--filter_meanScore\', action="store", type=float,\n+                            default=0,\n+                            help="filter out mean BitScores below the\\\n+                                  specified float number")\n+    the_parser.add_argument(\'--filter_term_in\', action="store", type=str,\n+                            default="",\n+                            help="select the specified term in the\\\n+                                  subject list")\n+    the_parser.add_argument(\'--filter_term_out\', action="store", type=str,\n+                            default="",\n+                            help="exclude the specified term from\\\n+                                  the subject list")\n+    the_parser.add_argument(\'--al_sequences\', action="store", type=str,\n+                            help="sequences that have been blast aligned")\n+    the_parser.add_argument(\'--un_sequences\', action="store", type=str,\n+                            help="sequences that have not been blast aligned")\n+    the_parser.add_argument(\'--dataset_name\', action="store", type=str,\n+                            default="",\n+                            help="the name of the dataset that has been parsed,\\\n+                                  to be reported in the output")\n+    args = the_parser.parse_args()\n+    if not all((args.sequences, args.blast, args.fastaOutput,\n+                args.tabularOutput)):\n+        the_parser.error(\'argument(s) missing, call the\\\n+                         -h option of the script\')\n+    if not args.flanking:\n+        args.flanking = 0\n+    return args\n+\n+\n+def median(lst):\n+    lst = sorted(lst)\n+    if len(lst) < 1:\n+            return None\n+    if len(lst) % 2 == 1:\n+            return lst[((len(lst)+1)/2)-1]\n+    if len(lst) % 2 == 0:\n+            return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0\n+\n+\n+def mean(lst):\n+    if len(lst) < 1:\n+        return 0\n+    return sum(lst) / float(len(lst))\n+\n+\n+def getfasta(fastafile):\n+    fastadic = {}\n+    for line in open(fastafile):\n+        if line[0] == ">":\n+            header = line[1:-1]\n+            fastadic[header] = ""\n+        else:\n+            fastadic[header] += line\n+    for header in fa'..b't]))\n+                for hit in Xblastdict[subject][transcript]:\n+                    percentIdentity = hit[0]\n+                    alignLenght = hit[1]\n+                    subjectStart = hit[6]\n+                    subjectEnd = hit[7]\n+                    queryCov = "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100)\n+                    Eval, BitScore = hit[8], hit[9]\n+                    info = [transcript] + [percentIdentity, alignLenght,\n+                                           subjectStart, subjectEnd, queryCov,\n+                                           Eval, BitScore]\n+                    info = [str(i) for i in info]\n+                    info = "\\t".join(info)\n+                    F.write("%s\\n" % info)\n+    else:\n+        F.write("--- %s ---\\n" % dataset_name)\n+        F.write("# subject\\tsubject length\\tTotal Subject Coverage\\tRelative\\\n+                 Subject Coverage\\tBest Bit Score\\tMean Bit Score\\n")\n+        for subject in sorted(results,\n+                              key=lambda x: results[x]["meanBitScores"],\n+                              reverse=True):\n+            line = []\n+            line.append(subject)\n+            line.append(results[subject]["subjectLength"])\n+            line.append(results[subject]["TotalCoverage"])\n+            line.append(results[subject]["RelativeSubjectCoverage"])\n+            line.append(results[subject]["maxBitScores"])\n+            line.append(results[subject]["meanBitScores"])\n+            line = [str(i) for i in line]\n+            F.write("%s\\n" % "\\t".join(line))\n+            for header in results[subject]["HitDic"]:\n+                Fasta.write(">%s\\n%s\\n" % (header,\n+                                           insert_newlines(\n+                                               results[subject][\n+                                                       "HitDic"][header])))\n+            Fasta.write("\\n")  # final carriage return for the sequence\n+    F.close()\n+    Fasta.close()\n+    return blasted_transcripts\n+\n+\n+def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences,\n+                       unmatched_sequences):\n+    \'\'\'to output the sequences that matched and did not matched in the blast\'\'\'\n+    F_matched = open(matched_sequences, "w")\n+    F_unmatched = open(unmatched_sequences, "w")\n+    for transcript in fastadict:\n+        if transcript in blasted_transcripts:\n+            \'\'\'\'list of blasted_transcripts is generated\n+            by the outputParsing function\'\'\'\n+            F_matched.write(">%s\\n%s\\n" % (transcript, insert_newlines(\n+                                           fastadict[transcript])))\n+        else:\n+            F_unmatched.write(">%s\\n%s\\n" % (transcript, insert_newlines(\n+                                             fastadict[transcript])))\n+    F_matched.close()\n+    F_unmatched.close()\n+    return\n+\n+\n+def __main__():\n+    args = Parser()\n+    fastadict = getfasta(args.sequences)\n+    Xblastdict = getblast(args.blast)\n+    results = defaultdict(dict)\n+    for subject in Xblastdict:\n+        results[subject]["HitDic"], results[subject]["subjectLength"], results[\n+            subject]["TotalCoverage"], results[subject][\n+            "RelativeSubjectCoverage"], results[subject][\n+            "maxBitScores"], results[subject][\n+            "meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject,\n+                                               args.flanking)\n+    blasted_transcripts = outputParsing(\n+        args.dataset_name, args.tabularOutput,\n+        args.fastaOutput, results, Xblastdict, fastadict,\n+        filter_relativeCov=args.filter_relativeCov,\n+        filter_maxScore=args.filter_maxScore,\n+        filter_meanScore=args.filter_meanScore,\n+        filter_term_in=args.filter_term_in,\n+        filter_term_out=args.filter_term_out, mode=args.mode)\n+    dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences,\n+                       args.un_sequences)\n+\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r 9dfb65ebb02e BlastParser_and_hits.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BlastParser_and_hits.xml Sun Oct 15 18:43:37 2017 -0400
[
@@ -0,0 +1,94 @@
+<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.5.0">
+<description>for virus discovery</description>
+<requirements></requirements>
+<command><![CDATA[
+    python '$__tool_directory__'/BlastParser_and_hits.py
+ --sequences '$sequences'
+ --blast '$blast'
+ --tabularOutput '$tabularOutput'
+ --fastaOutput '$fastaOutput'
+ --flanking $flanking
+ --mode $mode
+ ## Additional parameters.
+    #if $additional_filters.use_filters == "yes":
+        --filter_relativeCov $additional_filters.filter_relativeCov
+        --filter_maxScore $additional_filters.filter_maxScore
+        --filter_meanScore $additional_filters.filter_meanScore
+        --filter_term_in "$additional_filters.filter_term_in"
+        --filter_term_out "$additional_filters.filter_term_out"
+    #end if
+    --al_sequences '$al_sequences'
+    --un_sequences '$un_sequences'
+    --dataset_name "$blast.element_identifier"
+
+    ]]></command>
+<inputs>
+ <param name="sequences" type="data" format="fasta"  label="fasta sequences that have been blasted" />
+ <param name="blast" type="data" format="tabular" label="The blast output you wish to parse">
+            <validator type="expression" message="Blast file must have 13 columns">value.metadata.columns == 13</validator>
+        </param>
+ <param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
+ <param name="mode" type="select" label="Extensive or compact  reporting mode" help="display (extensive)  or not (compact) the oases contigs">
+     <option value="verbose" selected="true">extensive</option>
+     <option value="short">compact</option>
+ </param>
+    <conditional name="additional_filters">
+            <param name="use_filters" type="select" label="Use Additional Filters?">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no">
+            </when>
+            <when value="yes">            
+                <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/>
+                <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/>
+                <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/>
+                <param name="filter_term_in" type="text" value="" label="filter the subject list with a keyword" help=""/>
+                <param name="filter_term_out" type="text" value="" label="filter the subject list excluding a keyword" help=""/>
+            </when>
+    </conditional>
+</inputs>
+<outputs>
+ <data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/>
+ <data name="fastaOutput" format="fasta" label="hits"/>
+ <data name="al_sequences" format="fasta" label="Blast aligned sequences"/>
+ <data name="un_sequences" format="fasta" label="Blast unaligned sequences"/>
+</outputs>
+
+  <tests>
+    <test>
+        <param ftype="fasta" name="sequences" value="input.fa" />
+        <param ftype="tabular" name="blast" value="blast.tab" />
+        <param name="flanking" value="5" />
+        <param name="use_filters" value="no" />
+        <param name="mode" value="verbose" />
+        <output name="tabularOutput" ftype="tabular" file="output.tab" />
+        <output name="fastaOutput" ftype="fasta" file="output.fa" />
+        <output name="al_sequences" ftype="fasta" file="al_sequences.fa" />
+        <output name="un_sequences" ftype="fasta" file="un_sequences.fa" />
+    </test>
+  </tests>
+
+<help>
+
+**What it does**
+
+Parse blast output for viruses genome assembly.
+
+Takes as inputs
+
+ - 1. the fasta sequences that have been submitted to blast
+ - 2. a blast alignment in a tabular format. **Importantly** this tabular output must contains the 12 standard columns (see blast documentation), **plus a column 13** that will report the length of the subject sequence (slen). When you use blast tools prior using this tool, remember to **check the appropriate box** to get the 13th column in the blast tabular output.
+ - 3. the numbers of flanking nucleotides to be recovered at the ends of blast hit sequences
+
+The tool returns 4 datasets
+
+ - 1. the fasta input sequences that produced significant blast hits
+ - 2. the fasta sequences that did not produced significant blast hits
+ - 3. the sequences of the blast hits, plus the flanking sequences (as specified in the tool form). This dataset may be further used in metavisitor workflows to produce contigs of hits.
+ - 4. and the parsing of the blast alignments which summarizes the blast results by "subject" sequences (blast analysis, by subjects)
+
+This latter parsing dataset may be customized by tuning the reporting mode and/or using filters
+
+</help>
+</tool>
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/al_sequences.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/al_sequences.fa Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,1228 @@\n+>Locus_81_Transcript_1/1_Confidence_1.000_Length_155\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGATTAGCGCGA\n+GTTTGCTTGGCATCGTTAAGCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCG\n+TCTTAATCTTCTCCCAGCATGATCCGTGTAACTCG\n+>Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\n+CGTTATCCAGCTGGATTATGTTGTTATTGTGTAGAAAATATAGGAGTCTCGAAGACTCTT\n+TAGAAAGCATACATGTAGTTGCCTCCCTCGTTCTGGATGTACTGAACGGCCTGCTTGCAT\n+ACTGATGTTAAGTGCAGAGCTATGTTTTCTCTTCCTCGCCAAAAAGAAAAACCACTTTTG\n+ACGACTATCTTACTTATCCAGGGTGTTAGTATTGGATTCAATATCCCTGTTGGTTCAAGA\n+GGGACTTTACCTGTTGCAATGTTAATTGCATCAGTCCTATGCCATCCTTTCCTTTCATAT\n+TCCGTCATAGTTTTGTAAGCTTCAGGATACCATTTGGCCAAACATTCTTTTAGACTAGGT\n+ATCTCAATCTTAGCTTTCTTCGCGATTTGATGTTGTCTTATAAATTCGCTGAATATTGGC\n+ATTCCGTTTGGACCAGCCTCATGAGATTGAATCATACGTCTAGGCCAAATGTTATTTTCG\n+CACGTTGGTCCTTCTGTTTTGTAAAACCTGTATGTGGGTACAACCGTCTTCTCCCATACC\n+GGTTTCAACTTCCTTAAATTTTTGATATAATTGCTGGAAATGTGTTTTTGCGGTCCAGGA\n+ATGTCGTCGGCTGCTATTTTCATTGAAAATTCGACTTGTTGATAAGCCGTCTTTTCCTCA\n+TCTGTCATTTGCTTCCACGTCAAGTTGATGTTTTCACTAAACAGTCCAGGAACATCGAAC\n+GTTGGGATGTCAGCAAGTGGTAGCTTGCTAGTAGTTTTCCACCCTTTCCATTCGTATATT\n+CCGAAACCACCCAGTCGTTTAGGCAGGTGCAGCCAGTGATAGCTCTGTCCCGTGTATTTA\n+CTCCACTTAATTTTGTTTGCTTGATGTAACCAATCCAATTGTTTCTTGCTTCTCCTTTCT\n+AATAAATAGATATTATTAGCGGTTGTCTCAACTTGTTGATTTGCAGTCCATGGTTGTGGG\n+TTCCACGGTTTCCTTTGTGTCACGCTAGGGATAGCCCGATTTGTCCAGCCTCTGACGCCT\n+TGAGTAGATATTTCTGTCCTTAAGAATTCGCAACAATTTTGCATAATTCCAAACTTGCTG\n+TTTTCTCCAACGGCGTTGATTGCCTGATAGCTATACCTAAA\n+>Locus_20_Transcript_1/1_Confidence_0.000_Length_191\n+CCCCCCCCACCTTCGCAATCACAGACAAATTAAACGTAAAACAAACGACATGGATGCATC\n+CAACCCTATTATTTCAAGCGATAGGAGCACTGTGAATCAGGTTTATGCGAAGATGGTGAG\n+GAGAAAACGGCAAGATCTTGCCTATCTCCAGGAGTTGGTGAGGACAGCCCATCGAGAGAT\n+TCTCCCCCAAG\n+>Locus_1_Transcript_5/7_Confidence_0.231_Length_163\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCACAGCCTTCCAACAACCGGAAGTGACCCCCCACCCGCTAAA\n+>Locus_15_Transcript_1/1_Confidence_0.000_Length_436\n+AGCCAGAGCATCCTGTCTAGCACCCCTCTGCGTCCCTGGTGCATCTCGGTGTACATTCTT\n+AACTTACCTATTTAATCACCCATGCGAGAACGATACTACGAAGATACTGAACTCAATAAC\n+CTCAGCTACAGTGAACAAGATAAACAGCTAAAACAAATATGTCAAACGAGTACCTTAGAT\n+CAATCCTCATGCCTGAGAGAGGTCCCTCCAGTATACCAGACGACAACGTCCGCCGTCATT\n+GCGTACGACAAGAAACAATCACTGCGAACATTGTGGTCGGATCTTCTGGAAAAGGGGCCT\n+TTGTTCTGTTCCCTAACAATCCTAGCAGCCTTATTGGCGCTCATTTTAAGTATGATGACC\n+AGGGCAAATCTTACAAATACACCCAATCACTCGTTGTCGCCCAACGCCTCAATGAGTCCT\n+ATAATTACGGAAGAAA\n+>Locus_58_Transcript_1/2_Confidence_0.333_Length_476\n+CCGAGTCTCCTCGGAGATTGCCTTTGCAGCGTTTCTTGGTATCGGTTGGTCCAACTCTTC\n+CAGGAATGGGTTCGTGTTCATGAATGGGTTCATGCTCGCGGACCCGGCTGTCCAGTGCGG\n+GAGGGTCTGTTTTGGGGTTCTTCTCTTTACGTCTCCCATATCTTTTATCTTTTTGTCCAG\n+AGATGTCAGGGTCTTTACTTGAACTAATCCTGGTGAGTTCCCAAGTAGTGGAATCCCAAG\n+AGAGTCCGTCAATTGGAGTTTCAGGTTGATGCCAAAAACTTCTCCCACACTGGACCCGAC\n+AACGGATCCTGTGAAAGCTGCAAATGGTGCCTTCCCCAAAACTCCACTCTTTAGTAAGTC\n+GTCTGCTGCATAAATGGCAAGCTGGTGGGACGTTCCCGTTACCTTGTTTGGTGATGTCGA\n+GGATGCGAGTCTTCCTTCTAATAATTGATAGCATGGTAGGAGGATGTAATCGCTTC\n+>Locus_48_Transcript_1/1_Confidence_0.000_Length_372\n+ATTGACGCGGCTCACCTCCGATTATCTCGAAAACCGCCCTAAGTTGTAGTTGTTGGGACG\n+GCTTGTTCAGACGCTCAATCGTTTCCTCGATTGACAATGGGTCAAGGTTGTTGAAAGGCC\n+CGTTCATCAACCGTACGAAAGTGTCGGCGATCCTCGCGATACGGTCGCTTGGTTTCTTGT\n+CGTTGGCGACAAAGGTTACCCTGCGTTCAATTGATTCAGACATTGTCTCCCAGCGTTTAA\n+TCATTGGCATCATCATACAGTCACTCACGATAGGCAAGGTGTATTGGCGTGCGCTCACTT\n+CTGGTACATCTGCGTCACTGGTTACTGGCCAATGGACGCGTGGCATTGTAGGTTTATACA\n+CAGTTGGACTAA\n+>Locus_4_Transcript_4/5_Confidence_0.333_Length_1170\n+GTCTTTTTCTTTCTCATTTGGTTCATGCAATTTTTCACCTGCGCAGGTATTTTTCTGTCT\n+TGTTGTTGATCTTGTTGTTGCCTTTGGTATCTGTCTATTTATTGCTTTAAGTTCTCCGAT\n+TGTTATTCGGGACAATGCGTCTGCGACATGATTATCCCTCCCCTTGAGATATTCTACTGT\n+GAATTCAAACTCCTCCAAGTCTAGTCTCATTCTGGTTAATTTTGAACTGGGGTTTCTCAT\n+TGAAAAGAGATGTGAACGCGATTAAGACTCATGGGAATGGTTTTGGCATCATGAAACCTG\n+ATAGTAGTTGGGAAATTGCTCCACCTCAACCCAAGGAAAAATACCTCAGATATTACGCAA\n+ATGGTGAATTCGTTGATATGAAAAACCTCGTTAACGAGAAACACCCCGTCATCGTTAACG\n+ATTATTGTGAATTTGCCCTGGAACATGAAATGTATCGTATTCTCCAACCTATGGACCCTT\n+CCAATTTTGCACCTCCACGGGAAACGGGAACAA'..b'AGAAAATTTAAGACAGAAGTACTCGATGAGCTTTGGGGAAGTGGTTGGC\n+AGGAACGAAATAAGATGAACCAGTATGAATGGTTGTCTTACTGCTGGGCAAATAATGTCA\n+CTAAGGTCGATACTCAAACCGTGCTTCTATCTTATGACATCAAATGGCAACAACTACCTG\n+CTGATATGAAAATGGCTATTCTCGGCGATTCGCGAGCTGATCTTGAAGCTCAAAAAACTC\n+ACAATAAAGTGATGCATGCATACAACGGTAACCCTTTGTGTCAGGGATTTCAAGAAGTTG\n+AAGCTTCAAAAACCTTCCTCAACATCGCGGAAGAGAGTAATTCAGTTCTGAAACCATATA\n+CTGGACTGGAAGCTGAGAAATACATCACCAACATTGTAGGAGACATGAATCCGAATCAAT\n+CAAGGATCTTCGATCAGGACAGGCTTAGAGGTAACCAATACAATGCCAATGGGGCTGTGG\n+TTCATAATGCTGTATCAACTATTCCGTTTACAAACCTCATTCCTAGGACGATTCGATCTG\n+ATGATGACGTCCTTGAGAAGTCGGCCAACAGATTACAGGTTACAGAGACAAACGTTACGG\n+ATTACTACGTTAATCCGATTGAGCCAACTGAATTATCCAAAACAATAAGTGACCAGATCA\n+AAAACAATCAATCATCTAACTGGCGACGAGATAACACGTCATTGGCTGGTTTCAATAGTT\n+TCGACATTGCGACAGTCAACACTGCACTAATTGCAAGAGGTCTAAGCACTGAATCAATGA\n+CTCTCAAGTTAGAGCTATTGCACGGAATAATGGCTATGCAGGTTGAAGCACCAATGATCA\n+ATTCCAGCACTTATTCGATCGTAGATAATCATACAATCCCGACCGTAACTGACAGGGCCG\n+TCATAGGCATCAATGACTCGCCTGTGTTTGGCGAGGACTGTGGTGGTGATCTTCCTGAAT\n+ATCCTTTCGGCGGCGGAACCGGTACAATTGCCTTTCACCTAACATTGCAAACTGTTCCTG\n+AAGAGAGGAGAGATAAGGCAATCTTCTGCCCTCCTGGTTTGTTGCAAGCAGCCCGAGATG\n+GAGCAGAGGCATTGGCCCTATTTGTTTTGTCGATGTCTGAATGGCCTTTCGGTATTTATA\n+CTGTCACCAAGAGAACAACCGATGAGAAGGGACTGAATCCTGCGGATCAGGTTTACGTGC\n+CGATGGAGACCATAACCCGTGTAGGTGGAGATAGAGTATTGGACGTTGTACTTCCTCGAA\n+GGTATGCGGTTGCAAATCCAACGACTCAAGGAAATGCTAATGCTCTAGCAGTTATACAGC\n+CTCAAGCTGGGCCTTTAGATAACGGTGCGGACGGATTAGCCGCTGGTGAATTATTGGATG\n+TCAATTTCATCGGCGCCGACGGCATTACTGAATATCCATTGACATATTACTTGTATACCT\n+GGGCACTTCGATTTGATATAACGACGATTAGGCAATACATTGGTAGAATGGCAGCGTTAA\n+TTGGAGTGAAACACCAACTATGGGCTAGTCATGAAATCAGAGTAGCTTTGTGTCAAGTTG\n+CACCCAAAATGGTGGTCGGAGTTACAGGTTCGGGAGACCTGCCAAGAGGATCAGCCGCTG\n+CAAGCGAGGTATGTTACTCAAGCTTATTGGAGGTCTCACGCTCTGAAGAAGATTTTCCGC\n+TGCTTGGTCAGGTTCAAGCCGATTTCAGAGTCTTTGAAACCAATACAAGCACATGGAATA\n+AAGTAGTTTTGGGATTGGCAACAGCACCAAACGTGACAAGTGAACAAAACATGCATGTAC\n+CATTCGTTGTTGGCGATCCGAGATCTAACGCGTGGGACCGACTCGAAGCAGTACCAATTG\n+CTGCTGCTTGGCAGATGTACTACCATTCAAGGGGCGTAACTACTGCCGCTTGGAATGATG\n+CGTACACTAACGTAAATAACGTTTGGTTGCAAAAGATGGCCCGTGATAGCTTCTCAACGA\n+CCCAAAGCACTGGGACGATACTGCCTGCCAGATATGGTAAGATAGTCAAAAACCTGATGA\n+GAAACATGTTTGAAAGAGAACCTGCCAAAGTAGTAACAAGCGTGGGAGGCGATGAATATG\n+AGATAACCCATTTTGAGCGCTGGTTACCGGGTAATAGATATGCTTCCGTGTTTGAACAAG\n+ATGAAACTGAAGTTAATCTGTTTCCTCCAACTTTATTACCGGATATTTGGGTTCAATATC\n+CAGCGACTCACACCCCAATCATGTGTGCTTCGTTCCCACCCGTTTTCGGCCAAGACTCAA\n+CACAAGGGTTCGGCAAAGAATCACAACTTATACCCTTTCGAAACGCAAACAACAATCTTG\n+TAGCACCATACGTTGAAGCTTTCGTTGCCAATCAAGCTTATTTCCCGATAGGATCGGGCC\n+CAAACATCAACGACAAGGTTCTGTGGAATAGTAGATTGTGGATGACAAGTGGTTTCGTTC\n+AGTACTTGGATTACGCTGGCAACGCAATCAACGAAGTGGTTCCTGCAGCAGGGCTACCTT\n+TGGGTAGGTCAATCCCATTACTGCCAGGGGAAGTTCAGCCGGTTGGTAACACCAACATGA\n+GCACAAGCTGCGTTCCTCGTTATTCTGTGGACGGTCGCCGAATTTTTACTTATGTCAACA\n+CAGCTCAATCCGTTCCTTTGATACAAGCGTGTAATAGAGCCAATAGATTGGCCAGATCGG\n+CATGGCTATTATTGCATGTCTACATCGAACCAGAGTTGCAGCTATTGAGCGATGAAGTGG\n+TGGACATATTCGACCAACTGACAAGCAAGACTTTTTTAGATGTAGCAAAATCGGCTGCGG\n+ACAGTGCGGAGGGCAACATTCCGGCAACGAAGGTATTGACAGACCTCCAGGCAGTGGATT\n+CAGCAACGCTGCCGAGTACTTTGGATCCATCCACAAATATGCTCCAACCAGCTCCTTTAC\n+TCGGCGAACCTACGACAAATTAACTCGATACATTCATGATGGGGTAAGTCGAAGTAGTCA\n+GAGAGAAATTTCATCATTGTCGGATGATTTAGTTAGATTTAGTACTCTCACAGATTTTAC\n+TGTCATCGATTTTATTACTAAGAAATTAGATTTAGAACGTCCGATTAGTACTCAAGGAGA\n+TTTATTGGCGTTAGAACCTAGATGCAAGGGTGACCTCGCTGTAGCAAGACTTAAGATTAA\n+GGACATAATAGGTAGGATTGATAAGGATATTAGAACGTGGTGTGAAGCAAATTTATGCCA\n+TTTGGATGCAATCTTGGTAACGAACTTGATAATCTGGGGACAAATCTGGGGGTTGGAAAT\n+TCTGAAAGCTTTACATTCAACAGGAATATTAAACGATTTCGATACTTTCGCAACAAAAGG\n+AAGCAAGATTAGTGCATTTGTCAAGCGTTTTCCCTTCGACAAGGATGATGCCAAAGCTAG\n+ATGGGCAGAGATCAACACGCTGACGGGCTATTTGCAGAATGATTTTGGAAACTTCGATTA\n+CGATAAGGAATTTGAAGCTTTAGCTACCGGAGATAGTAACCACCCCGCTTGGTGGGAAGA\n+AGTATTCACTAAGAAAATAAAAGAGTTGATGACACATCAAGAACACAAAAAGTATATCAG\n+TTTTGAAAAATATGTTAAGGAAGGATATTGGATAACATCTGGCAGCAGCAGCATCGGCAA\n+AGTCAATTGGTCATATGACGGAGACTTGGGCAAATTTAAGGCTAGAAAGAACATGTTATT\n+AGATCTATATACGCCGGACGAAATCTACAAAATGGCTGTAGAATGGGACGGGAAACTAGA\n+GAATAGAGTGTTCATCAAAGATGAATTGGCAAAGAGAAGATTGGCAGTGGCAAGCAATAT\n+TGAAGCATATTTGAATCAAGGGTATATATTTTATCTATTTG\n'
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/blast.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blast.tab Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,695 @@\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t98.47\t196\t3\t0\t3\t198\t517\t712\t9e-92\t  340\t1416\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t95.41\t196\t9\t0\t3\t198\t517\t712\t1e-83\t  313\t1365\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t94.39\t196\t11\t0\t3\t198\t518\t713\t7e-81\t  304\t1383\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t89.18\t194\t21\t0\t3\t196\t518\t711\t3e-66\t  255\t1400\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|9632343|ref|NC_002037.1|_Black_beetle_virus_RNA_2,_complete_sequence\t87.63\t194\t24\t0\t3\t196\t518\t711\t2e-62\t  242\t1399\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t97.44\t117\t3\t0\t1\t117\t596\t712\t4e-49\t  197\t1416\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t92.31\t117\t9\t0\t1\t117\t596\t712\t6e-41\t  170\t1365\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t91.45\t117\t10\t0\t1\t117\t597\t713\t7e-40\t  167\t1383\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|9632343|ref|NC_002037.1|_Black_beetle_virus_RNA_2,_complete_sequence\t84.35\t115\t18\t0\t1\t115\t597\t711\t6e-28\t  127\t1399\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t83.48\t115\t19\t0\t1\t115\t597\t711\t3e-26\t  122\t1400\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t97.67\t258\t6\t0\t1\t258\t455\t712\t2e-121\t  439\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t87.95\t166\t1\t1\t259\t405\t1250\t1415\t3e-56\t  223\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t85.71\t42\t6\t0\t339\t380\t112\t153\t4e-04\t50.0\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t94.96\t258\t13\t0\t1\t258\t455\t712\t1e-111\t  407\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t77.08\t96\t3\t1\t261\t337\t1252\t1347\t2e-15\t87.8\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t85.71\t42\t6\t0\t339\t380\t112\t153\t4e-04\t50.0\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t93.02\t258\t18\t0\t1\t258\t456\t713\t5e-105\t  385\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t94.87\t78\t4\t0\t260\t337\t1252\t1329\t2e-26\t  123\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t85.42\t48\t7\t0\t339\t386\t113\t160\t1e-05\t55.4\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t88.67\t256\t29\t0\t1\t256\t456\t711\t9e-89\t  331\t1400\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t86.54\t52\t7\t0\t286\t337\t1295\t1346\t7e-08\t62.6\t1400\n+Locus_1_Transcript_3/7_Co'..b'|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t85.90\t78\t11\t0\t51\t128\t2747\t2670\t5e-17\t91.5\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t87.23\t47\t6\t0\t1\t47\t2838\t2792\t3e-07\t59.0\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t85.90\t78\t11\t0\t51\t128\t2747\t2670\t5e-17\t91.5\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t87.23\t47\t6\t0\t1\t47\t2838\t2792\t3e-07\t59.0\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t95.83\t48\t2\t0\t1\t48\t133\t86\t3e-13\t78.8\t389\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t90.70\t43\t4\t0\t50\t92\t43\t1\t8e-08\t60.8\t389\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|262225309|gb|GQ342965.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA1\t100.00\t165\t0\t0\t1\t165\t2851\t2687\t2e-79\t  298\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|351066201|gb|JF461541.1|_Flock_house_virus_isolate_IP-VIA-022011_segment_RNA1,_complete_sequence\t98.18\t165\t3\t0\t1\t165\t2851\t2687\t4e-75\t  284\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|152143028|gb|EF690537.1|_Flock_house_virus_isolate_TNCL_segment_RNA1_protein_A_mRNA,_complete_cd\t95.15\t165\t8\t0\t1\t165\t2851\t2687\t1e-68\t  262\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|22681055|ref|NC_004146.1|_Flock_house_virus_RNA_1,_complete_sequence\t92.12\t165\t13\t0\t1\t165\t2851\t2687\t2e-61\t  239\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|51014188|ref|NC_001411.2|_Black_beetle_virus,_complete_genome\t92.12\t165\t13\t0\t1\t165\t2850\t2686\t2e-61\t  239\t3106\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t93.23\t133\t9\t0\t1\t133\t133\t1\t1e-49\t  199\t389\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t84.85\t165\t25\t0\t1\t165\t2838\t2674\t3e-45\t  185\t3096\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t84.85\t165\t25\t0\t1\t165\t2838\t2674\t3e-45\t  185\t3096\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|262225309|gb|GQ342965.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA1\t99.41\t169\t1\t0\t1\t169\t2851\t2683\t7e-80\t  300\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|351066201|gb|JF461541.1|_Flock_house_virus_isolate_IP-VIA-022011_segment_RNA1,_complete_sequence\t97.63\t169\t4\t0\t1\t169\t2851\t2683\t4e-76\t  288\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|152143028|gb|EF690537.1|_Flock_house_virus_isolate_TNCL_segment_RNA1_protein_A_mRNA,_complete_cd\t94.67\t169\t9\t0\t1\t169\t2851\t2683\t5e-69\t  264\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|22681055|ref|NC_004146.1|_Flock_house_virus_RNA_1,_complete_sequence\t91.72\t169\t14\t0\t1\t169\t2851\t2683\t2e-62\t  242\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|51014188|ref|NC_001411.2|_Black_beetle_virus,_complete_genome\t91.72\t169\t14\t0\t1\t169\t2850\t2682\t2e-62\t  242\t3106\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t93.23\t133\t9\t0\t1\t133\t133\t1\t2e-49\t  199\t389\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t84.62\t169\t26\t0\t1\t169\t2838\t2670\t3e-46\t  188\t3096\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t84.62\t169\t26\t0\t1\t169\t2838\t2670\t3e-46\t  188\t3096\n'
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,1712 @@\n+>Locus_1_Transcript_1/7_Confidence_0.231_Length_224\n+CTTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAGTTTA\n+CCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCGGTTG\n+CTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAG\n+CGGTGGGACCAGACAATTAAAACGGATGCCAACATAAATGGGCA\n+>Locus_1_Transcript_2/7_Confidence_0.231_Length_143\n+AATGGCCTATAAAGCTGAGCACTGTACAATTCCCGGTTGCTACAGAGCAAGCCACCTCTT\n+CACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAGCGGTGGGACCAGACAATTAAA\n+ACGGATGCCAACATAAATGGGCA\n+>Locus_1_Transcript_3/7_Confidence_0.231_Length_407\n+ATCCGGGCTTTACATCGATGTTCGGTACTACTGCAACATCTAGGTCCGACCAAGTGTCCT\n+ATTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAGTTTA\n+CCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCGGTTG\n+CTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAG\n+CGGTGGGACCAGACAATTCAAACCGGATGCCAACCTAACTGGGCAATTAGGCCACAATAA\n+GCCCAACTTGGTTGATGATCGAAACAGTGAGCCCCCTCCCATGTAATGGAAGACGTCGAC\n+GCAATCGTGCGAGACGTAATGACAGTTTAAGTCAGCCGACTAAGGGG\n+>Locus_1_Transcript_4/7_Confidence_0.077_Length_591\n+GTAAACAATTCCAAGTTCCAAATGGTTAACAACATCAAACCAAGACGGCAACGATCCCAA\n+CGTGTTGCCGTAACAACAACCCAAACAGCGCCTATTCCACAGCGAAACGTACCACGTAAT\n+CGGAGTGAGCCGCGTCAATAATGAGGCGGAATCGCCGTCGTGTACGTGGCATGAATATGG\n+CGGCGCTTTCCCGGCTTAGTCAGCCTGGGTTAGCGTTTCTCAAGTGTGCATTCGCACCAC\n+CTGACTTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAG\n+TTTACCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCG\n+GTTGCTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTA\n+TTAGCGGTGGGACCAGACAATTCAAACCGGATGCCAACCTAACTGGGCAATTAGGCCACA\n+ATAAGCCCAACTTGGTTGATGATCGAAACAGTGAGCCCCCTCCCATGTAATGGAAGACGT\n+CGACGCAATCGTGCGAGACGTAATGACAGTTTAAGTCAGCCGACTAAGGGG\n+>Locus_1_Transcript_5/7_Confidence_0.231_Length_163\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCACAGCCTTCCAACAACCGGAAGTGACCCCCCACCCGCTAAA\n+>Locus_1_Transcript_6/7_Confidence_0.077_Length_216\n+GTAAACAATTCCAAGTTCCAAATGGTTAACAACATCAAACCAAGACGGCAACGATCCCAA\n+CGTGTTGCCGTAACAACAACCCAAACAGCGCCTATTCCACAGCGAAACGTACCACGTAAT\n+CGGAGTGAGCCGCGTCAATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGC\n+TAACGGTAAGCGAAGGAAGGGCCCCCGAGGAGGCAA\n+>Locus_1_Transcript_7/7_Confidence_0.231_Length_184\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCCCCCACCCGCTAAACCGTAGCTGACTCCTAGGAGCACCTACACCCGTTCTAGCCCGAA\n+AGGG\n+>Locus_2_Transcript_1/5_Confidence_0.125_Length_327\n+TTCCTGCATCGATTTGTGGTCTTCGCACGCGCCTCGAACCGAAAACTAATGCATAAAGTA\n+GATCATAGAGCAACGACTGCCTTTTAAAGAAATTTCTTTAGAGAAAGGACGAAAATCACG\n+AGACAATCTTTGGTTGGATTGTAACGTCCTGACTCGTAGAGGATTTCCCGACCATGAAGT\n+AGCAAACCCAAAACTCTTGATATCTCAACATATGTATACGTAATGGATGCATCAAACCCA\n+ACTAATTCGTCTGAAAGGTTTACCTTGGAGGACTTGCGGTACGAACATGTGAAGAGAAAG\n+CGTCAGGACCTTGCTGCCCTCCATACC\n+>Locus_2_Transcript_2/5_Confidence_0.250_Length_809\n+TTCCTGCATCGATTTGTGGTCTTCGCACGCGCCTCGAACCGAAAACTAATGCATAAAGTA\n+GATCATAGAGCAACGACTGCCTTTTAAAGAAATTTCTTTAGAGAAAGGACGAAAATCACG\n+AGACAATCTTTGGTTGGATTGTAACGTCCTGACTCGTAGAGGATTTCCCGACCATGAAGT\n+AGCAAACCCAAAACTCTTGATATCTCAACATATGTATACGTAATGGATGCATCAAACCCA\n+ACTAATTCGTCTGAAAGGTTTACCTTGGAGGACTTGCGGTACGAACATGTGAAGAGAAAG\n+CGTCAGGACCTTGCTGCCCTCCATACCATGGCCAAAGCCTCCACACGAACATATGTTGGT\n+GGTGCGCTCCCTGACCGTGACAACATCATGGAGGTGTTCAAACAGTTGGACGCAGCTACT\n+CAAGCGGTTGACCCGACACCAATGTCTGACTTCTCATACCTAAATTCTCATCCGGCAATT\n+CCGGTATGGAGGGCAGCAAGGTCCTGACGCTTTCTCTTCACATGTTCGTACCGCAAGTCC\n+TCCAAGGTAAACCTTTCAGACGAATTAGTTGGGTTTGATGCATCCATTACGTATACATAT\n+GTTGAGATATCAAGAGTTTTGGGTTTGCTACTTCATGGTCGGGAAATCCTCTACGAGTCA\n+GGACGTTACAATCCAACCAAAGATTGTCTCGTGATTTTCGTCCTTTCTCTAAAGAAATTT\n+CTTTAAAAGGCAGTCGTTGCTCTATGATCTACTTTATGCATTAGTTTTCGGTTCGAGGCG\n+CGTGCGAAGACCACAAATCGATGCAGGAA\n+>Locus_2_Transcript_3/5_Confidence_0.250_Length_207\n+GTCAGGACCTTGCTGCCCTCCATACCATGGCCAAAGCCTCCACACGAACATATGTTGGTG\n+GTGCGCTCCCTGACCGTGACAACATCATGGAGGTGTTCAAACAGTTGGACGCAGCTACTC\n+AAGCGGTTGACCCGACACCAATGTCTGACTTCTCATACCTAAATTCTCATCCGGCAATTC\n+CGGTATGGAGGGCAGCAAGGTCCTGAC\n+>Locus_2_Transcript_4/5_Confidence_0.125_Length_605\n+GTCAGGA'..b'GGCGTCTTAGAAACGCTTA\n+>Locus_67_Transcript_1/2_Confidence_0.333_Length_210\n+TGCAACAACTTTAATATACGCTATTGGAGCTGGAATTACCGCGGCTGCTGGCACCAGACT\n+TGCCCTCCAATTGGTCCTTGTTAAATGCGCTGGATGGTGGTGGTTTTTGGCAAAACACAG\n+TGGTGTCTCGATAGCGGGGCTACCAGTCACATGTGCTGTGACAGAGGTGTTTTTACTGAG\n+TATGAAGAGCACACTGAAAAAATTAGTCTT\n+>Locus_67_Transcript_2/2_Confidence_0.333_Length_155\n+AGCGACACAAAAACAATGCTCCTTGTTAAATGCGCTGGATGGTGGTGGTTTTTGGCAAAA\n+CACAGTGGTGTCTCGATAGCGGGGCTACCAGTCACATGTGCTGTGACAGAGGTGTTTTTA\n+CTGAGTATGAAGAGCACACTGAAAAAATTAGTCTT\n+>Locus_68_Transcript_1/1_Confidence_0.000_Length_211\n+TGAAAACTCTTTTAAACCCAGCACAATTTATCTCAGACATTCCTGATGATATAATGATCC\n+GACACGTAAACAGGGCCCAGACCATCACCTACAACTTGAAGTCAGGGTCCTCTGGCACCG\n+GCCTGATCGTGGTCTATCCAAACACCCCGTCGAGTATTAGCGGCTTCCATTACATATGGG\n+ATTCCGCTACCTCGAATTGGGTGTTTGATCA\n+>Locus_69_Transcript_1/1_Confidence_0.000_Length_164\n+CCAGAGGATTGGATGTTATTGACAGGATCCTATCGTAAGAGTAGTCAGACACTTCACTCA\n+AGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCATAAACACCCGCAGGTA\n+AGGTGCTGGACTTAATGCTTAGCGAGCCTGAAATCAGTCTGCCA\n+>Locus_70_Transcript_1/1_Confidence_0.000_Length_147\n+AAAAGTTAATTTTGTTAGTAAGAAAATTTGATAAAACTTACAGGAAAGGCGATAAAACTT\n+TCCTTTTAACAAGGTTCGATTAGTACACACGAGAAGAACCAAGAAGAAACTCGAGTGACA\n+TTAATGACTACAATAAGATGTTAGTAA\n+>Locus_71_Transcript_1/2_Confidence_0.333_Length_170\n+GATTAATGAAAACATCTTTGGCAAATGCTTTCGCAGTCGGACGTCTCGCTACGGTCCAAG\n+AATTTCACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCT\n+CTTGATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_71_Transcript_2/2_Confidence_0.333_Length_106\n+ACACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCTCTTG\n+ATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_72_Transcript_1/1_Confidence_0.000_Length_108\n+AAATGATCTCATCTCTGTATTCTGGCCGGAATGCTTGAACCATCGCCTTTTGAGCGATAT\n+TTCGTTGCATCCAGCCAGAAACCCGGCCATCAAGGTTGGAAAAATCCG\n+>Locus_73_Transcript_1/1_Confidence_0.000_Length_164\n+TCGGATAATATCCGGCTTTCCTGACATCCTTTTCATCCTGAAGGTCTCCAGATACACATT\n+AGCGTACTCGGATATCGTTCTACATGCCGAGCATAATCAACATTGGTATTATCCTGGAAG\n+GAACCCAACTGAGATCGCCGATGGGGTTTGTGAATTTGTCAGTG\n+>Locus_74_Transcript_1/2_Confidence_0.333_Length_232\n+ATCATATCCAACAGCTCTTCAGCTCTGCTCCTCGTTATCTTCGGAAATGGTTGCACATGG\n+ATTTTTGCCAGCATGCACGTCTTGCATACTGCGTCTGGTTTGAAAACGACCTTTTCAACA\n+CCGTACACCATCCTCTTCCTCACCATCTCCTGTAGGCTGCTTGTATTCAAATGGCCATAC\n+CTTTTATGCCATAGTGAACCATCAGCATCAACGGCCGCAAAACAACTGTTAT\n+>Locus_74_Transcript_2/2_Confidence_0.333_Length_109\n+CATTCGCTCTTCCTCCTCACCATCTCCTGTAGGCTGCTTGTATTCAAATGGCCATACCTT\n+TTATGCCATAGTGAACCATCAGCATCAACGGCCGCAAAACAACTGTTAT\n+>Locus_76_Transcript_1/1_Confidence_0.500_Length_196\n+TCAGTTTCAGCTTTGTTTTTCAGCCTGCTGCTACCATTCAGGTTCCACCTCCTTTTCAGT\n+TTAAAGTGGGTTTCTCGGCCAGAGTTGGGCAATTCCATTACTCTATTAGGGGGAAAAAAT\n+GGTAATTTGCTGCTCGATTATGACGAAGTCATTTGATCCGCTTCACATGTGACGTCGTCT\n+TTTTGTTGGGCCTGTC\n+>Locus_78_Transcript_1/1_Confidence_1.000_Length_154\n+AACGTGCGCAGGGACCTCATACCTCGAAGGAAGGGCCCCCCGAGGAGGCAAATCAGCACT\n+CGAAGAACGCCTCCGAAAGCTGGAGCTCAGCCCCCCACCCGCTAAACCGTAGGTGTCTCC\n+TAGGAGCACCCACACACGTTCTAGCCCGAAAGGG\n+>Locus_79_Transcript_1/1_Confidence_1.000_Length_154\n+AACGTGCGCAGGGACCTCATACCTCGAAGGAAGGGCCCCCCGAGGAGGCAAATCAGCACT\n+CGAAGAACGCCTCCGAAAGCTGGAGCTCAGCCCCCCACCCGCTAAACCGTAGCTGACTCC\n+TAGGAGCACCTACACCCGTTCTAGCCCGAAAGGG\n+>Locus_80_Transcript_1/1_Confidence_1.000_Length_101\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCAA\n+>Locus_81_Transcript_1/1_Confidence_1.000_Length_155\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGATTAGCGCGA\n+GTTTGCTTGGCATCGTTAAGCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCG\n+TCTTAATCTTCTCCCAGCATGATCCGTGTAACTCG\n+>Locus_82_Transcript_1/1_Confidence_1.000_Length_165\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCGAGTTTGCTTGGCATCGTTAA\n+GCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCGTCTT\n+>Locus_83_Transcript_1/1_Confidence_1.000_Length_196\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCGAGTTTGCTTGGCATCGTTAA\n+GCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCGTCTTAATCTTCTCCCAGCA\n+TGATCCGTGTAACTCG\n'
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/output.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fa Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,5087 @@\n+>gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de--Locus_42_Transcript_2/2_Confidence_0.333_Length_597_hit1_IdMatch=100.0,AligLength=593,E-val=0.0\n+TTCTGAGGTACGGCCAAATGCTGCCAATCCTGTTGGAGTTAGGCTCTATGCCCACAGCCA\n+CCATACCCAGGACATGAAGCTGTTGGGAGAAGCCTGGTCCTCCTCTGCCAAACTGGCTAA\n+GGAATACATCTGTGCAGCTATTTTGGAGGTTAACAACATCCCATACAAGAAGTCTAGCTT\n+TGCGACCCCTAATGTAGTGCCTCCCCCGCCTGAGTCTTCCTGGGCTCGGCAGGTTGAATA\n+CGCAACTGCCCCTAAAGCTGTGCCACAGATTGTTCCCGTTCATGAAACCCTGGATTTGGA\n+GCTGTTTCACAGTATAATGCAGCAATTCCCAGGGCACTGTCCCCGCCAGGTGCAGATGCT\n+TATTAGAGAGGTAAGTCATAAAAATCCTGATGACTACCGTGTGAGAATTCAAGACTTATT\n+CTCCCGGTTTCCACCTCCTAAAGTAGCATCGAAGAGATCACTGCTGAGCGCAGAACAAAG\n+GACGAGACTCAACAAAAAGACTCTCGACCGCAAAAAGAGGAAGAAAATTGCAAGTAAGCT\n+CTTGCCAACCCTATAACATCAAACAACCCAAACCTCAAATCAAGAATACAATTT\n+>gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de--Locus_42_Transcript_1/2_Confidence_0.333_Length_3138_hit1_IdMatch=100.0,AligLength=3001,E-val=0.0\n+CCCAAGTATCCCTGTTTGTGGGATTATGGATGCTGTTGGTGTCCCGTTGCACCCCCCATC\n+CATTAACAATCGGGGTGGGCATACTCATGTCACTCGCCAGATGGGCGGAGTACCCCACCC\n+TCCTCTTGGTCTAATACCCATAGCTCAGAAGTATGGGGTAGAGATCTTGGACCAGAAACA\n+TGTCTACACAGGAGGCACTTGGGATGGCTTTCTTAATAGACTCGCCCAACAGATGGGCCG\n+GACAACGGTCCCCCTGGCAAGAACAGTTTCAGCAGGCAAAAAGAATGCCTCTGATAGGGC\n+AATCATAAAACTGTTAAATAAATACATGCCTAGGACGGGAAATAAGAACTGGCCTGGTGT\n+CCATACAACCCCGGAAGAAGGCATACTTGATGGTATCAAGATAACGGCCAAATCCAGTGC\n+AGGCGCTCCCTATTGGAGACACAAGGGGGAATGCCTAGACCATATAATTGATACTGGACT\n+TCCTGTTGTGCTAAAGCATATAAAGGAAGGAACCCTCAACCAGTTGTGGCGGGAAAACCC\n+TGAGATGTTCCTCGTTGAAGTTAAGAACAAACTTGATAGATACGAAGTCTCGAAGCTTAA\n+AGAGAAGACTAGACCCTATGTGTGTGTACCTGCGCACTGGGCTCTCCTATTCTCGTGCTT\n+GACTCAAGGATTCCAAGAAGGACTTCAAGTTTTTAGTAACTTGGACCCCAGTACAGAATG\n+CTCGAATGCATATGGATTTTCATCCATTGCTGGAGGGCTTACTAGGATGGTCGACTGGAT\n+GTATGCCTGCCCAAAGCGGCGGGGACGGGTTGTCTGTTACGGTGACGATGCATGTATAAC\n+ATTTTGGAGCCAGGGCGTCCTCTATCGGGTGGATCCAGATTTTAAACAAATGGACGGGTC\n+CATAGATAGAGAGGATGCAAGAATCACCATTGAGTGGGTCCTCCACCATCTCAGAAAGGA\n+TTTAGGTGTAGAGGAGACCCCTGCTTTTTGGAAGACAGTTGCAGCAGTGTGGCTTGATAT\n+GGCCATTGACCCCCACTTTATCGTGGATGGTAAGACTGTTTACAGGAAGAAGAACCCCCA\n+TGGACTCATGACTGGAGTTCCAGGAACAACCTTATTTGACACTGTGAAATCTGTAATAGT\n+CTGGAACGAAATGTTGGATCAGGCTAGTGCAGGTTCCATAGACCTTTTAAATGAAGCTCA\n+AGTAGTTAAATGGATGAAGAGACAAGGCTTGGTTGTCAAAGAGGGAACTTGGAGTCCGGT\n+TGCACTCCCTGCAAGGGACACAGAGGGTCTAATTACGGACCACAAATTTCTTGGTGTACA\n+AATTATGGGAGTGTACCACAGACACCGTGTGATACATGTCCCCACAATGCCTGAGAGCGA\n+TGCTCTTGAGATGATGCTCTGCCAGAAGGATAACCCCTTTGAAAAGGCAGTATCTAGAAC\n+TGCACACCAAAGAACTCTGTATGACCGTATGAGGGGTTTAATGATAACAATGGGATTCAG\n+CATACCTCGGATCGAAGAGACAATACACGCTGTGGTTAATACAATCCCCGGCGAGATTAT\n+TGTCATGCAAACGCAAGAGCAAACCGGAACGAAGCCAGAACATATAACCCTCCAGGACTT\n+TGAATATCCAGATTCGTCTGGATTCCCCTCTCGGGACTTCTGTCTAGACCTCTATTCAGA\n+TGGAGGGGATGACAAAGCGGGATGGATTAATCTGTTCCCCACATTGTCAGGATTCCTTGA\n+CGAGTTTAAGAGGGAACAGAGAGTGGCTGTGAGGCAGATAAATTTGACAGTCCAATCCAA\n+CGACTATGATGTCAAGGAGGTTGTAGGATGTCCTCCTCCTCCTGAGGCGAATCTGAATGA\n+TGAGTACAAAGTGTTTGAGGCTCTTAAACCTCAGCAAGTCCAGTATTCAGAACCGAACCC\n+CAGACCCAAAGTAGTTCGGATTACAGAAAGTGGTGACATCCCTGAGAAATTCCTACCAAA\n+TATGGCTCAAGCCGTCGTTAGATGGCTTACCTCAGTTGGAGGTGTTTCTCAGGTTGGTAC\n+TGTTGCCGATAAAGTTGGAGCCAGTGCCTACCAAATTGTTGTGGGTGCTGCTAAAGGGGG\n+CTATTTCACTACTGGAGATGAACTTGGAGACTTGATTTCCCTATACCCACTAGTGACTCC\n+CTTCCCTACATTGCAGGACAGTCAAAGAGAAGAAATGGAGGAAAATCGTAATCTGATTGA\n+TAGGACTACTGCAGCCAGAACGTCAGCTTTGAGACGGGGGATTGTAAAAACCCAACCTGA\n+GCTCATTAATCTGGACGTTGCAGGAGTATCCAACCTCCATCCACCTCCCTACGATATAAA\n+TACAGCAGAGGATGCCATGGCATATGTCCATGCAGTGGTCAGCGGGAGATTTTCAGGATT\n+CACTAAATGGATATCTGAGGTACGGCCAAATGCTGCCAATCCTGTTGGAGTTAGGCTCTA\n+TGCCCACAGCCACCATACCCAGGACATGAAGCTGTTGGGAGAAGCCTGGTCCTCCTCTGC\n+CAAACTGGCTAAGGAATACATCTGTGCAGCTATTTTGGAGGTTAACAACATCCCATACAA\n+GAAGTCTAGCTTTGCGACCCCTAATGTAGTGCCTCCCCCGCCTGAGTCTTCCTGGGCTCG\n+GCAGGTTGAATACGCAACTGCCCCTAAAGCTGTGCCACAGATTGTTCCCGTTCATGAAAC\n+CCTGGATTTGGAGCTGTTTCACAGTATAATGCAGCAATTCCCAGGGCACTGTCCCCGCCA\n+GGTGCAGATGCTTATTAGAGAGGTAAGTCATAAAAATCCTGATGACTACCGTGTGAGAAT\n+TCAAGACTTATTCTCCCGGTTTCCACCTCCTAAAGTAGCATCGAAGAGATC'..b'Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|350543519|gb|JF907703.1|_Infectious_bursal_disease_virus_isolate_2009CAH495-SESW_polyprotein_gen--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|157886466|emb|AM111353.1|_Infectious_bursal_disease_virus_segment_A,_complete_sequence,_genomic_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|667756318|gb|KF569805.1|_Infectious_bursal_disease_virus_isolate_HuB-1_segment_A,_complete_seque--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|89112096|gb|AY444873.3|_Infectious_bursal_disease_virus_VP5_protein_and_structural_polyprotein_g--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863892|gb|JN982256.1|_Infectious_bursal_disease_virus_isolate_SP33_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|630375502|gb|KJ547673.1|_Infectious_bursal_disease_virus_isolate_VRDC-IBDV-WZ_polyprotein_mRNA,_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|452029935|gb|KC189836.1|_Infectious_bursal_disease_virus_strain_3529/92_polyprotein_mRNA,_comple--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|630375496|gb|KJ547670.1|_Infectious_bursal_disease_virus_isolate_Ventri-IBDV-Plus_polyprotein_mR--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863906|gb|JN982263.1|_Infectious_bursal_disease_virus_isolate_SC6_VP2_protein_gene,_complete_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863910|gb|JN982265.1|_Infectious_bursal_disease_virus_isolate_SP21_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|367057603|gb|JN585293.1|_Infectious_bursal_disease_virus_isolate_CAHFS_K669_segment_A,_complete_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863918|gb|JN982269.1|_Infectious_bursal_disease_virus_isolate_SC12_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|24306007|gb|AF322444.1|_Infectious_bursal_disease_virus_segment_A_VP5_protein_and_polyprotein_ge--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n'
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.tab Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,1810 @@\n+--- blast.tab ---\n+# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n+ \n+# gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de\n+# Suject Length: 3005\n+# Total Subject Coverage: 3001\n+# Relative Subject Coverage: 0.998668885191\n+# Best Bit Score: 5413.0\n+# Mean Bit Score: 3241.5\n+Locus_42_Transcript_2/2_Confidence_0.333_Length_597\t100.0\t593\t2409\t3001\t99.2\t0.0\t1070.0\n+Locus_42_Transcript_1/2_Confidence_0.333_Length_3138\t100.0\t3001\t1\t3001\t95.6\t0.0\t5413.0\n+ \n+# gi|268053723|ref|NC_013499.1|_Drosophila_melanogaster_totivirus_SW-2009a,_complete_genome\n+# Suject Length: 6780\n+# Total Subject Coverage: 6765\n+# Relative Subject Coverage: 0.997787610619\n+# Best Bit Score: 8001.0\n+# Mean Bit Score: 2184.97272727\n+Locus_10_Transcript_7/8_Confidence_0.111_Length_255\t100.0\t26\t6549\t6524\t9.8\t0.001\t48.2\n+Locus_7_Transcript_10/11_Confidence_0.154_Length_4093\t99.66\t4093\t542\t4628\t100.0\t0.0\t7319.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t99.67\t3956\t679\t4628\t87.5\t0.0\t7077.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t98.06\t566\t1\t563\t12.5\t0.0\t967.0\n+Locus_10_Transcript_4/8_Confidence_0.444_Length_1097\t99.91\t1087\t6731\t5645\t99.0\t0.0\t1956.0\n+Locus_7_Transcript_4/11_Confidence_0.154_Length_571\t99.82\t571\t3429\t3999\t99.8\t0.0\t1025.0\n+Locus_10_Transcript_3/8_Confidence_0.222_Length_529\t99.43\t529\t6170\t5645\t99.8\t0.0\t939.0\n+Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\t99.91\t1121\t6765\t5645\t99.9\t0.0\t2017.0\n+Locus_7_Transcript_11/11_Confidence_0.000_Length_1206\t99.17\t1206\t542\t1741\t99.9\t0.0\t2131.0\n+Locus_10_Transcript_8/8_Confidence_0.000_Length_1134\t99.89\t905\t6549\t5645\t79.7\t0.0\t1627.0\n+Locus_10_Transcript_1/8_Confidence_0.222_Length_565\t99.81\t530\t5670\t5141\t93.6\t0.0\t951.0\n+Locus_7_Transcript_9/11_Confidence_0.154_Length_552\t99.09\t551\t4603\t5153\t99.6\t0.0\t971.0\n+Locus_7_Transcript_1/11_Confidence_0.154_Length_1632\t99.16\t1069\t679\t1741\t65.4\t0.0\t1889.0\n+Locus_7_Transcript_1/11_Confidence_0.154_Length_1632\t98.06\t566\t1\t563\t34.5\t0.0\t967.0\n+Locus_7_Transcript_7/11_Confidence_0.462_Length_4481\t99.6\t4481\t679\t5153\t100.0\t0.0\t8001.0\n+Locus_10_Transcript_5/8_Confidence_0.333_Length_628\t99.84\t628\t6765\t6138\t99.8\t0.0\t1128.0\n+Locus_8_Transcript_2/4_Confidence_0.200_Length_117\t100.0\t26\t5645\t5670\t21.4\t4e-04\t48.2\n+Locus_7_Transcript_8/11_Confidence_0.154_Length_552\t99.28\t552\t4602\t5153\t99.8\t0.0\t978.0\n+Locus_7_Transcript_6/11_Confidence_0.154_Length_656\t99.69\t655\t3974\t4628\t99.7\t0.0\t1173.0\n+Locus_10_Transcript_2/8_Confidence_0.111_Length_1023\t99.9\t1023\t6163\t5141\t99.9\t0.0\t1840.0\n+Locus_7_Transcript_2/11_Confidence_0.077_Length_1069\t99.06\t1069\t679\t1741\t99.9\t0.0\t1884.0\n+Locus_7_Transcript_3/11_Confidence_0.154_Length_1743\t99.89\t1742\t1716\t3457\t99.9\t0.0\t3133.0\n+ \n+# gi|262225299|gb|GQ342961.1|_Drosophila_melanogaster_totivirus_SW-2009a_strain_DTV,_complete_genome\n+# Suject Length: 6780\n+# Total Subject Coverage: 6765\n+# Relative Subject Coverage: 0.997787610619\n+# Best Bit Score: 8001.0\n+# Mean Bit Score: 2184.97272727\n+Locus_10_Transcript_7/8_Confidence_0.111_Length_255\t100.0\t26\t6549\t6524\t9.8\t0.001\t48.2\n+Locus_7_Transcript_10/11_Confidence_0.154_Length_4093\t99.66\t4093\t542\t4628\t100.0\t0.0\t7319.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t99.67\t3956\t679\t4628\t87.5\t0.0\t7077.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t98.06\t566\t1\t563\t12.5\t0.0\t967.0\n+Locus_10_Transcript_4/8_Confidence_0.444_Length_1097\t99.91\t1087\t6731\t5645\t99.0\t0.0\t1956.0\n+Locus_7_Transcript_4/11_Confidence_0.154_Length_571\t99.82\t571\t3429\t3999\t99.8\t0.0\t1025.0\n+Locus_10_Transcript_3/8_Confidence_0.222_Length_529\t99.43\t529\t6170\t5645\t99.8\t0.0\t939.0\n+Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\t99.91\t1121\t6765\t5645\t99.9\t0.0\t2017.0\n+Locus_7_Transcript_11/11_Confidence_0.000_Length_1206\t99.17\t1206\t542\t1741\t99.9\t0.0\t2131.0\n+Locus_10_Transcript_8/8_Confidence_0.000_Length_1134\t99.89\t905\t6549\t5645\t79.7\t0.0\t1627.0\n+Locus_10_Transcript_1/8_Confidence_0.222_Length_565\t99.81\t530\t5670\t5'..b'egment_A,_complete_sequence,_genomic_\n+# Suject Length: 3262\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251379521766\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+ \n+# gi|667756318|gb|KF569805.1|_Infectious_bursal_disease_virus_isolate_HuB-1_segment_A,_complete_seque\n+# Suject Length: 3260\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251533742331\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+ \n+# gi|89112096|gb|AY444873.3|_Infectious_bursal_disease_virus_VP5_protein_and_structural_polyprotein_g\n+# Suject Length: 3260\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251533742331\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+ \n+# gi|395863892|gb|JN982256.1|_Infectious_bursal_disease_virus_isolate_SP33_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+ \n+# gi|630375502|gb|KJ547673.1|_Infectious_bursal_disease_virus_isolate_VRDC-IBDV-WZ_polyprotein_mRNA,_\n+# Suject Length: 3084\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.026588845655\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t446\t365\t49.4\t2e-04\t50.0\n+ \n+# gi|452029935|gb|KC189836.1|_Infectious_bursal_disease_virus_strain_3529/92_polyprotein_mRNA,_comple\n+# Suject Length: 3039\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0269825600526\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+ \n+# gi|630375496|gb|KJ547670.1|_Infectious_bursal_disease_virus_isolate_Ventri-IBDV-Plus_polyprotein_mR\n+# Suject Length: 3040\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0269736842105\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t409\t328\t49.4\t2e-04\t50.0\n+ \n+# gi|395863906|gb|JN982263.1|_Infectious_bursal_disease_virus_isolate_SC6_VP2_protein_gene,_complete_\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+ \n+# gi|395863910|gb|JN982265.1|_Infectious_bursal_disease_virus_isolate_SP21_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+ \n+# gi|367057603|gb|JN585293.1|_Infectious_bursal_disease_virus_isolate_CAHFS_K669_segment_A,_complete_\n+# Suject Length: 3184\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0257537688442\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t532\t451\t49.4\t2e-04\t50.0\n+ \n+# gi|395863918|gb|JN982269.1|_Infectious_bursal_disease_virus_isolate_SC12_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+ \n+# gi|24306007|gb|AF322444.1|_Infectious_bursal_disease_virus_segment_A_VP5_protein_and_polyprotein_ge\n+# Suject Length: 3085\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0265802269044\n+# Best Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t454\t373\t49.4\t2e-04\t50.0\n'
b
diff -r 000000000000 -r 9dfb65ebb02e test-data/un_sequences.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/un_sequences.fa Sun Oct 15 18:43:37 2017 -0400
b
b'@@ -0,0 +1,484 @@\n+>Locus_8_Transcript_1/4_Confidence_0.400_Length_658\n+TTTGCTTTTTTCTTTTATTCTCTGCCTTCCATCTTCACTATGACCATCTCATCATTAAGG\n+CAAATCCGAAGAGGCTCTAAGACAATCGGATCGATTTATGCAAATGATACTCCAATCTTC\n+GAGCGTTTCTTCCTGTGCCTCTCCGCCACCACGAAGAATAGCAACTCGTCATTATGGCAA\n+ACAAAGATCGAAGACCAGTTTTCACCTCAATCTAATGATGTTTGCCTCATTTATCCTGAT\n+CCGCTGTGATCCGCTTGTAATCAATCCCCATCCATCCTCAGCCAGGGGCAAGTTCAAGTT\n+GCCAGCTCTCCGCAGCGATCTCCTCCACGGTTCCTTTATGATCTCGAGTTTCATCCTTTC\n+CACTCCACCTGGCTTGAGTGGGTGGCTCAACATAATGGGGGGATCTGCCGTTATTCGGTT\n+GTATCCCGATCGCAGCCGCAGCTGCAAATAAAATAAGAGTCGACTTCAACACTCGACTTG\n+GCTTAATGCAAAGTGCCGCTGAGGAGAGATGCGTGGGTGGGCCCACATTGGGAAGCCAAC\n+TGGGTCTGCTCTAAGTCGCAGAGTGCAGATAAACACTGCCAAAAAATATGTGACTAAAAT\n+CGAATTTATTTCCTGGGTCGTCGATTAGGAAATAAGAATGAGAGGTAAATGCGCAGTC\n+>Locus_32_Transcript_4/5_Confidence_0.286_Length_242\n+GGTGGTGCAATTTGGAGTCTGGCCGCGCGATTAGCTAAGCGCTGCGACCATATAATCGCA\n+TTGACAACTCCGATTTGTGTGCGGGAGTCGAGGAAGTGCTCCACTTTATGGCAGTCACAA\n+TTTCATGTTTGAATGTTTCGCCTCGATAGGATTCGGAATGGGGATTTTCGAATTGCGATT\n+TGGTCCGCTGGCCTGGCCTGGACTGGCTGGCCGCTTGTCCGATTCTATCGAATGCGTGTC\n+TC\n+>Locus_5_Transcript_3/6_Confidence_0.222_Length_630\n+CGCCTGCATTAAAGCACTCCCACAGATTTACGGTCTCACCTTCCCCCTTCAAAAGATTGC\n+AGGCAACTTCTGCCAAGTGTTCAGCTCCTTTTTAGAGCATCGCAATGGATCTTGGGGCAG\n+CTGAGACTTTTCCGGTATTCCACTCGACAGCAACTTTCATGTTCTCGGCAGATGCGCGGC\n+AAATGATAACATAAAATGCGTTTTCCATTTCGAGGCATAATTTCGTGGCGCGGGCAAAAC\n+CATTTTCCGCATTTTCTCGGCCAATTTTCGTTGCCTCTGCGTGCTGTTCACAACTTGGCG\n+GTGAAAGAGGCACTATGAAAGAATCAAAAGCAGTCAACTCCTGTTCAGCAGCAAACAAAC\n+AGCAAAGAGGCGGGAAAAACAATATTGTATTTTGCGGAAGGGGAAAGGTACGAGGTCCCC\n+CCCGGGCTCCAGAAGTCTGCTGGAGCAATAAAAATAATAATCGAACTGCAGCCGCATTGC\n+GTTGGCCGCAAGGTCATTGCCCATTGGCTCTGATCGATTGGGCCAACAAGGTGGGCAAGA\n+GGGGCGATTCGAGCCAAGAGAAATGAGAGAGGGCAGAAACTGCCGTTGGTTAGCGATGGT\n+AATTTGATAATTGGCAAAACTGTCTGCTCG\n+>Locus_3_Transcript_3/5_Confidence_0.375_Length_130\n+TCCCATCGGCGGGGGATGGGAGGAAGGATGCCGGTGATGCGCCTTACGACACCTTAAGGT\n+GTCATAGACACGGTGAGGTGGCAATTAGGGCGCACCGTGGGGCACCCTTCACCGTCAACC\n+CTTCTTATCC\n+>Locus_8_Transcript_4/4_Confidence_0.200_Length_135\n+GCAACGAATATCTTTAATTACGCTCTATAATCAAATATAGAGAGGGTCTTAAATATTCCC\n+AGAAGAGGCCATTTCAAGTCACTAACAATTTGACTTTGCTTTTTTCTTTTATTCTCTGCC\n+CTCCATCTTCACCAT\n+>Locus_32_Transcript_2/5_Confidence_0.286_Length_124\n+CGCAGGAAAGGTGAAGGTGGTGCAATTTGGAGTCTGGCCGCGCGATTAGCTAAGCGCTGC\n+GACCATATAATCGCATTGACAACTCCGATTTGTGTGCGGGAGTCGAGGAAGTGCTCCACT\n+TTAT\n+>Locus_39_Transcript_1/2_Confidence_0.667_Length_187\n+ATTAAAACAGAGGAGACAGATGGCCCTTGCGAGCGTTGACTGTCTCTGATTTCTGCCCAG\n+TGCTCTGAATGTCAAAGTGAAGAAATTCAAGTAAGCGCGGGTCAACGGCGGGAGTAACTA\n+TGACTCTCTTAAGGTAGCCAAATGCCTCTTCATCTTGGAGACCAGCTGCGGATATTGGTA\n+CGGCCTG\n+>Locus_11_Transcript_2/2_Confidence_0.333_Length_1110\n+GCTTATCTTTCATATGGTCCATAGTAAATACATTAAAAACTAACGACCTTGGGACACGGC\n+CGCTGACACTGCGAAGGATGTCAAAAAAAAGAACCTGATTTTTAATTTCGACTGGACGAC\n+AGTTGAAATAGTGGGAGCGGTTTCCATCCCAGTATTCAAAACGGCGGTTTGTGATGATGT\n+TTACATAATCCTCAAATTTGTGCTCGTATCCTGCTTGTGTGTCGTTCTCAAACCAAAAGC\n+GTATAAAGACAGTGCCGGAACGCTTGAATTTCTTGAAATAACATTCCTGTTCTGAAAGGT\n+ATCCGTCGGTTTCGACAAGCACTACAGGATCAAAAATCATACACCCATATGCGATATTTG\n+CTCGTGAAATATGCATTATTTTTCCAATGTCTGGTGTAGAAATGTCATAGATAGAATGCA\n+AGAACATCAATCGTTCTGAAGTTACTGTACAATATTGAGCCGGTTTACGGCAAATGACTT\n+TCGGGTCTTCATTGAGATGCAGTTTTAGAACTTCCGAATGGTGTTTGGTGAGACCCTCTT\n+GTTTGTATGATCGAAGTTCAAATAAGTAACGACTGTGACGGTAATCGTCGTTAGCATCGA\n+GTAAGGGGTAACATGTATGGGTATATTTCTCTGCTGCATTCAGATGGGTCAAAGGGTTAC\n+CCCCAATATCTTTTAGTACACAATCAAATCCGTTGATTGGTACAGTGCCTCGTTCAATCC\n+TCATTCGATACTTCATGTAACGTTCGCTCAATAAGCGATGTGCACGTGCGAATGCATGAG\n+GTGCACTATCCGTAGCTTTCGAATAATCTAGGTTGAACATGCTGAATGATTTATTCAGTT\n+TGGATTCTTGATTCGATGTGAGATGTTGCTTGATGTATACTTTTTGGAAGTTGCGCTTCG\n+CAAGTATTTCTTGCTTAGCTTTGCAAATTCTTTTGTGTGCATAGGCTTCTTGGAACAATG\n+CATCACTGCCCAGACTCTTTGCAAGTTGAGCTTGTGTGACAGCATGTGTTGATGGAAATT\n+CAGAAGCGACGTTATCAAGAACAAGGTCAATTGCGGCACTATTTTCAATCGAATCAAAAG\n+TGACGTCTGAACGATCAACAGACGTCAAAA\n+>Locus_25_Transcript_2/3_Confidence_0.200_Length_363\n+ATTACTTGAATTTTTCAACACTTCAATACAAGATTCCTGACAACCACATTGTACCTTCTC\n+CACTAAGTGGTTGCAATCCTATGGTAATCGACGATCAACTCTTTAACAAGCACATGAACT\n+GATGGACTAACTAATGGGCAAGCAAAGTGTTTTAGGCAT'..b'GGCATTTTGTTTTGCATTATCCAAGATTCTGTGC\n+TGGCGCAAACAACAAGCAACTCATCTGCCACTTTTCACACCCTTGAAGCGGAGTTTTCAC\n+TTCTCTTTGGGTGAGCTTTAGCCGCAAATGGAGGAGCCGCCTCATAAATCATCGCCCAGG\n+CCACCGGTGGGCGTCGTCCAACGTCCAATGCTAGAAGTGTGCGCAGGCGCAGCTGCTGCT\n+CACCCCCTCCTCCCCTTAATTTCCCCAACTCATTGTCAGCTGCAAAGGTGCCAAAGAAGT\n+GTACAAACTTCGCGACTGATTTGGGGGGATTTGGCCAGCCGTA\n+>Locus_6_Transcript_1/3_Confidence_0.400_Length_598\n+AAATGCGCAGTCTGCTCCAATATCTTATGGTTTGAAACAAAACAAAAGCAGTATACCATC\n+GACTATATATTAAGTAGATAAAACTATTTTCCAGTGAGAGATTGTGCTTCATTAACACTC\n+GCCGCTCGTTCGCCTGAGTTTCCTTTTGTTTGCCGCAGTTTTTGACACCAAACTTGGCGT\n+CACTTCAAAGCCCTGGCGATTAATCTTGAGTGCTGGTGGGGAAGGAGTGGGGCTTCTCCA\n+GTGCCAGATCTTCCAGATCCGGTCGTGTCTGCAATTTGCAGGCATTTTGTTTTGCATTAT\n+CCAAGATTCTGTGCTGGCGCAAACAACAAGCAACTCATCTGCCACTTTTCACACCCTTGA\n+AGCGGAGTTTTCACTTCTCTTTGGGTGAGCTTTAGCCGCAAATGGAGGAGCCGCCTCATA\n+AATCATCGCCCAGGCCACCGGTGGGCGTCGTCCAACGTCCAATGCTAGAAGTGTGCGCAG\n+GCGCAGCTGCTGCTCACCCCCTCCTCCCCTTAATTTCCCCAACTCATTGTCAGCTGCAAA\n+GGTGCCAAAGAAGTGTACAAACTTCGCGACTGATTTGGGGGGATTTGGCCAGCCGTAC\n+>Locus_22_Transcript_1/1_Confidence_0.000_Length_312\n+ATCGAAAAATTTGTCAAATCTGCTAAGAAAAAACTAACAGATGAATGTTTCCCAGTCGAA\n+GCTTGCAACGAACATGAACCTGAATTCGATGAATCAGATTTAGGTACAGGAATAACCTAT\n+TCACCCATTTATGCAGTCGTCAAAGTACAAAAATGTGAACTTCCTGCAACTCCCGTGCCG\n+TTTGATGAACCTGTCGAGAAGGATAAACCAGATACAGAAAGGATAGAGGTTGGTGATATA\n+CGCAAATCTATGGATGAATTCACACGCTACCTTAAATTCACACATGATTCAGAAATTAAC\n+AATATGAAATCA\n+>Locus_37_Transcript_2/3_Confidence_0.400_Length_145\n+TTATTTAGGTCACAAGTGTACTGACAAAGGTATATTGCCAGATGACTCCAAATATGAGGT\n+AATAAAGAACTGCCCCAAACCAGTAAACGCAGACGAAGCTAGACGCTTCGTGGCATTTTG\n+CAATTATTACAGAGGATTTATTAAG\n+>Locus_3_Transcript_1/5_Confidence_0.125_Length_388\n+CCACATAATACAAATAAATTTCAGGCATCGGAAAAATATATAAGGAGATTGCAGCCAGCA\n+CCTTGGGGAACCAGCAAAGATCTCAGCTCGTTATGGCCATCGACATTGTCCGGCATCTGA\n+CAAATTTTTGGATTTTGCTTCACGGCGATCATCTCTCAATCTCAATTTTGGACCCAATCT\n+CGATCGCCATCTTCCCCGGCTGCCAATCCATTTGGTCTGTGGCCAGTAGCTGTGGGGCTT\n+GATTGCGTCGATCGGCCTGTGTGGAATTGGAATTGGAAGTGGAGCTCTAGTTAACAGTTG\n+AGACCTGGACACCGAATGCGTGTCTCCGCCCCCGGGAACTGCAGAAGCAACAACTGCAGC\n+CGCAAACAATTGACAGAGACAAGCGCCT\n+>Locus_13_Transcript_2/3_Confidence_0.600_Length_223\n+CTTCTGAGAAATCAAAGCCGGTGGTCTCGAAAGGCGGAAGAGTGAGGATTTTGGTGGGCG\n+ATAGGTGCCAAGTGTTAAGGGTTGTCGTGGGTTGATAGTCGTTAAACGTCAGCGAGGGTC\n+AGTGGCTTACCAGGGTCTAAGGATATTTGAGATGTGGACTTTTGCTTAGAGAGGAAAATG\n+CAAGTGAAAGAGTCAAGCTATAATTGTAAATGGTGAACTACAA\n+>Locus_23_Transcript_1/1_Confidence_0.000_Length_197\n+AATTGAGATTAAATGACAAAACTCCGGTCTATATCAAAAACTATAGAATGCCAGAAAGTC\n+AAAAACCAGAAATTCAAAGGCAAGTTGACAAATTAATAAAAGATGGCATCGTCGAACCAT\n+CTATTTCAGAATATAATAGCCCTCTTCTCTTGGTACCCAAGAAATCACTGCCTAACTCGG\n+AGGAAAAGAGATGGCGA\n+>Locus_41_Transcript_1/2_Confidence_0.667_Length_155\n+CAACTGGACTCCAATTTCGACTCGGATGCGGCCCAGGTGGCCAGCTGCAGTTGCTGGGCG\n+GCAATAAAACATTTACCACCGAATTAGCCCAGTCGGAGAGTAGTTCAAGTAGTTTAAGTG\n+CAAGACCACTTAAAATTCAGTTACGACTGCTGCCC\n+>Locus_71_Transcript_1/2_Confidence_0.333_Length_170\n+GATTAATGAAAACATCTTTGGCAAATGCTTTCGCAGTCGGACGTCTCGCTACGGTCCAAG\n+AATTTCACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCT\n+CTTGATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_64_Transcript_1/1_Confidence_0.000_Length_172\n+AGGGATGTTGTTTCCGTAAAGCGCCACGGTTCCTGTGGTGTCTCGTGCGCTCTATTCGGC\n+CCTTGAAAAACCGAGGGAGGCTATTTGAATTTCGTGCCAGGCCGTACCGATATCCGCAGC\n+AGGTCTCCAAGGTGAACAGCCTCTAGTCGATAGAATAATGTAGGTAAGGGAA\n+>Locus_5_Transcript_6/6_Confidence_0.111_Length_262\n+AGGGATTAATCCCTAATCAAAGATCCATTCTTACACTGAAGTTTGGTTAAGATTGAAGCT\n+ATAGCTTGAGCGGAGTTAAGCCATCTTCAAGGAAACTTCTTCTTACGAATCGCAGCGATC\n+CCTAAACCTATTTCGAGTTGACCCCAATTTCCAGACCTCCAATGGTTATTACGGTTAACC\n+AGAGATTACCCAACCGACTAACCGACTAACATTTGCCACTTCATTGCCCTGCCACCTTCC\n+GCCGAACGCCGAACGCCGAACG\n+>Locus_50_Transcript_1/1_Confidence_0.000_Length_132\n+TATAATGGATCATATCCGCGAATATATCACTGACATGACCCATTTTTAAGTTGTTGGTGA\n+ACAAGTCATGTTGAAGAATTTCCGCGCTGACAAAGCAAATAAGCACGAGGAACACAAACA\n+TTTTGATATGAC\n+>Locus_61_Transcript_1/2_Confidence_0.667_Length_164\n+CACCGTCCTGCTGTCTATATCAACCAACGCCTTTCATGGGGTCTCATGAGCGGGAAGTTT\n+GGCACTTTAACCCGACGTTTGGTTCATCCCACAGCGCCAGTTCTGCTTACCAAAAGTGGC\n+CCACTGGGCACATTATATCATAACCTTGAACTTCATATCAGGAA\n'