Next changeset 1:1964514aabde (2015-09-14) |
Commit message:
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ |
added:
BlastParser_and_hits.py BlastParser_and_hits.xml test-data/blast.tab test-data/input.fa test-data/output.fa test-data/output.tab |
b |
diff -r 000000000000 -r 69ea2a13947f BlastParser_and_hits.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BlastParser_and_hits.py Sun Jun 21 14:31:29 2015 -0400 |
[ |
b'@@ -0,0 +1,177 @@\n+#!/usr/bin/python\n+# blastn blastx parser revised debugged: 3-4-2015. Commit issue.\n+# drosofff@gmail.com\n+\n+import sys\n+import argparse\n+from collections import defaultdict\n+\n+def Parser():\n+ the_parser = argparse.ArgumentParser()\n+ the_parser.add_argument(\'--blast\', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")\n+ the_parser.add_argument(\'--sequences\', action="store", type=str, help="Path to the fasta file with blasted sequences")\n+ the_parser.add_argument(\'--fastaOutput\', action="store", type=str, help="fasta output file of blast hits")\n+ the_parser.add_argument(\'--tabularOutput\', action="store", type=str, help="tabular output file of blast analysis")\n+ the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") \n+ the_parser.add_argument(\'--mode\', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")\n+ args = the_parser.parse_args()\n+ if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):\n+ the_parser.error(\'argument(s) missing, call the -h option of the script\')\n+ if not args.flanking:\n+ args.flanking = 0\n+ return args\n+\n+def median(lst):\n+ lst = sorted(lst)\n+ if len(lst) < 1:\n+ return None\n+ if len(lst) %2 == 1:\n+ return lst[((len(lst)+1)/2)-1]\n+ if len(lst) %2 == 0:\n+ return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0\n+\n+def mean(lst):\n+ if len(lst) < 1:\n+ return 0\n+ return sum(lst) / float(len(lst))\n+\n+def getfasta (fastafile):\n+ fastadic = {}\n+ for line in open (fastafile):\n+ if line[0] == ">":\n+ header = line[1:-1]\n+ fastadic[header] = ""\n+ else:\n+ fastadic[header] += line\n+ for header in fastadic:\n+ fastadic[header] = "".join(fastadic[header].split("\\n"))\n+ return fastadic\n+\n+def insert_newlines(string, every=60):\n+ lines = []\n+ for i in xrange(0, len(string), every):\n+ lines.append(string[i:i+every])\n+ return \'\\n\'.join(lines)\n+ \n+def getblast (blastfile):\n+ \'\'\'blastinfo [0]\tPercentage of identical matches\n+ blastinfo [1]\tAlignment length\n+ blastinfo [2]\tNumber of mismatches\n+ blastinfo [3]\tNumber of gap openings\n+ blastinfo [4]\tStart of alignment in query\n+ blastinfo [5]\tEnd of alignment in query\n+ blastinfo [6]\tStart of alignment in subject (database hit)\n+ blastinfo [7]\tEnd of alignment in subject (database hit)\n+ blastinfo [8]\tExpectation value (E-value)\n+ blastinfo [9]\tBit score\n+ blastinfo [10]\tSubject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) \'\'\'\n+ blastdic = defaultdict (dict) \n+ for line in open (blastfile):\n+ fields = line[:-1].split("\\t")\n+ transcript = fields[0]\n+ subject = fields[1]\n+ blastinfo = [float(fields[2]) ] # blastinfo[0]\n+ blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7\n+ blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type\n+ blastinfo.append(float(fields[11])) # blastinfo[9] Bit score\n+ blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT\n+ try:\n+ blastdic[subject][transcript].append(blastinfo)\n+ except:\n+ blastdic[subject][transcript] = [ blastinfo ]\n+ return blastdic\n+\n+def getseq (fastadict, transcript, up, down, orientation="direct"):\n+ def reverse (seq):\n+ revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"}\n+ revseq = [revdict[i] for i in seq[::-1]]\n+ return "".join(revseq)\n+ pickseq = fastadict[transcript][up-1:down]\n+ if orientation == "direct":\n+ return pickseq\n+ else:\n+ return reverse(pickseq)\n+\n+def subjectCoverage (fastadict, b'..b'(subjectLength)\n+ return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores)\n+ \n+def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue):\n+ if rightCoordinate > leftCoordinate:\n+ polarity = "direct"\n+ else:\n+ polarity = "reverse"\n+ leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate\n+ if leftCoordinate - FlankingValue > 0:\n+ leftCoordinate -= FlankingValue\n+ else:\n+ leftCoordinate = 1\n+ return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)\n+ \n+def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"):\n+ F= open(F, "w")\n+ Fasta=open(Fasta, "w")\n+ if mode == "verbose":\n+ print >>F, "# SeqId\\t%Identity\\tAlignLength\\tStartSubject\\tEndSubject\\t%QueryHitCov\\tE-value\\tBitScore\\n"\n+ for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):\n+ print >> F, "#\\n# %s" % subject\n+ print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])\n+ print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])\n+ print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])\n+ print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"])\n+ print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"])\n+ for header in results[subject]["HitDic"]:\n+ print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )\n+ print >> Fasta, "" # final carriage return for the sequence\n+ for transcript in Xblastdict[subject]:\n+ transcriptSize = float(len(fastadict[transcript]))\n+ for hit in Xblastdict[subject][transcript]:\n+ percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100)\n+ Eval, BitScore = hit[8], hit[9]\n+ info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore]\n+ info = [str(i) for i in info]\n+ info = "\\t".join(info)\n+ print >> F, info\n+ else:\n+ print >>F, "# subject\\tsubject length\\tTotal Subject Coverage\\tRelative Subject Coverage\\tMaximum Bit Score\\tMean Bit Score"\n+ for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):\n+ line = []\n+ line.append(subject)\n+ line.append(results[subject]["subjectLength"])\n+ line.append(results[subject]["TotalCoverage"])\n+ line.append(results[subject]["RelativeSubjectCoverage"])\n+ line.append(results[subject]["maxBitScores"])\n+ line.append(results[subject]["meanBitScores"])\n+ line = [str(i) for i in line]\n+ print >> F, "\\t".join(line)\n+ for header in results[subject]["HitDic"]:\n+ print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )\n+ print >> Fasta, "" # final carriage return for the sequence\n+ F.close()\n+ Fasta.close()\n+ \n+ \n+\n+def __main__ ():\n+ args = Parser()\n+ fastadict = getfasta (args.sequences)\n+ Xblastdict = getblast (args.blast)\n+ results = defaultdict(dict)\n+ for subject in Xblastdict:\n+ results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n+ outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode)\n+if __name__=="__main__": __main__()\n' |
b |
diff -r 000000000000 -r 69ea2a13947f BlastParser_and_hits.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BlastParser_and_hits.xml Sun Jun 21 14:31:29 2015 -0400 |
b |
@@ -0,0 +1,45 @@ +<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.1.0"> +<description>for virus discovery</description> +<requirements></requirements> +<command interpreter="python"> +BlastParser_and_hits.py + --sequences $sequences + --blast $blast + --tabularOutput $tabularOutput + --fastaOutput $fastaOutput + --flanking $flanking + --mode $mode +</command> +<inputs> + <param name="sequences" type="data" format="fasta" label="fasta sequences that have been blasted" /> + <param name="blast" type="data" format="tabular" label="The blast output you wish to parse" /> + <param name="flanking" type="text" size="5" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/> + <param name="mode" type="select" label="Verbose or short reporting mode" help="display or not the oases contigs"> + <option value="verbose" default="true">verbose</option> + <option value="short">do not report oases contigs</option> + </param> +</inputs> +<outputs> + <data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/> + <data name="fastaOutput" format="fasta" label="hits"/> +</outputs> + + <tests> + <test> + <param ftype="fasta" name="sequences" value="input.fa" /> + <param ftype="tabular" name="blast" value="blast.tab" /> + <param name="flanking" value="5" /> + <param name="mode" value="verbose" /> + <output name="tabularOutput" ftype="tabular" file="output.tab" /> + <output name="fastaOutput" ftype="fasta" file="output.fa" /> + </test> + </tests> + +<help> + +**What it does** + +Parse blast outputs for viruses genome assembly. Outputs analysis and hit sequences for further assembly + +</help> +</tool> |
b |
diff -r 000000000000 -r 69ea2a13947f test-data/blast.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blast.tab Sun Jun 21 14:31:29 2015 -0400 |
b |
b'@@ -0,0 +1,695 @@\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t98.47\t196\t3\t0\t3\t198\t517\t712\t9e-92\t 340\t1416\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t95.41\t196\t9\t0\t3\t198\t517\t712\t1e-83\t 313\t1365\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t94.39\t196\t11\t0\t3\t198\t518\t713\t7e-81\t 304\t1383\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t89.18\t194\t21\t0\t3\t196\t518\t711\t3e-66\t 255\t1400\n+Locus_1_Transcript_1/7_Confidence_0.231_Length_224\tgi|9632343|ref|NC_002037.1|_Black_beetle_virus_RNA_2,_complete_sequence\t87.63\t194\t24\t0\t3\t196\t518\t711\t2e-62\t 242\t1399\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t97.44\t117\t3\t0\t1\t117\t596\t712\t4e-49\t 197\t1416\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t92.31\t117\t9\t0\t1\t117\t596\t712\t6e-41\t 170\t1365\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t91.45\t117\t10\t0\t1\t117\t597\t713\t7e-40\t 167\t1383\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|9632343|ref|NC_002037.1|_Black_beetle_virus_RNA_2,_complete_sequence\t84.35\t115\t18\t0\t1\t115\t597\t711\t6e-28\t 127\t1399\n+Locus_1_Transcript_2/7_Confidence_0.231_Length_143\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t83.48\t115\t19\t0\t1\t115\t597\t711\t3e-26\t 122\t1400\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t97.67\t258\t6\t0\t1\t258\t455\t712\t2e-121\t 439\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t87.95\t166\t1\t1\t259\t405\t1250\t1415\t3e-56\t 223\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|262225312|gb|GQ342966.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA2\t85.71\t42\t6\t0\t339\t380\t112\t153\t4e-04\t50.0\t1416\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t94.96\t258\t13\t0\t1\t258\t455\t712\t1e-111\t 407\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t77.08\t96\t3\t1\t261\t337\t1252\t1347\t2e-15\t87.8\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|351066204|gb|JF461542.1|_Flock_house_virus_isolate_IP-VIA-022011_coat_protein_precursor,_gene,_c\t85.71\t42\t6\t0\t339\t380\t112\t153\t4e-04\t50.0\t1365\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t93.02\t258\t18\t0\t1\t258\t456\t713\t5e-105\t 385\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t94.87\t78\t4\t0\t260\t337\t1252\t1329\t2e-26\t 123\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|152143030|gb|EF690538.1|_Flock_house_virus_isolate_TNCL_segment_RNA2_protein_alpha_mRNA,_complet\t85.42\t48\t7\t0\t339\t386\t113\t160\t1e-05\t55.4\t1383\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t88.67\t256\t29\t0\t1\t256\t456\t711\t9e-89\t 331\t1400\n+Locus_1_Transcript_3/7_Confidence_0.231_Length_407\tgi|22711883|ref|NC_004144.1|_Flock_house_virus,_complete_genome\t86.54\t52\t7\t0\t286\t337\t1295\t1346\t7e-08\t62.6\t1400\n+Locus_1_Transcript_3/7_Co'..b'|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t85.90\t78\t11\t0\t51\t128\t2747\t2670\t5e-17\t91.5\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t87.23\t47\t6\t0\t1\t47\t2838\t2792\t3e-07\t59.0\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t85.90\t78\t11\t0\t51\t128\t2747\t2670\t5e-17\t91.5\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t87.23\t47\t6\t0\t1\t47\t2838\t2792\t3e-07\t59.0\t3096\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t95.83\t48\t2\t0\t1\t48\t133\t86\t3e-13\t78.8\t389\n+Locus_81_Transcript_1/1_Confidence_1.000_Length_155\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t90.70\t43\t4\t0\t50\t92\t43\t1\t8e-08\t60.8\t389\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|262225309|gb|GQ342965.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA1\t100.00\t165\t0\t0\t1\t165\t2851\t2687\t2e-79\t 298\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|351066201|gb|JF461541.1|_Flock_house_virus_isolate_IP-VIA-022011_segment_RNA1,_complete_sequence\t98.18\t165\t3\t0\t1\t165\t2851\t2687\t4e-75\t 284\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|152143028|gb|EF690537.1|_Flock_house_virus_isolate_TNCL_segment_RNA1_protein_A_mRNA,_complete_cd\t95.15\t165\t8\t0\t1\t165\t2851\t2687\t1e-68\t 262\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|22681055|ref|NC_004146.1|_Flock_house_virus_RNA_1,_complete_sequence\t92.12\t165\t13\t0\t1\t165\t2851\t2687\t2e-61\t 239\t3107\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|51014188|ref|NC_001411.2|_Black_beetle_virus,_complete_genome\t92.12\t165\t13\t0\t1\t165\t2850\t2686\t2e-61\t 239\t3106\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t93.23\t133\t9\t0\t1\t133\t133\t1\t1e-49\t 199\t389\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t84.85\t165\t25\t0\t1\t165\t2838\t2674\t3e-45\t 185\t3096\n+Locus_82_Transcript_1/1_Confidence_1.000_Length_165\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t84.85\t165\t25\t0\t1\t165\t2838\t2674\t3e-45\t 185\t3096\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|262225309|gb|GQ342965.1|_Drosophila_melanogaster_American_nodavirus_(ANV)_SW-2009a_segment_RNA1\t99.41\t169\t1\t0\t1\t169\t2851\t2683\t7e-80\t 300\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|351066201|gb|JF461541.1|_Flock_house_virus_isolate_IP-VIA-022011_segment_RNA1,_complete_sequence\t97.63\t169\t4\t0\t1\t169\t2851\t2683\t4e-76\t 288\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|152143028|gb|EF690537.1|_Flock_house_virus_isolate_TNCL_segment_RNA1_protein_A_mRNA,_complete_cd\t94.67\t169\t9\t0\t1\t169\t2851\t2683\t5e-69\t 264\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|22681055|ref|NC_004146.1|_Flock_house_virus_RNA_1,_complete_sequence\t91.72\t169\t14\t0\t1\t169\t2851\t2683\t2e-62\t 242\t3107\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|51014188|ref|NC_001411.2|_Black_beetle_virus,_complete_genome\t91.72\t169\t14\t0\t1\t169\t2850\t2682\t2e-62\t 242\t3106\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|210673|gb|M33065.1|BBVRNA3_Black_beetle_virus_RNA3_proteins_B1_and_B2_genes,_complete_cds\t93.23\t133\t9\t0\t1\t133\t133\t1\t2e-49\t 199\t389\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|13195547|gb|AF329080.1|_Boolarra_virus_segment_RNA1_protein_A_and_B2_genes,_complete_cds\t84.62\t169\t26\t0\t1\t169\t2838\t2670\t3e-46\t 188\t3096\n+Locus_83_Transcript_1/1_Confidence_1.000_Length_196\tgi|22681027|ref|NC_004142.1|_Boolarra_virus_RNA1,_complete_genome\t84.62\t169\t26\t0\t1\t169\t2838\t2670\t3e-46\t 188\t3096\n' |
b |
diff -r 000000000000 -r 69ea2a13947f test-data/input.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fa Sun Jun 21 14:31:29 2015 -0400 |
b |
b'@@ -0,0 +1,1712 @@\n+>Locus_1_Transcript_1/7_Confidence_0.231_Length_224\n+CTTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAGTTTA\n+CCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCGGTTG\n+CTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAG\n+CGGTGGGACCAGACAATTAAAACGGATGCCAACATAAATGGGCA\n+>Locus_1_Transcript_2/7_Confidence_0.231_Length_143\n+AATGGCCTATAAAGCTGAGCACTGTACAATTCCCGGTTGCTACAGAGCAAGCCACCTCTT\n+CACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAGCGGTGGGACCAGACAATTAAA\n+ACGGATGCCAACATAAATGGGCA\n+>Locus_1_Transcript_3/7_Confidence_0.231_Length_407\n+ATCCGGGCTTTACATCGATGTTCGGTACTACTGCAACATCTAGGTCCGACCAAGTGTCCT\n+ATTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAGTTTA\n+CCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCGGTTG\n+CTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTATTAG\n+CGGTGGGACCAGACAATTCAAACCGGATGCCAACCTAACTGGGCAATTAGGCCACAATAA\n+GCCCAACTTGGTTGATGATCGAAACAGTGAGCCCCCTCCCATGTAATGGAAGACGTCGAC\n+GCAATCGTGCGAGACGTAATGACAGTTTAAGTCAGCCGACTAAGGGG\n+>Locus_1_Transcript_4/7_Confidence_0.077_Length_591\n+GTAAACAATTCCAAGTTCCAAATGGTTAACAACATCAAACCAAGACGGCAACGATCCCAA\n+CGTGTTGCCGTAACAACAACCCAAACAGCGCCTATTCCACAGCGAAACGTACCACGTAAT\n+CGGAGTGAGCCGCGTCAATAATGAGGCGGAATCGCCGTCGTGTACGTGGCATGAATATGG\n+CGGCGCTTTCCCGGCTTAGTCAGCCTGGGTTAGCGTTTCTCAAGTGTGCATTCGCACCAC\n+CTGACTTTCAGGTACGCTTCCATGAACGTGGGCATTTACCCAACGTCGAACTTGATGCAG\n+TTTACCGGAAGCATAACTGTTTGGAAATGTCCTATAAAGCTGAGCACTGTACAATTCCCG\n+GTTGCTACAGAGCAAGCCACCTCTTCACTAGTACACATTCTTGCTGGTTTAAATGGTGTA\n+TTAGCGGTGGGACCAGACAATTCAAACCGGATGCCAACCTAACTGGGCAATTAGGCCACA\n+ATAAGCCCAACTTGGTTGATGATCGAAACAGTGAGCCCCCTCCCATGTAATGGAAGACGT\n+CGACGCAATCGTGCGAGACGTAATGACAGTTTAAGTCAGCCGACTAAGGGG\n+>Locus_1_Transcript_5/7_Confidence_0.231_Length_163\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCACAGCCTTCCAACAACCGGAAGTGACCCCCCACCCGCTAAA\n+>Locus_1_Transcript_6/7_Confidence_0.077_Length_216\n+GTAAACAATTCCAAGTTCCAAATGGTTAACAACATCAAACCAAGACGGCAACGATCCCAA\n+CGTGTTGCCGTAACAACAACCCAAACAGCGCCTATTCCACAGCGAAACGTACCACGTAAT\n+CGGAGTGAGCCGCGTCAATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGC\n+TAACGGTAAGCGAAGGAAGGGCCCCCGAGGAGGCAA\n+>Locus_1_Transcript_7/7_Confidence_0.231_Length_184\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCCCCCACCCGCTAAACCGTAGCTGACTCCTAGGAGCACCTACACCCGTTCTAGCCCGAA\n+AGGG\n+>Locus_2_Transcript_1/5_Confidence_0.125_Length_327\n+TTCCTGCATCGATTTGTGGTCTTCGCACGCGCCTCGAACCGAAAACTAATGCATAAAGTA\n+GATCATAGAGCAACGACTGCCTTTTAAAGAAATTTCTTTAGAGAAAGGACGAAAATCACG\n+AGACAATCTTTGGTTGGATTGTAACGTCCTGACTCGTAGAGGATTTCCCGACCATGAAGT\n+AGCAAACCCAAAACTCTTGATATCTCAACATATGTATACGTAATGGATGCATCAAACCCA\n+ACTAATTCGTCTGAAAGGTTTACCTTGGAGGACTTGCGGTACGAACATGTGAAGAGAAAG\n+CGTCAGGACCTTGCTGCCCTCCATACC\n+>Locus_2_Transcript_2/5_Confidence_0.250_Length_809\n+TTCCTGCATCGATTTGTGGTCTTCGCACGCGCCTCGAACCGAAAACTAATGCATAAAGTA\n+GATCATAGAGCAACGACTGCCTTTTAAAGAAATTTCTTTAGAGAAAGGACGAAAATCACG\n+AGACAATCTTTGGTTGGATTGTAACGTCCTGACTCGTAGAGGATTTCCCGACCATGAAGT\n+AGCAAACCCAAAACTCTTGATATCTCAACATATGTATACGTAATGGATGCATCAAACCCA\n+ACTAATTCGTCTGAAAGGTTTACCTTGGAGGACTTGCGGTACGAACATGTGAAGAGAAAG\n+CGTCAGGACCTTGCTGCCCTCCATACCATGGCCAAAGCCTCCACACGAACATATGTTGGT\n+GGTGCGCTCCCTGACCGTGACAACATCATGGAGGTGTTCAAACAGTTGGACGCAGCTACT\n+CAAGCGGTTGACCCGACACCAATGTCTGACTTCTCATACCTAAATTCTCATCCGGCAATT\n+CCGGTATGGAGGGCAGCAAGGTCCTGACGCTTTCTCTTCACATGTTCGTACCGCAAGTCC\n+TCCAAGGTAAACCTTTCAGACGAATTAGTTGGGTTTGATGCATCCATTACGTATACATAT\n+GTTGAGATATCAAGAGTTTTGGGTTTGCTACTTCATGGTCGGGAAATCCTCTACGAGTCA\n+GGACGTTACAATCCAACCAAAGATTGTCTCGTGATTTTCGTCCTTTCTCTAAAGAAATTT\n+CTTTAAAAGGCAGTCGTTGCTCTATGATCTACTTTATGCATTAGTTTTCGGTTCGAGGCG\n+CGTGCGAAGACCACAAATCGATGCAGGAA\n+>Locus_2_Transcript_3/5_Confidence_0.250_Length_207\n+GTCAGGACCTTGCTGCCCTCCATACCATGGCCAAAGCCTCCACACGAACATATGTTGGTG\n+GTGCGCTCCCTGACCGTGACAACATCATGGAGGTGTTCAAACAGTTGGACGCAGCTACTC\n+AAGCGGTTGACCCGACACCAATGTCTGACTTCTCATACCTAAATTCTCATCCGGCAATTC\n+CGGTATGGAGGGCAGCAAGGTCCTGAC\n+>Locus_2_Transcript_4/5_Confidence_0.125_Length_605\n+GTCAGGA'..b'GGCGTCTTAGAAACGCTTA\n+>Locus_67_Transcript_1/2_Confidence_0.333_Length_210\n+TGCAACAACTTTAATATACGCTATTGGAGCTGGAATTACCGCGGCTGCTGGCACCAGACT\n+TGCCCTCCAATTGGTCCTTGTTAAATGCGCTGGATGGTGGTGGTTTTTGGCAAAACACAG\n+TGGTGTCTCGATAGCGGGGCTACCAGTCACATGTGCTGTGACAGAGGTGTTTTTACTGAG\n+TATGAAGAGCACACTGAAAAAATTAGTCTT\n+>Locus_67_Transcript_2/2_Confidence_0.333_Length_155\n+AGCGACACAAAAACAATGCTCCTTGTTAAATGCGCTGGATGGTGGTGGTTTTTGGCAAAA\n+CACAGTGGTGTCTCGATAGCGGGGCTACCAGTCACATGTGCTGTGACAGAGGTGTTTTTA\n+CTGAGTATGAAGAGCACACTGAAAAAATTAGTCTT\n+>Locus_68_Transcript_1/1_Confidence_0.000_Length_211\n+TGAAAACTCTTTTAAACCCAGCACAATTTATCTCAGACATTCCTGATGATATAATGATCC\n+GACACGTAAACAGGGCCCAGACCATCACCTACAACTTGAAGTCAGGGTCCTCTGGCACCG\n+GCCTGATCGTGGTCTATCCAAACACCCCGTCGAGTATTAGCGGCTTCCATTACATATGGG\n+ATTCCGCTACCTCGAATTGGGTGTTTGATCA\n+>Locus_69_Transcript_1/1_Confidence_0.000_Length_164\n+CCAGAGGATTGGATGTTATTGACAGGATCCTATCGTAAGAGTAGTCAGACACTTCACTCA\n+AGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCATAAACACCCGCAGGTA\n+AGGTGCTGGACTTAATGCTTAGCGAGCCTGAAATCAGTCTGCCA\n+>Locus_70_Transcript_1/1_Confidence_0.000_Length_147\n+AAAAGTTAATTTTGTTAGTAAGAAAATTTGATAAAACTTACAGGAAAGGCGATAAAACTT\n+TCCTTTTAACAAGGTTCGATTAGTACACACGAGAAGAACCAAGAAGAAACTCGAGTGACA\n+TTAATGACTACAATAAGATGTTAGTAA\n+>Locus_71_Transcript_1/2_Confidence_0.333_Length_170\n+GATTAATGAAAACATCTTTGGCAAATGCTTTCGCAGTCGGACGTCTCGCTACGGTCCAAG\n+AATTTCACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCT\n+CTTGATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_71_Transcript_2/2_Confidence_0.333_Length_106\n+ACACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCTCTTG\n+ATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_72_Transcript_1/1_Confidence_0.000_Length_108\n+AAATGATCTCATCTCTGTATTCTGGCCGGAATGCTTGAACCATCGCCTTTTGAGCGATAT\n+TTCGTTGCATCCAGCCAGAAACCCGGCCATCAAGGTTGGAAAAATCCG\n+>Locus_73_Transcript_1/1_Confidence_0.000_Length_164\n+TCGGATAATATCCGGCTTTCCTGACATCCTTTTCATCCTGAAGGTCTCCAGATACACATT\n+AGCGTACTCGGATATCGTTCTACATGCCGAGCATAATCAACATTGGTATTATCCTGGAAG\n+GAACCCAACTGAGATCGCCGATGGGGTTTGTGAATTTGTCAGTG\n+>Locus_74_Transcript_1/2_Confidence_0.333_Length_232\n+ATCATATCCAACAGCTCTTCAGCTCTGCTCCTCGTTATCTTCGGAAATGGTTGCACATGG\n+ATTTTTGCCAGCATGCACGTCTTGCATACTGCGTCTGGTTTGAAAACGACCTTTTCAACA\n+CCGTACACCATCCTCTTCCTCACCATCTCCTGTAGGCTGCTTGTATTCAAATGGCCATAC\n+CTTTTATGCCATAGTGAACCATCAGCATCAACGGCCGCAAAACAACTGTTAT\n+>Locus_74_Transcript_2/2_Confidence_0.333_Length_109\n+CATTCGCTCTTCCTCCTCACCATCTCCTGTAGGCTGCTTGTATTCAAATGGCCATACCTT\n+TTATGCCATAGTGAACCATCAGCATCAACGGCCGCAAAACAACTGTTAT\n+>Locus_76_Transcript_1/1_Confidence_0.500_Length_196\n+TCAGTTTCAGCTTTGTTTTTCAGCCTGCTGCTACCATTCAGGTTCCACCTCCTTTTCAGT\n+TTAAAGTGGGTTTCTCGGCCAGAGTTGGGCAATTCCATTACTCTATTAGGGGGAAAAAAT\n+GGTAATTTGCTGCTCGATTATGACGAAGTCATTTGATCCGCTTCACATGTGACGTCGTCT\n+TTTTGTTGGGCCTGTC\n+>Locus_78_Transcript_1/1_Confidence_1.000_Length_154\n+AACGTGCGCAGGGACCTCATACCTCGAAGGAAGGGCCCCCCGAGGAGGCAAATCAGCACT\n+CGAAGAACGCCTCCGAAAGCTGGAGCTCAGCCCCCCACCCGCTAAACCGTAGGTGTCTCC\n+TAGGAGCACCCACACACGTTCTAGCCCGAAAGGG\n+>Locus_79_Transcript_1/1_Confidence_1.000_Length_154\n+AACGTGCGCAGGGACCTCATACCTCGAAGGAAGGGCCCCCCGAGGAGGCAAATCAGCACT\n+CGAAGAACGCCTCCGAAAGCTGGAGCTCAGCCCCCCACCCGCTAAACCGTAGCTGACTCC\n+TAGGAGCACCTACACCCGTTCTAGCCCGAAAGGG\n+>Locus_80_Transcript_1/1_Confidence_1.000_Length_101\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCAA\n+>Locus_81_Transcript_1/1_Confidence_1.000_Length_155\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGATTAGCGCGA\n+GTTTGCTTGGCATCGTTAAGCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCG\n+TCTTAATCTTCTCCCAGCATGATCCGTGTAACTCG\n+>Locus_82_Transcript_1/1_Confidence_1.000_Length_165\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCGAGTTTGCTTGGCATCGTTAA\n+GCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCGTCTT\n+>Locus_83_Transcript_1/1_Confidence_1.000_Length_196\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGCTGCTTCCGC\n+CGCCGTTTGAATGCGGTCGGGAAGTTCCTGGATTAGCGCGAGTTTGCTTGGCATCGTTAA\n+GCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCGTCTTAATCTTCTCCCAGCA\n+TGATCCGTGTAACTCG\n' |
b |
diff -r 000000000000 -r 69ea2a13947f test-data/output.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fa Sun Jun 21 14:31:29 2015 -0400 |
b |
b'@@ -0,0 +1,5087 @@\n+>gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de--Locus_42_Transcript_2/2_Confidence_0.333_Length_597_hit1_IdMatch=100.0,AligLength=593,E-val=0.0\n+TTCTGAGGTACGGCCAAATGCTGCCAATCCTGTTGGAGTTAGGCTCTATGCCCACAGCCA\n+CCATACCCAGGACATGAAGCTGTTGGGAGAAGCCTGGTCCTCCTCTGCCAAACTGGCTAA\n+GGAATACATCTGTGCAGCTATTTTGGAGGTTAACAACATCCCATACAAGAAGTCTAGCTT\n+TGCGACCCCTAATGTAGTGCCTCCCCCGCCTGAGTCTTCCTGGGCTCGGCAGGTTGAATA\n+CGCAACTGCCCCTAAAGCTGTGCCACAGATTGTTCCCGTTCATGAAACCCTGGATTTGGA\n+GCTGTTTCACAGTATAATGCAGCAATTCCCAGGGCACTGTCCCCGCCAGGTGCAGATGCT\n+TATTAGAGAGGTAAGTCATAAAAATCCTGATGACTACCGTGTGAGAATTCAAGACTTATT\n+CTCCCGGTTTCCACCTCCTAAAGTAGCATCGAAGAGATCACTGCTGAGCGCAGAACAAAG\n+GACGAGACTCAACAAAAAGACTCTCGACCGCAAAAAGAGGAAGAAAATTGCAAGTAAGCT\n+CTTGCCAACCCTATAACATCAAACAACCCAAACCTCAAATCAAGAATACAATTT\n+>gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de--Locus_42_Transcript_1/2_Confidence_0.333_Length_3138_hit1_IdMatch=100.0,AligLength=3001,E-val=0.0\n+CCCAAGTATCCCTGTTTGTGGGATTATGGATGCTGTTGGTGTCCCGTTGCACCCCCCATC\n+CATTAACAATCGGGGTGGGCATACTCATGTCACTCGCCAGATGGGCGGAGTACCCCACCC\n+TCCTCTTGGTCTAATACCCATAGCTCAGAAGTATGGGGTAGAGATCTTGGACCAGAAACA\n+TGTCTACACAGGAGGCACTTGGGATGGCTTTCTTAATAGACTCGCCCAACAGATGGGCCG\n+GACAACGGTCCCCCTGGCAAGAACAGTTTCAGCAGGCAAAAAGAATGCCTCTGATAGGGC\n+AATCATAAAACTGTTAAATAAATACATGCCTAGGACGGGAAATAAGAACTGGCCTGGTGT\n+CCATACAACCCCGGAAGAAGGCATACTTGATGGTATCAAGATAACGGCCAAATCCAGTGC\n+AGGCGCTCCCTATTGGAGACACAAGGGGGAATGCCTAGACCATATAATTGATACTGGACT\n+TCCTGTTGTGCTAAAGCATATAAAGGAAGGAACCCTCAACCAGTTGTGGCGGGAAAACCC\n+TGAGATGTTCCTCGTTGAAGTTAAGAACAAACTTGATAGATACGAAGTCTCGAAGCTTAA\n+AGAGAAGACTAGACCCTATGTGTGTGTACCTGCGCACTGGGCTCTCCTATTCTCGTGCTT\n+GACTCAAGGATTCCAAGAAGGACTTCAAGTTTTTAGTAACTTGGACCCCAGTACAGAATG\n+CTCGAATGCATATGGATTTTCATCCATTGCTGGAGGGCTTACTAGGATGGTCGACTGGAT\n+GTATGCCTGCCCAAAGCGGCGGGGACGGGTTGTCTGTTACGGTGACGATGCATGTATAAC\n+ATTTTGGAGCCAGGGCGTCCTCTATCGGGTGGATCCAGATTTTAAACAAATGGACGGGTC\n+CATAGATAGAGAGGATGCAAGAATCACCATTGAGTGGGTCCTCCACCATCTCAGAAAGGA\n+TTTAGGTGTAGAGGAGACCCCTGCTTTTTGGAAGACAGTTGCAGCAGTGTGGCTTGATAT\n+GGCCATTGACCCCCACTTTATCGTGGATGGTAAGACTGTTTACAGGAAGAAGAACCCCCA\n+TGGACTCATGACTGGAGTTCCAGGAACAACCTTATTTGACACTGTGAAATCTGTAATAGT\n+CTGGAACGAAATGTTGGATCAGGCTAGTGCAGGTTCCATAGACCTTTTAAATGAAGCTCA\n+AGTAGTTAAATGGATGAAGAGACAAGGCTTGGTTGTCAAAGAGGGAACTTGGAGTCCGGT\n+TGCACTCCCTGCAAGGGACACAGAGGGTCTAATTACGGACCACAAATTTCTTGGTGTACA\n+AATTATGGGAGTGTACCACAGACACCGTGTGATACATGTCCCCACAATGCCTGAGAGCGA\n+TGCTCTTGAGATGATGCTCTGCCAGAAGGATAACCCCTTTGAAAAGGCAGTATCTAGAAC\n+TGCACACCAAAGAACTCTGTATGACCGTATGAGGGGTTTAATGATAACAATGGGATTCAG\n+CATACCTCGGATCGAAGAGACAATACACGCTGTGGTTAATACAATCCCCGGCGAGATTAT\n+TGTCATGCAAACGCAAGAGCAAACCGGAACGAAGCCAGAACATATAACCCTCCAGGACTT\n+TGAATATCCAGATTCGTCTGGATTCCCCTCTCGGGACTTCTGTCTAGACCTCTATTCAGA\n+TGGAGGGGATGACAAAGCGGGATGGATTAATCTGTTCCCCACATTGTCAGGATTCCTTGA\n+CGAGTTTAAGAGGGAACAGAGAGTGGCTGTGAGGCAGATAAATTTGACAGTCCAATCCAA\n+CGACTATGATGTCAAGGAGGTTGTAGGATGTCCTCCTCCTCCTGAGGCGAATCTGAATGA\n+TGAGTACAAAGTGTTTGAGGCTCTTAAACCTCAGCAAGTCCAGTATTCAGAACCGAACCC\n+CAGACCCAAAGTAGTTCGGATTACAGAAAGTGGTGACATCCCTGAGAAATTCCTACCAAA\n+TATGGCTCAAGCCGTCGTTAGATGGCTTACCTCAGTTGGAGGTGTTTCTCAGGTTGGTAC\n+TGTTGCCGATAAAGTTGGAGCCAGTGCCTACCAAATTGTTGTGGGTGCTGCTAAAGGGGG\n+CTATTTCACTACTGGAGATGAACTTGGAGACTTGATTTCCCTATACCCACTAGTGACTCC\n+CTTCCCTACATTGCAGGACAGTCAAAGAGAAGAAATGGAGGAAAATCGTAATCTGATTGA\n+TAGGACTACTGCAGCCAGAACGTCAGCTTTGAGACGGGGGATTGTAAAAACCCAACCTGA\n+GCTCATTAATCTGGACGTTGCAGGAGTATCCAACCTCCATCCACCTCCCTACGATATAAA\n+TACAGCAGAGGATGCCATGGCATATGTCCATGCAGTGGTCAGCGGGAGATTTTCAGGATT\n+CACTAAATGGATATCTGAGGTACGGCCAAATGCTGCCAATCCTGTTGGAGTTAGGCTCTA\n+TGCCCACAGCCACCATACCCAGGACATGAAGCTGTTGGGAGAAGCCTGGTCCTCCTCTGC\n+CAAACTGGCTAAGGAATACATCTGTGCAGCTATTTTGGAGGTTAACAACATCCCATACAA\n+GAAGTCTAGCTTTGCGACCCCTAATGTAGTGCCTCCCCCGCCTGAGTCTTCCTGGGCTCG\n+GCAGGTTGAATACGCAACTGCCCCTAAAGCTGTGCCACAGATTGTTCCCGTTCATGAAAC\n+CCTGGATTTGGAGCTGTTTCACAGTATAATGCAGCAATTCCCAGGGCACTGTCCCCGCCA\n+GGTGCAGATGCTTATTAGAGAGGTAAGTCATAAAAATCCTGATGACTACCGTGTGAGAAT\n+TCAAGACTTATTCTCCCGGTTTCCACCTCCTAAAGTAGCATCGAAGAGATC'..b'Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|350543519|gb|JF907703.1|_Infectious_bursal_disease_virus_isolate_2009CAH495-SESW_polyprotein_gen--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|157886466|emb|AM111353.1|_Infectious_bursal_disease_virus_segment_A,_complete_sequence,_genomic_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|667756318|gb|KF569805.1|_Infectious_bursal_disease_virus_isolate_HuB-1_segment_A,_complete_seque--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|89112096|gb|AY444873.3|_Infectious_bursal_disease_virus_VP5_protein_and_structural_polyprotein_g--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863892|gb|JN982256.1|_Infectious_bursal_disease_virus_isolate_SP33_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|630375502|gb|KJ547673.1|_Infectious_bursal_disease_virus_isolate_VRDC-IBDV-WZ_polyprotein_mRNA,_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|452029935|gb|KC189836.1|_Infectious_bursal_disease_virus_strain_3529/92_polyprotein_mRNA,_comple--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|630375496|gb|KJ547670.1|_Infectious_bursal_disease_virus_isolate_Ventri-IBDV-Plus_polyprotein_mR--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863906|gb|JN982263.1|_Infectious_bursal_disease_virus_isolate_SC6_VP2_protein_gene,_complete_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863910|gb|JN982265.1|_Infectious_bursal_disease_virus_isolate_SP21_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|367057603|gb|JN585293.1|_Infectious_bursal_disease_virus_isolate_CAHFS_K669_segment_A,_complete_--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|395863918|gb|JN982269.1|_Infectious_bursal_disease_virus_isolate_SC12_VP2_protein_gene,_complete--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n+>gi|24306007|gb|AF322444.1|_Infectious_bursal_disease_virus_segment_A_VP5_protein_and_polyprotein_ge--Locus_69_Transcript_1/1_Confidence_0.000_Length_164_hit1_IdMatch=73.17,AligLength=82,E-val=2e-04\n+TCAGACACTTCACTCAAGGTCCCTTGGAACCAGACTGCATTGAATGTTCCATTCAGTGCA\n+TAAACACCCGCAGGTAAGGTGCTGGAC\n+\n' |
b |
diff -r 000000000000 -r 69ea2a13947f test-data/output.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tab Sun Jun 21 14:31:29 2015 -0400 |
b |
b'@@ -0,0 +1,1810 @@\n+# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n+\n+#\n+# gi|262225307|gb|GQ342964.1|_Drosophila_melanogaster_tetravirus_SW-2009a_strain_DTRV_putative_RNA-de\n+# Suject Length: 3005\n+# Total Subject Coverage: 3001\n+# Relative Subject Coverage: 0.998668885191\n+# Maximum Bit Score: 5413.0\n+# Mean Bit Score: 3241.5\n+Locus_42_Transcript_2/2_Confidence_0.333_Length_597\t100.0\t593\t2409\t3001\t99.2\t0.0\t1070.0\n+Locus_42_Transcript_1/2_Confidence_0.333_Length_3138\t100.0\t3001\t1\t3001\t95.6\t0.0\t5413.0\n+#\n+# gi|268053723|ref|NC_013499.1|_Drosophila_melanogaster_totivirus_SW-2009a,_complete_genome\n+# Suject Length: 6780\n+# Total Subject Coverage: 6765\n+# Relative Subject Coverage: 0.997787610619\n+# Maximum Bit Score: 8001.0\n+# Mean Bit Score: 2184.97272727\n+Locus_10_Transcript_7/8_Confidence_0.111_Length_255\t100.0\t26\t6549\t6524\t9.8\t0.001\t48.2\n+Locus_7_Transcript_10/11_Confidence_0.154_Length_4093\t99.66\t4093\t542\t4628\t100.0\t0.0\t7319.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t99.67\t3956\t679\t4628\t87.5\t0.0\t7077.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t98.06\t566\t1\t563\t12.5\t0.0\t967.0\n+Locus_10_Transcript_4/8_Confidence_0.444_Length_1097\t99.91\t1087\t6731\t5645\t99.0\t0.0\t1956.0\n+Locus_7_Transcript_4/11_Confidence_0.154_Length_571\t99.82\t571\t3429\t3999\t99.8\t0.0\t1025.0\n+Locus_10_Transcript_3/8_Confidence_0.222_Length_529\t99.43\t529\t6170\t5645\t99.8\t0.0\t939.0\n+Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\t99.91\t1121\t6765\t5645\t99.9\t0.0\t2017.0\n+Locus_7_Transcript_11/11_Confidence_0.000_Length_1206\t99.17\t1206\t542\t1741\t99.9\t0.0\t2131.0\n+Locus_10_Transcript_8/8_Confidence_0.000_Length_1134\t99.89\t905\t6549\t5645\t79.7\t0.0\t1627.0\n+Locus_10_Transcript_1/8_Confidence_0.222_Length_565\t99.81\t530\t5670\t5141\t93.6\t0.0\t951.0\n+Locus_7_Transcript_9/11_Confidence_0.154_Length_552\t99.09\t551\t4603\t5153\t99.6\t0.0\t971.0\n+Locus_7_Transcript_1/11_Confidence_0.154_Length_1632\t99.16\t1069\t679\t1741\t65.4\t0.0\t1889.0\n+Locus_7_Transcript_1/11_Confidence_0.154_Length_1632\t98.06\t566\t1\t563\t34.5\t0.0\t967.0\n+Locus_7_Transcript_7/11_Confidence_0.462_Length_4481\t99.6\t4481\t679\t5153\t100.0\t0.0\t8001.0\n+Locus_10_Transcript_5/8_Confidence_0.333_Length_628\t99.84\t628\t6765\t6138\t99.8\t0.0\t1128.0\n+Locus_8_Transcript_2/4_Confidence_0.200_Length_117\t100.0\t26\t5645\t5670\t21.4\t4e-04\t48.2\n+Locus_7_Transcript_8/11_Confidence_0.154_Length_552\t99.28\t552\t4602\t5153\t99.8\t0.0\t978.0\n+Locus_7_Transcript_6/11_Confidence_0.154_Length_656\t99.69\t655\t3974\t4628\t99.7\t0.0\t1173.0\n+Locus_10_Transcript_2/8_Confidence_0.111_Length_1023\t99.9\t1023\t6163\t5141\t99.9\t0.0\t1840.0\n+Locus_7_Transcript_2/11_Confidence_0.077_Length_1069\t99.06\t1069\t679\t1741\t99.9\t0.0\t1884.0\n+Locus_7_Transcript_3/11_Confidence_0.154_Length_1743\t99.89\t1742\t1716\t3457\t99.9\t0.0\t3133.0\n+#\n+# gi|262225299|gb|GQ342961.1|_Drosophila_melanogaster_totivirus_SW-2009a_strain_DTV,_complete_genome\n+# Suject Length: 6780\n+# Total Subject Coverage: 6765\n+# Relative Subject Coverage: 0.997787610619\n+# Maximum Bit Score: 8001.0\n+# Mean Bit Score: 2184.97272727\n+Locus_10_Transcript_7/8_Confidence_0.111_Length_255\t100.0\t26\t6549\t6524\t9.8\t0.001\t48.2\n+Locus_7_Transcript_10/11_Confidence_0.154_Length_4093\t99.66\t4093\t542\t4628\t100.0\t0.0\t7319.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t99.67\t3956\t679\t4628\t87.5\t0.0\t7077.0\n+Locus_7_Transcript_5/11_Confidence_0.308_Length_4519\t98.06\t566\t1\t563\t12.5\t0.0\t967.0\n+Locus_10_Transcript_4/8_Confidence_0.444_Length_1097\t99.91\t1087\t6731\t5645\t99.0\t0.0\t1956.0\n+Locus_7_Transcript_4/11_Confidence_0.154_Length_571\t99.82\t571\t3429\t3999\t99.8\t0.0\t1025.0\n+Locus_10_Transcript_3/8_Confidence_0.222_Length_529\t99.43\t529\t6170\t5645\t99.8\t0.0\t939.0\n+Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\t99.91\t1121\t6765\t5645\t99.9\t0.0\t2017.0\n+Locus_7_Transcript_11/11_Confidence_0.000_Length_1206\t99.17\t1206\t542\t1741\t99.9\t0.0\t2131.0\n+Locus_10_Transcript_8/8_Confidence_0.000_Length_1134\t99.89\t905\t6549\t5645\t79.7\t0.0\t1627.0\n+Locus_10_Transcript_1/8_Confidence_0.222_Length_565\t99.81\t530\t5670\t5141\t93.6'..b'_\n+# Suject Length: 3262\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251379521766\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+#\n+# gi|667756318|gb|KF569805.1|_Infectious_bursal_disease_virus_isolate_HuB-1_segment_A,_complete_seque\n+# Suject Length: 3260\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251533742331\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+#\n+# gi|89112096|gb|AY444873.3|_Infectious_bursal_disease_virus_VP5_protein_and_structural_polyprotein_g\n+# Suject Length: 3260\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0251533742331\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t538\t457\t49.4\t2e-04\t50.0\n+#\n+# gi|395863892|gb|JN982256.1|_Infectious_bursal_disease_virus_isolate_SP33_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+#\n+# gi|630375502|gb|KJ547673.1|_Infectious_bursal_disease_virus_isolate_VRDC-IBDV-WZ_polyprotein_mRNA,_\n+# Suject Length: 3084\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.026588845655\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t446\t365\t49.4\t2e-04\t50.0\n+#\n+# gi|452029935|gb|KC189836.1|_Infectious_bursal_disease_virus_strain_3529/92_polyprotein_mRNA,_comple\n+# Suject Length: 3039\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0269825600526\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+#\n+# gi|630375496|gb|KJ547670.1|_Infectious_bursal_disease_virus_isolate_Ventri-IBDV-Plus_polyprotein_mR\n+# Suject Length: 3040\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0269736842105\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t409\t328\t49.4\t2e-04\t50.0\n+#\n+# gi|395863906|gb|JN982263.1|_Infectious_bursal_disease_virus_isolate_SC6_VP2_protein_gene,_complete_\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+#\n+# gi|395863910|gb|JN982265.1|_Infectious_bursal_disease_virus_isolate_SP21_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+#\n+# gi|367057603|gb|JN585293.1|_Infectious_bursal_disease_virus_isolate_CAHFS_K669_segment_A,_complete_\n+# Suject Length: 3184\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0257537688442\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t532\t451\t49.4\t2e-04\t50.0\n+#\n+# gi|395863918|gb|JN982269.1|_Infectious_bursal_disease_virus_isolate_SC12_VP2_protein_gene,_complete\n+# Suject Length: 1356\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0604719764012\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t408\t327\t49.4\t2e-04\t50.0\n+#\n+# gi|24306007|gb|AF322444.1|_Infectious_bursal_disease_virus_segment_A_VP5_protein_and_polyprotein_ge\n+# Suject Length: 3085\n+# Total Subject Coverage: 82\n+# Relative Subject Coverage: 0.0265802269044\n+# Maximum Bit Score: 50.0\n+# Mean Bit Score: 50.0\n+Locus_69_Transcript_1/1_Confidence_0.000_Length_164\t73.17\t82\t454\t373\t49.4\t2e-04\t50.0\n' |