Next changeset 1:5da5dcc5e13a (2014-10-08) |
Commit message:
Uploaded |
added:
blastxml_to_tabular_selectable.py blastxml_to_tabular_selectable.xml test-data/._blastp_rhodopsin_gibberish.xml test-data/._blastp_rhodopsin_proteins.xml test-data/blastp_rhodopsin_gibberish.xml test-data/blastp_rhodopsin_proteins.xml test-data/blastp_rhodopsin_proteins_ext.tabular test-data/blastp_rhodopsin_proteins_ext_allhits.tabular test-data/blastp_rhodopsin_proteins_ext_allqueries.tabular test-data/blastp_rhodopsin_proteins_selcol.tabular test-data/blastp_rhodopsin_proteins_std.tabular test-data/unmatched_queries.tabular |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 blastxml_to_tabular_selectable.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blastxml_to_tabular_selectable.py Wed Oct 08 19:38:28 2014 -0400 |
[ |
b'@@ -0,0 +1,329 @@\n+#!/usr/bin/env python\n+"""Convert a BLAST XML file to 12 column tabular output\n+\n+Takes three command line options, input BLAST XML filename, output tabular\n+BLAST filename, output format (std for standard 12 columns, or ext for the\n+extended 25 columns offered in the BLAST+ wrappers).\n+\n+The 12 columns output are \'qseqid sseqid pident length mismatch gapopen qstart\n+qend sstart send evalue bitscore\' or \'std\' at the BLAST+ command line, which\n+mean:\n+ \n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The additional columns offered in the Galaxy BLAST+ wrappers are:\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+ 25 salltitles All subject titles, separated by \'<>\'\n+====== ============= ===========================================\n+\n+Most of these fields are given explicitly in the XML file, others some like\n+the percentage identity and the number of gap openings must be calculated.\n+\n+Be aware that the sequence in the extended tabular output or XML direct from\n+BLAST+ may or may not use XXXX masking on regions of low complexity. This\n+can throw the off the calculation of percentage identity and gap openings.\n+[In fact, both BLAST 2.2.24+ and 2.2.25+ have a subtle bug in this regard,\n+with these numbers changing depending on whether or not the low complexity\n+filter is used.]\n+\n+This script attempts to produce identical output to what BLAST+ would have done.\n+However, check this with "diff -b ..." since BLAST+ sometimes includes an extra\n+space character (probably a bug).\n+"""\n+import sys\n+import re\n+import os\n+from optparse import OptionParser\n+\n+if "-v" in sys.argv or "--version" in sys.argv:\n+ print "v0.0.12"\n+ sys.exit(0)\n+\n+if sys.version_info[:2] >= ( 2, 5 ):\n+ try:\n+ from xml.etree import cElementTree as ElementTree\n+ except ImportError:\n+ from xml.etree import ElementTree as ElementTree\n+else:\n+ from galaxy import eggs\n+ import pkg_resources; pkg_resources.require( "elementtree" )\n+ from elementtree import ElementTree\n+\n+def stop_err( msg ):\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.exit(1)\n+\n+usage = "usage: %prog [options] blastxml[,...]"\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-o\',\'--output\', dest=\'output\', default = None, help=\'output file path\', metavar="FILE")\n+parser.add_option("-c", "--columns", dest="columns", default=\'std\', help="[std|ext|colname[,colname,...]] std: 12 column, ext: 25 column, or user specified columns")\n+parser.add_option("-a", "--allqueries", action="store_true", dest="allqueries", default=False, help='..b' expected_identity + q_seq.count("X")))\n+ \n+\n+ evalue = hsp.findtext("Hsp_evalue")\n+ if evalue == "0":\n+ evalue = "0.0"\n+ else:\n+ evalue = "%0.0e" % float(evalue)\n+ \n+ bitscore = float(hsp.findtext("Hsp_bit-score"))\n+ if bitscore < 100:\n+ #Seems to show one decimal place for lower scores\n+ bitscore = "%0.1f" % bitscore\n+ else:\n+ #Note BLAST does not round to nearest int, it truncates\n+ bitscore = "%i" % bitscore\n+\n+ values = [qseqid,\n+ sseqid,\n+ pident,\n+ length, #hsp.findtext("Hsp_align-len")\n+ str(mismatch),\n+ gapopen,\n+ hsp.findtext("Hsp_query-from"), #qstart,\n+ hsp.findtext("Hsp_query-to"), #qend,\n+ hsp.findtext("Hsp_hit-from"), #sstart,\n+ hsp.findtext("Hsp_hit-to"), #send,\n+ evalue, #hsp.findtext("Hsp_evalue") in scientific notation\n+ bitscore, #hsp.findtext("Hsp_bit-score") rounded\n+ ]\n+\n+ if extended:\n+ sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">"))\n+ salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))\n+ #print hit_def, "-->", sallseqid\n+ positive = hsp.findtext("Hsp_positive")\n+ ppos = "%0.2f" % (100*float(positive)/float(length))\n+ qframe = hsp.findtext("Hsp_query-frame")\n+ sframe = hsp.findtext("Hsp_hit-frame")\n+ if blast_program == "blastp":\n+ #Probably a bug in BLASTP that they use 0 or 1 depending on format\n+ if qframe == "0": qframe = "1"\n+ if sframe == "0": sframe = "1"\n+ slen = int(hit.findtext("Hit_len"))\n+ values.extend([sallseqid,\n+ hsp.findtext("Hsp_score"), #score,\n+ nident,\n+ positive,\n+ hsp.findtext("Hsp_gaps"), #gaps,\n+ ppos,\n+ qframe,\n+ sframe,\n+ #NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n+ q_seq,\n+ h_seq,\n+ str(qlen),\n+ str(slen),\n+ salltitles,\n+ ])\n+ if out_fmt == \'cols\':\n+ if columns and len(columns) > 0:\n+ v = []\n+ for name in columns:\n+ v.append(values[colnames.index(name)])\n+ values = v\n+ #print "\\t".join(values) \n+ outfile.write("\\t".join(values) + "\\n")\n+ # prevents ElementTree from growing large datastructure\n+ root.clear()\n+ elem.clear()\n+\n+\n+for in_file in args:\n+ # get an iterable\n+ try: \n+ context = ElementTree.iterparse(in_file, events=("start", "end"))\n+ except:\n+ stop_err("Invalid data format.")\n+ # turn it into an iterator\n+ context = iter(context)\n+ # get the root element\n+ try:\n+ event, root = context.next()\n+ except:\n+ stop_err( "Invalid data format." )\n+ for event, elem in context:\n+ handle_event(event, elem)\n+\n+if unhitfile:\n+ unhitfile.close()\n+if options.output:\n+ outfile.close()\n' |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 blastxml_to_tabular_selectable.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blastxml_to_tabular_selectable.xml Wed Oct 08 19:38:28 2014 -0400 |
b |
b'@@ -0,0 +1,178 @@\n+<tool id="blastxml_to_tabular_selectable" name="BLAST XML to selected tabular columns" version="0.0.9">\n+ <description>Convert BLAST XML output to tabular</description>\n+ <command interpreter="python">\n+ blastxml_to_tabular_selectable.py -o $tabular_file \n+ #if $output.out_format == \'cols\' and $output.columns:\n+ -c \'$output.columns\'\n+ #else\n+ -c \'$output.out_format\' \n+ #end if\n+ $qdef\n+ $allqueries\n+ #if $unmatched:\n+ -u $unmatched_file\n+ #end if\n+ #if $maxhits.__str__ != \'\':\n+ --maxhits $maxhits\n+ #end if\n+ #if $maxhsps.__str__ != \'\':\n+ --maxhsps $maxhsps\n+ #end if\n+ #for i in $blastxml_file#${i} #end for#\n+ ## $blastxml_file \n+ </command>\n+ <inputs>\n+ <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/> \n+ <param name="qdef" type="boolean" truevalue="-d" falsevalue="" checked="False" label="Use Iteration_query-def value for qseqid"/>\n+ <param name="allqueries" type="boolean" truevalue="-a" falsevalue="" checked="False" label="Output all queries including those with no hits"/>\n+ <param name="unmatched" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Output a list with queries having no hits"/>\n+ <param name="maxhits" type="integer" value="1" optional="true" label="Maximum number of Hits to display for a query">\n+ <validator type="in_range" min="1" />\n+ </param>\n+ <param name="maxhsps" type="integer" value="1" optional="true" label="Maximum number of HSPs to display for a Hit">\n+ <validator type="in_range" min="1" />\n+ </param>\n+ \n+ <conditional name="output">\n+ <param name="out_format" type="select" label="Output format">\n+ <option value="std" selected="True">Tabular (standard 12 columns)</option>\n+ <option value="ext">Tabular (extended 24 columns)</option>\n+ <option value="cols">Tabular (select columns to output)</option>\n+ </param>\n+ <when value="std"/>\n+ <when value="ext"/>\n+ <when value="cols">\n+ <param name="columns" type="select" multiple="true" display="checkboxes" label="Output columns">\n+ <option value="qseqid"> 1 qseqid Query Seq-id (ID of your sequence)</option>\n+ <option value="sseqid"> 2 sseqid Subject Seq-id (ID of the database hit)</option>\n+ <option value="pident"> 3 pident Percentage of identical matches</option>\n+ <option value="length"> 4 length Alignment length</option>\n+ <option value="mismatch"> 5 mismatch Number of mismatches</option>\n+ <option value="gapopen"> 6 gapopen Number of gap openings</option>\n+ <option value="qstart"> 7 qstart Start of alignment in query</option>\n+ <option value="qend"> 8 qend End of alignment in query</option>\n+ <option value="sstart"> 9 sstart Start of alignment in subject (database hit)</option>\n+ <option value="send">10 send End of alignment in subject (database hit)</option>\n+ <option value="evalue">11 evalue Expectation value (E-value)</option>\n+ <option value="bitscore">12 bitscore Bit score</option>\n+ <option value="sallseqid">13 sallseqid All subject Seq-id(s), separated by a \';\'</option>\n+ <option value="score">14 score Raw score</option>\n+ <option value="nident">15 nident Number of identical matches</option>\n+ <option value="positive">16 positive Number of positive-scoring matches</option>\n+ <option value="gaps">17 gaps Total number of gaps</option>\n+ <option value="ppos">18 ppos Percentage of positive-scoring matches</option>\n+ <option value="qframe">19 qframe Query frame</'..b' <param name="out_format" value="cols" />\n+ <param name="columns" value="qseqid,sseqid,length,bitscore" />\n+ <output name="tabular_file" file="blastp_rhodopsin_proteins_selcol.tabular" ftype="tabular" />\n+ </test>\n+ <test>\n+ <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />\n+ <param name="out_format" value="ext" />\n+ <param name="maxhits" value="10" />\n+ <param name="maxhsps" value="10" />\n+ <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allhits.tabular" ftype="tabular" />\n+ </test>\n+ <test>\n+ <param name="blastxml_file" value="blastp_rhodopsin_proteins.xml" ftype="blastxml" />\n+ <param name="out_format" value="ext" />\n+ <param name="maxhits" value="1" />\n+ <param name="maxhsps" value="1" />\n+ <param name="unmatched" value="True" />\n+ <param name="allqueries" value="True" />\n+ <output name="tabular_file" file="blastp_rhodopsin_proteins_ext_allqueries.tabular" ftype="tabular" />\n+ <output name="unmatched_file" file="unmatched_queries.tabular" ftype="tabular" />\n+ </test>\n+ </tests>\n+ <help>\n+ \n+**What it does**\n+\n+NCBI BLAST+ (and the older NCBI \'legacy\' BLAST) can output in a range of\n+formats including tabular and a more detailed XML format. A complex workflow\n+may need both the XML and the tabular output - but running BLAST twice is\n+slow and wasteful.\n+\n+This tool takes the BLAST XML output and by default converts it into the\n+standard 12 column tabular equivalent:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+ 1 qseqid Query Seq-id (ID of your sequence)\n+ 2 sseqid Subject Seq-id (ID of the database hit)\n+ 3 pident Percentage of identical matches\n+ 4 length Alignment length\n+ 5 mismatch Number of mismatches\n+ 6 gapopen Number of gap openings\n+ 7 qstart Start of alignment in query\n+ 8 qend End of alignment in query\n+ 9 sstart Start of alignment in subject (database hit)\n+ 10 send End of alignment in subject (database hit)\n+ 11 evalue Expectation value (E-value)\n+ 12 bitscore Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 22 column tabular\n+BLAST output.\n+\n+====== ============= ===========================================\n+Column NCBI name Description\n+------ ------------- -------------------------------------------\n+ 13 sallseqid All subject Seq-id(s), separated by a \';\'\n+ 14 score Raw score\n+ 15 nident Number of identical matches\n+ 16 positive Number of positive-scoring matches\n+ 17 gaps Total number of gaps\n+ 18 ppos Percentage of positive-scoring matches\n+ 19 qframe Query frame\n+ 20 sframe Subject frame\n+ 21 qseq Aligned part of query sequence\n+ 22 sseq Aligned part of subject sequence\n+ 23 qlen Query sequence length\n+ 24 slen Subject sequence length\n+ 25 salltitles All subject title(s), separated by a \'<>\'\n+====== ============= ===========================================\n+\n+Beware that the XML file (and thus the conversion) and the tabular output\n+direct from BLAST+ may differ in the presence of XXXX masking on regions\n+low complexity (columns 21 and 22), and thus also calculated figures like\n+the percentage idenity (column 3).\n+\n+ </help>\n+</tool>\n' |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/._blastp_rhodopsin_gibberish.xml |
b |
Binary file test-data/._blastp_rhodopsin_gibberish.xml has changed |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/._blastp_rhodopsin_proteins.xml |
b |
Binary file test-data/._blastp_rhodopsin_proteins.xml has changed |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_gibberish.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_gibberish.xml Wed Oct 08 19:38:28 2014 -0400 |
[ |
b'@@ -0,0 +1,493 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>/panfs/roc/rissdb/blast/current/nr</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</BlastOutput_query-def>\n+ <BlastOutput_query-len>348</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>0.001</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Iteration_query-def>\n+ <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|57163783|ref|NP_001009242.1|</Hit_id>\n+ <Hit_def>rhodopsin [Felis catus] >gi|75073790|sp|Q95KU1.1|OPSD_FELCA RecName: Full=Rhodopsin [Felis catus] >gi|16516829|emb|CAD10144.1| opsin [Felis catus]</Hit_def>\n+ <Hit_accession>NP_001009242</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>717.613</Hsp_bit-score>\n+ <Hsp_score>1851</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>348</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>348</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>348</Hsp_identity>\n+ <Hsp_positive>348</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>348</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+<Hit>\n+ <Hit_num>2</Hit_num>\n+ <Hit_id>gi|591296700|ref|XP_007075496.1|</Hit_id>\n+ <Hit_def>PREDICTED: rhodopsin [Panthera tigris altaica]</Hit_def>\n+ <Hit_accession>XP_007075496</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>714.146</Hsp_bit-score>\n+ <Hsp_score>1842</Hsp_score>\n+ <Hsp_evalue>0</Hsp_eval'..b'\n+ <Hsp_query-to>354</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>354</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>354</Hsp_identity>\n+ <Hsp_positive>354</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>354</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+<Hit>\n+ <Hit_num>2</Hit_num>\n+ <Hit_id>gi|5931796|emb|CAB56646.1|</Hit_id>\n+ <Hit_def>rod opsin [Anguilla japonica]</Hit_def>\n+ <Hit_accession>CAB56646</Hit_accession>\n+ <Hit_len>352</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>663.685</Hsp_bit-score>\n+ <Hsp_score>1711</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>343</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>343</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>315</Hsp_identity>\n+ <Hsp_positive>331</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>343</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPMSNATGVVRSPFEYPQYYLAEPWAYSALAAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWVVVCKPMSNFRFGENHAIMGVAFTWLMALACAAPPLFGWSRYIPEGMQCSCGIDYYTPNPETYNESFVIYMFVCHFTIPLTVVSFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSSFGPIFMTIPAFFAKSSALYNPLIYICMNKQFRNCMITTLCCGKNPFEEEEGASTTASKTEA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+PMSNATGVVRSPFEYPQYYLAEPWA+SAL+AYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERW+VVCKP++NFRFGE+HAIMGV TW MALACA PPLFGWSRYIPEG+QCSCGIDYYT P NESFVIYMF CHF+IPL V+SFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGS+FGPIFMTIP+FFAKSSALYNP+IYICMNKQFR+CMITTLCCGKNPFEEE+GAS T+SKTEA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>49886901</Statistics_db-num>\n+ <Statistics_db-len>17905752166</Statistics_db-len>\n+ <Statistics_hsp-len>149</Statistics_hsp-len>\n+ <Statistics_eff-space>2146883802985</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins.xml Wed Oct 08 19:38:28 2014 -0400 |
[ |
b'@@ -0,0 +1,641 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>blastp</BlastOutput_program>\n+ <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>/panfs/roc/rissdb/blast/current/nr</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</BlastOutput_query-def>\n+ <BlastOutput_query-len>348</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>0.001</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>F</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Iteration_query-def>\n+ <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gi|57163783|ref|NP_001009242.1|</Hit_id>\n+ <Hit_def>rhodopsin [Felis catus] >gi|75073790|sp|Q95KU1.1|OPSD_FELCA RecName: Full=Rhodopsin [Felis catus] >gi|16516829|emb|CAD10144.1| opsin [Felis catus]</Hit_def>\n+ <Hit_accession>NP_001009242</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>717.613</Hsp_bit-score>\n+ <Hsp_score>1851</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>348</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>348</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>348</Hsp_identity>\n+ <Hsp_positive>348</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>348</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+<Hit>\n+ <Hit_num>2</Hit_num>\n+ <Hit_id>gi|591296700|ref|XP_007075496.1|</Hit_id>\n+ <Hit_def>PREDICTED: rhodopsin [Panthera tigris altaica]</Hit_def>\n+ <Hit_accession>XP_007075496</Hit_accession>\n+ <Hit_len>348</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>714.146</Hsp_bit-score>\n+ <Hsp_score>1842</Hsp_score>\n+ <Hsp_evalue>0</Hsp_eval'..b' <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>315</Hsp_identity>\n+ <Hsp_positive>331</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>343</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPMSNATGVVRSPFEYPQYYLAEPWAYSALAAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWVVVCKPMSNFRFGENHAIMGVAFTWLMALACAAPPLFGWSRYIPEGMQCSCGIDYYTPNPETYNESFVIYMFVCHFTIPLTVVSFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSSFGPIFMTIPAFFAKSSALYNPLIYICMNKQFRNCMITTLCCGKNPFEEEEGASTTASKTEA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+PMSNATGVVRSPFEYPQYYLAEPWA+SAL+AYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERW+VVCKP++NFRFGE+HAIMGV TW MALACA PPLFGWSRYIPEG+QCSCGIDYYT P NESFVIYMF CHF+IPL V+SFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGS+FGPIFMTIP+FFAKSSALYNP+IYICMNKQFR+CMITTLCCGKNPFEEE+GAS T+SKTEA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+<Hit>\n+ <Hit_num>3</Hit_num>\n+ <Hit_id>gi|3914251|sp|Q90215.1|OPSD2_ANGAN</Hit_id>\n+ <Hit_def>RecName: Full=Rhodopsin, freshwater form [Anguilla anguilla] >gi|1311520|gb|AAA99200.1| rhodopsin, partial [Anguilla anguilla] >gi|1587493|prf||2206482A rod visual pigment</Hit_def>\n+ <Hit_accession>Q90215</Hit_accession>\n+ <Hit_len>352</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>652.899</Hsp_bit-score>\n+ <Hsp_score>1683</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>1</Hsp_query-from>\n+ <Hsp_query-to>343</Hsp_query-to>\n+ <Hsp_hit-from>1</Hsp_hit-from>\n+ <Hsp_hit-to>343</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>0</Hsp_hit-frame>\n+ <Hsp_identity>308</Hsp_identity>\n+ <Hsp_positive>327</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>343</Hsp_align-len>\n+ <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEA</Hsp_qseq>\n+ <Hsp_hseq>MNGTEGPNFYVPMSNVTGVVRSPFEYPQYYLAEPWAYSALAAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAVERWMVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLFGWSRYIPEGMQCSCGMDHYAPNPETYNESFVIYMFICHFTIPLTVISFCYGRLVCTVKEATAQQQESETTQRAEREVTRMVIIMVISFLVCWVPYASVAWYIFTHQGSSFGPIFMTIPAFFAKSSSLYNPLIYICMNKQSRNCMITTLCCGKNPFEEEEGASTTASKTEA</Hsp_hseq>\n+ <Hsp_midline>MNGTEGPNFY+PMSN TGVVRSPFEYPQYYLAEPWA+SAL+AYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLA+ERWMVVCKP++NFRFGE+HAIMGV TW MALACA PPLFGWSRYIPEG+QCSCG+D+Y P NESFVIYMF CHF+IPL VISFCYGRLVCTVKEA AQQQESETTQRAEREVTRMV+IMVISFLVCWVPYASVAWYIFTHQGS+FGPIFMTIP+FFAKSS+LYNP+IYICMNKQ R+CMITTLCCGKNPFEEE+GAS T+SKTEA</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>49886901</Statistics_db-num>\n+ <Statistics_db-len>17905752166</Statistics_db-len>\n+ <Statistics_hsp-len>149</Statistics_hsp-len>\n+ <Statistics_eff-space>2146883802985</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins_ext.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins_ext.tabular Wed Oct 08 19:38:28 2014 -0400 |
[ |
@@ -0,0 +1,7 @@ +#qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles +gi|57163783|ref|NP_001009242.1| gi|57163783|ref|NP_001009242.1| 100.00 348 0 0 1 348 1 348 0.0 717 gi|57163783|ref|NP_001009242.1|;gi|75073790|sp|Q95KU1.1|OPSD_FELCA;gi|16516829|emb|CAD10144.1| 1851 348 348 0 100.00 0 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus]<>RecName: Full=Rhodopsin [Felis catus]<>opsin [Felis catus] +gi|3024260|sp|P56514.1|OPSD_BUFBU gi|3024260|sp|P56514.1|OPSD_BUFBU 100.00 354 0 0 1 354 1 354 0.0 729 gi|3024260|sp|P56514.1|OPSD_BUFBU;gi|2734706|gb|AAB93704.1| 1881 354 354 0 100.00 0 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA 354 354 RecName: Full=Rhodopsin<>rhodopsin [Bufo bufo] +gi|283855846|gb|ADB45242.1| gi|283855846|gb|ADB45242.1| 100.00 328 0 0 1 328 1 328 0.0 677 gi|283855846|gb|ADB45242.1| 1747 328 328 0 100.00 0 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 328 328 rhodopsin [Cynopterus brachyotis] +gi|283855823|gb|ADB45229.1| gi|283855823|gb|ADB45229.1| 100.00 328 0 0 1 328 1 328 0.0 676 gi|283855823|gb|ADB45229.1| 1745 328 328 0 100.00 0 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 328 328 rhodopsin [Myotis ricketti] +gi|223523|prf||0811197A gi|223523|prf||0811197A 100.00 347 0 0 1 347 1 347 0.0 717 gi|223523|prf||0811197A 1850 347 347 0 100.00 0 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 347 347 rhodopsin +gi|12583665|dbj|BAB21486.1| gi|12583665|dbj|BAB21486.1| 100.00 354 0 0 1 354 1 354 0.0 731 gi|12583665|dbj|BAB21486.1| 1886 354 354 0 100.00 0 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA 354 354 fresh water form rod opsin [Conger myriaster] |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins_ext_allhits.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins_ext_allhits.tabular Wed Oct 08 19:38:28 2014 -0400 |
[ |
b'@@ -0,0 +1,19 @@\n+#qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tsallseqid\tscore\tnident\tpositive\tgaps\tppos\tqframe\tsframe\tqseq\tsseq\tqlen\tslen\tsalltitles\n+gi|57163783|ref|NP_001009242.1|\tgi|57163783|ref|NP_001009242.1|\t100.00\t348\t0\t0\t1\t348\t1\t348\t0.0\t717\tgi|57163783|ref|NP_001009242.1|;gi|75073790|sp|Q95KU1.1|OPSD_FELCA;gi|16516829|emb|CAD10144.1|\t1851\t348\t348\t0\t100.00\t0\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\trhodopsin [Felis catus]<>RecName: Full=Rhodopsin [Felis catus]<>opsin [Felis catus]\n+gi|57163783|ref|NP_001009242.1|\tgi|591296700|ref|XP_007075496.1|\t99.43\t348\t2\t0\t1\t348\t1\t348\t0.0\t714\tgi|591296700|ref|XP_007075496.1|\t1842\t346\t347\t0\t99.71\t0\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTASKTETSQVAPA\t348\t348\tPREDICTED: rhodopsin [Panthera tigris altaica]\n+gi|57163783|ref|NP_001009242.1|\tgi|586525511|ref|XP_006917708.1|\t98.28\t348\t6\t0\t1\t348\t1\t348\t0.0\t709\tgi|586525511|ref|XP_006917708.1|;gi|431899951|gb|ELK07898.1|\t1829\t342\t345\t0\t99.14\t0\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGEDEASTTASKTETSQVAPA\t348\t348\tPREDICTED: rhodopsin [Pteropus alecto]<>Rhodopsin [Pteropus alecto]\n+gi|3024260|sp|P56514.1|OPSD_BUFBU\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t100.00\t354\t0\t0\t1\t354\t1\t354\t0.0\t729\tgi|3024260|sp|P56514.1|OPSD_BUFBU;gi|2734706|gb|AAB93704.1|\t1881\t354\t354\t0\t100.00\t0\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA\t354\t354\tRecName: Full=Rhod'..b'A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain B, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain C, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of Rhombohedral Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of A Photoactivated Rhodopsin<>Chain B, Crystal Structure Of A Photoactivated Rhodopsin<>Chain C, Crystal Structure Of A Photoactivated Rhodopsin<>Chain A, Crystallographic Model Of Lumirhodopsin<>Chain B, Crystallographic Model Of Lumirhodopsin<>Chain A, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain B, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain A, Crystallographic Model Of Bathorhodopsin<>Chain B, Crystallographic Model Of Bathorhodopsin<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution\n+gi|12583665|dbj|BAB21486.1|\tgi|12583665|dbj|BAB21486.1|\t100.00\t354\t0\t0\t1\t354\t1\t354\t0.0\t731\tgi|12583665|dbj|BAB21486.1|\t1886\t354\t354\t0\t100.00\t0\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA\t354\t354\tfresh water form rod opsin [Conger myriaster]\n+gi|12583665|dbj|BAB21486.1|\tgi|5931796|emb|CAB56646.1|\t91.84\t343\t28\t0\t1\t343\t1\t343\t0.0\t663\tgi|5931796|emb|CAB56646.1|\t1711\t315\t331\t0\t96.50\t0\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEA\tMNGTEGPNFYVPMSNATGVVRSPFEYPQYYLAEPWAYSALAAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWVVVCKPMSNFRFGENHAIMGVAFTWLMALACAAPPLFGWSRYIPEGMQCSCGIDYYTPNPETYNESFVIYMFVCHFTIPLTVVSFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSSFGPIFMTIPAFFAKSSALYNPLIYICMNKQFRNCMITTLCCGKNPFEEEEGASTTASKTEA\t354\t352\trod opsin [Anguilla japonica]\n+gi|12583665|dbj|BAB21486.1|\tgi|3914251|sp|Q90215.1|OPSD2_ANGAN\t89.80\t343\t35\t0\t1\t343\t1\t343\t0.0\t652\tgi|3914251|sp|Q90215.1|OPSD2_ANGAN;gi|1311520|gb|AAA99200.1|;gi|1587493|prf||2206482A\t1683\t308\t327\t0\t95.34\t0\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEA\tMNGTEGPNFYVPMSNVTGVVRSPFEYPQYYLAEPWAYSALAAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAVERWMVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLFGWSRYIPEGMQCSCGMDHYAPNPETYNESFVIYMFICHFTIPLTVISFCYGRLVCTVKEATAQQQESETTQRAEREVTRMVIIMVISFLVCWVPYASVAWYIFTHQGSSFGPIFMTIPAFFAKSSSLYNPLIYICMNKQSRNCMITTLCCGKNPFEEEEGASTTASKTEA\t354\t352\tRecName: Full=Rhodopsin, freshwater form [Anguilla anguilla]<>rhodopsin, partial [Anguilla anguilla]<>rod visual pigment\n' |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins_ext_allqueries.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins_ext_allqueries.tabular Wed Oct 08 19:38:28 2014 -0400 |
[ |
@@ -0,0 +1,8 @@ +#qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles +gi|57163783|ref|NP_001009242.1| gi|57163783|ref|NP_001009242.1| 100.00 348 0 0 1 348 1 348 0.0 717 gi|57163783|ref|NP_001009242.1|;gi|75073790|sp|Q95KU1.1|OPSD_FELCA;gi|16516829|emb|CAD10144.1| 1851 348 348 0 100.00 0 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus]<>RecName: Full=Rhodopsin [Felis catus]<>opsin [Felis catus] +gi|3024260|sp|P56514.1|OPSD_BUFBU gi|3024260|sp|P56514.1|OPSD_BUFBU 100.00 354 0 0 1 354 1 354 0.0 729 gi|3024260|sp|P56514.1|OPSD_BUFBU;gi|2734706|gb|AAB93704.1| 1881 354 354 0 100.00 0 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQVSPA 354 354 RecName: Full=Rhodopsin<>rhodopsin [Bufo bufo] +gibberish 75 +gi|283855846|gb|ADB45242.1| gi|283855846|gb|ADB45242.1| 100.00 328 0 0 1 328 1 328 0.0 677 gi|283855846|gb|ADB45242.1| 1747 328 328 0 100.00 0 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 328 328 rhodopsin [Cynopterus brachyotis] +gi|283855823|gb|ADB45229.1| gi|283855823|gb|ADB45229.1| 100.00 328 0 0 1 328 1 328 0.0 676 gi|283855823|gb|ADB45229.1| 1745 328 328 0 100.00 0 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 328 328 rhodopsin [Myotis ricketti] +gi|223523|prf||0811197A gi|223523|prf||0811197A 100.00 347 0 0 1 347 1 347 0.0 717 gi|223523|prf||0811197A 1850 347 347 0 100.00 0 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 347 347 rhodopsin +gi|12583665|dbj|BAB21486.1| gi|12583665|dbj|BAB21486.1| 100.00 354 0 0 1 354 1 354 0.0 731 gi|12583665|dbj|BAB21486.1| 1886 354 354 0 100.00 0 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSSVSPA 354 354 fresh water form rod opsin [Conger myriaster] |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins_selcol.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins_selcol.tabular Wed Oct 08 19:38:28 2014 -0400 |
b |
@@ -0,0 +1,7 @@ +#qseqid sseqid length bitscore +gi|57163783|ref|NP_001009242.1| gi|57163783|ref|NP_001009242.1| 348 717 +gi|3024260|sp|P56514.1|OPSD_BUFBU gi|3024260|sp|P56514.1|OPSD_BUFBU 354 729 +gi|283855846|gb|ADB45242.1| gi|283855846|gb|ADB45242.1| 328 677 +gi|283855823|gb|ADB45229.1| gi|283855823|gb|ADB45229.1| 328 676 +gi|223523|prf||0811197A gi|223523|prf||0811197A 347 717 +gi|12583665|dbj|BAB21486.1| gi|12583665|dbj|BAB21486.1| 354 731 |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/blastp_rhodopsin_proteins_std.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastp_rhodopsin_proteins_std.tabular Wed Oct 08 19:38:28 2014 -0400 |
b |
@@ -0,0 +1,7 @@ +#qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore +gi|57163783|ref|NP_001009242.1| gi|57163783|ref|NP_001009242.1| 100.00 348 0 0 1 348 1 348 0.0 717 +gi|3024260|sp|P56514.1|OPSD_BUFBU gi|3024260|sp|P56514.1|OPSD_BUFBU 100.00 354 0 0 1 354 1 354 0.0 729 +gi|283855846|gb|ADB45242.1| gi|283855846|gb|ADB45242.1| 100.00 328 0 0 1 328 1 328 0.0 677 +gi|283855823|gb|ADB45229.1| gi|283855823|gb|ADB45229.1| 100.00 328 0 0 1 328 1 328 0.0 676 +gi|223523|prf||0811197A gi|223523|prf||0811197A 100.00 347 0 0 1 347 1 347 0.0 717 +gi|12583665|dbj|BAB21486.1| gi|12583665|dbj|BAB21486.1| 100.00 354 0 0 1 354 1 354 0.0 731 |
b |
diff -r 000000000000 -r 2bd0cbccb3c6 test-data/unmatched_queries.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_queries.tabular Wed Oct 08 19:38:28 2014 -0400 |
b |
@@ -0,0 +1,1 @@ +gibberish |