# HG changeset patch # User si-datascience # Date 1527188250 14400 # Node ID 0da2847fc10868f240300737b98ac23b3f5ba4a2 Uploaded diff -r 000000000000 -r 0da2847fc108 interproscan5/create_html_index.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/create_html_index.py Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +import os +import re +import sys + +START = ''' + + + + +

InterProScan result summary page

+''' + +END = ''' + + +''' + + +def raw_mode(html_file, directory): + with open(html_file, 'w') as h: + h.write(START) + h.write('') + h.write(END) + + +def fix_name(name): + return re.sub('[&/]', '_', name) + + +def cooked_mode(orfed_ids, tsv_file, html_file, directory): + name_freq = {} + with open(tsv_file) as f: + for line in f: + name = line.split("\t", 1)[0] + if orfed_ids: + deorfed_name = re.sub('_\\d+$', '', name) + else: + deorfed_name = name + + data = name_freq.get(deorfed_name, []) + if data: + data[0] += 1 + data[1][name] = data[1].get(name, 0) + 1 + else: + data = [1, {name: 1}] + name_freq[deorfed_name] = data + + name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True))) + for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True) + ] + + filename_dict = {} + for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]: + filename_dict[os.path.splitext(filename)[0]] = filename + + with open(html_file, 'w') as h: + h.write(START) + h.write('
    ') + for don, (freq, members) in name_freq_sorted: + h.write('\n
  1. ') + if len(members) == 1: + fn = fix_name(members[0][0]) + if fn in filename_dict: + h.write(' %s (%d features)' % + (filename_dict[fn], members[0][0], members[0][1])) + del filename_dict[fn] + else: + h.write(' %s (%d features, broken link!)' % + (members[0][0], members[0][0], members[0][1])) + else: + h.write('%s (%d features)' % (don, freq)) + h.write('\n') + h.write('
  2. ') + + if len(filename_dict) > 0: + h.write('

    Sequences without any features

    ') + h.write('\n') + h.write('
') + h.write(END) + + +def main(): + if len(sys.argv) == 3: + raw_mode(sys.argv[1], sys.argv[2]) + elif len(sys.argv) == 5: + cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4]) + else: + print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"') + sys.exit(1) + + +main() diff -r 000000000000 -r 0da2847fc108 interproscan5/create_index.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/create_index.py Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import os +import sys + +o = open(sys.argv[1], 'w+') + +o.write('

InterProScan result summary page

') +o.close() diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/__base__.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-n/__base__.gff3 Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,377 @@ +##gff-version 3 +##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269 +##sequence-region P00363 1 602 +P00363 . polypeptide 1 602 . + . md5=0d2c0f1acdd08ab0157f2308531a58e4;ID=P00363 +P00363 SUPERFAMILY protein_match 361 415 2.27E-71 + . Name=SSF51905;Target=P00363 361 415;status=T;ID=match$1_361_415;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 SUPERFAMILY protein_match 2 250 2.27E-71 + . Name=SSF51905;Target=P00363 2 250;status=T;ID=match$1_2_250;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 TIGRFAM protein_match 7 581 4.7E-250 + . Name=TIGR01812;signature_desc=sdhA_frdA_Gneg: succinate dehydrogenase or fumarate reductase, flavoprotein subunit;Target=P00363 7 581;status=T;ID=match$2_7_581;Ontology_term="GO:0016627","GO:0022900","GO:0050660","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR014006" +P00363 Gene3D protein_match 234 352 3.6E-37 + . Name=G3DSA:3.90.700.10;Target=P00363 234 352;status=T;ID=match$3_234_352;date=24-01-2016;Dbxref="InterPro:IPR027477" +P00363 PRINTS protein_match 375 382 2.5E-5 + . Name=PR00411;signature_desc=Pyridine nucleotide disulphide reductase class-I signature;Target=P00363 375 382;status=T;ID=match$4_375_382;date=24-01-2016 +P00363 PRINTS protein_match 7 29 2.5E-5 + . Name=PR00411;signature_desc=Pyridine nucleotide disulphide reductase class-I signature;Target=P00363 7 29;status=T;ID=match$4_7_29;date=24-01-2016 +P00363 Gene3D protein_match 426 537 1.5E-38 + . Name=G3DSA:1.20.58.100;Target=P00363 426 537;status=T;ID=match$5_426_537;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 Coils protein_match 423 443 . + . Name=Coil;Target=P00363 423 443;status=T;ID=match$6_423_443;date=24-01-2016 +P00363 Gene3D protein_match 353 416 5.9E-106 + . Name=G3DSA:3.50.50.60;Target=P00363 353 416;status=T;ID=match$7_353_416;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 Gene3D protein_match 2 233 5.9E-106 + . Name=G3DSA:3.50.50.60;Target=P00363 2 233;status=T;ID=match$7_2_233;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 PANTHER protein_match 1 584 0.0 + . Name=PTHR11632;Target=P00363 1 584;status=T;ID=match$8_1_584;date=24-01-2016 +P00363 SUPERFAMILY protein_match 444 590 3.01E-46 + . Name=SSF46977;Target=P00363 444 590;status=T;ID=match$9_444_590;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 PANTHER protein_match 1 584 0.0 + . Name=PTHR11632:SF50;Target=P00363 1 584;status=T;ID=match$10_1_584;date=24-01-2016 +P00363 PIRSF protein_match 1 575 2.0E-42 + . Name=PIRSF000171;Target=P00363 1 575;status=T;ID=match$11_1_575;date=24-01-2016;Dbxref="InterPro:IPR030664" +P00363 TIGRFAM protein_match 3 582 0.0 + . Name=TIGR01176;signature_desc=fum_red_Fp: fumarate reductase (quinol), flavoprotein subunit;Target=P00363 3 582;status=T;ID=match$12_3_582;Ontology_term="GO:0009061","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR005884" +P00363 Pfam protein_match 453 581 7.0E-39 + . Name=PF02910;signature_desc=Fumarate reductase flavoprotein C-term;Target=P00363 453 581;status=T;ID=match$13_453_581;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 Coils protein_match 473 493 . + . Name=Coil;Target=P00363 473 493;status=T;ID=match$14_473_493;date=24-01-2016 +P00363 ProSitePatterns protein_match 43 52 . + . Name=PS00504;signature_desc=Fumarate reductase / succinate dehydrogenase FAD-binding site.;Target=P00363 43 52;status=T;ID=match$15_43_52;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR003952" +P00363 Gene3D protein_match 543 577 5.5E-18 + . Name=G3DSA:4.10.80.40;Target=P00363 543 577;status=T;ID=match$16_543_577;date=24-01-2016 +P00363 SUPERFAMILY protein_match 227 358 6.88E-43 + . Name=SSF56425;Target=P00363 227 358;status=T;ID=match$17_227_358;date=24-01-2016;Dbxref="InterPro:IPR027477" +P00363 Pfam protein_match 7 397 1.1E-115 + . Name=PF00890;signature_desc=FAD binding domain;Target=P00363 7 397;status=T;ID=match$18_7_397;date=24-01-2016;Dbxref="InterPro:IPR003953" +P00363 PRINTS protein_match 8 27 3.0E-10 + . Name=PR00368;signature_desc=FAD-dependent pyridine nucleotide reductase signature;Target=P00363 8 27;status=T;ID=match$19_8_27;date=24-01-2016 +P00363 PRINTS protein_match 360 382 3.0E-10 + . Name=PR00368;signature_desc=FAD-dependent pyridine nucleotide reductase signature;Target=P00363 360 382;status=T;ID=match$19_360_382;date=24-01-2016 +##sequence-region P00350 1 468 +P00350 . polypeptide 1 468 . + . md5=1776f92beb74ff0ccd9ac47a65663644;ID=P00350 +P00350 SUPERFAMILY protein_match 4 173 4.86E-50 + . Name=SSF51735;Target=P00350 4 173;status=T;ID=match$20_4_173;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00350 TIGRFAM protein_match 5 467 1.1E-232 + . Name=TIGR00873;signature_desc=gnd: 6-phosphogluconate dehydrogenase (decarboxylating);Target=P00350 5 467;status=T;ID=match$21_5_467;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006113" +P00350 Gene3D protein_match 1 180 1.9E-61 + . Name=G3DSA:3.40.50.720;Target=P00350 1 180;status=T;ID=match$22_1_180;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00350 Pfam protein_match 3 174 1.8E-52 + . Name=PF03446;signature_desc=NAD binding domain of 6-phosphogluconate dehydrogenase;Target=P00350 3 174;status=T;ID=match$23_3_174;Ontology_term="GO:0004616","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006115" +P00350 PIRSF protein_match 1 468 1.9E-243 + . Name=PIRSF000109;Target=P00350 1 468;status=T;ID=match$24_1_468;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006113" +P00350 ProSitePatterns protein_match 253 265 . + . Name=PS00461;signature_desc=6-phosphogluconate dehydrogenase signature.;Target=P00350 253 265;status=T;ID=match$25_253_265;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006184" +P00350 Gene3D protein_match 181 433 1.0E-119 + . Name=G3DSA:1.10.1040.10;Target=P00350 181 433;status=T;ID=match$26_181_433;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR013328" +P00350 Gene3D protein_match 436 467 1.4E-22 + . Name=G3DSA:1.20.5.320;Target=P00350 436 467;status=T;ID=match$27_436_467;date=24-01-2016;Dbxref="InterPro:IPR012284" +P00350 PRINTS protein_match 249 276 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 249 276;status=T;ID=match$28_249_276;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 168 196 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 168 196;status=T;ID=match$28_168_196;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 66 95 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 66 95;status=T;ID=match$28_66_95;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 119 144 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 119 144;status=T;ID=match$28_119_144;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 4 27 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 4 27;status=T;ID=match$28_4_27;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 356 378 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 356 378;status=T;ID=match$28_356_378;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PANTHER protein_match 1 466 7.6E-305 + . Name=PTHR11811:SF25;Target=P00350 1 466;status=T;ID=match$29_1_466;date=24-01-2016 +P00350 PANTHER protein_match 1 466 7.6E-305 + . Name=PTHR11811;Target=P00350 1 466;status=T;ID=match$30_1_466;date=24-01-2016 +P00350 Pfam protein_match 179 466 2.0E-132 + . Name=PF00393;signature_desc=6-phosphogluconate dehydrogenase, C-terminal domain;Target=P00350 179 466;status=T;ID=match$31_179_466;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006114" +P00350 SUPERFAMILY protein_match 177 467 6.8E-132 + . Name=SSF48179;Target=P00350 177 467;status=T;ID=match$32_177_467;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR008927" +##sequence-region P00370 1 447 +P00370 . polypeptide 1 447 . + . md5=9ec8e26fcf5c160f533bf9a69dec0212;ID=P00370 +P00370 PRINTS protein_match 372 383 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 372 383;status=T;ID=match$33_372_383;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 114 128 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 114 128;status=T;ID=match$33_114_128;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 235 255 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 235 255;status=T;ID=match$33_235_255;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 193 215 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 193 215;status=T;ID=match$33_193_215;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 SUPERFAMILY protein_match 6 196 6.72E-74 + . Name=SSF53223;Target=P00370 6 196;status=T;ID=match$34_6_196;date=24-01-2016 +P00370 Gene3D protein_match 68 188 1.0E-60 + . Name=G3DSA:3.40.192.10;Target=P00370 68 188;status=T;ID=match$35_68_188;date=24-01-2016 +P00370 Pfam protein_match 202 445 1.3E-87 + . Name=PF00208;signature_desc=Glutamate/Leucine/Phenylalanine/Valine dehydrogenase;Target=P00370 202 445;status=T;ID=match$36_202_445;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006096" +P00370 PIRSF protein_match 22 447 1.8E-114 + . Name=PIRSF000185;Target=P00370 22 447;status=T;ID=match$37_22_447;Ontology_term="GO:0016639","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR014362" +P00370 SMART protein_match 204 445 2.0E-93 + . Name=SM00839;signature_desc=Glutamate/Leucine/Phenylalanine/Valine dehydrogenase;Target=P00370 204 445;status=T;ID=match$38_204_445;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006096" +P00370 Gene3D protein_match 209 372 1.6E-63 + . Name=G3DSA:3.40.50.720;Target=P00370 209 372;status=T;ID=match$39_209_372;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00370 SUPERFAMILY protein_match 202 446 4.96E-77 + . Name=SSF51735;Target=P00370 202 446;status=T;ID=match$40_202_446;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00370 Gene3D protein_match 7 67 8.2E-28 + . Name=G3DSA:1.10.285.10;Target=P00370 7 67;status=T;ID=match$41_7_67;date=24-01-2016 +P00370 Gene3D protein_match 373 446 2.0E-26 + . Name=G3DSA:1.10.285.10;Target=P00370 373 446;status=T;ID=match$41_373_446;date=24-01-2016 +P00370 ProSitePatterns protein_match 122 135 . + . Name=PS00074;signature_desc=Glu / Leu / Phe / Val dehydrogenases active site.;Target=P00370 122 135;status=T;ID=match$42_122_135;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 Pfam protein_match 57 184 6.3E-49 + . Name=PF02812;signature_desc=Glu/Leu/Phe/Val dehydrogenase, dimerisation domain;Target=P00370 57 184;status=T;ID=match$43_57_184;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006097" +P00370 PANTHER protein_match 1 447 3.2E-250 + . Name=PTHR11606;Target=P00370 1 447;status=T;ID=match$44_1_447;date=24-01-2016 +P00370 PANTHER protein_match 1 447 3.2E-250 + . Name=PTHR11606:SF4;Target=P00370 1 447;status=T;ID=match$45_1_447;date=24-01-2016 +##sequence-region C1P5Z7 1 43 +C1P5Z7 . polypeptide 1 43 . + . md5=abfa044baa298f169ea62ac6b48e1185;ID=C1P5Z7 +C1P5Z7 Pfam protein_match 2 37 1.0E-11 + . Name=PF15894;signature_desc=Inhibitor of glucose uptake transporter SgrT;Target=C1P5Z7 2 37;status=T;ID=match$46_2_37;Ontology_term="GO:0046325";date=24-01-2016;Dbxref="InterPro:IPR031767" +##FASTA +>C1P5Z7 +MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES +>P00350 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>P00363 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKA +NG +>P00370 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$1_2_250 +QTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHD +SFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMK +IERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTL +VQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSG +ILMTEGCRG +>match$1_361_415 +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERA +>match$2_7_581 +DLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYH +FHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTW +FAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRA +NAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTE +GCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTIS +TPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQ +NCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGNEAAI +EAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQER +FKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDV +NFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVY +>match$3_234_352 +PTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQ +AFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVR +>match$4_7_29 +DLAIVGAGGAGLRAAIAAAQANP +>match$4_375_382 +LFAVGECS +>match$5_426_537 +IEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQE +RFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLD +>match$6_423_443 +EAAIEAQAAGVEQRLKDLVNQ +>match$7_2_233 +QTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHD +SFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMK +IERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTL +VQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYH +>match$7_353_416 +PTAHYTMGGIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQAT +ERAA +>match$8_1_584 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGE +>match$9_444_590 +DGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDL +LYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTR +LEYSDVKITTLPPAKRVYGGEADAADK +>match$10_1_584 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGE +>match$11_1_575 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLP +>match$12_3_582 +TFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDS +FEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKI +ERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLV +QIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGI +LMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKG +NTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGI +ETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGN +EAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAE +LQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTE +RDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYG +>match$13_453_581 +RDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDLLYTIELGHG +LNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTRLEYSDVKIT +TLPPAKRVY +>match$14_473_493 +MQKTIDKLAELQERFKRVRIT +>match$15_43_52 +RSHTVAAEGG +>match$16_543_577 +RDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPA +>match$17_227_358 +MEFVQYHPTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELG +PRDKVSQAFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVK +EPIPVRPTAHYT +>match$18_7_397 +DLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYH +FHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTW +FAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRA +NAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTE +GCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTIS +TPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQ +NCETRIKGLFAVGECSSVGLHGANRLGSNSL +>match$19_8_27 +LAIVGAGGAGLRAAIAAAQA +>match$19_360_382 +GGIETDQNCETRIKGLFAVGECS +>match$20_4_173 +QQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVES +LETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFI +GTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYI +>match$21_5_467 +QIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVESL +ETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFIG +TGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGHYVKM +VHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFTKKDE +DGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAASKVLS +GPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIFRAGC +IIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPTFSAA +VAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$22_1_180 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +>match$23_3_174 +KQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVE +SLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNF +IGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIG +>match$24_1_468 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>match$25_253_265 +ILDEAANKGTGKW +>match$26_181_433 +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRA +>match$27_436_467 +LPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$28_4_27 +QQIGVVGMAVMGRNLALNIESRGY +>match$28_66_95 +TPRRILLMVKAGAGTDAAIDSLKPYLDKGD +>match$28_119_144 +GFNFIGTGVSGGEEGALKGPSIMPGG +>match$28_168_196 +PCVTYIGADGAGHYVKMVHNGIEYGDMQL +>match$28_249_276 +LVDVILDEAANKGTGKWTSQSALDLGEP +>match$28_356_378 +IAKIFRAGCIIRAQFLQKITDAY +>match$29_1_466 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$30_1_466 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$31_179_466 +GHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDI +FTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVA +ASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAK +IFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPV +PTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$32_177_467 +GAGHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITK +DIFTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQR +VAASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEI +AKIFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGI +PVPTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$33_114_128 +TFKNALTTLPMGGGK +>match$33_193_215 +VFTGKGLSFGGSLIRPEATGYGL +>match$33_235_255 +VSVSGSGNVAQYAIEKAMEFG +>match$33_372_383 +ANAGGVATSGLE +>match$34_6_196 +SLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRV +VWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMG +GGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKK +LSNNTACVFTG +>match$35_68_188 +VDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGG +KGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLS +N +>match$36_202_445 +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQG +>match$37_22_447 +TEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVVWVDDRNQIQVNRAWR +VQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGGKGGSDFDPKGKSEG +EVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLSNNTACVFTGKGLSF +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQGVI +>match$38_204_445 +SLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASD +SSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCAT +QNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSGLE +MAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAMLA +QG +>match$39_209_372 +EATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASDSSGTV +VDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCATQNELD +VDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAA +>match$40_202_446 +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQGV +>match$41_7_67 +LESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVV +W +>match$41_373_446 +NAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIA +GFVKVADAMLAQGV +>match$42_122_135 +LPMGGGKGGSDFDP +>match$43_57_184 +PERVIQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFK +NALTTLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREV +GFMAGMMK +>match$44_1_447 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$45_1_447 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$46_2_37 +RQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWD diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/__base__.html.tar.gz Binary file interproscan5/data-n/__base__.html.tar.gz has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/__base__.svg.tar.gz Binary file interproscan5/data-n/__base__.svg.tar.gz has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/__base__.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-n/__base__.tsv Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,59 @@ +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF51905 361 415 2.27E-71 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF51905 2 250 2.27E-71 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 TIGRFAM TIGR01812 sdhA_frdA_Gneg: succinate dehydrogenase or fumarate reductase, flavoprotein subunit 7 581 4.7E-250 T 24-01-2016 IPR014006 Succinate dehydrogenase/fumarate reductase, flavoprotein subunit GO:0016627|GO:0022900|GO:0050660|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.90.700.10 234 352 3.6E-37 T 24-01-2016 IPR027477 Succinate dehydrogenase/fumarate reductase flavoprotein, catalytic domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00411 Pyridine nucleotide disulphide reductase class-I signature 375 382 2.5E-5 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00411 Pyridine nucleotide disulphide reductase class-I signature 7 29 2.5E-5 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:1.20.58.100 426 537 1.5E-38 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Coils Coil 423 443 - T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.50.50.60 353 416 5.9E-106 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.50.50.60 2 233 5.9E-106 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PANTHER PTHR11632 1 584 0.0 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF46977 444 590 3.01E-46 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PANTHER PTHR11632:SF50 1 584 0.0 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PIRSF PIRSF000171 1 575 2.0E-42 T 24-01-2016 IPR030664 Succinate dehydrogenase/fumarate reductase, alpha/adenylylsulphate reductase subunit +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 TIGRFAM TIGR01176 fum_red_Fp: fumarate reductase (quinol), flavoprotein subunit 3 582 0.0 T 24-01-2016 IPR005884 Fumarate reductase, flavoprotein subunit GO:0009061|GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Pfam PF02910 Fumarate reductase flavoprotein C-term 453 581 7.0E-39 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Coils Coil 473 493 - T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 ProSitePatterns PS00504 Fumarate reductase / succinate dehydrogenase FAD-binding site. 43 52 - T 24-01-2016 IPR003952 Fumarate reductase/succinate dehydrogenase, FAD-binding site GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:4.10.80.40 543 577 5.5E-18 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF56425 227 358 6.88E-43 T 24-01-2016 IPR027477 Succinate dehydrogenase/fumarate reductase flavoprotein, catalytic domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Pfam PF00890 FAD binding domain 7 397 1.1E-115 T 24-01-2016 IPR003953 FAD-dependent oxidoreductase 2, FAD binding domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00368 FAD-dependent pyridine nucleotide reductase signature 8 27 3.0E-10 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00368 FAD-dependent pyridine nucleotide reductase signature 360 382 3.0E-10 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 SUPERFAMILY SSF51735 4 173 4.86E-50 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00350 1776f92beb74ff0ccd9ac47a65663644 468 TIGRFAM TIGR00873 gnd: 6-phosphogluconate dehydrogenase (decarboxylating) 5 467 1.1E-232 T 24-01-2016 IPR006113 6-phosphogluconate dehydrogenase, decarboxylating GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:3.40.50.720 1 180 1.9E-61 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Pfam PF03446 NAD binding domain of 6-phosphogluconate dehydrogenase 3 174 1.8E-52 T 24-01-2016 IPR006115 6-phosphogluconate dehydrogenase, NADP-binding GO:0004616|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PIRSF PIRSF000109 1 468 1.9E-243 T 24-01-2016 IPR006113 6-phosphogluconate dehydrogenase, decarboxylating GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 ProSitePatterns PS00461 6-phosphogluconate dehydrogenase signature. 253 265 - T 24-01-2016 IPR006184 6-phosphogluconate-binding site GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:1.10.1040.10 181 433 1.0E-119 T 24-01-2016 IPR013328 6-phosphogluconate dehydrogenase, domain 2 GO:0016491|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:1.20.5.320 436 467 1.4E-22 T 24-01-2016 IPR012284 6-phosphogluconate dehydrogenase, domain 3 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 249 276 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 168 196 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 66 95 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 119 144 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 4 27 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 356 378 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PANTHER PTHR11811:SF25 1 466 7.6E-305 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PANTHER PTHR11811 1 466 7.6E-305 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Pfam PF00393 6-phosphogluconate dehydrogenase, C-terminal domain 179 466 2.0E-132 T 24-01-2016 IPR006114 6-phosphogluconate dehydrogenase, C-terminal GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 SUPERFAMILY SSF48179 177 467 6.8E-132 T 24-01-2016 IPR008927 6-phosphogluconate dehydrogenase C-terminal domain-like GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 372 383 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 114 128 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 235 255 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 193 215 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SUPERFAMILY SSF53223 6 196 6.72E-74 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:3.40.192.10 68 188 1.0E-60 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Pfam PF00208 Glutamate/Leucine/Phenylalanine/Valine dehydrogenase 202 445 1.3E-87 T 24-01-2016 IPR006096 Glutamate/phenylalanine/leucine/valine dehydrogenase, C-terminal GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PIRSF PIRSF000185 22 447 1.8E-114 T 24-01-2016 IPR014362 Glutamate dehydrogenase GO:0016639|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SMART SM00839 Glutamate/Leucine/Phenylalanine/Valine dehydrogenase 204 445 2.0E-93 T 24-01-2016 IPR006096 Glutamate/phenylalanine/leucine/valine dehydrogenase, C-terminal GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:3.40.50.720 209 372 1.6E-63 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SUPERFAMILY SSF51735 202 446 4.96E-77 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:1.10.285.10 7 67 8.2E-28 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:1.10.285.10 373 446 2.0E-26 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 ProSitePatterns PS00074 Glu / Leu / Phe / Val dehydrogenases active site. 122 135 - T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Pfam PF02812 Glu/Leu/Phe/Val dehydrogenase, dimerisation domain 57 184 6.3E-49 T 24-01-2016 IPR006097 Glutamate/phenylalanine/leucine/valine dehydrogenase, dimerisation domain GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PANTHER PTHR11606 1 447 3.2E-250 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PANTHER PTHR11606:SF4 1 447 3.2E-250 T 24-01-2016 +C1P5Z7 abfa044baa298f169ea62ac6b48e1185 43 Pfam PF15894 Inhibitor of glucose uptake transporter SgrT 2 37 1.0E-11 T 24-01-2016 IPR031767 Inhibitor of glucose uptake transporter SgrT GO:0046325 diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/__base__.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-n/__base__.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,2204 @@ + + + + MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKANG + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RSHTvaAeGG + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES + + + + + + + + + + + + + + + + + + + + MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAMLAQGVI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LpmGGGKgGsdfDP + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IlDeaANKGTGkW + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 0da2847fc108 interproscan5/data-n/input.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-n/input.fasta Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,32 @@ +>sp|C1P5Z7|SGRT_ECOLI Putative inhibitor of glucose uptake transporter SgrT OS=Escherichia coli (strain K12) GN=sgrT PE=1 SV=1 +MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES +>sp|P00350|6PGD_ECOLI 6-phosphogluconate dehydrogenase, decarboxylating OS=Escherichia coli (strain K12) GN=gnd PE=1 SV=2 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>sp|P00363|FRDA_ECOLI Fumarate reductase flavoprotein subunit OS=Escherichia coli (strain K12) GN=frdA PE=1 SV=3 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKA +NG +>sp|P00370|DHE4_ECOLI NADP-specific glutamate dehydrogenase OS=Escherichia coli (strain K12) GN=gdhA PE=1 SV=1 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/__base__.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-p/__base__.gff3 Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,377 @@ +##gff-version 3 +##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269 +##sequence-region P00363 1 602 +P00363 . polypeptide 1 602 . + . md5=0d2c0f1acdd08ab0157f2308531a58e4;ID=P00363 +P00363 SUPERFAMILY protein_match 361 415 2.27E-71 + . Name=SSF51905;Target=P00363 361 415;status=T;ID=match$1_361_415;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 SUPERFAMILY protein_match 2 250 2.27E-71 + . Name=SSF51905;Target=P00363 2 250;status=T;ID=match$1_2_250;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 TIGRFAM protein_match 7 581 4.7E-250 + . Name=TIGR01812;signature_desc=sdhA_frdA_Gneg: succinate dehydrogenase or fumarate reductase, flavoprotein subunit;Target=P00363 7 581;status=T;ID=match$2_7_581;Ontology_term="GO:0016627","GO:0022900","GO:0050660","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR014006" +P00363 Gene3D protein_match 234 352 3.6E-37 + . Name=G3DSA:3.90.700.10;Target=P00363 234 352;status=T;ID=match$3_234_352;date=24-01-2016;Dbxref="InterPro:IPR027477" +P00363 PRINTS protein_match 375 382 2.5E-5 + . Name=PR00411;signature_desc=Pyridine nucleotide disulphide reductase class-I signature;Target=P00363 375 382;status=T;ID=match$4_375_382;date=24-01-2016 +P00363 PRINTS protein_match 7 29 2.5E-5 + . Name=PR00411;signature_desc=Pyridine nucleotide disulphide reductase class-I signature;Target=P00363 7 29;status=T;ID=match$4_7_29;date=24-01-2016 +P00363 Gene3D protein_match 426 537 1.5E-38 + . Name=G3DSA:1.20.58.100;Target=P00363 426 537;status=T;ID=match$5_426_537;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 Coils protein_match 423 443 . + . Name=Coil;Target=P00363 423 443;status=T;ID=match$6_423_443;date=24-01-2016 +P00363 Gene3D protein_match 353 416 5.9E-106 + . Name=G3DSA:3.50.50.60;Target=P00363 353 416;status=T;ID=match$7_353_416;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 Gene3D protein_match 2 233 5.9E-106 + . Name=G3DSA:3.50.50.60;Target=P00363 2 233;status=T;ID=match$7_2_233;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR023753" +P00363 PANTHER protein_match 1 584 0.0 + . Name=PTHR11632;Target=P00363 1 584;status=T;ID=match$8_1_584;date=24-01-2016 +P00363 SUPERFAMILY protein_match 444 590 3.01E-46 + . Name=SSF46977;Target=P00363 444 590;status=T;ID=match$9_444_590;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 PANTHER protein_match 1 584 0.0 + . Name=PTHR11632:SF50;Target=P00363 1 584;status=T;ID=match$10_1_584;date=24-01-2016 +P00363 PIRSF protein_match 1 575 2.0E-42 + . Name=PIRSF000171;Target=P00363 1 575;status=T;ID=match$11_1_575;date=24-01-2016;Dbxref="InterPro:IPR030664" +P00363 TIGRFAM protein_match 3 582 0.0 + . Name=TIGR01176;signature_desc=fum_red_Fp: fumarate reductase (quinol), flavoprotein subunit;Target=P00363 3 582;status=T;ID=match$12_3_582;Ontology_term="GO:0009061","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR005884" +P00363 Pfam protein_match 453 581 7.0E-39 + . Name=PF02910;signature_desc=Fumarate reductase flavoprotein C-term;Target=P00363 453 581;status=T;ID=match$13_453_581;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR015939" +P00363 Coils protein_match 473 493 . + . Name=Coil;Target=P00363 473 493;status=T;ID=match$14_473_493;date=24-01-2016 +P00363 ProSitePatterns protein_match 43 52 . + . Name=PS00504;signature_desc=Fumarate reductase / succinate dehydrogenase FAD-binding site.;Target=P00363 43 52;status=T;ID=match$15_43_52;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR003952" +P00363 Gene3D protein_match 543 577 5.5E-18 + . Name=G3DSA:4.10.80.40;Target=P00363 543 577;status=T;ID=match$16_543_577;date=24-01-2016 +P00363 SUPERFAMILY protein_match 227 358 6.88E-43 + . Name=SSF56425;Target=P00363 227 358;status=T;ID=match$17_227_358;date=24-01-2016;Dbxref="InterPro:IPR027477" +P00363 Pfam protein_match 7 397 1.1E-115 + . Name=PF00890;signature_desc=FAD binding domain;Target=P00363 7 397;status=T;ID=match$18_7_397;date=24-01-2016;Dbxref="InterPro:IPR003953" +P00363 PRINTS protein_match 8 27 3.0E-10 + . Name=PR00368;signature_desc=FAD-dependent pyridine nucleotide reductase signature;Target=P00363 8 27;status=T;ID=match$19_8_27;date=24-01-2016 +P00363 PRINTS protein_match 360 382 3.0E-10 + . Name=PR00368;signature_desc=FAD-dependent pyridine nucleotide reductase signature;Target=P00363 360 382;status=T;ID=match$19_360_382;date=24-01-2016 +##sequence-region P00350 1 468 +P00350 . polypeptide 1 468 . + . md5=1776f92beb74ff0ccd9ac47a65663644;ID=P00350 +P00350 SUPERFAMILY protein_match 4 173 4.86E-50 + . Name=SSF51735;Target=P00350 4 173;status=T;ID=match$20_4_173;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00350 TIGRFAM protein_match 5 467 1.1E-232 + . Name=TIGR00873;signature_desc=gnd: 6-phosphogluconate dehydrogenase (decarboxylating);Target=P00350 5 467;status=T;ID=match$21_5_467;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006113" +P00350 Gene3D protein_match 1 180 1.9E-61 + . Name=G3DSA:3.40.50.720;Target=P00350 1 180;status=T;ID=match$22_1_180;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00350 Pfam protein_match 3 174 1.8E-52 + . Name=PF03446;signature_desc=NAD binding domain of 6-phosphogluconate dehydrogenase;Target=P00350 3 174;status=T;ID=match$23_3_174;Ontology_term="GO:0004616","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006115" +P00350 PIRSF protein_match 1 468 1.9E-243 + . Name=PIRSF000109;Target=P00350 1 468;status=T;ID=match$24_1_468;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006113" +P00350 ProSitePatterns protein_match 253 265 . + . Name=PS00461;signature_desc=6-phosphogluconate dehydrogenase signature.;Target=P00350 253 265;status=T;ID=match$25_253_265;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006184" +P00350 Gene3D protein_match 181 433 1.0E-119 + . Name=G3DSA:1.10.1040.10;Target=P00350 181 433;status=T;ID=match$26_181_433;Ontology_term="GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR013328" +P00350 Gene3D protein_match 436 467 1.4E-22 + . Name=G3DSA:1.20.5.320;Target=P00350 436 467;status=T;ID=match$27_436_467;date=24-01-2016;Dbxref="InterPro:IPR012284" +P00350 PRINTS protein_match 249 276 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 249 276;status=T;ID=match$28_249_276;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 168 196 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 168 196;status=T;ID=match$28_168_196;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 66 95 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 66 95;status=T;ID=match$28_66_95;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 119 144 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 119 144;status=T;ID=match$28_119_144;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 4 27 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 4 27;status=T;ID=match$28_4_27;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PRINTS protein_match 356 378 1.521867E-89 + . Name=PR00076;signature_desc=6-phosphogluconate dehydrogenase signature;Target=P00350 356 378;status=T;ID=match$28_356_378;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006183" +P00350 PANTHER protein_match 1 466 7.6E-305 + . Name=PTHR11811:SF25;Target=P00350 1 466;status=T;ID=match$29_1_466;date=24-01-2016 +P00350 PANTHER protein_match 1 466 7.6E-305 + . Name=PTHR11811;Target=P00350 1 466;status=T;ID=match$30_1_466;date=24-01-2016 +P00350 Pfam protein_match 179 466 2.0E-132 + . Name=PF00393;signature_desc=6-phosphogluconate dehydrogenase, C-terminal domain;Target=P00350 179 466;status=T;ID=match$31_179_466;Ontology_term="GO:0004616","GO:0006098","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006114" +P00350 SUPERFAMILY protein_match 177 467 6.8E-132 + . Name=SSF48179;Target=P00350 177 467;status=T;ID=match$32_177_467;Ontology_term="GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR008927" +##sequence-region P00370 1 447 +P00370 . polypeptide 1 447 . + . md5=9ec8e26fcf5c160f533bf9a69dec0212;ID=P00370 +P00370 PRINTS protein_match 372 383 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 372 383;status=T;ID=match$33_372_383;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 114 128 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 114 128;status=T;ID=match$33_114_128;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 235 255 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 235 255;status=T;ID=match$33_235_255;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 PRINTS protein_match 193 215 2.5E-33 + . Name=PR00082;signature_desc=Glutamate/leucine/phenylalanine/valine dehydrogenase signature;Target=P00370 193 215;status=T;ID=match$33_193_215;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 SUPERFAMILY protein_match 6 196 6.72E-74 + . Name=SSF53223;Target=P00370 6 196;status=T;ID=match$34_6_196;date=24-01-2016 +P00370 Gene3D protein_match 68 188 1.0E-60 + . Name=G3DSA:3.40.192.10;Target=P00370 68 188;status=T;ID=match$35_68_188;date=24-01-2016 +P00370 Pfam protein_match 202 445 1.3E-87 + . Name=PF00208;signature_desc=Glutamate/Leucine/Phenylalanine/Valine dehydrogenase;Target=P00370 202 445;status=T;ID=match$36_202_445;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006096" +P00370 PIRSF protein_match 22 447 1.8E-114 + . Name=PIRSF000185;Target=P00370 22 447;status=T;ID=match$37_22_447;Ontology_term="GO:0016639","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR014362" +P00370 SMART protein_match 204 445 2.0E-93 + . Name=SM00839;signature_desc=Glutamate/Leucine/Phenylalanine/Valine dehydrogenase;Target=P00370 204 445;status=T;ID=match$38_204_445;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006096" +P00370 Gene3D protein_match 209 372 1.6E-63 + . Name=G3DSA:3.40.50.720;Target=P00370 209 372;status=T;ID=match$39_209_372;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00370 SUPERFAMILY protein_match 202 446 4.96E-77 + . Name=SSF51735;Target=P00370 202 446;status=T;ID=match$40_202_446;date=24-01-2016;Dbxref="InterPro:IPR016040" +P00370 Gene3D protein_match 7 67 8.2E-28 + . Name=G3DSA:1.10.285.10;Target=P00370 7 67;status=T;ID=match$41_7_67;date=24-01-2016 +P00370 Gene3D protein_match 373 446 2.0E-26 + . Name=G3DSA:1.10.285.10;Target=P00370 373 446;status=T;ID=match$41_373_446;date=24-01-2016 +P00370 ProSitePatterns protein_match 122 135 . + . Name=PS00074;signature_desc=Glu / Leu / Phe / Val dehydrogenases active site.;Target=P00370 122 135;status=T;ID=match$42_122_135;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006095" +P00370 Pfam protein_match 57 184 6.3E-49 + . Name=PF02812;signature_desc=Glu/Leu/Phe/Val dehydrogenase, dimerisation domain;Target=P00370 57 184;status=T;ID=match$43_57_184;Ontology_term="GO:0006520","GO:0016491","GO:0055114";date=24-01-2016;Dbxref="InterPro:IPR006097" +P00370 PANTHER protein_match 1 447 3.2E-250 + . Name=PTHR11606;Target=P00370 1 447;status=T;ID=match$44_1_447;date=24-01-2016 +P00370 PANTHER protein_match 1 447 3.2E-250 + . Name=PTHR11606:SF4;Target=P00370 1 447;status=T;ID=match$45_1_447;date=24-01-2016 +##sequence-region C1P5Z7 1 43 +C1P5Z7 . polypeptide 1 43 . + . md5=abfa044baa298f169ea62ac6b48e1185;ID=C1P5Z7 +C1P5Z7 Pfam protein_match 2 37 1.0E-11 + . Name=PF15894;signature_desc=Inhibitor of glucose uptake transporter SgrT;Target=C1P5Z7 2 37;status=T;ID=match$46_2_37;Ontology_term="GO:0046325";date=24-01-2016;Dbxref="InterPro:IPR031767" +##FASTA +>C1P5Z7 +MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES +>P00350 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>P00363 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKA +NG +>P00370 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$1_2_250 +QTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHD +SFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMK +IERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTL +VQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSG +ILMTEGCRG +>match$1_361_415 +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERA +>match$2_7_581 +DLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYH +FHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTW +FAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRA +NAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTE +GCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTIS +TPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQ +NCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGNEAAI +EAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQER +FKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDV +NFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVY +>match$3_234_352 +PTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQ +AFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVR +>match$4_7_29 +DLAIVGAGGAGLRAAIAAAQANP +>match$4_375_382 +LFAVGECS +>match$5_426_537 +IEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQE +RFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLD +>match$6_423_443 +EAAIEAQAAGVEQRLKDLVNQ +>match$7_2_233 +QTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHD +SFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMK +IERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTL +VQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYH +>match$7_353_416 +PTAHYTMGGIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQAT +ERAA +>match$8_1_584 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGE +>match$9_444_590 +DGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDL +LYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTR +LEYSDVKITTLPPAKRVYGGEADAADK +>match$10_1_584 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGE +>match$11_1_575 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLP +>match$12_3_582 +TFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDS +FEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKI +ERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLV +QIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGI +LMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKG +NTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGI +ETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGN +EAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAE +LQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTE +RDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYG +>match$13_453_581 +RDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDLLYTIELGHG +LNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTRLEYSDVKIT +TLPPAKRVY +>match$14_473_493 +MQKTIDKLAELQERFKRVRIT +>match$15_43_52 +RSHTVAAEGG +>match$16_543_577 +RDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPA +>match$17_227_358 +MEFVQYHPTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELG +PRDKVSQAFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVK +EPIPVRPTAHYT +>match$18_7_397 +DLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYH +FHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTW +FAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRA +NAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTE +GCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTIS +TPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQ +NCETRIKGLFAVGECSSVGLHGANRLGSNSL +>match$19_8_27 +LAIVGAGGAGLRAAIAAAQA +>match$19_360_382 +GGIETDQNCETRIKGLFAVGECS +>match$20_4_173 +QQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVES +LETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFI +GTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYI +>match$21_5_467 +QIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVESL +ETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFIG +TGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGHYVKM +VHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFTKKDE +DGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAASKVLS +GPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIFRAGC +IIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPTFSAA +VAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$22_1_180 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +>match$23_3_174 +KQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVE +SLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNF +IGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIG +>match$24_1_468 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>match$25_253_265 +ILDEAANKGTGKW +>match$26_181_433 +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRA +>match$27_436_467 +LPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$28_4_27 +QQIGVVGMAVMGRNLALNIESRGY +>match$28_66_95 +TPRRILLMVKAGAGTDAAIDSLKPYLDKGD +>match$28_119_144 +GFNFIGTGVSGGEEGALKGPSIMPGG +>match$28_168_196 +PCVTYIGADGAGHYVKMVHNGIEYGDMQL +>match$28_249_276 +LVDVILDEAANKGTGKWTSQSALDLGEP +>match$28_356_378 +IAKIFRAGCIIRAQFLQKITDAY +>match$29_1_466 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$30_1_466 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$31_179_466 +GHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDI +FTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVA +ASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAK +IFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPV +PTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEW +>match$32_177_467 +GAGHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITK +DIFTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQR +VAASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEI +AKIFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGI +PVPTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWL +>match$33_114_128 +TFKNALTTLPMGGGK +>match$33_193_215 +VFTGKGLSFGGSLIRPEATGYGL +>match$33_235_255 +VSVSGSGNVAQYAIEKAMEFG +>match$33_372_383 +ANAGGVATSGLE +>match$34_6_196 +SLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRV +VWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMG +GGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKK +LSNNTACVFTG +>match$35_68_188 +VDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGG +KGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLS +N +>match$36_202_445 +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQG +>match$37_22_447 +TEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVVWVDDRNQIQVNRAWR +VQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGGKGGSDFDPKGKSEG +EVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLSNNTACVFTGKGLSF +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQGVI +>match$38_204_445 +SLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASD +SSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCAT +QNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSGLE +MAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAMLA +QG +>match$39_209_372 +EATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASDSSGTV +VDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCATQNELD +VDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAA +>match$40_202_446 +GGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITA +SDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPC +ATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSG +LEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAM +LAQGV +>match$41_7_67 +LESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVV +W +>match$41_373_446 +NAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIA +GFVKVADAMLAQGV +>match$42_122_135 +LPMGGGKGGSDFDP +>match$43_57_184 +PERVIQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFK +NALTTLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREV +GFMAGMMK +>match$44_1_447 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$45_1_447 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI +>match$46_2_37 +RQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWD diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/__base__.html.tar.gz Binary file interproscan5/data-p/__base__.html.tar.gz has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/__base__.svg.tar.gz Binary file interproscan5/data-p/__base__.svg.tar.gz has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/__base__.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-p/__base__.tsv Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,59 @@ +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF51905 361 415 2.27E-71 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF51905 2 250 2.27E-71 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 TIGRFAM TIGR01812 sdhA_frdA_Gneg: succinate dehydrogenase or fumarate reductase, flavoprotein subunit 7 581 4.7E-250 T 24-01-2016 IPR014006 Succinate dehydrogenase/fumarate reductase, flavoprotein subunit GO:0016627|GO:0022900|GO:0050660|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.90.700.10 234 352 3.6E-37 T 24-01-2016 IPR027477 Succinate dehydrogenase/fumarate reductase flavoprotein, catalytic domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00411 Pyridine nucleotide disulphide reductase class-I signature 375 382 2.5E-5 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00411 Pyridine nucleotide disulphide reductase class-I signature 7 29 2.5E-5 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:1.20.58.100 426 537 1.5E-38 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Coils Coil 423 443 - T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.50.50.60 353 416 5.9E-106 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:3.50.50.60 2 233 5.9E-106 T 24-01-2016 IPR023753 FAD/NAD(P)-binding domain GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PANTHER PTHR11632 1 584 0.0 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF46977 444 590 3.01E-46 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PANTHER PTHR11632:SF50 1 584 0.0 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PIRSF PIRSF000171 1 575 2.0E-42 T 24-01-2016 IPR030664 Succinate dehydrogenase/fumarate reductase, alpha/adenylylsulphate reductase subunit +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 TIGRFAM TIGR01176 fum_red_Fp: fumarate reductase (quinol), flavoprotein subunit 3 582 0.0 T 24-01-2016 IPR005884 Fumarate reductase, flavoprotein subunit GO:0009061|GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Pfam PF02910 Fumarate reductase flavoprotein C-term 453 581 7.0E-39 T 24-01-2016 IPR015939 Fumarate reductase/succinate dehydrogenase flavoprotein-like, C-terminal GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Coils Coil 473 493 - T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 ProSitePatterns PS00504 Fumarate reductase / succinate dehydrogenase FAD-binding site. 43 52 - T 24-01-2016 IPR003952 Fumarate reductase/succinate dehydrogenase, FAD-binding site GO:0016491|GO:0055114 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Gene3D G3DSA:4.10.80.40 543 577 5.5E-18 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 SUPERFAMILY SSF56425 227 358 6.88E-43 T 24-01-2016 IPR027477 Succinate dehydrogenase/fumarate reductase flavoprotein, catalytic domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 Pfam PF00890 FAD binding domain 7 397 1.1E-115 T 24-01-2016 IPR003953 FAD-dependent oxidoreductase 2, FAD binding domain +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00368 FAD-dependent pyridine nucleotide reductase signature 8 27 3.0E-10 T 24-01-2016 +P00363 0d2c0f1acdd08ab0157f2308531a58e4 602 PRINTS PR00368 FAD-dependent pyridine nucleotide reductase signature 360 382 3.0E-10 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 SUPERFAMILY SSF51735 4 173 4.86E-50 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00350 1776f92beb74ff0ccd9ac47a65663644 468 TIGRFAM TIGR00873 gnd: 6-phosphogluconate dehydrogenase (decarboxylating) 5 467 1.1E-232 T 24-01-2016 IPR006113 6-phosphogluconate dehydrogenase, decarboxylating GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:3.40.50.720 1 180 1.9E-61 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Pfam PF03446 NAD binding domain of 6-phosphogluconate dehydrogenase 3 174 1.8E-52 T 24-01-2016 IPR006115 6-phosphogluconate dehydrogenase, NADP-binding GO:0004616|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PIRSF PIRSF000109 1 468 1.9E-243 T 24-01-2016 IPR006113 6-phosphogluconate dehydrogenase, decarboxylating GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 ProSitePatterns PS00461 6-phosphogluconate dehydrogenase signature. 253 265 - T 24-01-2016 IPR006184 6-phosphogluconate-binding site GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:1.10.1040.10 181 433 1.0E-119 T 24-01-2016 IPR013328 6-phosphogluconate dehydrogenase, domain 2 GO:0016491|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Gene3D G3DSA:1.20.5.320 436 467 1.4E-22 T 24-01-2016 IPR012284 6-phosphogluconate dehydrogenase, domain 3 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 249 276 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 168 196 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 66 95 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 119 144 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 4 27 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PRINTS PR00076 6-phosphogluconate dehydrogenase signature 356 378 1.521867E-89 T 24-01-2016 IPR006183 6-phosphogluconate dehydrogenase GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PANTHER PTHR11811:SF25 1 466 7.6E-305 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 PANTHER PTHR11811 1 466 7.6E-305 T 24-01-2016 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 Pfam PF00393 6-phosphogluconate dehydrogenase, C-terminal domain 179 466 2.0E-132 T 24-01-2016 IPR006114 6-phosphogluconate dehydrogenase, C-terminal GO:0004616|GO:0006098|GO:0055114 +P00350 1776f92beb74ff0ccd9ac47a65663644 468 SUPERFAMILY SSF48179 177 467 6.8E-132 T 24-01-2016 IPR008927 6-phosphogluconate dehydrogenase C-terminal domain-like GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 372 383 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 114 128 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 235 255 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PRINTS PR00082 Glutamate/leucine/phenylalanine/valine dehydrogenase signature 193 215 2.5E-33 T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SUPERFAMILY SSF53223 6 196 6.72E-74 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:3.40.192.10 68 188 1.0E-60 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Pfam PF00208 Glutamate/Leucine/Phenylalanine/Valine dehydrogenase 202 445 1.3E-87 T 24-01-2016 IPR006096 Glutamate/phenylalanine/leucine/valine dehydrogenase, C-terminal GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PIRSF PIRSF000185 22 447 1.8E-114 T 24-01-2016 IPR014362 Glutamate dehydrogenase GO:0016639|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SMART SM00839 Glutamate/Leucine/Phenylalanine/Valine dehydrogenase 204 445 2.0E-93 T 24-01-2016 IPR006096 Glutamate/phenylalanine/leucine/valine dehydrogenase, C-terminal GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:3.40.50.720 209 372 1.6E-63 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 SUPERFAMILY SSF51735 202 446 4.96E-77 T 24-01-2016 IPR016040 NAD(P)-binding domain +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:1.10.285.10 7 67 8.2E-28 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Gene3D G3DSA:1.10.285.10 373 446 2.0E-26 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 ProSitePatterns PS00074 Glu / Leu / Phe / Val dehydrogenases active site. 122 135 - T 24-01-2016 IPR006095 Glutamate/phenylalanine/leucine/valine dehydrogenase GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 Pfam PF02812 Glu/Leu/Phe/Val dehydrogenase, dimerisation domain 57 184 6.3E-49 T 24-01-2016 IPR006097 Glutamate/phenylalanine/leucine/valine dehydrogenase, dimerisation domain GO:0006520|GO:0016491|GO:0055114 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PANTHER PTHR11606 1 447 3.2E-250 T 24-01-2016 +P00370 9ec8e26fcf5c160f533bf9a69dec0212 447 PANTHER PTHR11606:SF4 1 447 3.2E-250 T 24-01-2016 +C1P5Z7 abfa044baa298f169ea62ac6b48e1185 43 Pfam PF15894 Inhibitor of glucose uptake transporter SgrT 2 37 1.0E-11 T 24-01-2016 IPR031767 Inhibitor of glucose uptake transporter SgrT GO:0046325 diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/__base__.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-p/__base__.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,2204 @@ + + + + MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDHDSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGMKIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGTLVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGSGILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWRKGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMGGIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGNGNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKLAELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGCTERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKANG + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RSHTvaAeGG + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES + + + + + + + + + + + + + + + + + + + + MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERVIQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALTTLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMAGMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGSGNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEFGLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQQAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEGEQTNYVQGANIAGFVKVADAMLAQGVI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LpmGGGKgGsdfDP + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEFVESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGFNFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGHYVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFTKKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAASKVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIFRAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPTFSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IlDeaANKGTGkW + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 0da2847fc108 interproscan5/data-p/input.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/data-p/input.fasta Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,32 @@ +>sp|C1P5Z7|SGRT_ECOLI Putative inhibitor of glucose uptake transporter SgrT OS=Escherichia coli (strain K12) GN=sgrT PE=1 SV=1 +MRQFYQHYFTATAKLCWLRWLSVPQRLTMLEGLMQWDDRNSES +>sp|P00350|6PGD_ECOLI 6-phosphogluconate dehydrogenase, decarboxylating OS=Escherichia coli (strain K12) GN=gnd PE=1 SV=2 +MSKQQIGVVGMAVMGRNLALNIESRGYTVSIFNRSREKTEEVIAENPGKKLVPYYTVKEF +VESLETPRRILLMVKAGAGTDAAIDSLKPYLDKGDIIIDGGNTFFQDTIRRNRELSAEGF +NFIGTGVSGGEEGALKGPSIMPGGQKEAYELVAPILTKIAAVAEDGEPCVTYIGADGAGH +YVKMVHNGIEYGDMQLIAEAYSLLKGGLNLTNEELAQTFTEWNNGELSSYLIDITKDIFT +KKDEDGNYLVDVILDEAANKGTGKWTSQSALDLGEPLSLITESVFARYISSLKDQRVAAS +KVLSGPQAQPAGDKAEFIEKVRRALYLGKIVSYAQGFSQLRAASEEYNWDLNYGEIAKIF +RAGCIIRAQFLQKITDAYAENPQIANLLLAPYFKQIADDYQQALRDVVAYAVQNGIPVPT +FSAAVAYYDSYRAAVLPANLIQAQRDYFGAHTYKRIDKEGVFHTEWLD +>sp|P00363|FRDA_ECOLI Fumarate reductase flavoprotein subunit OS=Escherichia coli (strain K12) GN=frdA PE=1 SV=3 +MQTFQADLAIVGAGGAGLRAAIAAAQANPNAKIALISKVYPMRSHTVAAEGGSAAVAQDH +DSFEYHFHDTVAGGDWLCEQDVVDYFVHHCPTEMTQLELWGCPWSRRPDGSVNVRRFGGM +KIERTWFAADKTGFHMLHTLFQTSLQFPQIQRFDEHFVLDILVDDGHVRGLVAMNMMEGT +LVQIRANAVVMATGGAGRVYRYNTNGGIVTGDGMGMALSHGVPLRDMEFVQYHPTGLPGS +GILMTEGCRGEGGILVNKNGYRYLQDYGMGPETPLGEPKNKYMELGPRDKVSQAFWHEWR +KGNTISTPRGDVVYLDLRHLGEKKLHERLPFICELAKAYVGVDPVKEPIPVRPTAHYTMG +GIETDQNCETRIKGLFAVGECSSVGLHGANRLGSNSLAELVVFGRLAGEQATERAATAGN +GNEAAIEAQAAGVEQRLKDLVNQDGGENWAKIRDEMGLAMEEGCGIYRTPELMQKTIDKL +AELQERFKRVRITDTSSVFNTDLLYTIELGHGLNVAECMAHSAMARKESRGAHQRLDEGC +TERDDVNFLKHTLAFRDADGTTRLEYSDVKITTLPPAKRVYGGEADAADKAEAANKKEKA +NG +>sp|P00370|DHE4_ECOLI NADP-specific glutamate dehydrogenase OS=Escherichia coli (strain K12) GN=gdhA PE=1 SV=1 +MDQTYSLESFLNHVQKRDPNQTEFAQAVREVMTTLWPFLEQNPKYRQMSLLERLVEPERV +IQFRVVWVDDRNQIQVNRAWRVQFSSAIGPYKGGMRFHPSVNLSILKFLGFEQTFKNALT +TLPMGGGKGGSDFDPKGKSEGEVMRFCQALMTELYRHLGADTDVPAGDIGVGGREVGFMA +GMMKKLSNNTACVFTGKGLSFGGSLIRPEATGYGLVYFTEAMLKRHGMGFEGMRVSVSGS +GNVAQYAIEKAMEFGARVITASDSSGTVVDESGFTKEKLARLIEIKASRDGRVADYAKEF +GLVYLEGQQPWSLPVDIALPCATQNELDVDAAHQLIANGVKAVAEGANMPTTIEATELFQ +QAGVLFAPGKAANAGGVATSGLEMAQNAARLGWKAEKVDARLHHIMLDIHHACVEHGGEG +EQTNYVQGANIAGFVKVADAMLAQGVI diff -r 000000000000 -r 0da2847fc108 interproscan5/fake_ips.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/fake_ips.py Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,35 @@ +""" +""" + +__author__ = 'mkh' + +if __name__ == '__main__': + import argparse + import os + from shutil import copyfile + import sys + + parser = argparse.ArgumentParser(description='Fake IPS.') + + parser.add_argument('-dp', action='store_true') + parser.add_argument('--input') + parser.add_argument('--seqtype', default='p') + parser.add_argument('-f', dest='formats', default='tsv') + parser.add_argument('--applications') + parser.add_argument('--tempdir') + parser.add_argument('--pathways', action='store_true') + parser.add_argument('--goterms', action='store_true') + parser.add_argument('--iprlookup', action='store_true') + parser.add_argument('--mode') + parser.add_argument('--clusterrunid') + parser.add_argument('--output-file-base', dest='output_file_base') + parser.add_argument('-o', dest='output_file', default="/tmp/junk") + + args = parser.parse_args() + formats = args.formats.split(',') + src_data_dir = os.path.dirname(__file__) + '/data-' + args.seqtype + + for ext in formats: + if ext in ['html', 'svg']: + ext += '.tar.gz' + copyfile(src_data_dir + '/' + '__base__' + '.' + ext, args.output_file_base + '.' + ext) diff -r 000000000000 -r 0da2847fc108 interproscan5/fastabox.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/fastabox.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,55 @@ + + Simple creation of FASTA file from text area field. + + INTERPROSCAN_SCRIPT_PATH + + + + cp $fasta_file $output + + + + + + + + + + + + + + + + + + + $url_paste + + + + + + + + diff -r 000000000000 -r 0da2847fc108 interproscan5/ips5.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/ips5.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,319 @@ + + Interproscan functional predictions of ORFs. + + + INTERPROSCAN_SCRIPT_PATH + + + + #import os + echo "Start timestamp: \$(date)"; + \$INTERPROSCAN_SCRIPT_PATH/interproscan.sh + ## disables the precalculated lookup service, all calculation will be run locally + -dp + --input $infile + --seqtype $seqtype + -f tsv,$output_types + --applications $appl + + $pathways + $goterms + $iprlookup + $mode + --output-file-base __base__ + 2>&1; + + mv __base__.tsv $tsv_file; + + #if 'gff3' in str($output_types): + mv __base__.gff3 $gff3_file; + #end if + + #if 'xml' in str($output_types): + mv __base__.xml $xml_file; + #end if + + #if 'html' in str($output_types): + mkdir -p $html_file.files_path; + #set temp_archive_file = '__base__.html.tar.gz' + tar -C $html_file.files_path -xvmzf $temp_archive_file; + #if str($seqtype) == 'p' and not str($getorfed): + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $html_file $html_file.files_path; + #else + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $html_file $html_file.files_path; + #end if + rm $temp_archive_file; + #end if + + #if 'svg' in str($output_types): + mkdir -p $svg_file.files_path; + #set temp_archive_file = '__base__.svg.tar.gz' + tar -C $svg_file.files_path -xvmzf $temp_archive_file; + #if str($seqtype) == 'p' and not str($getorfed): + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $svg_file $svg_file.files_path; + #else + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $svg_file $svg_file.files_path; + #end if + rm $temp_archive_file; + #end if + + echo "End timestamp: \$(date)" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'html' in output_types + + + 'svg' in output_types + + + 'gff3' in output_types + + + 'xml' in output_types + + + + + + + + +**What it does** + +Interproscan is a batch tool to query the Interpro database. It provides annotations based on multiple searches of profile and other functional databases. + + +##### +Input +##### + +Required is a FASTA file containing protein or nucleotide sequences. + + +###### +Output +###### + +In this version of InterProScan_, you can retrieve output in any of the following five formats: + + * TSV: a simple tab-delimited file format + * XML: the new "IMPACT" XML format (XSD available here_). + * GFF: The `GFF 3.0`_ format + * HTML: An HTML representation of the protein matches + * SVG: An Scalable Vector Graphics representation of the protein matches + + +.. _`GFF 3.0`: http://gmod.org/wiki/GFF#GFF3_Format +.. _here: http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 + + + +Tab-separated values format (TSV) +================================= + +Basic tab delimited format. + + +Example Output +-------------- + +:: + + P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013 + P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302 + P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013 + ... + + +The TSV format presents the match data in columns as follows: + + - Protein Accession (e.g. P51587) + - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579) + - Sequence Length (e.g. 3418) + - Analysis (e.g. Pfam / PRINTS / Gene3D) + - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140) + - Signature Description (e.g. BRCA2 repeat profile) + - Start location + - Stop location + - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52) + - Status - is the status of the match (T: true) + - Date - is the date of the run + - (InterProScan_ annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on) + - (InterProScan_ annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on) + - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on) + - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on) + + +Extensible Markup Language (XML) +================================ + +XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here]. + +Example Output +-------------- + +.. image:: $PATH_TO_IMAGES/example_xml_output.png + + + +Generic Feature Format Version 3 (GFF3) +======================================= + +The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [http://www.sequenceontology.org/gff3.shtml]. + +Example Output +-------------- + +:: + + ##gff-version 3 + ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269 + ##sequence-region AACH01000027 1 1347 + ##seqid|source|type|start|end|score|strand|phase|attributes + AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027 + AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347 + AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347 + AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13" + ##sequence-region 2 + ... + >pep_AACH01000027_1_1347 + LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV + LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA + GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI + LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ + ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA + TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV + DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML + RSQKAKGVLIYRDDWISITPEIQLLFTEF + ... + >match$8_84_314 + KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK + RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL + LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR + AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS + + +Scalable Vector Graphics (SVG) and HyperText Markup Language (HTML) +==================================================================== + +InterProScan_ 5 outputs a single HTML/SVG file for each protein sequence analysed. + + +Example Output +-------------- + +.. image:: $PATH_TO_IMAGES/P51587.svg.png + +.. _InterProScan: http://www.ebi.ac.uk/interpro + + +---------- +References +---------- + + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Zdobnov EM, Apweiler R (2001) +InterProScan an integration platform for the signature-recognition methods in InterPro. +Bioinformatics 17, 847-848. +http://dx.doi.org/10.1093/bioinformatics/17.9.847 + +Quevillon E, Silventoinen V, Pillai S, Harte N, Mulder N, Apweiler R, Lopez R (2005) +InterProScan: protein domains identifier. +Nucleic Acids Research 33 (Web Server issue), W116-W120. +http://dx.doi.org/10.1093/nar/gki442 + +Hunter S, Apweiler R, Attwood TK, Bairoch A, Bateman A, Binns D, Bork P, Das U, Daugherty L, Duquenne L, Finn RD, Gough J, Haft D, Hulo N, Kahn D, Kelly E, Laugraud A, Letunic I, Lonsdale D, Lopez R, Madera M, Maslen J, McAnulla C, McDowall J, Mistry J, Mitchell A, Mulder N, Natale D, Orengo C, Quinn AF, Selengut JD, Sigrist CJ, Thimma M, Thomas PD, Valentin F, Wilson D, Wu CH, Yeats C. (2009) +InterPro: the integrative protein signature database. +Nucleic Acids Research 37 (Database Issue), D224-228. +http://dx.doi.org/10.1093/nar/gkn785 + + +This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at +http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan5 + + +**Galaxy Wrapper Author**:: + + * Bjoern Gruening, University of Freiburg + * Konrad Paszkiewicz, University of Exeter + + + diff -r 000000000000 -r 0da2847fc108 interproscan5/ipsfaux.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/ipsfaux.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,320 @@ + + Interproscan functional predictions of ORFs (faux). Input file and computation options are + ignored (other than the types of outputs). The same precomputed results are produced every time. + + + INTERPROSCAN_SCRIPT_PATH + + + + #import os + echo "Start timestamp: \$(date)"; + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/fake_ips.py + ## disables the precalculated lookup service, all calculation will be run locally + -dp + --input $infile + --seqtype $seqtype + -f tsv,$output_types + --applications $appl + + $pathways + $goterms + $iprlookup + $mode + --output-file-base __base__ + 2>&1; + + mv __base__.tsv $tsv_file; + + #if 'gff3' in str($output_types): + mv __base__.gff3 $gff3_file; + #end if + + #if 'xml' in str($output_types): + mv __base__.xml $xml_file; + #end if + + #if 'html' in str($output_types): + mkdir -p $html_file.files_path; + #set temp_archive_file = '__base__.html.tar.gz' + tar -C $html_file.files_path -xvmzf $temp_archive_file; + #if str($seqtype) == 'p' and not str($getorfed): + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $html_file $html_file.files_path; + #else + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $html_file $html_file.files_path; + #end if + rm $temp_archive_file; + #end if + + #if 'svg' in str($output_types): + mkdir -p $svg_file.files_path; + #set temp_archive_file = '__base__.svg.tar.gz' + tar -C $svg_file.files_path -xvmzf $temp_archive_file; + #if str($seqtype) == 'p' and not str($getorfed): + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 0 $tsv_file $svg_file $svg_file.files_path; + #else + python \$INTERPROSCAN_SCRIPT_PATH/interproscan5/create_html_index.py 1 $tsv_file $svg_file $svg_file.files_path; + #end if + rm $temp_archive_file; + #end if + + echo "End timestamp: \$(date)" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'html' in output_types + + + 'svg' in output_types + + + 'gff3' in output_types + + + 'xml' in output_types + + + + + + + + +**What it does** + +Interproscan is a batch tool to query the Interpro database. It provides annotations based on multiple searches of profile and other functional databases. + + +##### +Input +##### + +Required is a FASTA file containing protein or nucleotide sequences. + + +###### +Output +###### + +In this version of InterProScan_, you can retrieve output in any of the following five formats: + + * TSV: a simple tab-delimited file format + * XML: the new "IMPACT" XML format (XSD available here_). + * GFF: The `GFF 3.0`_ format + * HTML: An HTML representation of the protein matches + * SVG: An Scalable Vector Graphics representation of the protein matches + + +.. _`GFF 3.0`: http://gmod.org/wiki/GFF#GFF3_Format +.. _here: http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 + + + +Tab-separated values format (TSV) +================================= + +Basic tab delimited format. + + +Example Output +-------------- + +:: + + P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013 + P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302 + P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013 + ... + + +The TSV format presents the match data in columns as follows: + + - Protein Accession (e.g. P51587) + - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579) + - Sequence Length (e.g. 3418) + - Analysis (e.g. Pfam / PRINTS / Gene3D) + - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140) + - Signature Description (e.g. BRCA2 repeat profile) + - Start location + - Stop location + - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52) + - Status - is the status of the match (T: true) + - Date - is the date of the run + - (InterProScan_ annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on) + - (InterProScan_ annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on) + - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on) + - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on) + + +Extensible Markup Language (XML) +================================ + +XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here]. + +Example Output +-------------- + +.. image:: $PATH_TO_IMAGES/example_xml_output.png + + + +Generic Feature Format Version 3 (GFF3) +======================================= + +The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [http://www.sequenceontology.org/gff3.shtml]. + +Example Output +-------------- + +:: + + ##gff-version 3 + ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269 + ##sequence-region AACH01000027 1 1347 + ##seqid|source|type|start|end|score|strand|phase|attributes + AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027 + AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347 + AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347 + AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13" + ##sequence-region 2 + ... + >pep_AACH01000027_1_1347 + LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV + LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA + GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI + LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ + ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA + TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV + DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML + RSQKAKGVLIYRDDWISITPEIQLLFTEF + ... + >match$8_84_314 + KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK + RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL + LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR + AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS + + +Scalable Vector Graphics (SVG) and HyperText Markup Language (HTML) +==================================================================== + +InterProScan_ 5 outputs a single HTML/SVG file for each protein sequence analysed. + + +Example Output +-------------- + +.. image:: $PATH_TO_IMAGES/P51587.svg.png + +.. _InterProScan: http://www.ebi.ac.uk/interpro + + +---------- +References +---------- + + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Zdobnov EM, Apweiler R (2001) +InterProScan an integration platform for the signature-recognition methods in InterPro. +Bioinformatics 17, 847-848. +http://dx.doi.org/10.1093/bioinformatics/17.9.847 + +Quevillon E, Silventoinen V, Pillai S, Harte N, Mulder N, Apweiler R, Lopez R (2005) +InterProScan: protein domains identifier. +Nucleic Acids Research 33 (Web Server issue), W116-W120. +http://dx.doi.org/10.1093/nar/gki442 + +Hunter S, Apweiler R, Attwood TK, Bairoch A, Bateman A, Binns D, Bork P, Das U, Daugherty L, Duquenne L, Finn RD, Gough J, Haft D, Hulo N, Kahn D, Kelly E, Laugraud A, Letunic I, Lonsdale D, Lopez R, Madera M, Maslen J, McAnulla C, McDowall J, Mistry J, Mitchell A, Mulder N, Natale D, Orengo C, Quinn AF, Selengut JD, Sigrist CJ, Thimma M, Thomas PD, Valentin F, Wilson D, Wu CH, Yeats C. (2009) +InterPro: the integrative protein signature database. +Nucleic Acids Research 37 (Database Issue), D224-228. +http://dx.doi.org/10.1093/nar/gkn785 + + +This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at +http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan5 + + +**Galaxy Wrapper Author**:: + + * Bjoern Gruening, University of Freiburg + * Konrad Paszkiewicz, University of Exeter + + + diff -r 000000000000 -r 0da2847fc108 interproscan5/readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/readme.rst Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,83 @@ +================================================== +Galaxy wrapper for InterProScan 5 prediction tools +================================================== + +InterProScan is a tool that combines different protein signature recognition methods native to the InterPro +member databases into one resource with look up of corresponding InterPro and GO annotation. + +This wrapper is copyright 2013 by: + * Bjoern Gruening + * Konrad Paszkiewicz + + +This prepository contains a wrapper for the InterProScan_ command line tool. + +.. _InterProScan: http://www.ebi.ac.uk/interpro/interproscan.html + + +Quevillon E., Silventoinen V., Pillai S., Harte N., Mulder N., Apweiler R., Lopez R. (2005). InterProScan: protein domains identifier. Nucleic Acids Res. 33 (Web Server issue): W116-W120 + + +============ +Installation +============ + +Please download install InterProScan according to: + +https://code.google.com/p/interproscan/wiki/HowToDownload + + +======== +Citation +======== + +If you use this Galaxy tool in work leading to a scientific +publication, in addition to citing the invididual underlying tools, please cite: + +Peter Cock, Bjoern Gruening, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Full reference information is included in the help text. + + +============= +Input formats +============= + +The standard interproscan input is either genomic or protein sequences. +In the case of genomic sequences Interproscan will run an ORF prediction tool. + + +======= +History +======= + +interproscan: + + - v5.0: Initial public release of version 5.0 + + +============= +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff -r 000000000000 -r 0da2847fc108 interproscan5/static/images/P51587.svg.png Binary file interproscan5/static/images/P51587.svg.png has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/static/images/example_xml_output.png Binary file interproscan5/static/images/example_xml_output.png has changed diff -r 000000000000 -r 0da2847fc108 interproscan5/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interproscan5/tool_dependencies.xml Thu May 24 14:57:30 2018 -0400 @@ -0,0 +1,7 @@ + + + + $REPOSITORY_INSTALL_DIR + +