Repository 'spring_cross'
hg clone https://toolshed.g2.bx.psu.edu/repos/guerler/spring_cross

Changeset 0:dbbcc7cd889f (2021-03-23)
Next changeset 1:3175a61346e8 (2021-03-29)
Commit message:
"planemo upload commit 6158473dbced09024b0a805a7df2c93d47705d87"
added:
macros.xml
spring_cross.xml
test-data/cross/pdb.ffdata
test-data/cross/pdb.ffindex
test-data/cross/reference.tabular
test-data/ffindex_indices.loc
test-data/map/chains.tabular
test-data/map/mapped.reference.tabular
test-data/mcc/biogrid_fret.txt
test-data/mcc/human_hv1h2.png
test-data/mcc/human_hv1h2.txt
test-data/minz/NP_000282.1.hhr
test-data/minz/NP_000290.2.hhr
test-data/minz/NP_000548.2.hhr
test-data/minz/NP_000836.2.hhr
test-data/minz/pdb70_random.txt
test-data/minz/pdb70_result.0.txt
test-data/minz/pdb70_result.1.txt
test-data/model/ACE2_HUMAN.hhr
test-data/model/SPIKE_SARS2.hhr
test-data/model/crossreference.txt
test-data/model/hhr.ffdata
test-data/model/hhr.ffindex
test-data/model/log.txt
test-data/model/pairs.txt
test-data/model/pdb_structures.ffdata
test-data/model/pdb_structures.ffindex
test-data/model/sp|Q9BYF1|ACE2_HUMAN.sp|P0DTC2|SPIKE_SARS2.pdb
tool-data/ffindex_indices.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r dbbcc7cd889f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,89 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">springsuite</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">0.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="ffindex_single_inputs">
+        <param name="ffdata" type="data" format="ffdata" label="PDB Database" help="Database Data file." />
+        <param name="ffindex" type="data" format="ffindex" label="PDB Database Index" help="Database Index file." />
+    </xml>
+
+    <xml name="ffindex_single_inputs_hhr">
+        <param name="ffdata" type="data" format="ffdata" label="HHR Database" help="Database Data file." />
+        <param name="ffindex" type="data" format="ffindex" label="HHR Database Index" help="Database Index file." />
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1021/ci300579r</citation>
+        </citations>
+    </xml>
+    <xml name="logfile">
+        <param name="logfile" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log file" />
+    </xml>
+
+    <xml name="pdb_source">
+        <conditional name="pdb_source">
+            <param name="pdb_source_selector" type="select" label="Custom or built-in PDB" help="Built-ins have been indexed using ffindex">
+                <option value="indexed" selected="true">Use a built-in index</option>
+                <option value="history">Use a PDB index from history</option>
+            </param>
+            <when value="indexed">
+                <param name="pdb" type="select" label="Select PDB database" help="">
+                    <options from_data_table="ffindex_indices">
+                        <filter type="sort_by" column="0" />
+                        <filter type="static_value" column="3" value="pdb" />
+                        <validator type="no_options" message="No indices are available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <expand macro="ffindex_single_inputs" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="hhr_source">
+        <conditional name="hhr_source">
+            <param name="hhr_source_selector" type="select" label="Custom or built-in HHR index" help="Built-ins have been indexed using ffindex">
+                <option value="indexed" selected="true">Use a built-in index</option>
+                <option value="history">Use a HHR index from history</option>
+            </param>
+            <when value="indexed">
+                <param name="hhr" type="select" label="Select HHR database" help="">
+                    <options from_data_table="ffindex_indices">
+                        <filter type="sort_by" column="0" />
+                        <filter type="static_value" column="3" value="hhr" />
+                        <validator type="no_options" message="No indices are available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <expand macro="ffindex_single_inputs_hhr" />
+            </when>
+        </conditional>
+    </xml>
+
+    <token name="@pdb_source@">
+        #if $pdb_source.pdb_source_selector == 'indexed':
+            -d '${pdb_source.pdb.fields.path}.ffdata'
+            -i '${pdb_source.pdb.fields.path}.ffindex'
+        #else
+            -d '$pdb_source.ffdata'
+            -i '$pdb_source.ffindex'
+        #end if
+    </token>
+    <token name="@hhr_source@">
+        #if $hhr_source.hhr_source_selector == 'indexed':
+            -dh '${hhr_source.hhr.fields.path}.ffdata'
+            -ih '${hhr_source.hhr.fields.path}.ffindex'
+        #else
+            -dh '$hhr_source.ffdata'
+            -ih '$hhr_source.ffindex'
+        #end if
+    </token>
+</macros>
b
diff -r 000000000000 -r dbbcc7cd889f spring_cross.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/spring_cross.xml Tue Mar 23 13:55:42 2021 +0000
[
@@ -0,0 +1,51 @@
+<tool id="spring_cross" name="SPRING Cross" license="MIT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+    <description>reference builder</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+    spring_cross.py
+        @pdb_source@
+        -o '$output'
+        -g '$log'
+    ]]></command>
+    <inputs>
+        <expand macro="pdb_source" />
+        <expand macro="logfile" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output" label="SPRING Cross Reference" />
+        <data format="txt" name="log" label="SPRING Cross Log">
+            <filter>logfile</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="pdb_source">
+                <param name="pdb_source_selector" value="history" />
+                <param name="ffindex" value="cross/pdb.ffindex" />
+                <param name="ffdata" value="cross/pdb.ffdata" />
+            </conditional>
+            <param name="logfile" value="false" />
+            <output name="output" file="cross/reference.tabular" />
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="pdb_source">
+                <param name="pdb_source_selector" value="indexed" />
+                <param name="pdb" value="pdb01_cross" />
+            </conditional>
+            <param name="logfile" value="true" />
+            <output name="output" file="cross/reference.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**What it does**
+
+Creates a 2-column cross reference between a list of input chains and all interacting chains found in the provided PDB database.
+This reference is required as input for the SPRING min-Z calculator.
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r dbbcc7cd889f test-data/cross/pdb.ffdata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cross/pdb.ffdata Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,107645 @@\n+HEADER    VIRAL PROTEIN                           25-FEB-20   6VYB              \n+TITLE     SARS-COV-2 SPIKE ECTODOMAIN STRUCTURE (OPEN STATE)                    \n+COMPND    MOL_ID: 1;                                                            \n+COMPND   2 MOLECULE: SPIKE GLYCOPROTEIN;                                        \n+COMPND   3 CHAIN: A, B, C;                                                      \n+COMPND   4 FRAGMENT: ECTODOMAIN;                                                \n+COMPND   5 SYNONYM: S GLYCOPROTEIN,E2,PEPLOMER PROTEIN;                         \n+COMPND   6 ENGINEERED: YES                                                      \n+SOURCE    MOL_ID: 1;                                                            \n+SOURCE   2 ORGANISM_SCIENTIFIC: SEVERE ACUTE RESPIRATORY SYNDROME CORONAVIRUS   \n+SOURCE   3 2;                                                                   \n+SOURCE   4 ORGANISM_COMMON: 2019-NCOV;                                          \n+SOURCE   5 ORGANISM_TAXID: 2697049;                                             \n+SOURCE   6 GENE: S, 2;                                                          \n+SOURCE   7 EXPRESSION_SYSTEM: HOMO SAPIENS;                                     \n+SOURCE   8 EXPRESSION_SYSTEM_COMMON: HUMAN;                                     \n+SOURCE   9 EXPRESSION_SYSTEM_TAXID: 9606                                        \n+KEYWDS    CORONAVIRUS, SARS-COV-2, SARS-COV, SPIKE GLYCOPROTEIN, FUSION         \n+KEYWDS   2 PROTEIN, STRUCTURAL GENOMICS, SEATTLE STRUCTURAL GENOMICS CENTER FOR \n+KEYWDS   3 INFECTIOUS DISEASE, SSGCID, VIRAL PROTEIN                            \n+EXPDTA    ELECTRON MICROSCOPY                                                   \n+AUTHOR    A.C.WALLS,Y.J.PARK,M.A.TORTORICI,A.WALL,SEATTLE STRUCTURAL GENOMICS   \n+AUTHOR   2 CENTER FOR INFECTIOUS DISEASE (SSGCID),A.T.MCGUIRE,D.VEESLER         \n+REVDAT   6   29-JUL-20 6VYB    1       COMPND REMARK HETNAM LINK                \n+REVDAT   6 2                   1       SITE   ATOM                              \n+REVDAT   5   06-MAY-20 6VYB    1       COMPND SOURCE DBREF  SEQADV              \n+REVDAT   4   29-APR-20 6VYB    1       JRNL                                     \n+REVDAT   3   01-APR-20 6VYB    1       COMPND                                   \n+REVDAT   2   25-MAR-20 6VYB    1       JRNL                                     \n+REVDAT   1   11-MAR-20 6VYB    0                                                \n+JRNL        AUTH   A.C.WALLS,Y.J.PARK,M.A.TORTORICI,A.WALL,A.T.MCGUIRE,         \n+JRNL        AUTH 2 D.VEESLER                                                    \n+JRNL        TITL   STRUCTURE, FUNCTION, AND ANTIGENICITY OF THE SARS-COV-2      \n+JRNL        TITL 2 SPIKE GLYCOPROTEIN.                                          \n+JRNL        REF    CELL                          V. 181   281 2020              \n+JRNL        REFN                   ISSN 1097-4172                               \n+JRNL        PMID   32155444                                                     \n+JRNL        DOI    10.1016/J.CELL.2020.02.058                                   \n+REMARK   2                                                                      \n+REMARK   2 RESOLUTION.    3.20 ANGSTROMS.                                       \n+REMARK   3                                                                      \n+REMARK   3 REFINEMENT.                                                          \n+REMARK   3   SOFTWARE PACKAGES      : LEGINON, RELION, RELION                   \n+REMARK   3   RECONSTRUCTION SCHEMA  : NULL                                      \n+REMARK   3                                                                      \n+REMARK   3 EM MAP-MODEL FITTING AND REFINEMENT                                  \n+REMARK   3   PDB ENTRY                    : NULL                                \n+REMARK   3   REFINEMENT SPACE             : NULL                                \n+REMARK   3   REFINEMENT PROTOCOL         '..b'                                                              \n+CONECT 1816 8594                                                                \n+CONECT 1857 8594                                                                \n+CONECT 1889 8594                                                                \n+CONECT 3297 8595                                                                \n+CONECT 4527 8595                                                                \n+CONECT 4550 8595                                                                \n+CONECT 4556 8595                                                                \n+CONECT 8283 8606                                                                \n+CONECT 8290 8630                                                                \n+CONECT 8594 1771 1816 1857 1889                                                 \n+CONECT 8595 3297 4527 4550 4556                                                 \n+CONECT 8596 8597 8598 8599 8600                                                 \n+CONECT 8597 8596                                                                \n+CONECT 8598 8596                                                                \n+CONECT 8599 8596                                                                \n+CONECT 8600 8596 8601 8605                                                      \n+CONECT 8601 8600 8602 8603 8604                                                 \n+CONECT 8602 8601                                                                \n+CONECT 8603 8601                                                                \n+CONECT 8604 8601                                                                \n+CONECT 8605 8600                                                                \n+CONECT 8606 8283                                                                \n+CONECT 8607 8612 8620                                                           \n+CONECT 8608 8610 8618 8620                                                      \n+CONECT 8609 8613 8616                                                           \n+CONECT 8610 8608 8615 8625                                                      \n+CONECT 8611 8614 8620 8624                                                      \n+CONECT 8612 8607                                                                \n+CONECT 8613 8609 8624 8627                                                      \n+CONECT 8614 8611                                                                \n+CONECT 8615 8610 8623                                                           \n+CONECT 8616 8609 8630                                                           \n+CONECT 8617 8622 8623                                                           \n+CONECT 8618 8608 8621                                                           \n+CONECT 8619 8630                                                                \n+CONECT 8620 8607 8608 8611 8627                                                 \n+CONECT 8621 8618 8625                                                           \n+CONECT 8622 8617 8625 8626                                                      \n+CONECT 8623 8615 8617                                                           \n+CONECT 8624 8611 8613 8628                                                      \n+CONECT 8625 8610 8621 8622                                                      \n+CONECT 8626 8622                                                                \n+CONECT 8627 8613 8620                                                           \n+CONECT 8628 8624                                                                \n+CONECT 8629 8630                                                                \n+CONECT 8630 8290 8616 8619 8629                                                 \n+MASTER      498    0    6   55   31    0    8    6 8630    5   47  103          \n+END                                                                             \n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/cross/pdb.ffindex
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cross/pdb.ffindex Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,11 @@
+6vyb.pdb 0 2088828
+6vyo.pdb 2088829 758727
+6w37.pdb 2847557 66582
+6w4h.pdb 2914140 675378
+6w9c.pdb 3589519 1250964
+6w9q.pdb 4840484 182574
+6wey.pdb 5023059 436995
+6wji.pdb 5460055 983583
+6wlc.pdb 6443639 1054296
+7bqy.pdb 7497936 448173
+7bv2.pdb 7946110 773145
b
diff -r 000000000000 -r dbbcc7cd889f test-data/cross/reference.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cross/reference.tabular Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,44 @@
+6VYB_A 6VYB_B
+6VYB_A 6VYB_C
+6VYB_B 6VYB_A
+6VYB_B 6VYB_C
+6VYB_C 6VYB_A
+6VYB_C 6VYB_B
+6VYO_A 6VYO_B
+6VYO_A 6VYO_D
+6VYO_B 6VYO_A
+6VYO_B 6VYO_C
+6VYO_C 6VYO_B
+6VYO_C 6VYO_D
+6VYO_D 6VYO_A
+6VYO_D 6VYO_C
+6W4H_A 6W4H_B
+6W4H_B 6W4H_A
+6W9C_A 6W9C_B
+6W9C_A 6W9C_C
+6W9C_B 6W9C_A
+6W9C_B 6W9C_C
+6W9C_C 6W9C_A
+6W9C_C 6W9C_B
+6WJI_A 6WJI_B
+6WJI_B 6WJI_A
+6WJI_B 6WJI_D
+6WJI_B 6WJI_E
+6WJI_C 6WJI_D
+6WJI_C 6WJI_E
+6WJI_D 6WJI_B
+6WJI_D 6WJI_C
+6WJI_E 6WJI_B
+6WJI_E 6WJI_C
+6WJI_E 6WJI_F
+6WJI_F 6WJI_E
+6WLC_A 6WLC_B
+6WLC_B 6WLC_A
+7BQY_A 7BQY_C
+7BQY_C 7BQY_A
+7BV2_A 7BV2_B
+7BV2_A 7BV2_C
+7BV2_B 7BV2_A
+7BV2_B 7BV2_C
+7BV2_C 7BV2_A
+7BV2_C 7BV2_B
b
diff -r 000000000000 -r dbbcc7cd889f test-data/ffindex_indices.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ffindex_indices.loc Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,5 @@
+##ffindex indices
+#unique_id display name path type
+pdb01_model PDB 2021-03-17-model ${__HERE__}/model/pdb_structures pdb
+pdb01_cross PDB 2021-03-17-cross ${__HERE__}/cross/pdb pdb
+hhr_model HHR model - cross ${__HERE__}/model/hhr hhr
b
diff -r 000000000000 -r dbbcc7cd889f test-data/map/chains.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/map/chains.tabular Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,16 @@
+6VYB_A
+6VYB_B
+6VYB_C
+6VYO_A
+6W4H_A
+6W4H_B
+6W9C_A
+6W9Q_A
+6W37_A
+6WEY_A
+6WJI_A
+6WLC_A
+7BQY_A
+7BQY_C
+7BV2_A
+7BV2_B
\ No newline at end of file
b
diff -r 000000000000 -r dbbcc7cd889f test-data/map/mapped.reference.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/map/mapped.reference.tabular Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,20 @@
+6VYB_A 6VYB_B 6VYB_A 6VYB_B
+6VYB_A 6VYB_C 6VYB_A 6VYB_C
+6VYB_B 6VYB_A 6VYB_B 6VYB_A
+6VYB_B 6VYB_C 6VYB_B 6VYB_C
+6VYB_C 6VYB_A 6VYB_C 6VYB_A
+6VYB_C 6VYB_B 6VYB_C 6VYB_B
+6W4H_A 6W4H_B 6W4H_A 6W4H_B
+6W4H_B 6W4H_A 6W4H_B 6W4H_A
+6W9C_A 6W9C_A 6W9C_A 6W9C_B
+6W9C_A 6W9C_A 6W9C_A 6W9C_C
+6W9C_A 6W9C_A 6W9C_B 6W9C_A
+6W9C_A 6W9C_A 6W9C_B 6W9C_C
+6W9C_A 6W9C_A 6W9C_C 6W9C_A
+6W9C_A 6W9C_A 6W9C_C 6W9C_B
+6WLC_A 6WLC_A 6WLC_A 6WLC_B
+6WLC_A 6WLC_A 6WLC_B 6WLC_A
+7BQY_A 7BQY_C 7BQY_A 7BQY_C
+7BQY_C 7BQY_A 7BQY_C 7BQY_A
+7BV2_A 7BV2_B 7BV2_A 7BV2_B
+7BV2_B 7BV2_A 7BV2_B 7BV2_A
b
diff -r 000000000000 -r dbbcc7cd889f test-data/mcc/biogrid_fret.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mcc/biogrid_fret.txt Tue Mar 23 13:55:42 2021 +0000
[
b'@@ -0,0 +1,3458 @@\n+140012\t852276\t850450\t32692\t31060\tYBL007C\tYCR088W\tSLA1\tABP1\tcytoskeletal protein-binding protein SLA1|L000001912\tL000000013\tFRET\tphysical\tWarren DT (2002)\tPUBMED:11950888\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP32790\t-\tNP_009546\tP15891\t-\tNP_010012\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+143270\t855828\t856499\t35915\t36532\tYPL248C\tYHR099W\tGAL4\tTRA1\tGAL81|galactose-responsive transcription factor GAL4|L000000661\thistone acetyltransferase TRA1|L000003945\tFRET\tphysical\tBhaumik SR (2004)\tPUBMED:14871930\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP04386\t-\tNP_015076\tP38811\t-\tNP_011967\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145346\t853385\t853808\t33693\t34076\tYJL061W\tYKL057C\tNUP82\tNUP120\tHRB187|linker nucleoporin NUP82|L000002941\tRAT2|L000003138\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP40368\t-\tNP_012474\tP35729\t-\tNP_012866\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145347\t853808\t853385\t34076\t33693\tYKL057C\tYJL061W\tNUP120\tNUP82\tRAT2|L000003138\tHRB187|linker nucleoporin NUP82|L000002941\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP35729\t-\tNP_012866\tP40368\t-\tNP_012474\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145348\t853385\t855066\t33693\t35221\tYJL061W\tYMR047C\tNUP82\tNUP116\tHRB187|linker nucleoporin NUP82|L000002941\tNSP116|FG-nucleoporin NUP116|L000001293\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP40368\t-\tNP_012474\tQ02630\t-\tNP_013762\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145349\t855066\t853385\t35221\t33693\tYMR047C\tYJL061W\tNUP116\tNUP82\tNSP116|FG-nucleoporin NUP116|L000001293\tHRB187|linker nucleoporin NUP82|L000002941\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tQ02630\t-\tNP_013762\tP40368\t-\tNP_012474\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145350\t855184\t850552\t35328\t31150\tYMR153W\tYFR002W\tNUP53\tNIC96\tFG-nucleoporin NUP53\tlinker nucleoporin NIC96|L000001250\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tQ03790\t-\tNP_013873\tP34077\t-\tNP_116657\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145351\t850552\t855184\t31150\t35328\tYFR002W\tYMR153W\tNIC96\tNUP53\tlinker nucleoporin NIC96|L000001250\tFG-nucleoporin NUP53\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP34077\t-\tNP_116657\tQ03790\t-\tNP_013873\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145352\t854265\t854868\t34496\t35040\tYOR098C\tYML103C\tNUP1\tNUP188\tFG-nucleoporin NUP1|L000001288\tL000003099\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP20676\t-\tNP_014741\tP52593\t-\tNP_013604\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145353\t854868\t854265\t35040\t34496\tYML103C\tYOR098C\tNUP188\tNUP1\tL000003099\tFG-nucleoporin NUP1|L000001288\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP52593\t-\tNP_013604\tP20676\t-\tNP_014741\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145354\t854868\t853957\t35040\t34213\tYML103C\tYKR082W\tNUP188\tNUP133\tL000003099\tRAT3|L000002620\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP52593\t-\tNP_013604\tP36161\t-\tNP_013008\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145355\t853957\t854868\t34213\t35040\tYKR082W\tYML103C\tNUP133\tNUP188\tRAT3|L000002620\tL000003099\tFRET\tphysical\tDamelin M (2002)\tPUBMED:12496130\t559292\t559292\tLow Throughput\t-\t-\t-\t-\tBIOGRID\tP36161\t-\tNP_013008\tP52593\t-\tNP_013604\t-\t-\t-\t-\t-\t-\tSaccharomyces cerevisiae (S288c)\tSaccharomyces cerevisiae (S288c)\n+145356\t854868\t855066\t35040\t3'..b'hila melanogaster\n+2870679\t38611\t39332\t64077\t64693\tDmel_CG10642\tDmel_CG7293\tKlp64D\tKlp68D\tCG10642|DmKlp64D|DmelCG10642|KIF 3A|KIF3A|KLP4|KLP64Ddm|Klp 64D\tCG7293|DmKlp68D|DmelCG7293|KIF 3B|KIF3B|KLP-5|KLP5|KLP68Ddm|KLP[[64D/68D]]\tFRET\tphysical\tAhmed Z (2019)\tPUBMED:31420166\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\t-\tQ9VRK9\tNP_523934\tP46867\tM9PF68\tNP_524029|NP_001261726\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2870680\t38611\t39332\t64077\t64693\tDmel_CG10642\tDmel_CG7293\tKlp64D\tKlp68D\tCG10642|DmKlp64D|DmelCG10642|KIF 3A|KIF3A|KLP4|KLP64Ddm|Klp 64D\tCG7293|DmKlp68D|DmelCG7293|KIF 3B|KIF3B|KLP-5|KLP5|KLP68Ddm|KLP[[64D/68D]]\tFRET\tphysical\tAhmed Z (2020)\tPUBMED:31784087\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\t-\tQ9VRK9\tNP_523934\tP46867\tM9PF68\tNP_524029|NP_001261726\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2870745\t39018\t39018\t64423\t64423\tDmel_CG43782\tDmel_CG43782\torb2\torb2\tCG43113|CG43782|CG5735|DmelCG43782|Dmel_CG43113|Dmel_CG5735|Dmel_CG5741|anon-WO0140519.222\tCG43113|CG43782|CG5735|DmelCG43782|Dmel_CG43113|Dmel_CG5735|Dmel_CG5741|anon-WO0140519.222\tFRET\tphysical\tNil Z (2019)\tPUBMED:31491385\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\tQ9VSR3\tA4V1P2\tNP_648266|NP_729427|NP_729429|NP_729428|NP_001261608\tQ9VSR3\tA4V1P2\tNP_648266|NP_729427|NP_729429|NP_729428|NP_001261608\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2870798\t39409\t53554\t64761\t72787\tDmel_CG10698\tDmel_CG1487\tCrzR\tkrz\tCG10698|Crz-R|DCR|DGRHRII|DmelCG10698|GRH-RII|GRHRII|GRHRII_Dro|anon-WO0170980.196|anon-WO0170980.197\t0952/14|CG1487|DmelCG1487|Kurz|l(3)S095214\tFRET\tphysical\tChai F (2019)\tPUBMED:31325455\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\t-\tM9PF00|Q9VTW7\tNP_001261755|NP_648571\t-\tQ9V393\tNP_524988|NP_001247400\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2870884\t39748\t39748\t65059\t65059\tDmel_CG5830\tDmel_CG5830\tCG5830\tCG5830\tDmelCG5830\tDmelCG5830\tFRET\tphysical\tNil Z (2019)\tPUBMED:31491385\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\t-\tQ9VUX0|M9PFN0\tNP_001261912|NP_648825\t-\tQ9VUX0|M9PFN0\tNP_001261912|NP_648825\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2871052\t40616\t40616\t65830\t65830\tDmel_CG2530\tDmel_CG2530\tcorto\tcorto\t7128|CG2530|CP-1|Ccf|DmelCG2530|anon-WO02059370.59|l(3)07128|l(3)neo31|l(3)neo32\t7128|CG2530|CP-1|Ccf|DmelCG2530|anon-WO02059370.59|l(3)07128|l(3)neo31|l(3)neo32\tFRET\tphysical\tNil Z (2019)\tPUBMED:31491385\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\tP41046\t-\tNP_001287166|NP_524231|NP_001246921|NP_001246923|NP_001246922\tP41046\t-\tNP_001287166|NP_524231|NP_001246921|NP_001246923|NP_001246922\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2871859\t43767\t41565\t68605\t66658\tDmel_CG2125\tDmel_CG6054\tci\tSu(fu)\tCG2125|CID|Ce|Ci/GLI|Ci155|Ci[D]|DmelCG2125|Gli|Siah|ci-D|l(4)102ABc|l(4)13|l(4)17\tCG6054|DmelCG6054|SUFU|dSufu\tFRET\tphysical\tHan Y (2019)\tPUBMED:31279575\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\tP19538\tH9XVL6|H5V858|H9XVL7\tNP_524617|NP_001245402|NP_001245401\t-\tQ9VG38\tNP_536750\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2871874\t43832\t37007\t68658\t62693\tDmel_CG11081\tDmel_CG6446\tPlexA\tSema-1b\tBcDNA:GM05237|CG11081|D-Plex A|DPlexA|DmelCG11081|Plex1|PlexA1|lincRNA.927|plex|plex A\tBcDNA:GH03186|CG6446|D-semaIII|DmelCG6446|Sema 1b|Sema III|Sema1b|sema-III\tFRET\tphysical\tRozbesky D (2020)\tPUBMED:32500924\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\t-\tQ0KIF1|Q9V491|H9XVP3\tNP_524637|NP_726627|NP_726628|NP_001245428\t-\tQ7KK54\tNP_001163178|NP_611244|NP_995879\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n+2871937\t44353\t38218\t68999\t63751\tDmel_CG18247\tDmel_CG2086\tshark\tdrpr\tCG18247|DmelCG18247|Dtk7|SYK/SHARK|Syk|Tk7|l(2)W4|l(2R)W4\tBcDNA:GH03529|CG18172|CG2086|CT41022|CT6730|DmelCG2086\tFRET\tphysical\tWilliamson AP (2018)\tPUBMED:30139739\t7227\t7227\tLow Throughput\t-\t-\t-\t-\tFLYBASE\tQ24145\t-\tNP_524743\t-\tQ9W0A1|Q9W0A0|M9PDW5|M9PGU6|M9NEX8|M9PBI3\tNP_001246549|NP_728660|NP_001261276|NP_001261277|NP_001261275|NP_477450\t-\t-\t-\t-\t-\t-\tDrosophila melanogaster\tDrosophila melanogaster\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/mcc/human_hv1h2.png
b
Binary file test-data/mcc/human_hv1h2.png has changed
b
diff -r 000000000000 -r dbbcc7cd889f test-data/mcc/human_hv1h2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mcc/human_hv1h2.txt Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,20796 @@\n+sp|P04601|NEF_HV1H2\tsp|Q96CW1|AP2M1_HUMAN\t495.9\t3TB8_A\t5FPI_A\t6OWT_N\t6QH5_N\n+sp|P04601|NEF_HV1H2\tsp|Q9BXS5|AP1M1_HUMAN\t474.5\t3TB8_A\t4P6Z_M\t4EN2_B\t6CRI_M\n+sp|P04601|NEF_HV1H2\tsp|Q9Y6Q5|AP1M2_HUMAN\t442.4\t3TB8_A\t4P6Z_M\t4EN2_B\t6CRI_M\n+sp|P69723|VIF_HV1H2\tsp|Q13951|PEBB_HUMAN\t412.1\t4N9F_b\t6P59_C\t4N9F_b\t6P59_A\n+sp|P04601|NEF_HV1H2\tsp|P53677|AP3M2_HUMAN\t406.8\t3TB8_A\t2XA7_M\t6OWT_N\t6QH5_N\n+sp|P04601|NEF_HV1H2\tsp|Q9Y2T2|AP3M1_HUMAN\t404.8\t3TB8_A\t6QH5_N\t6OWT_N\t6QH5_N\n+sp|P04601|NEF_HV1H2\tsp|O00189|AP4M1_HUMAN\t353.7\t3TB8_A\t2XA7_M\t6OWT_N\t6QH5_N\n+sp|P69723|VIF_HV1H2\tsp|Q13616|CUL1_HUMAN\t328.9\t4N9F_b\t1LDJ_A\t4N9F_b\t4JGH_D\n+sp|P69723|VIF_HV1H2\tsp|Q13620|CUL4B_HUMAN\t295.0\t4N9F_b\t2HYE_C\t4N9F_b\t4JGH_D\n+sp|P69726|VPR_HV1H2\tsp|P13051|UNG_HUMAN\t291.6\t1ESX_A\t3FCI_A\t5JK7_F\t3FCI_A\n+sp|P69726|VPR_HV1H2\tsp|Q10570|CPSF1_HUMAN\t291.6\t1ESX_A\t6F9N_A\t5JK7_F\t5JK7_B\n+sp|P69726|VPR_HV1H2\tsp|Q15393|SF3B3_HUMAN\t291.6\t1ESX_A\t6FF7_v\t5JK7_F\t5JK7_B\n+sp|P69726|VPR_HV1H2\tsp|Q16531|DDB1_HUMAN\t291.6\t1ESX_A\t5JK7_B\t5JK7_F\t5JK7_B\n+sp|P69723|VIF_HV1H2\tsp|Q9NRW3|ABC3C_HUMAN\t286.5\t4N9F_b\t5CQH_A\t6NIL_F\t5HX4_A\n+sp|P69723|VIF_HV1H2\tsp|Q93034|CUL5_HUMAN\t286.1\t4N9F_b\t6V9I_C\t4N9F_b\t4JGH_D\n+sp|P69723|VIF_HV1H2\tsp|Q13619|CUL4A_HUMAN\t281.7\t4N9F_b\t2HYE_C\t4N9F_b\t4JGH_D\n+sp|P69723|VIF_HV1H2\tsp|Q13618|CUL3_HUMAN\t278.9\t4N9F_b\t2HYE_C\t4N9F_b\t4JGH_D\n+sp|P69723|VIF_HV1H2\tsp|Q8IUX4|ABC3F_HUMAN\t275.2\t4N9F_b\t6P3X_B\t6NIL_F\t5HX4_A\n+sp|P04608|TAT_HV1H2\tsp|A0A0B4J2F2|SIK1B_HUMAN\t264.2\t3MI9_C\t3FE3_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O00444|PLK4_HUMAN\t264.2\t3MI9_C\t6N45_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O14965|AURKA_HUMAN\t264.2\t3MI9_C\t4DEE_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O15264|MK13_HUMAN\t264.2\t3MI9_C\t3COI_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O76039|CDKL5_HUMAN\t264.2\t3MI9_C\t4BGQ_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O95747|OXSR1_HUMAN\t264.2\t3MI9_C\t5DBX_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|O96017|CHK2_HUMAN\t264.2\t3MI9_C\t3I6U_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P06493|CDK1_HUMAN\t264.2\t3MI9_C\t6GU2_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P11801|KPSH1_HUMAN\t264.2\t3MI9_C\t4B99_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P20794|MAK_HUMAN\t264.2\t3MI9_C\t6O9L_8\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P21127|CD11B_HUMAN\t264.2\t3MI9_C\t6O9L_8\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P24941|CDK2_HUMAN\t264.2\t3MI9_C\t1FVV_C\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P42685|FRK_HUMAN\t264.2\t3MI9_C\t1AD5_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P50613|CDK7_HUMAN\t264.2\t3MI9_C\t6O9L_8\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P50750|CDK9_HUMAN\t264.2\t3MI9_C\t3MI9_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P53350|PLK1_HUMAN\t264.2\t3MI9_C\t3FC2_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P53778|MK12_HUMAN\t264.2\t3MI9_C\t1CM8_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P54646|AAPK2_HUMAN\t264.2\t3MI9_C\t5ISO_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|P57059|SIK1_HUMAN\t264.2\t3MI9_C\t3FE3_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00526|CDK3_HUMAN\t264.2\t3MI9_C\t6Q4G_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00532|CDKL1_HUMAN\t264.2\t3MI9_C\t4AGU_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00534|CDK6_HUMAN\t264.2\t3MI9_C\t1BLX_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00535|CDK5_HUMAN\t264.2\t3MI9_C\t1UNL_B\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00536|CDK16_HUMAN\t264.2\t3MI9_C\t3MTL_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q00537|CDK17_HUMAN\t264.2\t3MI9_C\t3MTL_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q12851|M4K2_HUMAN\t264.2\t3MI9_C\t5J5T_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q13554|KCC2B_HUMAN\t264.2\t3MI9_C\t5U6Y_C\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q13555|KCC2G_HUMAN\t264.2\t3MI9_C\t5U6Y_C\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q13557|KCC2D_HUMAN\t264.2\t3MI9_C\t5U6Y_C\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q14004|CDK13_HUMAN\t264.2\t3MI9_C\t5EFQ_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q14012|KCC1A_HUMAN\t264.2\t3MI9_C\t6QP5_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q15131|CDK10_HUMAN\t264.2\t3MI9_C\t4B99_A\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q16539|MK14_HUMAN\t264.2\t3MI9_C\t4LOP_C\t3MI9_C\t3MI9_A\n+sp|P04608|TAT_HV1H2\tsp|Q5MAI5|CDKL4_HUMAN\t264.2\t3MI9_C\t4AGU_B\t3MI9_C\t3MI9'..b'\t3J70_P\n+sp|P04585|POL_HV1H2\ttr|A0A075B6Z9|A0A075B6Z9_HUMAN\t14.4\t1L6N_A\t5U64_B\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\ttr|A0A075B6Z9|A0A075B6Z9_HUMAN\t14.4\t1L6N_A\t5U64_B\t2XT1_A\t6FPV_A\n+tr|A0A0J9YWJ2|A0A0J9YWJ2_HUMAN\tsp|P04585|POL_HV1H2\t14.4\t2DL1_A\t1L6N_A\t6BK8_D\t6BK8_O\n+tr|A0A0J9YWJ2|A0A0J9YWJ2_HUMAN\tsp|P04591|GAG_HV1H2\t14.4\t2DL1_A\t1L6N_A\t6BK8_D\t6BK8_O\n+tr|A0A075B6U7|A0A075B6U7_HUMAN\tsp|P04578|ENV_HV1H2\t14.3\t5OVW_L\t6PWU_E\t6VN0_G\t6MTJ_B\n+tr|A0A075B6Y0|A0A075B6Y0_HUMAN\tsp|P04578|ENV_HV1H2\t14.3\t6GKD_B\t6PWU_E\t6NIJ_H\t3J70_P\n+tr|A0A075B6W2|A0A075B6W2_HUMAN\tsp|P04578|ENV_HV1H2\t14.2\t6GKD_B\t6PWU_E\t6VN0_I\t6MTJ_B\n+tr|A0A0A0MT92|A0A0A0MT92_HUMAN\tsp|P04578|ENV_HV1H2\t14.1\t5VM4_B\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B6X3|A0A075B6X3_HUMAN\tsp|P04578|ENV_HV1H2\t14.1\t6GJS_B\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B6Z2|A0A075B6Z2_HUMAN\tsp|P04578|ENV_HV1H2\t14.1\t6GKD_B\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04585|POL_HV1H2\ttr|A0N4Z8|A0N4Z8_HUMAN\t14.0\t1L6N_A\t4HGM_A\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\ttr|A0N4Z8|A0N4Z8_HUMAN\t14.0\t1L6N_A\t4HGM_A\t2XT1_A\t6FPV_A\n+tr|A0A075B6V6|A0A075B6V6_HUMAN\tsp|P04578|ENV_HV1H2\t14.0\t3ZHK_A\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B6Y8|A0A075B6Y8_HUMAN\tsp|P04578|ENV_HV1H2\t14.0\t3ZHK_A\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04585|POL_HV1H2\ttr|A0A087WU04|A0A087WU04_HUMAN\t13.9\t1L6N_A\t6GJS_B\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\ttr|A0A087WU04|A0A087WU04_HUMAN\t13.9\t1L6N_A\t6GJS_B\t2XT1_A\t6FPV_A\n+sp|A0A0A0MTA4|TJB25_HUMAN\tsp|P04578|ENV_HV1H2\t13.9\t3ZHK_A\t6PWU_E\t6VN0_I\t6MTJ_B\n+tr|A0A075B6V9|A0A075B6V9_HUMAN\tsp|P04578|ENV_HV1H2\t13.9\t1ZMY_A\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B6X9|A0A075B6X9_HUMAN\tsp|P04578|ENV_HV1H2\t13.9\t5FOJ_A\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04585|POL_HV1H2\tsp|A0A0A0MT94|TJB22_HUMAN\t13.8\t1L6N_A\t6TYL_J\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\tsp|A0A0A0MT94|TJB22_HUMAN\t13.8\t1L6N_A\t6TYL_J\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\tsp|A0A0C5B5G6|MOTSC_HUMAN\t13.8\t1L6N_A\t5AOQ_M\t6SW9_8\t5JB3_Y\n+tr|A0A075B6U9|A0A075B6U9_HUMAN\tsp|P04578|ENV_HV1H2\t13.8\t5FOJ_A\t6PWU_E\t6VN0_I\t6MTJ_B\n+sp|P04585|POL_HV1H2\tsp|A0A0A0MT89|KJ01_HUMAN\t13.7\t1L6N_A\t4HGM_A\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\tsp|A0A0A0MT89|KJ01_HUMAN\t13.7\t1L6N_A\t4HGM_A\t2XT1_A\t6FPV_A\n+sp|A0A0J9YXG5|TJB14_HUMAN\tsp|P04578|ENV_HV1H2\t13.6\t6VI4_D\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B702|A0A075B702_HUMAN\tsp|P04578|ENV_HV1H2\t13.6\t1ZMY_A\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04585|POL_HV1H2\tsp|A0A0A0MT70|TJB26_HUMAN\t13.5\t1L6N_A\t3ZHK_A\t2XT1_A\t6FPV_A\n+sp|P04585|POL_HV1H2\ttr|A0A0A0MTA1|A0A0A0MTA1_HUMAN\t13.5\t1L6N_A\t5VM4_B\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\tsp|A0A0A0MT70|TJB26_HUMAN\t13.5\t1L6N_A\t3ZHK_A\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\ttr|A0A0A0MTA1|A0A0A0MTA1_HUMAN\t13.5\t1L6N_A\t5VM4_B\t2XT1_A\t6FPV_A\n+sp|A0A0A0MT87|TJB24_HUMAN\tsp|P04578|ENV_HV1H2\t13.5\t5OVW_L\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A075B6Z4|A0A075B6Z4_HUMAN\tsp|P04578|ENV_HV1H2\t13.5\t5NGV_H\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04585|POL_HV1H2\ttr|A0A0A0MT85|A0A0A0MT85_HUMAN\t13.4\t1L6N_A\t3GKZ_A\t2XT1_A\t6FPV_A\n+sp|P04591|GAG_HV1H2\ttr|A0A0A0MT85|A0A0A0MT85_HUMAN\t13.4\t1L6N_A\t3GKZ_A\t2XT1_A\t6FPV_A\n+sp|A0A0A0MT70|TJB26_HUMAN\tsp|P04578|ENV_HV1H2\t13.4\t3ZHK_A\t6PWU_E\t6VPX_K\t3J70_P\n+sp|P04601|NEF_HV1H2\ttr|A0A0A0MT93|A0A0A0MT93_HUMAN\t13.3\t3TB8_A\t6APO_A\t4ORZ_B\t5MZV_D\n+tr|A0A075B6V0|A0A075B6V0_HUMAN\tsp|P04578|ENV_HV1H2\t13.3\t5UJR_A\t6PWU_E\t6VPX_K\t3J70_P\n+tr|A0A0A0MTA3|A0A0A0MTA3_HUMAN\tsp|P04578|ENV_HV1H2\t13.2\t4KV5_E\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|A0A0A0MT94|TJB22_HUMAN\tsp|P04578|ENV_HV1H2\t13.1\t6TYL_J\t6PWU_E\t6NIJ_H\t3J70_P\n+tr|A0A075B6W8|A0A075B6W8_HUMAN\tsp|P04578|ENV_HV1H2\t13.1\t6GJS_B\t6PWU_E\t6VN0_I\t6MTJ_B\n+sp|P04578|ENV_HV1H2\tsp|A0A0A0MT89|KJ01_HUMAN\t13.0\t6PWU_E\t4HGM_A\t6DCQ_D\t6VPX_P\n+tr|A0A0A0MT85|A0A0A0MT85_HUMAN\tsp|P04578|ENV_HV1H2\t12.5\t3GKZ_A\t6PWU_E\t6NIJ_H\t3J70_P\n+sp|P04608|TAT_HV1H2\ttr|A0A0J9YWD0|A0A0J9YWD0_HUMAN\t12.3\t3MI9_C\t3C5T_B\t6N4Y_C\t4NBX_B\n+sp|A0A0J9YXM7|TJB15_HUMAN\tsp|P04578|ENV_HV1H2\t12.1\t6GKD_B\t6PWU_E\t6VO0_I\t4TVP_G\n+tr|A0A0A0MT96|A0A0A0MT96_HUMAN\tsp|P04578|ENV_HV1H2\t11.7\t2I24_N\t6PWU_E\t6VO0_I\t4TVP_G\n+tr|A0A0A0MTA1|A0A0A0MTA1_HUMAN\tsp|P04578|ENV_HV1H2\t11.7\t5VM4_B\t6PWU_E\t6VO0_G\t4TVP_G\n+tr|A0A0A0MT69|A0A0A0MT69_HUMAN\tsp|P04578|ENV_HV1H2\t11.3\t4HGM_A\t6PWU_E\t6VO0_I\t4TVP_G\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/NP_000282.1.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/NP_000282.1.hhr Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,1458 @@\n+Query         NP_000282.1\n+Match_columns 417\n+No_of_seqs    604 out of 4515\n+Neff          6.98956\n+Searched_HMMs 787\n+Date          Fri Jul 24 20:35:21 2020\n+Command       /home/guerler/hh-suite/build/bin/hhblits -i /home/guerler/human/fasta/NP_00/NP_000282.1.fasta -d /home/guerler/pdb70/pdb70 -o /home/guerler/human/hhr/NP_00/NP_000282.1.hhr \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 2WZB_A PHOSPHOGLYCERATE KINASE 100.0  9E-114  7E-118  875.1   0.0  415    3-417     2-416 (416)\n+  2 1HDI_A PHOSPHOGLYCERATE KINASE 100.0  4E-113  3E-117  868.7   0.0  412    6-417     2-413 (413)\n+  3 6Y3A_A Phosphoglycerate kinase 100.0  3E-112  2E-116  865.4   0.0  414    2-416     9-423 (424)\n+  4 3UWD_A Phosphoglycerate kinase 100.0  7E-112  6E-116  855.6   0.0  391    4-416     1-393 (394)\n+  5 1QPG_A 3-PHOSPHOGLYCERATE KINA 100.0  7E-112  6E-116  861.3   0.0  413    2-416     1-413 (415)\n+  6 3PGK_A PHOSPHOGLYCERATE KINASE 100.0  8E-112  7E-116  861.3   0.0  413    2-416     2-414 (416)\n+  7 3Q3V_A Phosphoglycerate kinase 100.0  3E-111  3E-115  852.3   0.0  392    2-416     3-399 (403)\n+  8 1PHP_A 3-PHOSPHOGLYCERATE KINA 100.0  1E-110  9E-115  847.6   0.0  387    8-416     5-393 (394)\n+  9 1VPE_A PHOSPHOGLYCERATE KINASE 100.0  1E-110  1E-114  848.4   0.0  392    6-416     2-395 (398)\n+ 10 4FEY_A Phosphoglycerate kinase 100.0  6E-110  5E-114  842.0   0.0  388    2-416     2-391 (395)\n+ 11 1V6S_B Phosphoglycerate kinase 100.0  1E-109  1E-113  838.8   0.0  386    7-416     2-390 (390)\n+ 12 3ZLB_A PHOSPHOGLYCERATE KINASE 100.0  2E-109  1E-113  840.0   0.0  394    4-416     1-397 (398)\n+ 13 4EHJ_A Phosphoglycerate kinase 100.0  2E-109  2E-113  837.1   0.0  386    4-416     1-388 (392)\n+ 14 16PK_A 3-PHOSPHOGLYCERATE KINA 100.0  4E-109  4E-113  840.3   0.0  395    6-416     2-414 (415)\n+ 15 3OZ7_B Phosphoglycerate kinase 100.0  4E-109  4E-113  840.5   0.0  413    3-416     4-417 (417)\n+ 16 4DG5_A Phosphoglycerate kinase 100.0  5E-109  4E-113  836.1   0.0  392    5-416     9-402 (403)\n+ 17 6HXE_A Phosphoglycerate kinase 100.0  4E-108  4E-112  826.1   0.0  381    4-416     1-383 (387)\n+ 18 4NG4_B Phosphoglycerate kinase 100.0  5E-108  4E-112  830.0   0.0  389    1-416     9-399 (404)\n+ 19 4NG4_C Phosphoglycerate kinase 100.0  5E-108  4E-112  830.0   0.0  389    1-416     9-399 (404)\n+ 20 6I06_A Phosphoglycerate kinase 100.0  1E-107  9E-112  823.4   0.0  381    4-416     1-383 (387)\n+ 21 2CUN_A Phosphoglycerate kinase 100.0  3E-105  3E-109  811.0   0.0  387    8-416     3-400 (410)\n+ 22 1FW8_A PHOSPHOGLYCERATE KINASE 100.0 8.8E-90 7.6E-94  698.3   0.0  342   74-416     2-343 (416)\n+ 23 1FW8_A PHOSPHOGLYCERATE KINASE  99.3 3.2E-16 2.7E-20  156.7   0.0   68    3-70    347-414 (416)\n+ 24 2Q33_B D-MONELLIN CHAIN A, D-M  34.3     5.9 0.00042   24.6   0.0   11  310-320     2-12  (48)\n+\n+No 1\n+>2WZB_A PHOSPHOGLYCERATE KINASE 1 (E.C.2.7.2.3); HEREDITARY HEMOLYTIC ANEMIA, TRANSFERASE, PHOSPHOPROTEIN; HET: ADP, 3PG; 1.47A {HOMO SAPIENS}\n+Probab=100.00  E-value=8.6e-114  Score=875.06  Aligned_cols=415  Identities=100%  Similarity=1.449  Sum_probs=363.4  Template_Neff=6.400\n+\n+Q NP_000282.1       3 LSNKLTLDKLDVKGKRVVMRVDFNVPMKNNQITNNQRIKAAVPSIKFCLDNGAKSVVLMSHLGRPDGVPMPDKYSLEPVA   82 (417)\n+Q Consensus         3 ~~~~~~i~~~~l~gK~VlvRvD~NVPl~~~~i~d~~RI~~~lpTI~~Ll~~gak~vil~SHlGRP~g~~~~~~~Sl~~va   82 (417)\n+                      +..|++++|.|++|||||||+|+|||+++++|.|++||++++|||+||+++||++|||+||+|||+++..++.+||+||+\n+T Consensus         2 ~~~~~~l~~~~l~gK~vlvR~D~NVPl~~~~i~dd~RI~~~lpTI~~Ll~~gak~vil~sHlGrP~g~~~~~~~Sl~pv~   81 (416)\n+T 2WZB_A            2 LSNKLTLDKLDVKGKRVVMRVDFNVPMKNNQITNNQRIKAAVPSIKFCLDNGAKSVVLMSHLGRPDGVPMPDKYSLEPVA   81 (416)\n+T ss_dssp             CTTBCBGGGCCCTTCEEEEECCCCCCEETTEESCCHHHHHHHHHHHHHHHTTCSEEEEECCCSCCTTSCCHHHHCSHHHH\n+T ss_pred             CcccccccccccCCCEEEEEeEccccccCCccCCChHHHHHHHHHHHHHHCCCCEEEEEeeCCCCCCCCCCcccChHHHH\n+Confidence            457888988899999999999'..b'CSCSTTTHHHHHHHHTTCSE\n+T ss_pred             HHhhCCeEeecccccCCccccCcccCCCchhhhcHHHHHHHHHHHHHhhCCCCCEEEEEcCCCHHHHHHHHHHHHHhcCE\n+Confidence            99999999999999999999999999976589999999999999999999999999999999999999999999999999\n+\n+\n+Q NP_000282.1     234 MIIGGGMAFTFLKVLNNMEIGTSLFDEEGAKIVKDLMSKAEKNGVKITLPVDFVTADKFDENAKTGQATVASGIPAGWMG  313 (417)\n+Q Consensus       234 iligG~~a~tfl~a~~g~~ig~s~~e~~~~~~a~~il~~a~~~~~~i~lP~D~~v~~~~~~~~~~~~~~~~~~i~~~~~~  313 (417)\n+                      |++||+|||+||+|++++++|+|++|++..+.|++++++|+.++++|+||+|++|.+.+..+.....+...+.+|++|++\n+T Consensus       161 IligG~~a~tfL~a~~~~~iG~S~~e~~~~~~a~~il~~a~~~~~ki~LP~D~vv~~~~~~~~~~~~~~~~~~i~~~~~i  240 (416)\n+T 1FW8_A          161 IIIGGGMAFTFKKVLENTEIGDSIFDKAGAEIVPKLMEKAKAKGVEVVLPVDFIIADAFSADANTKTVTDKEGIPAGWQG  240 (416)\n+T ss_dssp             EEEEGGGHHHHHHHHSCCCCCSCCCCHHHHHHHHHHHHHHHHHTCEEECCSEEEEESSSSTTCCEEEEETTTCCCTTCEE\n+T ss_pred             EEeCchHHHHHHHHhCCCccCccccchhhhhHHHHHHHHHHHcCCeEEeeeeEEEeccccCCCCCceeeccCCCCCCCcc\n+Confidence            99999999999999755689999999988999999999988999999999999998865322211111122378899999\n+\n+\n+Q NP_000282.1     314 LDCGPESSKKYAEAVTRAKQIVWNGPVGVFEWEAFARGTKALMDEVVKATSRGCITIIGGGDTATCCAKWNTEDKVSHVS  393 (417)\n+Q Consensus       314 ~DIGp~Ti~~~~~~I~~aktI~WnGp~G~~E~~~f~~GT~~l~~al~~~~~~~~~~ivGGGdT~~~~~~~g~~~~~~~vS  393 (417)\n+                      +||||+|++.|.+.|++|||||||||||+||.++|++||+++++++++++.+++++|+|||||+++++++|+.++++|||\n+T Consensus       241 lDIGp~Ti~~~~~~I~~aktI~wnGP~G~~E~~~f~~GT~~i~~ai~~~~~~~a~~ivGGGdT~~~~~~~g~~~~~s~vS  320 (416)\n+T 1FW8_A          241 LDNGPESRKLFAATVAKAKTIVWNGPPGVFEFEKFAAGTKALLDEVVKSSAAGNTVIIGGGDTATVAKKYGVTDKISHVS  320 (416)\n+T ss_dssp             EEECHHHHHHHHHHHHHCSEEEEESCSSCTTSGGGCHHHHHHHHHHHHHHHTTCEEEECTTHHHHHHHHTTCGGGSSEEC\n+T ss_pred             cccCHHHHHHHHHHHHcCCEEEEcCCCCcccccccChHHHHHHHHHHHHhcCCCeEEEechhHHHHHHHhCCCCCceEEe\n+Confidence            99999999999999999999999999999998889999999999997632236899999999999998889878899999\n+\n+\n+Q NP_000282.1     394 TGGGASLELLEGKVLPGVDALSN  416 (417)\n+Q Consensus       394 tgGgA~Le~L~G~~LPgl~aL~~  416 (417)\n+                      |||||+||||+|+.||||++|++\n+T Consensus       321 TGGGA~Le~L~Gk~LPgieaL~~  343 (416)\n+T 1FW8_A          321 TGGGASLELLEGKELPGVAFLSE  343 (416)\n+T ss_dssp             SCSHHHHHHHTTCCCHHHHTSCS\n+T ss_pred             cCchHHHHHHcCCCCCceeehhh\n+Confidence            99999999999999999999985\n+\n+\n+No 23\n+>1FW8_A PHOSPHOGLYCERATE KINASE (E.C.2.7.2.3); phosphotransferase, kinase, phosphoglycerate kinase, glycolysis; HET: GOL; 2.3A {Saccharomyces cerevisiae} SCOP: c.86.1.1\n+Probab=99.28  E-value=3.2e-16  Score=156.69  Aligned_cols=68  Identities=57%  Similarity=0.940  Sum_probs=61.4  Template_Neff=6.400\n+\n+Q NP_000282.1       3 LSNKLTLDKLDVKGKRVVMRVDFNVPMKNNQITNNQRIKAAVPSIKFCLDNGAKSVVLMSHLGRPDGV   70 (417)\n+Q Consensus         3 ~~~~~~i~~~~l~gK~VlvRvD~NVPl~~~~i~d~~RI~~~lpTI~~Ll~~gak~vil~SHlGRP~g~   70 (417)\n+                      ++.++++++.+++|||||||+|+|||+++++|.|++||++++|||+||+++||++||++||+|||+++\n+T Consensus       347 ~~~~~~~~~~~~~~k~vl~R~D~nvp~~~~~i~d~~Ri~~~~~ti~~l~~~~~~~vii~sH~grp~~~  414 (416)\n+T 1FW8_A          347 LSSKLSVQDLDLKDKRVFIRVDFNVPLDGKKITSNQRIVAALPTIKYVLEHHPRYVVLASHLGRPNGE  414 (416)\n+T ss_dssp             SSCSCBGGGSCCTTCEEEEECCCCCCBSSSSBSCTHHHHHHHHHHHHHHHTCCSEEEEECCCSCCCSS\n+T ss_pred             ccccccccccccCCCEEEEEeecCCCCCCCCcCCCHHHHhHHHHHHHHHHcCCCEEEEeeccCCCCCC\n+Confidence            45567777788999999999999999988899999999999999999999998449999999999875\n+\n+\n+No 24\n+>2Q33_B D-MONELLIN CHAIN A, D-MONELLIN CHAIN; ALPHA/BETA, ALL-D PROTEIN, DE NOVO; 1.8A {N/A}\n+Probab=34.33  E-value=5.9  Score=24.60  Aligned_cols=11  Identities=36%  Similarity=0.761  Sum_probs=9.2  Template_Neff=1.000\n+\n+Q NP_000282.1     310 GWMGLDCGPES  320 (417)\n+Q Consensus       310 ~~~~~DIGp~T  320 (417)\n+                      .|.++||||-|\n+T Consensus         2 eweiidigpft   12 (48)\n+T 2Q33_B            2 EWEIIDIGPFT   12 (48)\n+Confidence            58899999955\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/NP_000290.2.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/NP_000290.2.hhr Tue Mar 23 13:55:42 2021 +0000
b
b"@@ -0,0 +1,17574 @@\n+Query         NP_000290.2\n+Match_columns 747\n+No_of_seqs    3849 out of 40250\n+Neff          6.23432\n+Searched_HMMs 998\n+Date          Fri Jul 24 20:45:58 2020\n+Command       /home/guerler/hh-suite/build/bin/hhblits -i /home/guerler/human/fasta/NP_00/NP_000290.2.fasta -d /home/guerler/pdb70/pdb70 -o /home/guerler/human/hhr/NP_00/NP_000290.2.hhr \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 1XM9_A plakophilin 1  Plakophi  99.7 6.3E-22 7.3E-26  206.9   0.0  453  246-719     3-455 (457)\n+  2 3L6X_A Catenin delta-1, E-cadh  99.4 1.1E-17 1.2E-21  185.5   0.0  438  245-729    48-492 (584)\n+  3 5D5K_C Importin subunit alpha-  98.6 1.1E-11 1.4E-15  126.2   0.0  369  243-713    11-386 (466)\n+  4 3NMZ_A APC variant protein, Rh  98.6 1.3E-11 1.4E-15  129.0   0.0  381  238-715    25-452 (458)\n+  5 4UAF_B Importin alpha 1 import  98.5 1.3E-11 1.6E-15  125.7   0.0  369  243-713    11-386 (466)\n+  6 4U5L_A deltaIBB-importin-alpha  98.5 1.7E-11   2E-15  123.0   0.0  360  246-712    49-425 (426)\n+  7 6SA7_B DARPin-Armadillo fusion  98.5 2.5E-11 2.9E-15  127.0   0.0  136  244-380   172-309 (510)\n+  8 4RV1_D Engineered Protein OR49  98.5 2.7E-11 3.2E-15  120.8   0.0  362  247-715     4-369 (420)\n+  9 6S9O_F designed Armadillo repe  98.5 2.8E-11 3.3E-15  117.9   0.0  167  545-724   170-338 (344)\n+ 10 5MFD_C YIIIM''6AII, Capsid dec  98.5   3E-11 3.5E-15  116.2   0.0  318  246-712     5-326 (328)\n+ 11 4MZ6_E Deoxyuridine 5'-triphos  98.5 3.5E-11   4E-15  125.0   0.0  368  244-713    56-430 (509)\n+ 12 5UMZ_B Importin subunit alpha-  98.5 3.5E-11   4E-15  125.0   0.0  368  244-713    56-430 (509)\n+ 13 2JDQ_A IMPORTIN ALPHA-1 SUBUNI  98.4 3.6E-11 4.2E-15  122.1   0.0  367  244-716    19-393 (450)\n+ 14 4E4V_A Importin subunit alpha-  98.4 3.6E-11 4.2E-15  124.3   0.0  364  246-716    76-456 (485)\n+ 15 4B18_A IMPORTIN SUBUNIT ALPHA-  98.4   4E-11 4.7E-15  121.6   0.0  368  244-717    16-391 (447)\n+ 16 4XZR_B Heh1-NLS, Kap60; karyop  98.4 4.3E-11   5E-15  119.8   0.0  363  248-717     4-375 (423)\n+ 17 5XZX_A Importin subunit alpha-  98.4 4.3E-11 5.1E-15  119.5   0.0  133  247-380     4-140 (416)\n+ 18 1JDH_A BETA-CATENIN, hTcf-4; B  98.4 4.4E-11 5.2E-15  123.9   0.0  378  245-726   101-502 (529)\n+ 19 3IFQ_A plakoglobin, E-cadherin  98.4 4.5E-11 5.3E-15  124.5   0.0  381  246-726   104-503 (553)\n+ 20 4UAD_A Importin alpha import a  98.4   6E-11   7E-15  121.8   0.0  372  246-723    68-456 (479)\n+ 21 5ZHX_D Rap1 GTPase-GDP dissoci  98.4 6.3E-11 7.5E-15  120.9   0.0  407  246-727    19-458 (487)\n+ 22 1EE4_A KARYOPHERIN ALPHA; ARM   98.4 8.2E-11 9.6E-15  117.7   0.0  364  247-717     3-375 (423)\n+ 23 1EE4_B KARYOPHERIN ALPHA; ARM   98.4 8.2E-11 9.6E-15  117.7   0.0  364  247-717     3-375 (423)\n+ 24 4EV8_A Catenin beta-1; mouse c  98.3 9.8E-11 1.2E-14  121.5   0.0  382  246-727   103-504 (538)\n+ 25 4RXH_B Importin subunit alpha,  98.3 1.1E-10 1.2E-14  121.7   0.0  365  246-716    72-457 (495)\n+ 26 4BQK_A IMPORTIN SUBUNIT ALPHA-  98.3 1.2E-10 1.3E-14  118.5   0.0  366  246-716    46-427 (456)\n+ 27 6BW9_A Importin subunit alpha-  98.3 1.3E-10 1.5E-14  118.5   0.0  366  246-717    53-428 (459)\n+ 28 1WA5_B GTP-BINDING NUCLEAR PRO  98.3 1.9E-10 2.2E-14  121.4   0.0  134  245-379   173-308 (530)\n+ 29 5XGC_A Rap1 GTPase-GDP dissoci  98.3   2E-10 2.3E-14  117.7   0.0  409  246-729    35-476 (503)\n+ 30 4BPL_A IMPORTIN SUBUNIT ALPHA-  98.2 2.4E-10 2.9E-14  115.9   0.0  367  246-717    44-426 (454)\n+ 31 4B8J_A IMPORTIN SUBUNIT ALPHA-  98.2 2.6E-10 2.9E-14  120.0   0.0  367  246-717   118-500 (528)\n+ 32 5T94_B Guanine nucleotide exch  98.2 2.9E-10 3.3E-14  120.5   0.0  364  246-716   131-513 (542)\n+ 33 4TNM_A Importin-alpha3 / MOS6;  98.2 3.2E-10 3.7E-14  119.6   0.0  369  246-719   119-504 (531)\n+ 34 5TBK_C Importin subunit alpha-  98.2 3.4E-10 3.8E-14  119.3   0.0  366  245-716    71-443 (521)\n+ 35 5TBK_D Importin subunit alpha-  98.2 3.4E-10 3.8E-14  119.3   0.0  366  245-716    71-443"..b'CCHHHHHHHHHHHHHHHHH\n+Confidence               1234555555564 45677888888888877653\n+\n+\n+No 498\n+>2QK1_A Protein STU2; stu2, Stu2p, XMAP215, Dis1, TOG; 1.7A {Saccharomyces cerevisiae}\n+Probab=67.88  E-value=0.42  Score=40.93  Aligned_cols=116  Identities=3%  Similarity=-0.016  Sum_probs=68.4  Template_Neff=12.900\n+\n+Q NP_000290.2     244 GLTIPKAVQYLSSQDEKYQAIGAYYIQHTCFQDESAKQQVYQLGGICKLVDLLRSPNQNVQQAAAGALRNLVFRSTTN--  321 (747)\n+Q Consensus       244 ~~iL~~Ll~lL~ssd~eVr~sAL~aLsnLs~~~~~~~~~li~~~IL~~Ll~lL~s~d~eVr~~AL~aLs~La~~~~~~--  321 (747)\n+                      ..+++.+...+.+.+..++..++.++..+...............+++.+...+.+.++.++..++.++..++......  \n+T Consensus       103 ~~~~~~l~~~l~~~~~~v~~~a~~~l~~~~~~~~~~~~~~~~~~~~~~l~~~l~~~~~~vr~~~~~~l~~~~~~~~~~~~  182 (249)\n+T 2QK1_A          103 SLVFTPLLDRTKEKKPSVIEAIRKALLTICKYYDPLASSGRNEDMLKDILEHMKHKTPQIRMECTQLFNASMKEEKDGYS  182 (249)\n+T ss_dssp             HHHHHHHHHGGGCCCHHHHHHHHHHHHHHHHHSCTTCTTCTTHHHHHHHHHHTTCSSHHHHHHHHHHHHHHHHHCCSCSH\n+T ss_pred             HHHHHHHHHHhcCCCHHHHHHHHHHHHHHHHHhChhcccCCHHHHHHHHHHHhCCCCHHHHHHHHHHHHHHHHhccccch\n+Confidence            345556666666666777888888888776543210000111245566666666667778888888888776543310  \n+\n+\n+Q NP_000290.2     322 -KLETRRQNGIREAVSLLRRTGNAEIQKQLTGLLWNLSST  360 (747)\n+Q Consensus       322 -~~~ll~~~IL~~Ll~lL~ss~d~eVr~~AL~aLsnLas~  360 (747)\n+                       ........+++.+...+. +.++.++..++.++..+...\n+T Consensus       183 ~~~~~~~~~~~~~l~~~l~-~~~~~vr~~a~~~l~~l~~~  221 (249)\n+T 2QK1_A          183 TLQRYLKDEVVPIVIQIVN-DTQPAIRTIGFESFAILIKI  221 (249)\n+T ss_dssp             HHHHHCCCCCHHHHHHHHT-CSSHHHHHHHHHHHHHHHHH\n+T ss_pred             hhhHHHHHHHHHHHHHHHc-CCCHHHHHHHHHHHHHHHHH\n+Confidence             000111235555666665 55677888888888877653\n+\n+\n+No 499\n+>4D4Z_A DEOXYHYPUSINE HYDROXYLASE (E.C.1.14.99.29); OXIDOREDUCTASE, EIF-5A, HYPUSINE; HET: B3P, GOL; 1.7A {HOMO SAPIENS}\n+Probab=67.63  E-value=0.43  Score=42.31  Aligned_cols=60  Identities=10%  Similarity=0.123  Sum_probs=34.5  Template_Neff=12.700\n+\n+Q NP_000290.2     547 DAIRTYLNLMGKSK--KDATLEACAGALQNLTASKGLMSSGMSQLIGLKEKGLPQIARLLQSGNSDVVRSGASLLSNMS  623 (747)\n+Q Consensus       547 G~I~~LL~LL~ss~--d~eVr~~AL~aL~nLs~~s~~~s~~~~~~llie~giI~~Ll~LL~s~d~eVr~~AL~aLsnLa  623 (747)\n+                      ..++.|...+. +.  ++.++..++.+|..+...                ..++.+...+.+.++.++..++.+|..+.\n+T Consensus        74 ~~~~~l~~~l~-~~~~~~~vr~~a~~~l~~~~~~----------------~~~~~l~~~l~~~~~~v~~~a~~~l~~~~  135 (294)\n+T 4D4Z_A           74 RAIPMLVDVLQ-DTRQEPMVRHEAGEALGAIGDP----------------EVLEILKQYSSDPVIEVAETCQLAVRRLE  135 (294)\n+T ss_dssp             GGHHHHHHHHH-CTTSCHHHHHHHHHHHHHHCCG----------------GGHHHHHHHTTCSSHHHHHHHHHHHHHHH\n+T ss_pred             chHHHHHHHhh-CCCCCHHHHHHHHHHHHHhCCH----------------HHHHHHHHHccCCCchHHHHHHHHHHHHH\n+Confidence            34556666666 34  566777777776655421                12334455555556666666666666553\n+\n+\n+No 500\n+>4D50_A DEOXYHYPUSINE HYDROXYLASE (E.C.1.14.99.29); OXIDOREDUCTASE; HET: GAI; 1.7A {HOMO SAPIENS}\n+Probab=67.63  E-value=0.43  Score=42.31  Aligned_cols=60  Identities=10%  Similarity=0.123  Sum_probs=34.5  Template_Neff=12.700\n+\n+Q NP_000290.2     547 DAIRTYLNLMGKSK--KDATLEACAGALQNLTASKGLMSSGMSQLIGLKEKGLPQIARLLQSGNSDVVRSGASLLSNMS  623 (747)\n+Q Consensus       547 G~I~~LL~LL~ss~--d~eVr~~AL~aL~nLs~~s~~~s~~~~~~llie~giI~~Ll~LL~s~d~eVr~~AL~aLsnLa  623 (747)\n+                      ..++.|...+. +.  ++.++..++.+|..+...                ..++.+...+.+.++.++..++.+|..+.\n+T Consensus        74 ~~~~~l~~~l~-~~~~~~~vr~~a~~~l~~~~~~----------------~~~~~l~~~l~~~~~~v~~~a~~~l~~~~  135 (294)\n+T 4D50_A           74 RAIPMLVDVLQ-DTRQEPMVRHEAGEALGAIGDP----------------EVLEILKQYSSDPVIEVAETCQLAVRRLE  135 (294)\n+T ss_dssp             GGHHHHHHHHH-CTTSCHHHHHHHHHHHHHHCCG----------------GGHHHHHHHTTCSSHHHHHHHHHHHHHHH\n+T ss_pred             chHHHHHHHhh-CCCCCHHHHHHHHHHHHHhCCH----------------HHHHHHHHHccCCCchHHHHHHHHHHHHH\n+Confidence            34556666666 34  566777777776655421                12334455555556666666666666553\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/NP_000548.2.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/NP_000548.2.hhr Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,1626 @@\n+Query         NP_000548.2\n+Match_columns 501\n+No_of_seqs    1691 out of 7065\n+Neff          7.55495\n+Searched_HMMs 400\n+Date          Sat Jul 25 00:10:29 2020\n+Command       /home/guerler/hh-suite/build/bin/hhblits -i /home/guerler/human/fasta/NP_00/NP_000548.2.fasta -d /home/guerler/pdb70/pdb70 -o /home/guerler/human/hhr/NP_00/NP_000548.2.hhr \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 5FFO_H Integrin alpha-V, Integ 100.0 2.3E-42 2.1E-46  353.3   0.0  315  143-501    24-363 (363)\n+  2 3RJR_C Transforming growth fac 100.0 3.4E-42 3.1E-46  351.8   0.0  315  143-501    24-363 (363)\n+  3 5VQF_C Transforming growth fac 100.0 3.4E-42 3.2E-46  351.8   0.0  315  143-501    24-363 (363)\n+  4 5NTU_A Growth/differentiation  100.0 9.5E-42 8.8E-46  343.8   0.0  309  142-501    17-335 (335)\n+  5 5HLY_A Inhibin beta A chain; G 100.0   3E-39 2.7E-43  332.7   0.0  307  142-501    40-383 (383)\n+  6 6SF3_B Serine/threonine-protei  99.8 1.5E-26 1.3E-30  194.7   0.0  105  397-501     4-108 (108)\n+  7 4MPL_A Growth/differentiation   99.8 2.3E-26   2E-30  196.3   0.0  106  396-501    10-116 (116)\n+  8 6Z3J_B Growth/differentiation   99.8 2.6E-26 2.3E-30  196.2   0.0  107  395-501    11-117 (117)\n+  9 5I05_A Growth/differentiation   99.8   4E-26 3.5E-30  192.9   0.0  106  396-501     4-110 (110)\n+ 10 4N1D_A Bone morphogenetic prot  99.8 4.5E-26 3.9E-30  194.5   0.0  106  396-501    11-116 (116)\n+ 11 2QCQ_B Bone morphogenetic prot  99.8 5.7E-26   5E-30  191.9   0.0  105  397-501     5-110 (110)\n+ 12 1LXI_A BONE MORPHOGENETIC PROT  99.8   9E-26 7.8E-30  198.7   0.0  111  391-501    29-139 (139)\n+ 13 6OMN_G Bone morphogenetic prot  99.8 9.8E-26 8.7E-30  189.8   0.0  105  396-501     4-108 (108)\n+ 14 2R53_A Bone morphogenetic prot  99.8 9.9E-26 8.7E-30  192.3   0.0  106  396-501    11-116 (116)\n+ 15 2R53_B Bone morphogenetic prot  99.8 9.9E-26 8.7E-30  192.3   0.0  106  396-501    11-116 (116)\n+ 16 1REU_A bone morphogenetic prot  99.8 1.1E-25   1E-29  187.7   0.0  102  399-501     2-103 (103)\n+ 17 1M4U_L Bone Morphogenetic Prot  99.8 1.2E-25 1.1E-29  197.8   0.0  111  391-501    29-139 (139)\n+ 18 2H62_B Bone morphogenetic prot  99.8 2.1E-25 1.9E-29  189.6   0.0  105  396-501    10-114 (114)\n+ 19 6Q2J_B Growth/differentiation   99.8 3.8E-25 3.2E-29  193.7   0.0  102  396-501    34-135 (135)\n+ 20 1KTZ_A TRANSFORMING GROWTH FAC  99.8 7.5E-25 6.6E-29  185.6   0.0  102  395-501    10-112 (112)\n+ 21 5TY4_B TGF-beta receptor type-  99.8 9.9E-25 8.8E-29  179.9   0.0   96  400-500     1-97  (97)\n+ 22 5VT2_B Growth/differentiation   99.8 1.1E-24 9.5E-29  184.6   0.0  102  396-501    11-112 (112)\n+ 23 5VZ3_A Growth/differentiation   99.8 1.1E-24 9.5E-29  184.6   0.0  102  396-501    11-112 (112)\n+ 24 2ARP_A Inhibin beta A chain, F  99.8 1.3E-24 1.2E-28  184.9   0.0  105  397-501     8-116 (116)\n+ 25 2ARV_A Inhibin beta A chain; h  99.8 1.3E-24 1.2E-28  184.9   0.0  105  397-501     8-116 (116)\n+ 26 2P6A_B Activin A, Follistatin   99.8 1.3E-24 1.2E-28  184.9   0.0  105  397-501     8-116 (116)\n+ 27 2TGI_A TRANSFORMING GROWTH FAC  99.8 1.5E-24 1.3E-28  183.7   0.0  101  396-501    11-112 (112)\n+ 28 3HH2_B Growth/differentiation   99.8 2.4E-24 2.1E-28  181.5   0.0   99  396-501    11-109 (109)\n+ 29 5F3B_C RK35 Chimeric antibody   99.8 2.4E-24 2.1E-28  181.5   0.0   99  396-501    11-109 (109)\n+ 30 3KFD_D Transforming growth fac  99.8 3.7E-24 3.2E-28  181.3   0.0  102  395-501    10-112 (112)\n+ 31 5E4G_A Growth/differentiation   99.8 1.2E-23 1.1E-27  177.0   0.0   99  396-501    11-109 (109)\n+ 32 5NMZ_B Neurturin; cystine knot  99.7 2.2E-22 1.9E-26  167.0   0.0   93  397-501     4-101 (101)\n+ 33 5NMZ_C Neurturin; cystine knot  99.7 2.2E-22 1.9E-26  167.0   0.0   93  397-501     4-101 (101)\n+ 34 2GH0_C artemin, GDNF family re  99.7 2.9E-22 2.6E-26  166.0   0.0   95  398-501     2-100 (101)\n+ 35 5MR4_B Neurturin, GDNF family   99.7 4.4E-22 3.9E-26  165.4   0.0   94  396-501     4-102 (1'..b'piens}\n+Probab=57.82  E-value=1.2  Score=34.98  Aligned_cols=72  Identities=21%  Similarity=0.280  Sum_probs=38.3  Template_Neff=7.700\n+\n+Q NP_000548.2     422 LEYEAFHCEGLCEFPLRSHLEPTNHAVIQTLMNSMDPESTPPTCCVPTRLSPISILFIDSANNVVYKQYEDMVVESCGCR  501 (501)\n+Q Consensus       422 ~~~~a~yC~G~C~~~~~~~~~~~~h~~i~~~~~~~~~~~~~~pCC~P~~~~~l~ily~d~~~~~~~~~~~~mvv~~CgC~  501 (501)\n+                      ..+..++|.|.|.......    ........  ........-.||.|.++....+...=.++..+.  ..-.++.+|.|.\n+T Consensus        37 ~~v~~~~C~G~C~S~~~~~----p~~~~~~~--~~~~~~~~C~CC~p~~~~~~~v~l~C~~g~~~~--~~~~~i~~C~C~  108 (122)\n+T 5BPU_A           37 KMVLLARCEGHCSQASRSE----PLVSFSTV--LKQPFRSSCHCCRPQTSKLKALRLRCSGGMRLT--ATYRYILSCHCE  108 (122)\n+T ss_pred             CcEeceeeeccccceeecC----CCcccCCC--CCCcceeeeecCcCeEeEEEEEEEECCCCCeEE--EEEEEEEeceee\n+Confidence            6678899999998753200    00000000  000012345899999988766533323332222  333478899994\n+\n+\n+No 58\n+>5BQ8_C Norrin; Wnt signalling pathway, Norrie disease; HET: MLY; 2.0A {Homo sapiens}\n+Probab=53.96  E-value=1.5  Score=34.27  Aligned_cols=93  Identities=19%  Similarity=0.268  Sum_probs=46.5  Template_Neff=7.800\n+\n+Q NP_000548.2     398 ARCSRKALHVNFKDMGWDDWIIAPLEYEAFHCEGLCEFPLRSHLEPTNHAVIQTLMNSMDPESTPPTCCVPTRLSPISIL  477 (501)\n+Q Consensus       398 ~~Cc~~~l~V~F~dlGW~~wIiaP~~~~a~yC~G~C~~~~~~~~~~~~h~~i~~~~~~~~~~~~~~pCC~P~~~~~l~il  477 (501)\n+                      ..|....+...+..=.. .- .. ..+..++|.|.|.......    ........  ........-.||.|.++....|.\n+T Consensus        16 ~~C~~~~~~~~i~~~~~-gC-~s-~~v~~~~C~G~C~S~~~~~----p~~~~~~~--~~~~~~~~C~CC~p~~~~~~~V~   86 (122)\n+T 5BQ8_C           16 RRCMRHHYVDSISHPLY-KC-SS-KMVLLARCEGHCSQASRSE----PLVSFSTV--LKQPFRSSCHCCRPQTSKLKALR   86 (122)\n+T ss_dssp             TSSEEEEEEEEECCSSS-CB-CC-EEEEEEEEEECCSSCEEEE----ECCEETTB--CCCSEEEECEEEEEEEEEEEEEE\n+T ss_pred             CCeEEEeeEEEEEcCCC-Cc-ee-ceeEeeeEEEecccceecC----CccccCCC--CCCCceeEceecCCeEeEEEEEE\n+Confidence            45666555444432100 11 22 6678899999998753200    00000000  00001234589999998876653\n+\n+\n+Q NP_000548.2     478 FIDSANNVVYKQYEDMVVESCGCR  501 (501)\n+Q Consensus       478 y~d~~~~~~~~~~~~mvv~~CgC~  501 (501)\n+                      ..=.++..+  ...-+++.+|.|.\n+T Consensus        87 l~C~~g~~~--~~~~~~i~~C~C~  108 (122)\n+T 5BQ8_C           87 LRCSGGMRL--TATYRYILSCHCE  108 (122)\n+T ss_dssp             EEETTTEEE--EEEEEEEEEEEEE\n+T ss_pred             EECCCCceE--EEEEEEEEeceee\n+Confidence            332333322  2334578999994\n+\n+\n+No 59\n+>5BQE_A Frizzled-4; Wnt signalling pathway, Norrie disease; HET: MLY, NAG, PG0; 2.3A {Homo sapiens}\n+Probab=53.96  E-value=1.5  Score=34.27  Aligned_cols=93  Identities=19%  Similarity=0.268  Sum_probs=46.5  Template_Neff=7.800\n+\n+Q NP_000548.2     398 ARCSRKALHVNFKDMGWDDWIIAPLEYEAFHCEGLCEFPLRSHLEPTNHAVIQTLMNSMDPESTPPTCCVPTRLSPISIL  477 (501)\n+Q Consensus       398 ~~Cc~~~l~V~F~dlGW~~wIiaP~~~~a~yC~G~C~~~~~~~~~~~~h~~i~~~~~~~~~~~~~~pCC~P~~~~~l~il  477 (501)\n+                      ..|....+...+..=.. .- .. ..+..++|.|.|.......    ........  ........-.||.|.++....|.\n+T Consensus        16 ~~C~~~~~~~~i~~~~~-gC-~s-~~v~~~~C~G~C~S~~~~~----p~~~~~~~--~~~~~~~~C~CC~p~~~~~~~V~   86 (122)\n+T 5BQE_A           16 RRCMRHHYVDSISHPLY-KC-SS-KMVLLARCEGHCSQASRSE----PLVSFSTV--LKQPFRSSCHCCRPQTSKLKALR   86 (122)\n+T ss_dssp             TSSEEEEEEEEECCSSS-CB-CC-EEEEEEEEEECCSSCEEEE----ECCBCSSS--CSSCEEEECEEEEEEEEEEEEEE\n+T ss_pred             CCeEEEeeEEEEEcCCC-Cc-ee-ceeEeeeEEEecccceecC----CccccCCC--CCCCceeEceecCCeEeEEEEEE\n+Confidence            45666555444432100 11 22 6678899999998753200    00000000  00001234589999998876653\n+\n+\n+Q NP_000548.2     478 FIDSANNVVYKQYEDMVVESCGCR  501 (501)\n+Q Consensus       478 y~d~~~~~~~~~~~~mvv~~CgC~  501 (501)\n+                      ..=.++..+  ...-+++.+|.|.\n+T Consensus        87 l~C~~g~~~--~~~~~~i~~C~C~  108 (122)\n+T 5BQE_A           87 LRCSGGMRL--TATYRYILSCHCE  108 (122)\n+T ss_dssp             EEBSSSCEE--EEEEEEEEEEEEE\n+T ss_pred             EECCCCceE--EEEEEEEEeceee\n+Confidence            332333322  2334578999994\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/NP_000836.2.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/NP_000836.2.hhr Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,19542 @@\n+Query         NP_000836.2\n+Match_columns 908\n+No_of_seqs    3628 out of 54127\n+Neff          10.7604\n+Searched_HMMs 7012\n+Date          Sun Jul 26 14:02:06 2020\n+Command       /home/guerler/hh-suite/build/bin/hhblits -i /home/guerler/human/fasta/NP_00/NP_000836.2.fasta -d /home/guerler/pdb70/pdb70 -o /home/guerler/human/hhr/NP_00/NP_000836.2.hhr \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 6N4X_A Metabotropic glutamate  100.0 2.5E-77 1.8E-81  724.0   0.0  801   35-855    30-845 (877)\n+  2 6N52_B Metabotropic glutamate  100.0 1.2E-76 8.4E-81  718.1   0.0  803   34-856    29-846 (871)\n+  3 6N51_B Metabotropic glutamate  100.0 6.7E-76 4.8E-80  706.5   0.0  786   37-842     4-804 (804)\n+  4 6W2Y_A Gamma-aminobutyric acid 100.0 3.8E-66 2.8E-70  620.7   0.0  685   36-853    22-734 (829)\n+  5 6W2X_B Gamma-aminobutyric acid 100.0 8.6E-65 6.2E-69  611.4   0.0  678   36-852    13-725 (908)\n+  6 6UO8_A Gamma-aminobutyric acid 100.0 9.3E-65 6.7E-69  605.5   0.0  682   42-856     3-712 (762)\n+  7 6UO8_B Gamma-aminobutyric acid 100.0 2.2E-64 1.6E-68  602.9   0.0  675   39-852     9-718 (779)\n+  8 7C7Q_A Gamma-aminobutyric acid 100.0 3.4E-62 2.5E-66  592.4   0.0  685   40-855   166-876 (879)\n+  9 2E4U_A Metabotropic glutamate  100.0 1.2E-43 8.6E-48  406.8   0.0  535   40-584     9-553 (555)\n+ 10 2E4X_B Metabotropic glutamate  100.0 1.2E-43 8.6E-48  406.8   0.0  535   40-584     9-553 (555)\n+ 11 6N50_A Metabotropic glutamate  100.0 4.1E-43   3E-47  405.6   0.0  536   36-576    42-589 (596)\n+ 12 6N4Y_C Metabotropic glutamate  100.0 4.4E-43 3.2E-47  405.3   0.0  536   36-576    42-589 (596)\n+ 13 5K5S_B Extracellular calcium-s 100.0 1.3E-42 9.6E-47  402.9   0.0  534   39-580    25-609 (615)\n+ 14 5KZQ_A Metabotropic glutamate  100.0 1.7E-42 1.2E-46  398.2   0.0  529   38-580    24-564 (570)\n+ 15 5K5T_A Extracellular calcium-s 100.0 2.4E-42 1.8E-46  400.9   0.0  534   39-580    25-609 (615)\n+ 16 6FFH_A Metabotropic glutamate  100.0 9.6E-38 6.9E-42  309.0   0.0  266  573-852     4-431 (444)\n+ 17 6FFI_A Metabotropic glutamate  100.0 9.6E-38 6.9E-42  309.0   0.0  266  573-852     4-431 (444)\n+ 18 4OR2_B Chimera of Soluble cyto 100.0 1.8E-37 1.3E-41  330.2   0.0  271  570-854   108-380 (389)\n+ 19 6BT5_B Metabotropic glutamate   99.9 2.3E-27 1.7E-31  265.1   0.0  468   40-507     4-471 (479)\n+ 20 6BSZ_A Metabotropic glutamate   99.9 3.3E-27 2.4E-31  263.8   0.0  468   40-507     4-471 (479)\n+ 21 3KS9_B Metabotropic glutamate   99.8 1.4E-26   1E-30  259.8   0.0  465   37-506     9-489 (496)\n+ 22 4XAR_A Metabotropic glutamate   99.8 1.6E-26 1.2E-30  260.6   0.0  462   37-506    32-503 (517)\n+ 23 6B7H_A Metabotropic glutamate   99.8 1.6E-26 1.2E-30  260.6   0.0  462   37-506    32-503 (517)\n+ 24 1EWK_B METABOTROPIC GLUTAMATE   99.8 1.8E-26 1.3E-30  258.4   0.0  464   38-506     5-484 (490)\n+ 25 1EWT_A METABOTROPIC GLUTAMATE   99.8 1.8E-26 1.3E-30  258.4   0.0  464   38-506     5-484 (490)\n+ 26 5C5C_A Metabotropic glutamate   99.8   2E-26 1.4E-30  257.7   0.0  468   38-505     9-480 (481)\n+ 27 3SM9_A Metabotropic glutamate   99.8 2.4E-26 1.7E-30  256.8   0.0  459   40-506     8-476 (479)\n+ 28 4XAQ_A Metabotropic glutamate   99.8 9.3E-26 6.8E-30  253.4   0.0  453   40-506    28-491 (503)\n+ 29 5CNI_A Metabotropic glutamate   99.8 9.3E-26 6.8E-30  253.4   0.0  453   40-506    28-491 (503)\n+ 30 5CNJ_A Metabotropic glutamate   99.8 1.1E-25 7.8E-30  252.8   0.0  454   39-506    27-491 (503)\n+ 31 5FBK_A Extracellular calcium-s  99.8 3.8E-25 2.8E-29  252.0   0.0  460   40-507    53-560 (568)\n+ 32 5FBK_B Extracellular calcium-s  99.8 4.7E-25 3.4E-29  251.3   0.0  460   40-507    53-560 (568)\n+ 33 1DP4_A ATRIAL NATRIURETIC PEPT  99.8 2.8E-24   2E-28  236.6   0.0  369   44-494     1-397 (435)\n+ 34 1DP4_C ATRIAL NATRIURETIC PEPT  99.8 2.8E-24   2E-28  236.6   0.0  369   44-494     1-397 (435)\n+ 35 5X2M_A Taste receptor, type 1,  99.8 4.3E-24 3.1E-28  237.0   0.0  419   35-494     4-43'..b'10 (331)\n+T ss_dssp             -CTTHHHHHHHHHHHHHH\n+T ss_pred             chhHHHHHHHHHHHHHHH\n+Confidence            456787776665544443\n+\n+\n+No 467\n+>5DHG_B Nociceptin receptor-Cytochrome b562 Chimera; Nociceptin/orphanin FQ peptide receptor, NOP; HET: DGV, OLA, OLC; 3.0A {Escherichia coli}\n+Probab=23.42  E-value=12  Score=36.65  Aligned_cols=34  Identities=18%  Similarity=0.097  Sum_probs=17.4  Template_Neff=12.100\n+\n+Q NP_000836.2     644 DTIICSFRRVFLGLGMCFSYAALLTK-TNRIHRIF  677 (908)\n+Q Consensus       644 ~~~~C~~~~~~~~~gf~l~~~~l~~K-~~ri~~if  677 (908)\n+                      ....|.+..++..+++...+-.++.- ..|...+.\n+T Consensus       193 ~~~~c~~~~~~~~~~~~~s~~~~~~iai~R~~~i~  227 (424)\n+T 5DHG_B          193 GNALCKTVIAIDYYNMFTSTFTLTAMSVDRYVAIC  227 (424)\n+T ss_dssp             CHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHh\n+Confidence            35678877776655544433333332 33444443\n+\n+\n+No 468\n+>5DHH_B GPCR-BRIL Chimera; Nociceptin/orphanin FQ peptide receptor, NOP; HET: OLA, OLC, DGW; 3.004A {Homo sapiens}\n+Probab=23.42  E-value=12  Score=36.65  Aligned_cols=34  Identities=18%  Similarity=0.097  Sum_probs=17.4  Template_Neff=12.100\n+\n+Q NP_000836.2     644 DTIICSFRRVFLGLGMCFSYAALLTK-TNRIHRIF  677 (908)\n+Q Consensus       644 ~~~~C~~~~~~~~~gf~l~~~~l~~K-~~ri~~if  677 (908)\n+                      ....|.+..++..+++...+-.++.- ..|...+.\n+T Consensus       193 ~~~~c~~~~~~~~~~~~~s~~~~~~iai~R~~~i~  227 (424)\n+T 5DHH_B          193 GNALCKTVIAIDYYNMFTSTFTLTAMSVDRYVAIC  227 (424)\n+T ss_dssp             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHh\n+Confidence            35678877776655544433333332 33444443\n+\n+\n+No 469\n+>4DKL_A Mu-type opioid receptor, lysozyme chimera; G-protein coupled receptor, 7 transmembrane; HET: SO4, CLR, MPG, BF0, 1PE; 2.8A {Mus musculus, Enterobacteria phage T4}\n+Probab=20.35  E-value=15  Score=36.47  Aligned_cols=17  Identities=18%  Similarity=0.227  Sum_probs=10.1  Template_Neff=11.900\n+\n+Q NP_000836.2     644 DTIICSFRRVFLGLGMC  660 (908)\n+Q Consensus       644 ~~~~C~~~~~~~~~gf~  660 (908)\n+                      ....|.+..++..+++.\n+T Consensus        85 ~~~~C~~~~~~~~~~~~  101 (464)\n+T 4DKL_A           85 GNILCKIVISIDYYNMF  101 (464)\n+T ss_dssp             CSHHHHHHHHHHHHHHH\n+T ss_pred             cchHHHHHHHHHHHHHH\n+Confidence            35678777766554433\n+\n+\n+No 470\n+>5NX2_A Glucagon-like peptide 1 receptor, truncated; 7TM, GPCR, signalling protein, membrane; HET: SOG, 9DQ, 9DZ, 9DT, 9DW, 9DK, NAG; 3.7A {Homo sapiens}\n+Probab=20.13  E-value=16  Score=36.14  Aligned_cols=33  Identities=15%  Similarity=0.316  Sum_probs=19.8  Template_Neff=10.600\n+\n+Q NP_000836.2     645 TIICSFRRVFLGLGMCFSYAALLTKTNRIHRIF  677 (908)\n+Q Consensus       645 ~~~C~~~~~~~~~gf~l~~~~l~~K~~ri~~if  677 (908)\n+                      +..|.+..++........+.-+++-.+-+++..\n+T Consensus       204 ~~~C~~~~~~~~~~~las~~w~~~~~~~~~~~i  236 (422)\n+T 5NX2_A          204 SLSCRLVFLFMQYCVAANYYWLLVEGVYLYTLL  236 (422)\n+T ss_dssp             SHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCCC\n+T ss_pred             cHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            478888777766665555555555554444443\n+\n+\n+No 471\n+>6LN2_A Glucagon-like peptide 1 receptor,Rubredoxin,Glucagon-like peptide; Full length Human GLP1 receptor; HET: 97Y, NAG; 3.2A {Homo sapiens}\n+Probab=20.07  E-value=16  Score=36.87  Aligned_cols=35  Identities=14%  Similarity=0.264  Sum_probs=21.7  Template_Neff=8.200\n+\n+Q NP_000836.2     644 DTIICSFRRVFLGLGMCFSYAALLTKTNRIHRIFE  678 (908)\n+Q Consensus       644 ~~~~C~~~~~~~~~gf~l~~~~l~~K~~ri~~if~  678 (908)\n+                      ....|.+..++....+...+.-+++-.+-+++.+.\n+T Consensus       199 ~~~~C~~~~~l~~y~~las~~W~~~~a~~l~~~i~  233 (469)\n+T 6LN2_A          199 DSLACRLVFLLCQYCVAANYYWLLVEGVYLYTLLA  233 (469)\n+T ss_dssp             HHTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+T ss_pred             CcHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            35778888777766666666666555555444443\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/pdb70_random.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/pdb70_random.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,24 @@
+6W4H_B  1XM9_A
+6YZ1_B  3L6X_A
+6SPB_2  5D5K_C
+3UWD_A  3NMZ_A
+5FFO_H  6N4X_A
+3RJR_C  6N52_B
+5VQF_C  6N51_B
+5NTU_A  6W2Y_A
+2WZB_A  2WZB_A
+1HDI_A  1HDI_A
+6Y3A_A  6Y3A_A
+3UWD_A  3UWD_A
+5FFO_H  5FFO_H
+3RJR_C  3RJR_C
+5VQF_C  5VQF_C
+5NTU_A  5NTU_A
+1XM9_A  6W4H_B
+3L6X_A  6YZ1_B
+5D5K_C  6SPB_2
+3NMZ_A  3UWD_A
+6N4X_A  5FFO_H
+6N52_B  3RJR_C
+6N51_B  5VQF_C
+6W2Y_A  5NTU_A
\ No newline at end of file
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/pdb70_result.0.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/pdb70_result.0.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,4 @@
+NP_000282.1.hhr NP_000282.1.hhr 875.1 2WZB_A 2WZB_A 2WZB_A 2WZB_A
+NP_000548.2.hhr NP_000548.2.hhr 353.3 5FFO_H 5FFO_H 5FFO_H 5FFO_H
+NP_000548.2.hhr NP_000836.2.hhr 353.3 5FFO_H 6N4X_A 5FFO_H 6N4X_A
+NP_000282.1.hhr NP_000290.2.hhr 86.5 2WZB_A 1XM9_A 3UWD_A 3NMZ_A
b
diff -r 000000000000 -r dbbcc7cd889f test-data/minz/pdb70_result.1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minz/pdb70_result.1.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,2 @@
+NP_000282.1.hhr NP_000282.1.hhr 875.1 2WZB_A 2WZB_A 2WZB_A 2WZB_A
+NP_000282.1.hhr NP_000290.2.hhr 86.5 2WZB_A 1XM9_A 3UWD_A 3NMZ_A
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/ACE2_HUMAN.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/ACE2_HUMAN.hhr Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,3879 @@\n+Query         sp|Q9BYF1|ACE2_HUMAN Angiotensin-converting enzyme 2 OS=Homo sapiens (Human) OX=9606 GN=ACE2 PE=1 SV=2\n+Match_columns 805\n+No_of_seqs    1 out of 1\n+Neff          1\n+Searched_HMMs 82491\n+Date          Sat Oct 10 23:55:36 2020\n+Command       hhsearch -e 0.001 -i /galaxy-repl/main/files/046/047/dataset_46047478.dat -d hhdb -o /galaxy-repl/main/files/046/091/dataset_46091622.dat \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 6M18_B Sodium-dependent neutra 100.0  4E-194  5E-199 1504.8   0.0  804    2-805     3-814 (814)\n+  2 1R42_A angiotensin I convertin 100.0 3.4E-78 4.2E-83  582.0   0.0  615    1-615     1-615 (615)\n+  3 6LZG_A Angiotensin-converting  100.0 5.4E-74 6.6E-79  545.9   0.0  597   19-615     1-597 (597)\n+  4 3D0I_B Crystal structure of sp 100.0 1.2E-73 1.5E-78  545.2   0.0  596   20-615     2-597 (597)\n+  5 5AMB_A ANGIOTENSIN-CONVERTING  100.0 1.2E-69 1.5E-74  524.2   0.0  595   20-625    12-614 (629)\n+  6 6H5W_A Angiotensin-converting  100.0 4.4E-61 5.4E-66  455.7   0.0  579   22-613     5-588 (591)\n+  7 6S1Y_A Angiotensin-converting  100.0 4.6E-59 5.5E-64  450.3   0.0  579   21-610    11-600 (621)\n+  8 4CA5_A ANGIOTENSIN-CONVERTING  100.0 3.9E-58 4.7E-63  435.5   0.0  578   22-612     5-587 (589)\n+  9 5A2R_A ANGIOTENSIN-CONVERTING  100.0 2.8E-56 3.4E-61  425.1   0.0  579   22-611     5-594 (598)\n+ 10 4ASR_A ANGIOTENSIN-CONVERTING  100.0 2.9E-56 3.5E-61  426.4   0.0  578   22-611     6-595 (598)\n+ 11 4CA7_A ANGIOTENSIN-CONVERTING  100.0 3.2E-56 3.9E-61  426.0   0.0  578   22-611     6-595 (598)\n+ 12 2C6F_A ANGIOTENSIN-CONVERTING  100.0 2.3E-52 2.7E-57  400.7   0.0  579   22-611    14-599 (612)\n+ 13 5GIV_E Carboxypeptidase; M32 c 100.0 4.3E-52 5.2E-57  399.6   0.0  474   19-597     3-502 (503)\n+ 14 5GIV_F Carboxypeptidase; M32 c 100.0 4.3E-52 5.2E-57  399.6   0.0  474   19-597     3-502 (503)\n+ 15 3HQ2_B Bacillus subtilis M32 c 100.0 1.1E-47 1.3E-52  361.2   0.0  473   22-597     3-500 (501)\n+ 16 1K9X_B m32 carboxypeptidase; H 100.0 1.1E-46 1.3E-51  357.8   0.0  470   20-593     4-499 (499)\n+ 17 1KA2_A M32 carboxypeptidase; H 100.0 1.1E-46 1.3E-51  357.8   0.0  470   20-593     4-499 (499)\n+ 18 5E3X_A Thermostable carboxypep 100.0 2.4E-46 2.9E-51  356.7   0.0  456   27-597     2-486 (489)\n+ 19 3HOA_A Thermostable carboxypep 100.0 1.1E-45 1.3E-50  356.9   0.0  468   22-594     3-507 (509)\n+ 20 3DWC_B Metallocarboxypeptidase 100.0   2E-44 2.4E-49  343.9   0.0  459   23-593     3-501 (505)\n+ 21 5WVU_C Thermostable carboxypep 100.0 8.4E-44   1E-48  344.0   0.0  469   22-595     3-508 (510)\n+ 22 3SKS_A Putative Oligoendopepti 100.0 1.7E-38 2.1E-43  300.7   0.0  506   22-600    36-564 (567)\n+ 23 3AHN_A Oligopeptidase; HYDROLA 100.0 2.6E-38 3.2E-43  299.3   0.0  505   22-601    33-562 (564)\n+ 24 2QR4_A Peptidase M3B, oligoend 100.0   2E-33 2.4E-38  267.6   0.0  506   23-603    34-578 (587)\n+ 25 2O36_A Thimet oligopeptidase (  99.9   5E-30 6.1E-35  254.5   0.0  518   26-588    47-660 (674)\n+ 26 3CE2_A Crystal structure of pu  99.9 1.1E-29 1.4E-34  244.3   0.0  507   23-601    67-609 (618)\n+ 27 2O3E_A Neurolysin (E.C.3.4.24.  99.9 9.4E-29 1.1E-33  246.4   0.0  515   31-588    68-676 (678)\n+ 28 5LV0_A Neurolysin, mitochondri  99.9 6.2E-28 7.6E-33  240.2   0.0  504   41-589    83-682 (686)\n+ 29 1S4B_P Thimet oligopeptidase (  99.9 1.6E-27   2E-32  236.8   0.0  516   26-588    47-660 (674)\n+ 30 5L44_A K-26 dipeptidyl carboxy  99.8 4.9E-23   6E-28  205.4   0.0  512   28-589    76-681 (683)\n+ 31 5L43_B K-26 dipeptidyl carboxy  99.8 4.9E-23   6E-28  205.4   0.0  512   28-589    76-681 (683)\n+ 32 1Y79_1 Peptidyl-Dipeptidase Dc  99.4 4.5E-17 5.4E-22  163.2   0.0  501   41-588    70-676 (680)\n+ 33 4KA7_A Oligopeptidase A, short  99.4 5.8E-17 7.1E-22  163.7   0.0  407  144-589   239-706 (714)\n+ 34 2KNC_B Integrin alpha-IIb, Int  92.7   0.036 4.4E-07   45.3   0.0   31  733-763     2-32  (79)\n+ 35 2L8S_A Integrin alpha-1; Integ  87.6    0.29 '..b'l~~~~~~~~l~i~v~V~~l~~~~~~rYR~~~   50 (296)\n+T 6KOE_F           11 GPVAEQQSDLILLSIGFMLFIVGVVFVLFTIILVKYRDRK   50 (296)\n+T ss_pred             CHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHchhcC\n+Confidence            4444333333444444444443333444444556655554\n+\n+\n+No 97\n+>6KOB_B AA3-600 quinol oxidase subunit I; Menaquinol oxidase, Complex, Proton pumping; HET: MQ7, HEA; 3.6A {Bacillus subtilis}\n+Probab=22.61  E-value=76  Score=33.75  Aligned_cols=40  Identities=20%  Similarity=0.404  Sum_probs=15.7  Template_Neff=6.100\n+\n+Q sp|Q9BYF1|ACE2  732 GPPNQPPVSIWLIVFGVVMGVIVVGIVILIFTGIRDRKKK  771 (805)\n+Q Consensus       732 gppnqppvsiwlivfgvvmgvivvgiviliftgirdrkkk  771 (805)\n+                      ||..+.-..++++.+++.+.|+++.++++++..+|-|+++\n+T Consensus        11 gp~a~~~~~l~~~~~~~~l~i~v~V~~l~~~~~~rYR~~~   50 (296)\n+T 6KOB_B           11 GPVAEQQSDLILLSIGFMLFIVGVVFVLFTIILVKYRDRK   50 (296)\n+T ss_dssp             -----CTTHHHHHHHHHHHHHHHHHHHHTTTSSSSSSCCC\n+T ss_pred             CHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHchhcC\n+Confidence            4444333333444444444443333444444556655554\n+\n+\n+No 98\n+>1Q90_G Apocytochrome f, Cytochrome b6, Cytochrome; MEMBRANE PROTEIN COMPLEX, PHOTOSYNTHESIS, ELECTRON; HET: LFA, CLA, BCR, SQD, LMG, HEM, TDS; 3.1A {Chlamydomonas reinhardtii} SCOP: f.23.26.1\n+Probab=22.36  E-value=78  Score=26.34  Aligned_cols=29  Identities=24%  Similarity=0.430  Sum_probs=22.3  Template_Neff=2.400\n+\n+Q sp|Q9BYF1|ACE2  744 IVFGVVMGVIVVGIVILIFTGIRDRKKKN  772 (805)\n+Q Consensus       744 ivfgvvmgvivvgiviliftgirdrkkkn  772 (805)\n+                      +.+|+|.|.|.|.++-|.++....-++.+\n+T Consensus         5 lL~GIVlGli~iTlaGLfv~AY~QyrRg~   33 (37)\n+T 1Q90_G            5 LLCGIVLGLVPVTIAGLFVTAYLQYLRGD   33 (37)\n+T ss_pred             hHHHHHHHhHHHHHHHHHHHHHHHHHhcc\n+Confidence            57899999999998888888765544433\n+\n+\n+No 99\n+>2GSM_B Cytochrome c oxidase subunit 1; transmembrane protein complex, OXIDOREDUCTASE; HET: TRD, DMU, HEA; 2.0A {Rhodobacter sphaeroides} SCOP: f.17.2.1, b.6.1.2\n+Probab=21.90  E-value=80  Score=31.85  Aligned_cols=6  Identities=67%  Similarity=1.121  Sum_probs=2.5  Template_Neff=7.600\n+\n+Q sp|Q9BYF1|ACE2  721 SLEFLG  726 (805)\n+Q Consensus       721 sleflg  726 (805)\n+                      ||++.|\n+T Consensus         4 ~~~~~~    9 (262)\n+T 2GSM_B            4 SLEIIG    9 (262)\n+T ss_dssp             -CCEEE\n+T ss_pred             ccccCC\n+Confidence            455544\n+\n+\n+No 100\n+>3HB3_B Cytochrome c oxidase subunit 1-beta; Electron transfer, Proton transfer, Proton; HET: HEA, LDA, LMT; 2.25A {Paracoccus denitrificans}\n+Probab=21.53  E-value=83  Score=32.92  Aligned_cols=47  Identities=15%  Similarity=0.280  Sum_probs=18.3  Template_Neff=6.900\n+\n+Q sp|Q9BYF1|ACE2  725 LGIQPTLGPPNQPPVSIWLIVFGVVMGVIVVGIV-ILIFTGIRDRKKKN  772 (805)\n+Q Consensus       725 lgiqptlgppnqppvsiwlivfgvvmgvivvgiv-iliftgirdrkkkn  772 (805)\n+                      ++.|+..+|-.+.-...+-+++++. .+|++++. ++++.-+|-|++++\n+T Consensus        47 ~~~~~~aS~~a~~i~~L~~~~~~i~-~~Ifv~V~~ll~~~i~ryr~~~~   94 (298)\n+T 3HB3_B           47 MNFQPASSPLAHDQQWLDHFVLYII-TAVTIFVCLLLLICIVRFNRRAN   94 (298)\n+T ss_dssp             CSCSCCCSHHHHHHHHHHHHHHHHH-HHHHHHHHHHHHHHHHHSBTTTS\n+T ss_pred             CCCCCCCCHHHHHHHHHHHHHHHHH-HHHHHHHHHHHHHHHHHccccCC\n+Confidence            3444444444443333333333332 22222222 23333355555443\n+\n+\n+No 101\n+>5X5V_A GD; herpes simplex virus, pseudorabies virus; 1.5A {Suid herpesvirus 1}\n+Probab=20.11  E-value=93  Score=35.59  Aligned_cols=28  Identities=32%  Similarity=0.647  Sum_probs=0.0  Template_Neff=3.400\n+\n+Q sp|Q9BYF1|ACE2  742 WLIVFGVVMGVIVVGIVILIFTGIRDRK  769 (805)\n+Q Consensus       742 wlivfgvvmgvivvgiviliftgirdrk  769 (805)\n+                      ..+-.|+.||.|+||+.+.||..+|..|\n+T Consensus       355 v~vgvg~~~~~~~~~~~~~~~~~~~~~~  382 (402)\n+T 5X5V_A          355 VIVGTGTAMGALLVGVCVYIFFRLRGAK  382 (402)\n+T ss_dssp             ----------------------------\n+T ss_pred             eehhHHHHHHHHHHHHHHHHHHHccccc\n+Confidence            3456799999999999999998877654\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/SPIKE_SARS2.hhr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/SPIKE_SARS2.hhr Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,5096 @@\n+Query         sp|P0DTC2|SPIKE_SARS2 Spike glycoprotein OS=Severe acute respiratory syndrome coronavirus 2 OX=2697049 GN=S PE=1 SV=1\n+Match_columns 1273\n+No_of_seqs    1 out of 1\n+Neff          1\n+Searched_HMMs 82491\n+Date          Thu Nov 12 20:44:30 2020\n+Command       hhsearch -e 0.001 -i /galaxy-repl/main/files/047/416/dataset_47416633.dat -d hhdb -o /galaxy-repl/main/files/047/416/dataset_47416669.dat \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 6X29_A Spike glycoprotein; Tri 100.0  3E-269  4E-274 2251.1   0.0 1198   16-1213    1-1198(1273)\n+  2 6CS2_B Spike glycoprotein,Enve 100.0  3E-221  4E-226 1857.5   0.0 1161   31-1213   22-1182(1215)\n+  3 6Q04_B Spike glycoprotein; Cor 100.0  2E-204  2E-209 1723.9   0.0 1150   31-1224   65-1321(1359)\n+  4 5I08_C Spike glycoprotein; cor 100.0  2E-199  2E-204 1685.1   0.0 1142   30-1223   29-1293(1299)\n+  5 5X5F_B S protein; MERS-CoV, sp 100.0  6E-198  7E-203 1670.1   0.0 1163   14-1220   12-1286(1323)\n+  6 6VSJ_A Spike glycoprotein, Car 100.0  2E-195  3E-200 1649.5   0.0 1145    9-1204   22-1267(1275)\n+  7 6VSJ_C Spike glycoprotein, Car 100.0  2E-195  3E-200 1649.5   0.0 1145    9-1204   22-1267(1275)\n+  8 6OHW_A Spike surface glycoprot 100.0  7E-190  9E-195 1602.1   0.0 1144    2-1204   13-1299(1322)\n+  9 6NZK_A Spike surface glycoprot 100.0  7E-190  9E-195 1602.1   0.0 1144    2-1204   13-1299(1322)\n+ 10 6M15_A Spike glycoprotein; VIR 100.0  6E-154  7E-159 1294.0   0.0  832  273-1215  216-1070(1118)\n+ 11 6M15_B Spike glycoprotein; VIR 100.0  6E-154  7E-159 1294.0   0.0  832  273-1215  216-1070(1118)\n+ 12 6JX7_A Feline Infectious Perit 100.0  2E-132  2E-137 1149.2   0.0  664  525-1216  674-1394(1468)\n+ 13 5SZS_A Spike glycoprotein; cor 100.0  7E-131  9E-136 1128.9   0.0  662  505-1215  575-1283(1325)\n+ 14 6IXB_B Spike glycoprotein; Alp 100.0  8E-129  1E-133 1101.7   0.0  647  518-1214  426-1116(1116)\n+ 15 6CV0_A Spike protein; infectio 100.0  3E-125  3E-130 1068.9   0.0  651  524-1212  399-1074(1105)\n+ 16 6VV5_C Spike glycoprotein; gly 100.0  2E-124  2E-129 1078.6   0.0  641  521-1213  614-1309(1356)\n+ 17 6M3W_B Spike glycoprotein,Spik 100.0  2E-124  2E-129 1000.4   0.0  491  706-1196    1-491 (491)\n+ 18 6U7K_C Spike glycoprotein; PED 100.0  1E-122  1E-127 1065.5   0.0  641  521-1213  629-1324(1399)\n+ 19 6BFU_C Spike protein; coronavi 100.0  4E-119  4E-124 1017.5   0.0  746  267-1148  240-1022(1024)\n+ 20 6B3O_B Spike glycoprotein; Cor 100.0  7E-117  9E-122  954.9   0.0  497  703-1203   45-560 (605)\n+ 21 6PZ8_A S protein, G2 heavy cha 100.0  8E-112  1E-116  897.4   0.0  445  694-1140   12-470 (472)\n+ 22 6PZ8_F S protein, G2 heavy cha 100.0 3.1E-74 3.8E-79  636.3   0.0  621   14-676    12-724 (726)\n+ 23 7BZ5_A Spike protein S1, Heavy 100.0 3.5E-52 4.2E-57  416.5   0.0  225  319-543     1-225 (229)\n+ 24 6YZ5_E Spike glycoprotein, Nan 100.0 3.5E-51 4.3E-56  405.6   0.0  205  330-534     1-205 (210)\n+ 25 6YLA_E SARS-CoV-2 RBD, Heavy C 100.0 3.8E-51 4.6E-56  406.0   0.0  207  328-534     2-208 (213)\n+ 26 6ZFO_A SARS-CoV-2 RBD, Nanobod 100.0 2.3E-47 2.8E-52  375.2   0.0  194  333-526     1-194 (194)\n+ 27 2GHV_C Spike glycoprotein; SAR 100.0 3.9E-37 4.7E-42  305.0   0.0  197  330-527     4-200 (203)\n+ 28 1WYY_A E2 Glycoprotein; membra 100.0 2.3E-36 2.8E-41  288.5   0.0   99  903-1001    3-101 (149)\n+ 29 1WYY_B E2 Glycoprotein; membra 100.0 2.3E-36 2.8E-41  288.5   0.0   99  903-1001    3-101 (149)\n+ 30 5X4S_A Spike glycoprotein; SAR 100.0 1.1E-34 1.4E-39  298.0   0.0  261   30-307    21-281 (285)\n+ 31 5KWB_A Spike glycoprotein; Cor 100.0 1.2E-33 1.4E-38  297.9   0.0  271  316-592     1-370 (371)\n+ 32 6LXT_F Spike protein S2; 2019-  99.9 2.8E-28 3.4E-33  230.5   0.0  112  910-1021    1-113 (132)\n+ 33 6LXT_C 2019-nCoV S2 subunit; 2  99.9 2.8E-28 3.4E-33  230.5   0.0  112  910-1021    1-113 (132)\n+ 34 6C6Z_A Spike protein, CDC2-C2   99.9 6.9E-26 8.3E-31  228.8   0.0  148  319-472     1-148 (231)\n+ 35 5XGR_H Spi'..b'sp             TTCHHHHHHHHHHHHHHHHHHHHHHHGGGTCSSCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHSTTC\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHhhcchHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            4555666666666666555555554432222 23445666666666666666666666666665543\n+\n+\n+No 128\n+>6W08_B NSP15 endoribnuclease; Tripartite pore forming toxin, Cytotoxin; HET: EDO; 1.75A {Vibrio cholerae O1 biovar El Tor str. N16961}\n+Probab=20.88  E-value=87  Score=32.75  Aligned_cols=55  Identities=15%  Similarity=0.317  Sum_probs=27.7  Template_Neff=9.500\n+\n+Q sp|P0DTC2|SPIK 1139 DPLQPELDSFKEELDKYFKNHTSPDVDLGD--------ISGINASVVNIQKEIDRLNEVAKNL 1193 (1273)\n+Q Consensus      1139 dplqpeldsfkeeldkyfknhtspdvdlgd--------isginasvvniqkeidrlnevaknl 1193 (1273)\n+                      +.++.+++.|+..|..+...-..-.-++..        ..+.++.+..++++|++|++-.+.+\n+T Consensus       122 ~~~~~~~~~~~~~l~~f~~~~~~~~~~l~~~~~~~~~~~~~~~~~i~~L~~~I~~l~~~i~~~  184 (359)\n+T 6W08_B          122 QSLTSNIKRYDEGLNAWARQVEDAHNTLQQTIAQIQQEEVSIQAEIIATNAQIDLMKQQIAAF  184 (359)\n+T ss_dssp             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCGGGHHHHHHHHHHHHHHHHHHHHH\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            456666667777666554442222222322        1334445556666666665544433\n+\n+\n+No 129\n+>3OGI_C Putative ESAT-6-like protein 6, Putative; Structural Genomics, PSI-2, Protein Structure; HET: MSE; 2.549A {Mycobacterium tuberculosis}\n+Probab=20.43  E-value=90  Score=26.79  Aligned_cols=66  Identities=14%  Similarity=0.137  Sum_probs=31.7  Template_Neff=9.500\n+\n+Q sp|P0DTC2|SPIK  921 KLIANQFNSAIGKIQDSLSSTASALGKLQDV-VNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDK  986 (1273)\n+Q Consensus       921 klianqfnsaigkiqdslsstasalgklqdv-vnqnaqalntlvkqlssnfgaissvlndilsrldk  986 (1273)\n+                      .-.++.+....+.|++.++.....+..|.+. ....+++......++...+..+...|+++-..|+.\n+T Consensus        11 ~~~a~~l~~~~~~l~~~~~~l~~~~~~l~~~W~G~a~~af~~~~~~~~~~~~~~~~~l~~~~~~l~~   77 (101)\n+T 3OGI_C           11 DAHGAMIRAQAGLLEAEHQAIVRDVLAAGDFWGGAGSVACQEFITQLGRNFQVIYEQANAHGQKVQA   77 (101)\n+T ss_dssp             --CHHHHHHHHHHHHHHHHHHHHHHHHTGGGTTCTTSHHHHHHHHHHHHHHHHHTTCC---------\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHhhccHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            3455666666666666666555555555332 22345555566666666666666666666555544\n+\n+\n+No 130\n+>5HJ3_L Envelope glycoprotein, KZ52 Antibody Fragment; Ebola virus, EOV, Ebolavirus, GP; HET: MAN, NAG, BMA; 3.3A {Ebola virus sp.}\n+Probab=20.10  E-value=93  Score=31.85  Aligned_cols=35  Identities=17%  Similarity=0.174  Sum_probs=23.0  Template_Neff=5.300\n+\n+Q sp|P0DTC2|SPIK  930 AIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVK  964 (1273)\n+Q Consensus       930 aigkiqdslsstasalgklqdvvnqnaqalntlvk  964 (1273)\n+                      .+....++++.....+..+..+|-||..||.-|.-\n+T Consensus        60 ~~n~t~~al~~L~~~l~sl~~vvLQNR~aLD~Lla   94 (136)\n+T 5HJ3_L           60 LANETTQALQLFLRATTELRTFSILNRKAIDFLLQ   94 (136)\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHhhchHHHHHHHH\n+Confidence            33444455555555566677888999999887653\n+\n+\n+No 131\n+>6RX1_A Syncytin-1; HUMAN PLACENTAL PROTEIN, MEMBRANE FUSION; 2.1A {Homo sapiens}\n+Probab=20.03  E-value=93  Score=30.99  Aligned_cols=53  Identities=26%  Similarity=0.386  Sum_probs=30.7  Template_Neff=5.000\n+\n+Q sp|P0DTC2|SPIK  908 GIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVK  964 (1273)\n+Q Consensus       908 gigvtqnvlyenqklianqfnsaigkiqdslsstasalgklqdvvnqnaqalntlvk  964 (1273)\n+                      |.|+++.-+.    .+..+....+....++++.....+..+..+|-||..||.-|.-\n+T Consensus        12 ~~~~~~~~l~----~L~~~l~~~~n~t~~al~~L~~~l~sl~~vvLQNR~aLD~Ll~   64 (108)\n+T 6RX1_A           12 FQSTSTQFYY----KLSQELNGDMERVADSLVTLQDQLNSLAAVVLQNRRALDLLTA   64 (108)\n+T ss_dssp             ------CHHH----HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTG\n+T ss_pred             ccchhhHHHH----HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHhhchhHHHHHHH\n+Confidence            5555553222    2333444445556666777777777788899999999887653\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/crossreference.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/crossreference.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,33 @@
+1R42_A 1R42_A 2AJF_A 2AJF_B
+1R42_A 6CS2_B 2AJF_A 2AJF_E
+1R42_A 6CS2_B 2AJF_A 2AJF_F
+1R42_A 1R42_A 2AJF_B 2AJF_A
+1R42_A 6CS2_B 2AJF_B 2AJF_E
+1R42_A 6CS2_B 2AJF_B 2AJF_F
+6CS2_B 1R42_A 2AJF_E 2AJF_A
+6CS2_B 1R42_A 2AJF_E 2AJF_B
+6CS2_B 6CS2_B 2AJF_E 2AJF_F
+6CS2_B 1R42_A 2AJF_F 2AJF_A
+6CS2_B 1R42_A 2AJF_F 2AJF_B
+6CS2_B 6CS2_B 2AJF_F 2AJF_E
+6CS2_B 6CS2_B 6CS2_A 6CS2_B
+6CS2_B 6CS2_B 6CS2_A 6CS2_C
+6CS2_B 6LZG_A 6CS2_A 6CS2_D
+6CS2_B 6CS2_B 6CS2_B 6CS2_A
+6CS2_B 6CS2_B 6CS2_B 6CS2_C
+6CS2_B 6LZG_A 6CS2_B 6CS2_D
+6CS2_B 6CS2_B 6CS2_C 6CS2_A
+6CS2_B 6CS2_B 6CS2_C 6CS2_B
+6CS2_B 6LZG_A 6CS2_C 6CS2_D
+6LZG_A 6CS2_B 6CS2_D 6CS2_A
+6LZG_A 6CS2_B 6CS2_D 6CS2_B
+6LZG_A 6CS2_B 6CS2_D 6CS2_C
+6LZG_A 6YLA_E 6LZG_A 6LZG_B
+6YLA_E 6LZG_A 6LZG_B 6LZG_A
+1R42_A 6YLA_E 6M0J_A 6M0J_E
+6YLA_E 1R42_A 6M0J_E 6M0J_A
+6AT5_A 5EJB_D 6ILF_A 6ILF_C
+4L8S_C 6AT5_A 6ILF_B 6ILF_A
+4L8S_C 5EJB_D 6ILF_B 6ILF_C
+5EJB_D 6AT5_A 6ILF_C 6ILF_A
+5EJB_D 4L8S_C 6ILF_C 6ILF_B
\ No newline at end of file
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/hhr.ffdata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/hhr.ffdata Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,8976 @@\n+Query         sp|Q9BYF1|ACE2_HUMAN Angiotensin-converting enzyme 2 OS=Homo sapiens (Human) OX=9606 GN=ACE2 PE=1 SV=2\n+Match_columns 805\n+No_of_seqs    1 out of 1\n+Neff          1\n+Searched_HMMs 82491\n+Date          Sat Oct 10 23:55:36 2020\n+Command       hhsearch -e 0.001 -i /galaxy-repl/main/files/046/047/dataset_46047478.dat -d hhdb -o /galaxy-repl/main/files/046/091/dataset_46091622.dat \n+\n+ No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM\n+  1 6M18_B Sodium-dependent neutra 100.0  4E-194  5E-199 1504.8   0.0  804    2-805     3-814 (814)\n+  2 1R42_A angiotensin I convertin 100.0 3.4E-78 4.2E-83  582.0   0.0  615    1-615     1-615 (615)\n+  3 6LZG_A Angiotensin-converting  100.0 5.4E-74 6.6E-79  545.9   0.0  597   19-615     1-597 (597)\n+  4 3D0I_B Crystal structure of sp 100.0 1.2E-73 1.5E-78  545.2   0.0  596   20-615     2-597 (597)\n+  5 5AMB_A ANGIOTENSIN-CONVERTING  100.0 1.2E-69 1.5E-74  524.2   0.0  595   20-625    12-614 (629)\n+  6 6H5W_A Angiotensin-converting  100.0 4.4E-61 5.4E-66  455.7   0.0  579   22-613     5-588 (591)\n+  7 6S1Y_A Angiotensin-converting  100.0 4.6E-59 5.5E-64  450.3   0.0  579   21-610    11-600 (621)\n+  8 4CA5_A ANGIOTENSIN-CONVERTING  100.0 3.9E-58 4.7E-63  435.5   0.0  578   22-612     5-587 (589)\n+  9 5A2R_A ANGIOTENSIN-CONVERTING  100.0 2.8E-56 3.4E-61  425.1   0.0  579   22-611     5-594 (598)\n+ 10 4ASR_A ANGIOTENSIN-CONVERTING  100.0 2.9E-56 3.5E-61  426.4   0.0  578   22-611     6-595 (598)\n+ 11 4CA7_A ANGIOTENSIN-CONVERTING  100.0 3.2E-56 3.9E-61  426.0   0.0  578   22-611     6-595 (598)\n+ 12 2C6F_A ANGIOTENSIN-CONVERTING  100.0 2.3E-52 2.7E-57  400.7   0.0  579   22-611    14-599 (612)\n+ 13 5GIV_E Carboxypeptidase; M32 c 100.0 4.3E-52 5.2E-57  399.6   0.0  474   19-597     3-502 (503)\n+ 14 5GIV_F Carboxypeptidase; M32 c 100.0 4.3E-52 5.2E-57  399.6   0.0  474   19-597     3-502 (503)\n+ 15 3HQ2_B Bacillus subtilis M32 c 100.0 1.1E-47 1.3E-52  361.2   0.0  473   22-597     3-500 (501)\n+ 16 1K9X_B m32 carboxypeptidase; H 100.0 1.1E-46 1.3E-51  357.8   0.0  470   20-593     4-499 (499)\n+ 17 1KA2_A M32 carboxypeptidase; H 100.0 1.1E-46 1.3E-51  357.8   0.0  470   20-593     4-499 (499)\n+ 18 5E3X_A Thermostable carboxypep 100.0 2.4E-46 2.9E-51  356.7   0.0  456   27-597     2-486 (489)\n+ 19 3HOA_A Thermostable carboxypep 100.0 1.1E-45 1.3E-50  356.9   0.0  468   22-594     3-507 (509)\n+ 20 3DWC_B Metallocarboxypeptidase 100.0   2E-44 2.4E-49  343.9   0.0  459   23-593     3-501 (505)\n+ 21 5WVU_C Thermostable carboxypep 100.0 8.4E-44   1E-48  344.0   0.0  469   22-595     3-508 (510)\n+ 22 3SKS_A Putative Oligoendopepti 100.0 1.7E-38 2.1E-43  300.7   0.0  506   22-600    36-564 (567)\n+ 23 3AHN_A Oligopeptidase; HYDROLA 100.0 2.6E-38 3.2E-43  299.3   0.0  505   22-601    33-562 (564)\n+ 24 2QR4_A Peptidase M3B, oligoend 100.0   2E-33 2.4E-38  267.6   0.0  506   23-603    34-578 (587)\n+ 25 2O36_A Thimet oligopeptidase (  99.9   5E-30 6.1E-35  254.5   0.0  518   26-588    47-660 (674)\n+ 26 3CE2_A Crystal structure of pu  99.9 1.1E-29 1.4E-34  244.3   0.0  507   23-601    67-609 (618)\n+ 27 2O3E_A Neurolysin (E.C.3.4.24.  99.9 9.4E-29 1.1E-33  246.4   0.0  515   31-588    68-676 (678)\n+ 28 5LV0_A Neurolysin, mitochondri  99.9 6.2E-28 7.6E-33  240.2   0.0  504   41-589    83-682 (686)\n+ 29 1S4B_P Thimet oligopeptidase (  99.9 1.6E-27   2E-32  236.8   0.0  516   26-588    47-660 (674)\n+ 30 5L44_A K-26 dipeptidyl carboxy  99.8 4.9E-23   6E-28  205.4   0.0  512   28-589    76-681 (683)\n+ 31 5L43_B K-26 dipeptidyl carboxy  99.8 4.9E-23   6E-28  205.4   0.0  512   28-589    76-681 (683)\n+ 32 1Y79_1 Peptidyl-Dipeptidase Dc  99.4 4.5E-17 5.4E-22  163.2   0.0  501   41-588    70-676 (680)\n+ 33 4KA7_A Oligopeptidase A, short  99.4 5.8E-17 7.1E-22  163.7   0.0  407  144-589   239-706 (714)\n+ 34 2KNC_B Integrin alpha-IIb, Int  92.7   0.036 4.4E-07   45.3   0.0   31  733-763     2-32  (79)\n+ 35 2L8S_A Integrin alpha-1; Integ  87.6    0.29 '..b'sp             TTCHHHHHHHHHHHHHHHHHHHHHHHGGGTCSSCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHSTTC\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHhhcchHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            4555666666666666555555554432222 23445666666666666666666666666665543\n+\n+\n+No 128\n+>6W08_B NSP15 endoribnuclease; Tripartite pore forming toxin, Cytotoxin; HET: EDO; 1.75A {Vibrio cholerae O1 biovar El Tor str. N16961}\n+Probab=20.88  E-value=87  Score=32.75  Aligned_cols=55  Identities=15%  Similarity=0.317  Sum_probs=27.7  Template_Neff=9.500\n+\n+Q sp|P0DTC2|SPIK 1139 DPLQPELDSFKEELDKYFKNHTSPDVDLGD--------ISGINASVVNIQKEIDRLNEVAKNL 1193 (1273)\n+Q Consensus      1139 dplqpeldsfkeeldkyfknhtspdvdlgd--------isginasvvniqkeidrlnevaknl 1193 (1273)\n+                      +.++.+++.|+..|..+...-..-.-++..        ..+.++.+..++++|++|++-.+.+\n+T Consensus       122 ~~~~~~~~~~~~~l~~f~~~~~~~~~~l~~~~~~~~~~~~~~~~~i~~L~~~I~~l~~~i~~~  184 (359)\n+T 6W08_B          122 QSLTSNIKRYDEGLNAWARQVEDAHNTLQQTIAQIQQEEVSIQAEIIATNAQIDLMKQQIAAF  184 (359)\n+T ss_dssp             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCGGGHHHHHHHHHHHHHHHHHHHHH\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            456666667777666554442222222322        1334445556666666665544433\n+\n+\n+No 129\n+>3OGI_C Putative ESAT-6-like protein 6, Putative; Structural Genomics, PSI-2, Protein Structure; HET: MSE; 2.549A {Mycobacterium tuberculosis}\n+Probab=20.43  E-value=90  Score=26.79  Aligned_cols=66  Identities=14%  Similarity=0.137  Sum_probs=31.7  Template_Neff=9.500\n+\n+Q sp|P0DTC2|SPIK  921 KLIANQFNSAIGKIQDSLSSTASALGKLQDV-VNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDK  986 (1273)\n+Q Consensus       921 klianqfnsaigkiqdslsstasalgklqdv-vnqnaqalntlvkqlssnfgaissvlndilsrldk  986 (1273)\n+                      .-.++.+....+.|++.++.....+..|.+. ....+++......++...+..+...|+++-..|+.\n+T Consensus        11 ~~~a~~l~~~~~~l~~~~~~l~~~~~~l~~~W~G~a~~af~~~~~~~~~~~~~~~~~l~~~~~~l~~   77 (101)\n+T 3OGI_C           11 DAHGAMIRAQAGLLEAEHQAIVRDVLAAGDFWGGAGSVACQEFITQLGRNFQVIYEQANAHGQKVQA   77 (101)\n+T ss_dssp             --CHHHHHHHHHHHHHHHHHHHHHHHHTGGGTTCTTSHHHHHHHHHHHHHHHHHTTCC---------\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHhhccHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+Confidence            3455666666666666666555555555332 22345555566666666666666666666555544\n+\n+\n+No 130\n+>5HJ3_L Envelope glycoprotein, KZ52 Antibody Fragment; Ebola virus, EOV, Ebolavirus, GP; HET: MAN, NAG, BMA; 3.3A {Ebola virus sp.}\n+Probab=20.10  E-value=93  Score=31.85  Aligned_cols=35  Identities=17%  Similarity=0.174  Sum_probs=23.0  Template_Neff=5.300\n+\n+Q sp|P0DTC2|SPIK  930 AIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVK  964 (1273)\n+Q Consensus       930 aigkiqdslsstasalgklqdvvnqnaqalntlvk  964 (1273)\n+                      .+....++++.....+..+..+|-||..||.-|.-\n+T Consensus        60 ~~n~t~~al~~L~~~l~sl~~vvLQNR~aLD~Lla   94 (136)\n+T 5HJ3_L           60 LANETTQALQLFLRATTELRTFSILNRKAIDFLLQ   94 (136)\n+T ss_pred             HHHHHHHHHHHHHHHHHHHHHHHhhchHHHHHHHH\n+Confidence            33444455555555566677888999999887653\n+\n+\n+No 131\n+>6RX1_A Syncytin-1; HUMAN PLACENTAL PROTEIN, MEMBRANE FUSION; 2.1A {Homo sapiens}\n+Probab=20.03  E-value=93  Score=30.99  Aligned_cols=53  Identities=26%  Similarity=0.386  Sum_probs=30.7  Template_Neff=5.000\n+\n+Q sp|P0DTC2|SPIK  908 GIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVK  964 (1273)\n+Q Consensus       908 gigvtqnvlyenqklianqfnsaigkiqdslsstasalgklqdvvnqnaqalntlvk  964 (1273)\n+                      |.|+++.-+.    .+..+....+....++++.....+..+..+|-||..||.-|.-\n+T Consensus        12 ~~~~~~~~l~----~L~~~l~~~~n~t~~al~~L~~~l~sl~~vvLQNR~aLD~Ll~   64 (108)\n+T 6RX1_A           12 FQSTSTQFYY----KLSQELNGDMERVADSLVTLQDQLNSLAAVVLQNRRALDLLTA   64 (108)\n+T ss_dssp             ------CHHH----HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTG\n+T ss_pred             ccchhhHHHH----HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHhhchhHHHHHHH\n+Confidence            5555553222    2333444445556666777777777788899999999887653\n+\n+\n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/hhr.ffindex
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/hhr.ffindex Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,2 @@
+sp|Q9BYF1|ACE2_HUMAN    0   287957
+sp|P0DTC2|SPIKE_SARS2   287958  391533
\ No newline at end of file
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/log.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,2 @@
+#namea  nameb  springscore  tmscore  energy  clashes  zscore  templatea  templateb
+sp|Q9BYF1|ACE2_HUMAN  sp|P0DTC2|SPIKE_SARS2   0.91   0.82  -8.92   0.00  406.00  6LZG_A  6LZG_B
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/pairs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/pairs.txt Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,1 @@
+sp|Q9BYF1|ACE2_HUMAN    sp|P0DTC2|SPIKE_SARS2
\ No newline at end of file
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/pdb_structures.ffdata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/pdb_structures.ffdata Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,122458 @@\n+HEADER    HYDROLASE/VIRAL PROTEIN                 01-AUG-05   2AJF              \n+TITLE     STRUCTURE OF SARS CORONAVIRUS SPIKE RECEPTOR-BINDING DOMAIN COMPLEXED \n+TITLE    2 WITH ITS RECEPTOR                                                    \n+COMPND    MOL_ID: 1;                                                            \n+COMPND   2 MOLECULE: ANGIOTENSIN-CONVERTING ENZYME-RELATED CARBOXYPEPTIDASE     \n+COMPND   3 (ACE2);                                                              \n+COMPND   4 CHAIN: A, B;                                                         \n+COMPND   5 FRAGMENT: RESIDUES 19-615;                                           \n+COMPND   6 SYNONYM: ACE-RELATED CARBOXYPEPTIDASE, ANGIOTENSIN-CONVERTING ENZYME \n+COMPND   7 HOMOLOG, ACEH;                                                       \n+COMPND   8 EC: 3.4.17.-;                                                        \n+COMPND   9 ENGINEERED: YES;                                                     \n+COMPND  10 MOL_ID: 2;                                                           \n+COMPND  11 MOLECULE: SARS-CORONAVIRUS SPIKE PROTEIN;                            \n+COMPND  12 CHAIN: E, F;                                                         \n+COMPND  13 FRAGMENT: RECEPTOR-BINDING DOMAIN, RESIDUES 323-502;                 \n+COMPND  14 SYNONYM: SPIKE GLYCOPROTEIN, PEPLOMER PROTEIN, E2;                   \n+COMPND  15 ENGINEERED: YES                                                      \n+SOURCE    MOL_ID: 1;                                                            \n+SOURCE   2 ORGANISM_SCIENTIFIC: HOMO SAPIENS;                                   \n+SOURCE   3 ORGANISM_COMMON: HUMAN;                                              \n+SOURCE   4 ORGANISM_TAXID: 9606;                                                \n+SOURCE   5 GENE: ACE2;                                                          \n+SOURCE   6 EXPRESSION_SYSTEM: SPODOPTERA FRUGIPERDA;                            \n+SOURCE   7 EXPRESSION_SYSTEM_COMMON: FALL ARMYWORM;                             \n+SOURCE   8 EXPRESSION_SYSTEM_TAXID: 7108;                                       \n+SOURCE   9 EXPRESSION_SYSTEM_CELL_LINE: SF9;                                    \n+SOURCE  10 EXPRESSION_SYSTEM_VECTOR_TYPE: BACULOVIRUS;                          \n+SOURCE  11 EXPRESSION_SYSTEM_PLASMID: PFASTBAC 1;                               \n+SOURCE  12 MOL_ID: 2;                                                           \n+SOURCE  13 ORGANISM_SCIENTIFIC: SARS CORONAVIRUS;                               \n+SOURCE  14 ORGANISM_TAXID: 227859;                                              \n+SOURCE  15 STRAIN: SARS;                                                        \n+SOURCE  16 GENE: S;                                                             \n+SOURCE  17 EXPRESSION_SYSTEM: SPODOPTERA FRUGIPERDA;                            \n+SOURCE  18 EXPRESSION_SYSTEM_COMMON: FALL ARMYWORM;                             \n+SOURCE  19 EXPRESSION_SYSTEM_TAXID: 7108;                                       \n+SOURCE  20 EXPRESSION_SYSTEM_CELL_LINE: SF9;                                    \n+SOURCE  21 EXPRESSION_SYSTEM_VECTOR_TYPE: BACULOVIRUS;                          \n+SOURCE  22 EXPRESSION_SYSTEM_PLASMID: PFASTBAC 1                                \n+KEYWDS    ANTIPARALLEL BETA SHEET, EXTENDED LOOP, HYDROLASE-VIRAL PROTEIN       \n+KEYWDS   2 COMPLEX                                                              \n+EXPDTA    X-RAY DIFFRACTION                                                     \n+AUTHOR    F.LI,W.LI,M.FARZAN,S.C.HARRISON                                       \n+REVDAT   4   29-JUL-20 2AJF    1       COMPND REMARK HETNAM LINK                \n+REVDAT   4 2                   1       SITE   ATOM                              \n+REVDAT   3   13-JUL-11 2AJF    1       VERSN                                    \n+REVDAT   2   24-FEB-09 2AJF    1       VERSN                                    \n+REVDAT   1   20-SEP-05 2AJF    0         '..b'                                                              \n+CONECT 996410377                                                                \n+CONECT 999321543                                                                \n+CONECT1005410901                                                                \n+CONECT10377 9964                                                                \n+CONECT1090110054                                                                \n+CONECT1099811403                                                                \n+CONECT1140310998                                                                \n+CONECT1159611687                                                                \n+CONECT1168711596                                                                \n+CONECT1178711845                                                                \n+CONECT1184511787                                                                \n+CONECT1225012417                                                                \n+CONECT1229112331                                                                \n+CONECT1233112291                                                                \n+CONECT1241712250                                                                \n+CONECT13940 2390                                                                \n+CONECT1430114387                                                                \n+CONECT1438714301                                                                \n+CONECT1468615044                                                                \n+CONECT1504414686                                                                \n+CONECT1597216085                                                                \n+CONECT1608515972                                                                \n+CONECT1686416937                                                                \n+CONECT1693716864                                                                \n+CONECT1722017427                                                                \n+CONECT1742717220                                                                \n+CONECT1756717980                                                                \n+CONECT17596 6337                                                                \n+CONECT1765718504                                                                \n+CONECT1798017567                                                                \n+CONECT1850417657                                                                \n+CONECT1860119006                                                                \n+CONECT1900618601                                                                \n+CONECT1919919290                                                                \n+CONECT1929019199                                                                \n+CONECT1939019448                                                                \n+CONECT1944819390                                                                \n+CONECT1985320020                                                                \n+CONECT1989419934                                                                \n+CONECT1993419894                                                                \n+CONECT2002019853                                                                \n+CONECT21543 9993                                                                \n+CONECT2190421990                                                                \n+CONECT2199021904                                                                \n+CONECT2228922647                                                                \n+CONECT2264722289                                                                \n+MASTER     1140    0    0   66  180    0    0    622806    3   78  294          \n+END                                                                             \n'
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/pdb_structures.ffindex
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/pdb_structures.ffindex Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,7 @@
+2ajf.pdb 0 1135053
+6cs2.pdb 1135053 2523474
+6ilf.pdb 3658527 548208
+6lzg.pdb 4206735 1136349
+6m0j.pdb 5343084 583362
+6m18.pdb 5926446 1974699
+6x29.pdb 7901145 2017953
b
diff -r 000000000000 -r dbbcc7cd889f test-data/model/sp|Q9BYF1|ACE2_HUMAN.sp|P0DTC2|SPIKE_SARS2.pdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model/sp|Q9BYF1|ACE2_HUMAN.sp|P0DTC2|SPIKE_SARS2.pdb Tue Mar 23 13:55:42 2021 +0000
b
b'@@ -0,0 +1,20118 @@\n+ATOM      1  N   THR 0  15     -27.319  51.435  -1.110  0.00  0.00\n+ATOM      2  CA  THR 0  15     -28.530  51.067  -0.470  0.00  0.00\n+ATOM      3  CB  THR 0  15     -29.354  52.407  -0.327  0.00  0.00\n+ATOM      4  OG1 THR 0  15     -28.715  53.127   0.778  0.00  0.00\n+ATOM      5  CG2 THR 0  15     -30.852  52.264  -0.069  0.00  0.00\n+ATOM      6  C   THR 0  15     -29.362  50.054  -1.295  0.00  0.00\n+ATOM      7  O   THR 0  15     -29.898  49.095  -0.751  0.00  0.00\n+ATOM      8  N   ILE 0  21     -29.424  50.247  -2.611  0.00  0.00\n+ATOM      9  CA  ILE 0  21     -30.123  49.325  -3.448  0.00  0.00\n+ATOM     10  CB  ILE 0  21     -30.114  49.825  -4.911  0.00  0.00\n+ATOM     11  CG1 ILE 0  21     -30.880  51.152  -5.038  0.00  0.00\n+ATOM     12  CG2 ILE 0  21     -30.700  48.731  -5.781  0.00  0.00\n+ATOM     13  CD1 ILE 0  21     -30.867  51.814  -6.394  0.00  0.00\n+ATOM     14  C   ILE 0  21     -29.475  47.880  -3.367  0.00  0.00\n+ATOM     15  O   ILE 0  21     -30.167  46.873  -3.263  0.00  0.00\n+ATOM     16  N   GLU 0  22     -28.145  47.821  -3.403  0.00  0.00\n+ATOM     17  CA  GLU 0  22     -27.493  46.566  -3.298  0.00  0.00\n+ATOM     18  CB  GLU 0  22     -25.991  46.780  -3.509  0.00  0.00\n+ATOM     19  CG  GLU 0  22     -25.673  47.438  -4.846  0.00  0.00\n+ATOM     20  CD  GLU 0  22     -24.187  47.737  -5.079  0.00  0.00\n+ATOM     21  OE1 GLU 0  22     -23.418  47.883  -4.097  0.00  0.00\n+ATOM     22  OE2 GLU 0  22     -23.798  47.857  -6.266  0.00  0.00\n+ATOM     23  C   GLU 0  22     -27.751  45.870  -1.935  0.00  0.00\n+ATOM     24  O   GLU 0  22     -27.999  44.670  -1.884  0.00  0.00\n+ATOM     25  N   GLU 0  23     -27.738  46.637  -0.847  0.00  0.00\n+ATOM     26  CA  GLU 0  23     -28.030  46.081   0.439  0.00  0.00\n+ATOM     27  CB  GLU 0  23     -27.820  47.163   1.504  0.00  0.00\n+ATOM     28  CG  GLU 0  23     -26.410  47.741   1.483  0.00  0.00\n+ATOM     29  CD  GLU 0  23     -26.166  48.875   2.484  0.00  0.00\n+ATOM     30  OE1 GLU 0  23     -27.131  49.565   2.898  0.00  0.00\n+ATOM     31  OE2 GLU 0  23     -24.979  49.096   2.835  0.00  0.00\n+ATOM     32  C   GLU 0  23     -29.425  45.478   0.476  0.00  0.00\n+ATOM     33  O   GLU 0  23     -29.645  44.349   0.936  0.00  0.00\n+ATOM     34  N   GLN 0  24     -30.375  46.236  -0.068  0.00  0.00\n+ATOM     35  CA  GLN 0  24     -31.780  45.738  -0.095  0.00  0.00\n+ATOM     36  CB  GLN 0  24     -32.710  46.859  -0.568  0.00  0.00\n+ATOM     37  CG  GLN 0  24     -32.659  48.088   0.347  0.00  0.00\n+ATOM     38  CD  GLN 0  24     -33.536  49.233  -0.121  0.00  0.00\n+ATOM     39  OE1 GLN 0  24     -34.759  49.106  -0.202  0.00  0.00\n+ATOM     40  NE2 GLN 0  24     -32.912  50.366  -0.430  0.00  0.00\n+ATOM     41  C   GLN 0  24     -31.878  44.524  -0.995  0.00  0.00\n+ATOM     42  O   GLN 0  24     -32.633  43.594  -0.716  0.00  0.00\n+ATOM     43  N   ALA 0  25     -31.076  44.510  -2.059  0.00  0.00\n+ATOM     44  CA  ALA 0  25     -31.058  43.363  -2.984  0.00  0.00\n+ATOM     45  CB  ALA 0  25     -30.239  43.619  -4.246  0.00  0.00\n+ATOM     46  C   ALA 0  25     -30.612  42.107  -2.275  0.00  0.00\n+ATOM     47  O   ALA 0  25     -31.222  41.070  -2.511  0.00  0.00\n+ATOM     48  N   LYS 0  26     -29.646  42.164  -1.359  0.00  0.00\n+ATOM     49  CA  LYS 0  26     -29.220  41.049  -0.581  0.00  0.00\n+ATOM     50  CB  LYS 0  26     -27.942  41.271   0.211  0.00  0.00\n+ATOM     51  CG  LYS 0  26     -26.740  41.494  -0.714  0.00  0.00\n+ATOM     52  CD  LYS 0  26     -25.449  41.501   0.065  0.00  0.00\n+ATOM     53  CE  LYS 0  26     -24.207  41.790  -0.767  0.00  0.00\n+ATOM     54  NZ  LYS 0  26     -23.295  42.725  -0.035  0.00  0.00\n+ATOM     55  C   LYS 0  26     -30.366  40.551   0.269  0.00  0.00\n+ATOM     56  O   LYS 0  26     -30.498  39.332   0.340  0.00  0.00\n+ATOM     57  N   THR 0  27     -31.223  41.403   0.815  0.00  0.00\n+ATOM     58  CA  THR 0  27     -32.383  41.146   1.544  0.00  0.00\n+ATOM     59  CB  THR 0  27     -33'..b'LEU B 518     -27.092  35.368  39.831  1.00  1.00\n+ATOM   6355  N   HIS B 519     -24.490  34.792  44.136  1.00  1.00\n+ATOM   6356  CA  HIS B 519     -24.133  34.802  45.555  1.00  1.00\n+ATOM   6357  C   HIS B 519     -25.434  34.562  46.317  1.00  1.00\n+ATOM   6358  O   HIS B 519     -26.069  35.478  46.846  1.00  1.00\n+ATOM   6359  CB  HIS B 519     -23.472  36.116  45.960  1.00  1.00\n+ATOM   6360  CG  HIS B 519     -22.571  36.702  44.913  1.00  1.00\n+ATOM   6361  ND1 HIS B 519     -22.447  38.062  44.723  1.00  1.00\n+ATOM   6362  CD2 HIS B 519     -21.734  36.121  44.020  1.00  1.00\n+ATOM   6363  CE1 HIS B 519     -21.585  38.293  43.749  1.00  1.00\n+ATOM   6364  NE2 HIS B 519     -21.138  37.132  43.305  1.00  1.00\n+ATOM   6365  N   ALA B 520     -25.864  33.306  46.220  1.00  1.00\n+ATOM   6366  CA  ALA B 520     -27.143  32.918  46.841  1.00  1.00\n+ATOM   6367  C   ALA B 520     -27.192  31.422  47.130  1.00  1.00\n+ATOM   6368  O   ALA B 520     -26.193  30.724  46.949  1.00  1.00\n+ATOM   6369  CB  ALA B 520     -28.299  33.350  45.973  1.00  1.00\n+ATOM   6370  N   PRO B 521     -28.397  30.923  47.428  1.00  1.00\n+ATOM   6371  CA  PRO B 521     -28.665  29.559  47.872  1.00  1.00\n+ATOM   6372  C   PRO B 521     -27.834  28.356  47.421  1.00  1.00\n+ATOM   6373  O   PRO B 521     -27.348  27.705  48.296  1.00  1.00\n+ATOM   6374  CB  PRO B 521     -30.088  29.297  47.361  1.00  1.00\n+ATOM   6375  CG  PRO B 521     -30.406  30.509  46.518  1.00  1.00\n+ATOM   6376  CD  PRO B 521     -29.634  31.618  47.193  1.00  1.00\n+ATOM   6377  N   ALA B 522     -27.700  28.151  46.112  1.00  1.00\n+ATOM   6378  CA  ALA B 522     -27.109  26.958  45.451  1.00  1.00\n+ATOM   6379  C   ALA B 522     -28.060  25.806  45.747  1.00  1.00\n+ATOM   6380  O   ALA B 522     -27.901  25.149  46.754  1.00  1.00\n+ATOM   6381  CB  ALA B 522     -25.671  26.664  45.789  1.00  1.00\n+ATOM   6382  N   THR B 523     -29.050  25.637  44.878  1.00  1.00\n+ATOM   6383  CA  THR B 523     -30.113  24.648  45.141  1.00  1.00\n+ATOM   6384  C   THR B 523     -30.027  23.457  44.203  1.00  1.00\n+ATOM   6385  O   THR B 523     -30.712  22.475  44.517  1.00  1.00\n+ATOM   6386  CB  THR B 523     -31.507  25.262  45.014  1.00  1.00\n+ATOM   6387  OG1 THR B 523     -31.448  26.305  44.050  1.00  1.00\n+ATOM   6388  CG2 THR B 523     -32.047  25.787  46.324  1.00  1.00\n+ATOM   6389  N   VAL B 524     -29.323  23.554  43.070  1.00  1.00\n+ATOM   6390  CA  VAL B 524     -29.279  22.285  42.337  1.00  1.00\n+ATOM   6391  C   VAL B 524     -27.894  21.661  42.482  1.00  1.00\n+ATOM   6392  O   VAL B 524     -26.978  22.017  41.725  1.00  1.00\n+ATOM   6393  CB  VAL B 524     -29.651  22.494  40.857  1.00  1.00\n+ATOM   6394  CG1 VAL B 524     -29.511  21.199  40.054  1.00  1.00\n+ATOM   6395  CG2 VAL B 524     -31.058  23.051  40.743  1.00  1.00\n+ATOM   6396  N   CYS B 525     -27.684  20.766  43.453  1.00  1.00\n+ATOM   6397  CA  CYS B 525     -26.359  20.149  43.477  1.00  1.00\n+ATOM   6398  C   CYS B 525     -26.224  18.882  42.631  1.00  1.00\n+ATOM   6399  O   CYS B 525     -25.431  18.840  41.686  1.00  1.00\n+ATOM   6400  CB  CYS B 525     -25.982  19.850  44.930  1.00  1.00\n+ATOM   6401  SG  CYS B 525     -25.751  21.332  45.937  1.00  1.00\n+ATOM   6402  N   GLY B 526     -27.018  17.888  43.011  1.00  1.00\n+ATOM   6403  CA  GLY B 526     -27.110  16.586  42.340  1.00  1.00\n+ATOM   6404  C   GLY B 526     -25.875  15.708  42.445  1.00  1.00\n+ATOM   6405  O   GLY B 526     -24.991  15.861  41.632  1.00  1.00\n+ATOM   6406  N   PRO B 527     -25.747  14.782  43.409  1.00  1.00\n+ATOM   6407  CA  PRO B 527     -24.645  13.836  43.380  1.00  1.00\n+ATOM   6408  C   PRO B 527     -24.736  13.069  42.055  1.00  1.00\n+ATOM   6409  O   PRO B 527     -25.756  12.483  41.857  1.00  1.00\n+ATOM   6410  CB  PRO B 527     -24.973  12.903  44.547  1.00  1.00\n+ATOM   6411  CG  PRO B 527     -25.641  13.812  45.523  1.00  1.00\n+ATOM   6412  CD  PRO B 527     -26.500  14.705  44.646  1.00  1.00\n+TER\n'
b
diff -r 000000000000 -r dbbcc7cd889f tool-data/ffindex_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ffindex_indices.loc.sample Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,7 @@
+# ffindex collection, you can add multiple indices here and seperate them via the type (last column)
+# The path should point to a directory and the file-prefix ('pdb_prefix').
+# The folder needs to contain two files pdb_prefix.ffindex and pdb_prefix.ffdata
+#
+#identifer description from the PDB set /mnt/pdb_indices/pdb/pdb_prefix pdb
+#identifer description from the HHR set /mnt/hhr/hhr_prefix hhr
+
b
diff -r 000000000000 -r dbbcc7cd889f tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="ffindex_indices" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, type</columns>
+        <file path="tool-data/ffindex_indices.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r dbbcc7cd889f tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Mar 23 13:55:42 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of ffindex indexes for testing -->
+    <table name="ffindex_indices" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, type</columns>
+        <file path="${__HERE__}/test-data/ffindex_indices.loc" />
+    </table>
+</tables>