Repository 'interproscan'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/interproscan

Changeset 4:fcedfe919603 (2021-11-15)
Previous changeset 3:8ee90fc5fe11 (2013-10-08) Next changeset 5:2e1cf7d85dbc (2021-11-19)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/interproscan commit 2f5d27a375fcc2e8d77914b3d9e402a9e2df2d97"
modified:
interproscan.xml
added:
README.md
macros.xml
test-data/interproscan.loc
test-data/prot_out/out.gff3
test-data/prot_out/out.json
test-data/prot_out/out.tsv
test-data/prot_out/out.xml
test-data/prots.fa
test-data/transcript_out/out.gff3
test-data/transcript_out/out.json
test-data/transcript_out/out.tsv
test-data/transcript_out/out.xml
test-data/transcripts.fa
tool-data/interproscan.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
removed:
readme.rst
b
diff -r 8ee90fc5fe11 -r fcedfe919603 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,12 @@
+# InterProScan
+
+## Licensed software/data
+
+The standard install of InterProScan does not include the following components, because their license does not allow redistribution:
+
+- SignalP
+- TMHMM
+- Phobius
+- SMART data (THRESHOLD file)
+
+As a Galaxy instance admin, you can install those manually if needed, as long as you (and the Galaxy users) respect the respective licenses.
b
diff -r 8ee90fc5fe11 -r fcedfe919603 interproscan.xml
--- a/interproscan.xml Tue Oct 08 10:22:52 2013 -0400
+++ b/interproscan.xml Mon Nov 15 17:53:24 2021 +0000
[
b'@@ -1,243 +1,332 @@\n-<tool id="interproscan" name="Interproscan functional predictions of ORFs"  version="1.2">\n-    <description>Interproscan functional predictions of ORFs</description>\n-    <command>\n-## The command is a Cheetah template which allows some Python based syntax.\n-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n+<tool id="interproscan" name="InterProScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">\n+    <description>functional annotation</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <xrefs>\n+        <xref type="bio.tools">interproscan_4</xref>\n+    </xrefs>\n+    <requirements>\n+        <requirement type="package" version="@TOOL_VERSION@">interproscan</requirement>\n+    </requirements>\n+    <version_command>interproscan.sh --version</version_command>\n+    <command><![CDATA[\n+## Adapt properties file to use data from data table\n+mkdir -p \\$HOME/.interproscan-5\n+&&\n+sed \'s|^\\(data.directory=\\).*$|\\1${database.fields.path}|\' \\$(dirname \\$(readlink -f \\$(command -v interproscan.sh)))/interproscan.properties > \\$HOME/.interproscan-5/interproscan.properties\n+&&\n+\n+## Now run interproscan\n+interproscan.sh\n \n-## create temp directory\n-#import tempfile, os\n-#set $tfile = tempfile.mkstemp()[1]\n+## disables the precalculated lookup service, all calculation will be run locally\n+-dp\n+--input \'$input\'\n+--seqtype $seqtype\n+-f ${\',\'.join($oformat)}\n+\n+#if $licensed.use == \'true\' and $licensed.applications_licensed:\n+    --applications ${\',\'.join($applications)},${\',\'.join($licensed.applications_licensed)}\n+#else:\n+    --applications ${\',\'.join($applications)}\n+#end if\n+--tempdir \\$TEMP\n \n-sed \'s/ /_/g\' $input > $tfile;\n+$pathways\n+$goterms\n+$iprlookup\n+\n+--cpu \\${GALAXY_SLOTS:-4}\n+\n+--output-file-base \'output\'\n+    ]]></command>\n+    <inputs>\n+        <param argument="--input" type="data" format="fasta" label="Protein FASTA File"/>\n+\n+        <param argument="--seqtype" type="select" label="Type of the input sequences" help="">\n+            <option value="p" selected="true">Protein</option>\n+            <option value="n">DNA / RNA</option>\n+        </param>\n \n-## Hack, because interproscan does not seem to produce gff output even if it is configured\n-#if str($oformat) == "gff":\n-    #set $tfile2 = tempfile.mkstemp()[1]\n-    iprscan -cli -nocrc -i $tfile -o $tfile2 -goterms -seqtype p -altjobs -format raw -appl $appl 2>&#38;1;\n-    converter.pl -format gff3 -input $tfile2 -output $output;\n-    rm $tfile2;\n-#else\n-    iprscan -cli -nocrc -i $tfile -o $output -goterms -seqtype p -altjobs -format $oformat -appl $appl 2>&#38;1;\n-#end if\n+        <param name="database" label="InterProScan database" type="select">\n+            <options from_data_table="interproscan">\n+                <column name="value" index="0" />\n+                <column name="name" index="1" />\n+                <column name="path" index="3" />\n+                <filter type="sort_by" column="0" />\n+                <filter type="static_value" column="2" value="@TOOL_VERSION@" />\n+            </options>\n+        </param>\n+\n+        <param name="applications" type="select" multiple="True" label="Applications to run" help="Select your program">\n+            <option value="TIGRFAM" selected="true">TIGRFAM: protein families based on hidden Markov models (HMMs)</option>\n+            <option value="SFLD" selected="true">SFLD: a database of protein families based on hidden Markov models (HMMs)</option>\n+            <option value="SUPERFAMILY" selected="true">SUPERFAMILY: database of structural and functional annotation for all proteins and genomes</option>\n+            <option value="PANTHER" selected="true">PANTHER: Protein ANalysis THrough Evolutionary Relationships</option>\n+            <option value="Gene3D" selected="true">Gene3d: Structural assignment for whole genes and genomes using the CATH domain structure database</option>\n+            <option value="Hamap" selected="tru'..b'93/nar/gkn785\n+  - Protein Accession (e.g. P51587)\n+  - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579)\n+  - Sequence Length (e.g. 3418)\n+  - Analysis (e.g. Pfam / PRINTS / Gene3D)\n+  - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140)\n+  - Signature Description (e.g. BRCA2 repeat profile)\n+  - Start location\n+  - Stop location\n+  - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52)\n+  - Status - is the status of the match (T: true)\n+  - Date - is the date of the run\n+  - (InterProScan annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on)\n+  - (InterProScan annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on)\n+  - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on)\n+  - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on)\n \n \n-This wrapper is available to install into other Galaxy Instances via the Galaxy Tool Shed at\n-http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan\n+**Extensible Markup Language (XML)**\n+\n+XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here].\n+\n+**Generic Feature Format Version 3 (GFF3)**\n+\n+The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md].\n+\n+**Example Output**\n \n \n-**Galaxy Wrapper Author**::\n+::\n \n-    *  Bjoern Gruening, Pharmaceutical Bioinformatics, University of Freiburg\n-    *  Konrad Paszkiewicz, Exeter Sequencing Service, University of Exeter\n+  ##gff-version 3\n+  ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269\n+  ##sequence-region AACH01000027 1 1347\n+  ##seqid|source|type|start|end|score|strand|phase|attributes\n+  AACH01000027    provided_by_user        nucleic_acid    1       1347    .       +       .       Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027\n+  AACH01000027    getorf  ORF     1       1347    .       +       .       Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347\n+  AACH01000027    getorf  polypeptide     1       449     .       +       .       md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347\n+  AACH01000027    Pfam    protein_match   84      314     1.2E-45 +       .       Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13"\n+  ##sequence-region 2\n+  ...\n+  >pep_AACH01000027_1_1347\n+  LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV\n+  LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA\n+  GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI\n+  LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ\n+  ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA\n+  TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV\n+  DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML\n+  RSQKAKGVLIYRDDWISITPEIQLLFTEF\n+  ...\n+  >match$8_84_314\n+  KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK\n+  RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL\n+  LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR\n+  AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS\n \n-    </help>\n+]]></help>\n+\n+    <expand macro="citations" />\n </tool>\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">5.52-86.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btu031</citation>
+            <citation type="doi">10.7717/peerj.167</citation>
+            <citation type="doi">10.1093/bioinformatics/17.9.847</citation>
+            <citation type="doi">10.1093/nar/gki442</citation>
+            <citation type="doi">10.1093/nar/gkn785</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 8ee90fc5fe11 -r fcedfe919603 readme.rst
--- a/readme.rst Tue Oct 08 10:22:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,101 +0,0 @@
-==================================================
-Galaxy wrapper for InterProScan 4 prediction tools
-==================================================
-
-**Note**:
-
-This wrapper is for InterProScan 4.x if you want to use InterProScan 5 please have a look at:
-http://toolshed.g2.bx.psu.edu/view/bgruening/interproscan5 
-
------
-
-InterProScan is a tool that combines different protein signature recognition methods native to the InterPro 
-member databases into one resource with look up of corresponding InterPro and GO annotation.
-
-This wrapper is copyright 2012-2013 by:
- *  Bjoern Gruening, Pharmaceutical Bioinformatics, University of Freiburg
- *  Konrad Paszkiewicz, Exeter Sequencing Service, University of Exeter
-
-
-This prepository contains wrapper for the InterProScan_ command line tool.
-
-.. _InterProScan: http://www.ebi.ac.uk/interpro/
-
-
-Zdobnov E.M. and Apweiler R. "InterProScan - an integration platform for the signature-recognition methods in InterPro" Bioinformatics, 2001, 17(9): p. 847-8.
-
-
-============
-Installation
-============
-
-Please download install InterProScan according to:
-
-ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/Installing_InterProScan.txt
-
-Please see also:
-ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/index.html
-
-And rebuild the indizes if necessary
-
-.. code:: 
-
- index_data.pl -f interpro.xml -inx -v -bin -bforce
- index_data.pl -f match_complete.xml -inx -v -bin -bforce
- index_data.pl -f Pfam-A.seed -inx -v -bin -bforce
- index_data.pl -f Pfam-C -inx -v -bin -bforce
- index_data.pl -f prints.pval -inx -v -bin -bforce
- index_data.pl -f sf.seq -inx -v -bin -bforce
- index_data.pl -f sf_hmm -inx -v -bin -bforce
- index_data.pl -f smart.HMMs -inx -v -bin -bforce
- index_data.pl -f superfamily.hmm -inx -v -bin -bforce
- index_data.pl -f TIGRFAMs_HMM.LIB -inx -v -bin -bforce
-
-
-Add the tool definition to your tool_conf.xml file under Galaxy root:
-.. code::
-
- <tool file="iprscan/interproscan.xml" />
-
-=============
-Input formats
-=============
-
-The standard interproscan input is either genomic or protein sequences. In the case of genomic sequences Interproscan will run an ORF 
-prediction tool. However this tends to lose the ORF information (e.g. start/end co-ordinates) from the header. As such the requirement here is to input ORF 
-sequences (e.g. from EMBOSS getorf) and to then replace any spaces in the FASTA header with underscores. This workaround generally preserves the relevant 
-positional information. 
-
-
-=======
-History
-=======
-
-interproscan:
-
- - v1.1: Initial public release
- - v1.2: Merge with Konrad Paszkiewicz repository
-
-
-=============
-Licence (MIT)
-=============
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/interproscan.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/interproscan.loc Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of interproscan databases used for the
+# interproscan annotation tool
+#
+# the columns are:
+# value description interproscan_version path
+#
+# for example
+5.52-86.0 InterProScan 5.52-86.0 5.52-86.0 ${__HERE__}/fake_db/
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/prot_out/out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prot_out/out.gff3 Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,418 @@\n+##gff-version 3\n+##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269\n+##interproscan-version 5.52-86.0\n+##sequence-region FUN_000011-T1 1 923\n+FUN_000011-T1\t.\tpolypeptide\t1\t923\t.\t+\t.\tID=FUN_000011-T1;md5=ea9924e11f7decc417e8d9ed8b9c682e\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t49\t74\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 49 74;ID=match$1_49_74;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t1\t74\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 1 74;ID=match$2_1_74;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t1\t18\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 1 18;ID=match$3_1_18;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t770\t789\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 770 789;ID=match$4_770_789;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t771\t789\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 771 789;ID=match$5_771_789;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t188\t259\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 188 259;ID=match$6_188_259;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000011-T1\tMobiDBLite\tprotein_match\t20\t46\t.\t+\t.\tdate=09-11-2021;Target=FUN_000011-T1 20 46;ID=match$7_20_46;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+##sequence-region FUN_000012-T1 1 3977\n+FUN_000012-T1\t.\tpolypeptide\t1\t3977\t.\t+\t.\tID=FUN_000012-T1;md5=01beedc2fbf8012cba37f0c0d39aa071\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t2637\t2661\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 2637 2661;ID=match$8_2637_2661;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t520\t541\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 520 541;ID=match$9_520_541;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t2396\t2471\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 2396 2471;ID=match$10_2396_2471;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t1762\t1784\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 1762 1784;ID=match$11_1762_1784;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t1520\t1558\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 1520 1558;ID=match$12_1520_1558;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t3633\t3682\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 3633 3682;ID=match$13_3633_3682;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t3502\t3704\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 3502 3704;ID=match$14_3502_3704;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t2538\t2562\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 2538 2562;ID=match$15_2538_2562;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t3537\t3551\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 3537 3551;ID=match$16_3537_3551;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t2753\t2799\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 2753 2799;ID=match$17_2753_2799;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t3133\t3158\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 3133 3158;ID=match$18_3133_3158;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1\tMobiDBLite\tprotein_match\t2423\t2442\t.\t+\t.\tdate=09-11-2021;Target=FUN_000012-T1 2423 2442;ID=match$19_2423_24'..b'8_2562\n+PIEERIAKILDRGGARTEDSESSSG\n+>match$16_3537_3551\n+AKHHSYDDRTLSKTQ\n+>match$17_2753_2799\n+ERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNT\n+>match$18_3133_3158\n+ASCKKCSHSSHSEEETSSLGTDLDGT\n+>match$19_2423_2442\n+TSGEKDTSPQRQQDWSLSVG\n+>match$20_1483_1502\n+QIESDSATSSRTESMQQQKL\n+>match$21_1762_1787\n+VSLQRQKATQQQSPTTERRTKSLDTP\n+>match$22_2235_2261\n+RKPSTAESLDSYVSVDESHSAASKSPV\n+>match$23_3324_3375\n+GNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAAARK\n+>match$24_2590_2630\n+GKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGA\n+>match$25_2679_2705\n+NPLRDSSDSDVKEASPDDQKDASDASY\n+>match$26_1086_1105\n+DDESVSEGHQQHKYRSEMDV\n+>match$27_3816_3830\n+LPAPANHPMPPATGQ\n+>match$28_2152_2318\n+VVHKKIDGKSSSLERPAEHHYLGPDVKARSLDDKRQATEAAKKNEEKPAPVVRSAIGDQR\n+RSLEILKRSLPSEDARDSEGAFSRKPSTAESLDSYVSVDESHSAASKSPVPGTGGGTEGY\n+PHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETG\n+>match$29_3504_3532\n+AAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+>match$30_2152_2200\n+VVHKKIDGKSSSLERPAEHHYLGPDVKARSLDDKRQATEAAKKNEEKPA\n+>match$31_912_931\n+SAHYMEDEEEEEEDEGENIY\n+>match$32_1527_1541\n+PRVIPKTKSAGKSKK\n+>match$33_1090_1105\n+VSEGHQQHKYRSEMDV\n+>match$34_559_581\n+HQHQHAQQQHQPQHYAQHQHSGS\n+>match$35_3426_3463\n+PPASHLASSTTSQIQTQQPQQVRQKPRAPQPPTKPKPA\n+>match$36_1362_1410\n+RHIMNGTAPSPSEKKHTKPKSPKSPKSPKSPKSPKSPKSPKASSGGLGP\n+>match$37_3812_3938\n+NPVQLPAPANHPMPPATGQPVKPARTKKRQFKREDSTAAGTSGHSTAAPQVRPYHGRSYC\n+NPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGAGQDQEQGTGGQ\n+ARHVPYP\n+>match$38_1920_2025\n+YSSANSSRKTSPGVSRASSIESTGKGKLKTSEPYKGKPVSRSRPESRRTSADDIKARDGR\n+GSKKSSPKERPRSIEMRRLSKDKSKSQEETEADIAKRKERQQKLYE\n+>match$39_836_859\n+TPSLDSQSSRHTHSLPRNSELSGV\n+>match$40_3429_3449\n+SHLASSTTSQIQTQQPQQVRQ\n+>match$41_3552_3583\n+IREFKTTSKQLRQSSSFHEHMLSKSQQSSQEL\n+>match$42_2492_2531\n+EITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTS\n+>match$43_1920_1945\n+YSSANSSRKTSPGVSRASSIESTGKG\n+>match$44_915_931\n+YMEDEEEEEEDEGENIY\n+>match$45_2757_2773\n+ASEKSSMEDDVSKASTH\n+>match$46_3213_3252\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSG\n+>match$47_3345_3371\n+RRSGSNGNQARAAQEHADSQTGPETSA\n+>match$48_3718_3778\n+EQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSGKFESL\n+D\n+>match$49_3596_3621\n+TSSATNTTTTTNTLNSESTEPNSPQM\n+>match$50_2095_2134\n+FDEGRSPDKLDKANRSFEDRNKSFEDSEKSDAPEDMLIKS\n+>match$51_486_508\n+YAADSPTSQEEAECSAAGAERQH\n+>match$52_447_467\n+TPSLTKKAADTLDRRRDNPIG\n+>match$53_3040_3060\n+NSSPEVIVAQPTRSPAPRSPL\n+>match$54_542_558\n+QRGPDPARVHPGDHNTA\n+>match$55_1947_2024\n+LKTSEPYKGKPVSRSRPESRRTSADDIKARDGRGSKKSSPKERPRSIEMRRLSKDKSKSQ\n+EETEADIAKRKERQQKLY\n+>match$56_520_581\n+NRSLSPQGPQSWTSPSHSSHHQQRGPDPARVHPGDHNTAHQHQHAQQQHQPQHYAQHQHS\n+GS\n+>match$57_2398_2422\n+GSSDTGKKKNTLDKQSDRSKSSDSW\n+>match$58_2774_2799\n+SVSMRDESVGSTDHQPSLSSDSRSNT\n+>match$59_49_68\n+STPSGVDGTPSTPRHRGGKK\n+>match$60_3910_3929\n+GTIRRGTTKGAGQDQEQGTG\n+>match$61_2488_2710\n+VGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKIL\n+DRGGARTEDSESSSGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDS\n+KGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIESSNSSSTDDPWHHVE\n+TDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELN\n+>match$62_2211_2234\n+RRSLEILKRSLPSEDARDSEGAFS\n+>match$63_326_352\n+AKRSRVASVQNDTVLTATSTTSADSIR\n+>match$64_160_194\n+KFVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTI\n+>match$65_215_238\n+TTTTNSGSRSRSLNTHTSGSAQAI\n+>match$66_215_396\n+TTTTNSGSRSRSLNTHTSGSAQAITKPKRRRRAPMLTAKRRRKALDTELTTSADAGTEDK\n+APAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKSTSRSEAAKRSRVASV\n+QNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISA\n+ST\n+>match$67_33_61\n+MAASSHAPESDRRAQRLRTQSNWNPPDHS\n+>match$68_47_112\n+PATGASASSVGNTSAVGASSSSNSSAGQAASSNSNNVTATGSGSAPGGGPTSTGTTSGTQ\n+HGSGSG\n+>match$69_39_123\n+GGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNSNNVTATGSGSAPGGGPTS\n+TGTTSGTQHGSGSGAAAAVDSESDD\n+>match$70_299_331\n+PTTSIEISPPSTQSTTTPKEESTSMTETTILTT\n+>match$71_54_76\n+QKDVGLKSTSSSEELRKLPKTRG\n+>match$72_54_73\n+QKDVGLKSTSSSEELRKLPK\n+>match$73_1_25\n+MPPTINNSAVNSAAEKRPQRQTERK\n+>match$74_375_431\n+EEEIVEEVREEEQMQIIGETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRS\n+>match$75_1_16\n+MPPTINNSAVNSAAEK\n+>match$76_15_31\n+RYDLTLGSDKSSSLSRS\n+>match$77_1_31\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRS\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/prot_out/out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prot_out/out.json Mon Nov 15 17:53:24 2021 +0000
[
b'@@ -0,0 +1,1842 @@\n+{\n+ "interproscan-version": "5.52-86.0",\n+"results": [ {\n+  "sequence" : "MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM",\n+  "md5" : "5803f8be5816d8c362fa0a4bff18f8a3",\n+  "matches" : [ ],\n+  "xref" : [ {\n+    "name" : "FUN_000006-T1 FUN_000006",\n+    "id" : "FUN_000006-T1"\n+  } ]\n+},{\n+  "sequence" : "MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSSQKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIPQRPNPWAKNDDGSFYKAALLSGPPGIGKTTTATLVVKELGFDAVEFNASDTRSKRLLKDEVSTLLSNKSLSGYFTGQGQAVSRKHVLIMDEVDGMAGNEDRGGMQELIALIKDSSIPIICMCNDRNHPKIRSLVNYCYDLRFQRPRLEQIKGKIMSICFKEKVKISPAKVEEIIAATNNDIRQSINHIALLSAKEDASQKSGQQVATKDLKLGPWEVVRKVFTADEHKHMSFADKSDLFFHDYSLAPLFVQQNYLQVLPQGNKKDVLAKVAATADALSLGDLVEKRIRANSAWSLLPTQAFFSSVLPGEHMCGHFTGQINFPGWLGKNSKSGKRARLAQELHDHTRVCTSGSRLSVRLDYAPFLLDNIVRPLAKDGQEGVPAALDVMKDYHLLREDLDSLVELTSWPGKKSPLDAVDGRVKAALTRSYNKEVMAYSYSAQAGIKKKKSEAAGADDDYLDEGPGEEDGAGGHLSSEEDEDKDNLELDSLIKAKKRTTTSKASDSDSLPHPLPKHPPHCCCCWARPGQSQGHSQPPSSRIHAHAREPGFCAPLPFPFPLRSVNRCRALYRYFACLRRASCCVFWLHMFTFQHSPKRHCSECESIQGSSGGSVVNEGFE",\n+  "md5" : "ea9924e11f7decc417e8d9ed8b9c682e",\n+  "matches" : [ {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 49,\n+      "end" : 74,\n+      "location-fragments" : [ {\n+        "start" : 49,\n+        "end" : 74,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : "Polyampholyte"\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 1,\n+      "end" : 74,\n+      "location-fragments" : [ {\n+        "start" : 1,\n+        "end" : 74,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : ""\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 1,\n+      "end" : 18,\n+      "location-fragments" : [ {\n+        "start" : 1,\n+        "end" : 18,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : "Polyampholyte"\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 770,\n+      "end" : 789,\n+      "location-fragments" : [ {\n+        "start" : 770,\n+        "end" : 789,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : ""\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibrary'..b'36c8539c0f5b069cce1b285",\n+  "matches" : [ {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 1,\n+      "end" : 25,\n+      "location-fragments" : [ {\n+        "start" : 1,\n+        "end" : 25,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : ""\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 375,\n+      "end" : 431,\n+      "location-fragments" : [ {\n+        "start" : 375,\n+        "end" : 431,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : ""\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 1,\n+      "end" : 16,\n+      "location-fragments" : [ {\n+        "start" : 1,\n+        "end" : 16,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : "Polar"\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  } ],\n+  "xref" : [ {\n+    "name" : "FUN_000018-T1 FUN_000018",\n+    "id" : "FUN_000018-T1"\n+  } ]\n+},{\n+  "sequence" : "MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFKPCNPKRYLTT",\n+  "md5" : "5f4207144e447f181edd730fcff61ef2",\n+  "matches" : [ ],\n+  "xref" : [ {\n+    "name" : "FUN_000010-T1 FUN_000010",\n+    "id" : "FUN_000010-T1"\n+  } ]\n+},{\n+  "sequence" : "MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR",\n+  "md5" : "039e9dcd47be3a3db8b02494ffde402e",\n+  "matches" : [ {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 15,\n+      "end" : 31,\n+      "location-fragments" : [ {\n+        "start" : 15,\n+        "end" : 31,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : "Polar"\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  }, {\n+    "signature" : {\n+      "accession" : "mobidb-lite",\n+      "name" : "disorder_prediction",\n+      "description" : "consensus disorder prediction",\n+      "signatureLibraryRelease" : {\n+        "library" : "MOBIDB_LITE",\n+        "version" : "2.0"\n+      },\n+      "entry" : null\n+    },\n+    "locations" : [ {\n+      "start" : 1,\n+      "end" : 31,\n+      "location-fragments" : [ {\n+        "start" : 1,\n+        "end" : 31,\n+        "dc-status" : "CONTINUOUS"\n+      } ],\n+      "sequence-feature" : ""\n+    } ],\n+    "model-ac" : "mobidb-lite"\n+  } ],\n+  "xref" : [ {\n+    "name" : "FUN_000013-T1 FUN_000013",\n+    "id" : "FUN_000013-T1"\n+  } ]\n+} ]\n+}\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/prot_out/out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prot_out/out.tsv Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,77 @@\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t49\t74\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t74\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t18\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t770\t789\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t771\t789\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t188\t259\t-\tT\t09-11-2021\t-\t-\n+FUN_000011-T1\tea9924e11f7decc417e8d9ed8b9c682e\t923\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t20\t46\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2637\t2661\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t520\t541\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2396\t2471\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1762\t1784\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1520\t1558\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3633\t3682\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3502\t3704\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2538\t2562\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3537\t3551\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2753\t2799\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3133\t3158\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2423\t2442\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1483\t1502\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1762\t1787\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2235\t2261\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3324\t3375\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2590\t2630\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2679\t2705\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1086\t1105\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3816\t3830\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2152\t2318\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3504\t3532\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder predi'..b'\t3371\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3718\t3778\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3596\t3621\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2095\t2134\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t486\t508\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t447\t467\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3040\t3060\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t542\t558\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1947\t2024\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t520\t581\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2398\t2422\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2774\t2799\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t49\t68\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3910\t3929\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2488\t2710\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2211\t2234\t-\tT\t09-11-2021\t-\t-\n+FUN_000008-T1\t85417aa859c7206bc79b01bf4d7e605e\t396\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t326\t352\t-\tT\t09-11-2021\t-\t-\n+FUN_000008-T1\t85417aa859c7206bc79b01bf4d7e605e\t396\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t160\t194\t-\tT\t09-11-2021\t-\t-\n+FUN_000008-T1\t85417aa859c7206bc79b01bf4d7e605e\t396\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t215\t238\t-\tT\t09-11-2021\t-\t-\n+FUN_000008-T1\t85417aa859c7206bc79b01bf4d7e605e\t396\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t215\t396\t-\tT\t09-11-2021\t-\t-\n+FUN_000009-T1\t275f31fdcd1e48bab0e9176407572897\t130\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t33\t61\t-\tT\t09-11-2021\t-\t-\n+FUN_000005-T1\t43d29b4b17225ce880c77b3dbac06f82\t662\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t47\t112\t-\tT\t09-11-2021\t-\t-\n+FUN_000005-T1\t43d29b4b17225ce880c77b3dbac06f82\t662\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t39\t123\t-\tT\t09-11-2021\t-\t-\n+FUN_000016-T1\t805cdbb649e54f1527088be8a18adb3d\t478\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t299\t331\t-\tT\t09-11-2021\t-\t-\n+FUN_000017-T1\t5f92e26b5d87a35b1b6c91cf1c65ceb4\t124\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t54\t76\t-\tT\t09-11-2021\t-\t-\n+FUN_000017-T1\t5f92e26b5d87a35b1b6c91cf1c65ceb4\t124\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t54\t73\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t25\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t375\t431\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t16\t-\tT\t09-11-2021\t-\t-\n+FUN_000013-T1\t039e9dcd47be3a3db8b02494ffde402e\t576\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t15\t31\t-\tT\t09-11-2021\t-\t-\n+FUN_000013-T1\t039e9dcd47be3a3db8b02494ffde402e\t576\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t31\t-\tT\t09-11-2021\t-\t-\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/prot_out/out.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prot_out/out.xml Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,1102 @@\n+<?xml version="1.0" encoding="UTF-8"?><protein-matches xmlns="http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5" interproscan-version="5.52-86.0">\n+  <protein>\n+    <sequence md5="5803f8be5816d8c362fa0a4bff18f8a3">MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM</sequence>\n+    <xref id="FUN_000006-T1" name="FUN_000006-T1 FUN_000006"/>\n+    <matches/>\n+  </protein>\n+  <protein>\n+    <sequence md5="ea9924e11f7decc417e8d9ed8b9c682e">MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSSQKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIPQRPNPWAKNDDGSFYKAALLSGPPGIGKTTTATLVVKELGFDAVEFNASDTRSKRLLKDEVSTLLSNKSLSGYFTGQGQAVSRKHVLIMDEVDGMAGNEDRGGMQELIALIKDSSIPIICMCNDRNHPKIRSLVNYCYDLRFQRPRLEQIKGKIMSICFKEKVKISPAKVEEIIAATNNDIRQSINHIALLSAKEDASQKSGQQVATKDLKLGPWEVVRKVFTADEHKHMSFADKSDLFFHDYSLAPLFVQQNYLQVLPQGNKKDVLAKVAATADALSLGDLVEKRIRANSAWSLLPTQAFFSSVLPGEHMCGHFTGQINFPGWLGKNSKSGKRARLAQELHDHTRVCTSGSRLSVRLDYAPFLLDNIVRPLAKDGQEGVPAALDVMKDYHLLREDLDSLVELTSWPGKKSPLDAVDGRVKAALTRSYNKEVMAYSYSAQAGIKKKKSEAAGADDDYLDEGPGEEDGAGGHLSSEEDEDKDNLELDSLIKAKKRTTTSKASDSDSLPHPLPKHPPHCCCCWARPGQSQGHSQPPSSRIHAHAREPGFCAPLPFPFPLRSVNRCRALYRYFACLRRASCCVFWLHMFTFQHSPKRHCSECESIQGSSGGSVVNEGFE</sequence>\n+    <xref id="FUN_000011-T1" name="FUN_000011-T1 FUN_000011"/>\n+    <matches>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="Polyampholyte" start="49" end="74">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="49" end="74" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="" start="1" end="74">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="1" end="74" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="Polyampholyte" start="1" end="18">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="1" end="18" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="" start="770" end="789">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="770"'..b'disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="" start="1" end="25">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="1" end="25" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="" start="375" end="431">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="375" end="431" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="Polar" start="1" end="16">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="1" end="16" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+    </matches>\n+  </protein>\n+  <protein>\n+    <sequence md5="5f4207144e447f181edd730fcff61ef2">MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFKPCNPKRYLTT</sequence>\n+    <xref id="FUN_000010-T1" name="FUN_000010-T1 FUN_000010"/>\n+    <matches/>\n+  </protein>\n+  <protein>\n+    <sequence md5="039e9dcd47be3a3db8b02494ffde402e">MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR</sequence>\n+    <xref id="FUN_000013-T1" name="FUN_000013-T1 FUN_000013"/>\n+    <matches>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="Polar" start="15" end="31">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="15" end="31" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+      <mobidblite-match>\n+        <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+          <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+        </signature>\n+        <model-ac>mobidb-lite</model-ac>\n+        <locations>\n+          <mobidblite-location sequence-feature="" start="1" end="31">\n+            <location-fragments>\n+              <mobidblite-location-fragment start="1" end="31" dc-status="CONTINUOUS"/>\n+            </location-fragments>\n+          </mobidblite-location>\n+        </locations>\n+      </mobidblite-match>\n+    </matches>\n+  </protein>\n+</protein-matches>\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/prots.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prots.fa Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,155 @@\n+>FUN_000001-T1 FUN_000001\n+YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSW\n+LRKSEYISTEQTRFQPQNLENIEAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEY\n+KIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL\n+>FUN_000002-T1 FUN_000002\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000003-T1 FUN_000003\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000004-T1 FUN_000004\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000005-T1 FUN_000005\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000006-T1 FUN_000006\n+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI\n+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL\n+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL\n+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM\n+>FUN_000007-T1 FUN_000007\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL\n+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR\n+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF\n+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI\n+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG\n+YKLQDLWNVMPTKMETME\n+>FUN_000008-T1 FUN_000008\n+MKTLSVRLHRGTEFIKDTVHKALVMSAPTPVAPATAPAPKIVDHSLKRKLSGAGGLMGCSSIGSMTSSIAGSSRSHHYAL\n+TSQVASSQVIPLPSQVPTAAFLRTYTVAPTALHRSAAARKRNPSTDSLLMDLCLFKPIRPMPITPIKIHKFRGFEVKKPK\n+FVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTISDSAFVPMPYIETTNTAINATTTTNSGSRSRSLNTHTSGSAQAITK\n+PKRRRRAPMLTAKRRRKALDTELTTSADAGTEDKAPAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKST\n+SRSEAAKRSRVASVQNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISAST\n+>FUN_000009-T1 FUN_000009\n+MVTLRLPWCIRHKPPLCRIGLSHGCECDNSKKMAASSHAPESDRRAQRLRTQSNWNPPDHSALSLGKLVSRKLTPTAVGH\n+WVVGRQRAACACAGGPNADWTDGQPIESSRGCIFQPAPHCHGGRIARHFG\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASA\n+VLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKL\n+AVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPK'..b'YVSVDESHSAASKSPVPGTGGGTEGYPHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETGSW\n+MTVECDEFIGSDTSDNEPRTLEPDRNVLETQATLEDANPLEYSNCATPTSDLNILLTPPNASPQIEKSVLETFEKYTGSS\n+DTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVSCSIARPLGISQDFGKEEARKCQELK\n+QRMLQLEVGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESS\n+SGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQK\n+EQQSTWRPFPIESSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPA\n+MTSSLGGIGVNPTDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTN\n+LAYISDADRRTSAEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWP\n+LPEIPFDHVPVKPADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVI\n+LDEEMAVGPPDVAKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKN\n+SSPEVIVAQPTRSPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRK\n+NYEARLSSGGGGASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANL\n+SGSDSLSAVSTHSCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSAT\n+RSDDTTLTLTEMAHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEH\n+ADSQTGPETSAAARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQ\n+TQQPQQVRQKPRAPQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVT\n+PSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSAT\n+NTTTTTNTLNSESTEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDS\n+SLDVRGQEAKMRSRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQ\n+TDDYEDYPQYSGKFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKR\n+QFKREDSTAAGTSGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGA\n+GQDQEQGTGGQARHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQ\n+>FUN_000013-T1 FUN_000013\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA\n+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR\n+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI\n+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQ\n+RMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRP\n+PNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVG\n+LGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFK\n+NHVYFFRAESAHTYNR\n+>FUN_000014-T1 FUN_000014\n+MSTPARRRLMRDFKRLQEDPPTGVSGAPTDNNIMIWNAVIFGPHDTPFEDGTFKLTIEFTEEYPNKPPTVRFVSKVFHPN\n+VYADGGICLDILQNRWSPTYDVSAILTSIQSLLSDPNPNSPANSTAAQLYKENRREYEKRVKACVEQSFID\n+>FUN_000015-T1 FUN_000015\n+MNKAVCLVIVIQALRMVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQG\n+YKITLKNMEAFGASNFKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIH\n+AFKGANYLHIDALSLVLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPV\n+EQFYVD\n+>FUN_000016-T1 FUN_000016\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000017-T1 FUN_000017\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000018-T1 FUN_000018\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQVARDRQSRSRSRTRS\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/transcript_out/out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_out/out.gff3 Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,400 @@\n+##gff-version 3\n+##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269\n+##interproscan-version 5.52-86.0\n+##sequence-region FUN_000018-T1 1 1295\n+FUN_000018-T1\tprovided_by_user\tnucleic_acid\t1\t1295\t.\t+\t.\tID=FUN_000018-T1;Name=FUN_000018-T1;md5=1e5f0b711475708f1f8b964633355652\n+FUN_000018-T1_orf336\tgetorf\tORF\t312\t668\t.\t+\t.\tTarget=pep_FUN_000018-T1_312_668 1 119;ID=orf_FUN_000018-T1_312_668;Name=FUN_000018-T1_orf336;md5=1e5f0b711475708f1f8b964633355652\n+FUN_000018-T1_orf336\tgetorf\tpolypeptide\t1\t119\t.\t+\t.\tID=pep_FUN_000018-T1_312_668;md5=0b28fe115d4cc09260b038b19fb0b21d\n+FUN_000018-T1_orf336\tMobiDBLite\tprotein_match\t70\t84\t.\t+\t.\tdate=09-11-2021;Target=null 70 84;ID=match$1_70_84;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000018-T1_orf336\tMobiDBLite\tprotein_match\t37\t53\t.\t+\t.\tdate=09-11-2021;Target=null 37 53;ID=match$2_37_53;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000018-T1_orf336\tMobiDBLite\tprotein_match\t37\t87\t.\t+\t.\tdate=09-11-2021;Target=null 37 87;ID=match$3_37_87;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+##sequence-region FUN_000012-T1 1 11934\n+FUN_000012-T1\tprovided_by_user\tnucleic_acid\t1\t11934\t.\t+\t.\tID=FUN_000012-T1;Name=FUN_000012-T1;md5=bb41314e271ff246f69dd1a3a2cba23a\n+FUN_000012-T1_orf133\tgetorf\tORF\t1\t11931\t.\t+\t.\tTarget=pep_FUN_000012-T1_1_11931 1 3977;ID=orf_FUN_000012-T1_1_11931;Name=FUN_000012-T1_orf133;md5=bb41314e271ff246f69dd1a3a2cba23a\n+FUN_000012-T1_orf133\tgetorf\tpolypeptide\t1\t3977\t.\t+\t.\tID=pep_FUN_000012-T1_1_11931;md5=01beedc2fbf8012cba37f0c0d39aa071\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t1090\t1105\t.\t+\t.\tdate=09-11-2021;Target=null 1090 1105;ID=match$4_1090_1105;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t1527\t1541\t.\t+\t.\tdate=09-11-2021;Target=null 1527 1541;ID=match$5_1527_1541;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t912\t931\t.\t+\t.\tdate=09-11-2021;Target=null 912 931;ID=match$6_912_931;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t559\t581\t.\t+\t.\tdate=09-11-2021;Target=null 559 581;ID=match$7_559_581;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t1920\t2025\t.\t+\t.\tdate=09-11-2021;Target=null 1920 2025;ID=match$8_1920_2025;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t3426\t3463\t.\t+\t.\tdate=09-11-2021;Target=null 3426 3463;ID=match$9_3426_3463;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t3552\t3583\t.\t+\t.\tdate=09-11-2021;Target=null 3552 3583;ID=match$10_3552_3583;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t3910\t3929\t.\t+\t.\tdate=09-11-2021;Target=null 3910 3929;ID=match$11_3910_3929;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t3429\t3449\t.\t+\t.\tdate=09-11-2021;Target=null 3429 3449;ID=match$12_3429_3449;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t2398\t2422\t.\t+\t.\tdate=09-11-2021;Target=null 2398 2422;ID=match$13_2398_2422;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t836\t859\t.\t+\t.\tdate=09-11-2021;Target=null 836 859;ID=match$14_836_859;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match\t3812\t3938\t.\t+\t.\tdate=09-11-2021;Target=null 3812 3938;ID=match$15_3812_3938;signature_desc=consensus disorder prediction;Name=mobidb-lite;status=T\n+FUN_000012-T1_orf133\tMobiDBLite\tprotein_match'..b'RASSIESTGKG\n+>match$17_2590_2630\n+GKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGA\n+>match$18_1362_1410\n+RHIMNGTAPSPSEKKHTKPKSPKSPKSPKSPKSPKSPKSPKASSGGLGP\n+>match$19_2492_2531\n+EITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTS\n+>match$20_3596_3621\n+TSSATNTTTTTNTLNSESTEPNSPQM\n+>match$21_2757_2773\n+ASEKSSMEDDVSKASTH\n+>match$22_3213_3252\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSG\n+>match$23_3133_3158\n+ASCKKCSHSSHSEEETSSLGTDLDGT\n+>match$24_3040_3060\n+NSSPEVIVAQPTRSPAPRSPL\n+>match$25_1947_2024\n+LKTSEPYKGKPVSRSRPESRRTSADDIKARDGRGSKKSSPKERPRSIEMRRLSKDKSKSQ\n+EETEADIAKRKERQQKLY\n+>match$26_2774_2799\n+SVSMRDESVGSTDHQPSLSSDSRSNT\n+>match$27_3345_3371\n+RRSGSNGNQARAAQEHADSQTGPETSA\n+>match$28_486_508\n+YAADSPTSQEEAECSAAGAERQH\n+>match$29_447_467\n+TPSLTKKAADTLDRRRDNPIG\n+>match$30_3718_3778\n+EQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSGKFESL\n+D\n+>match$31_2488_2710\n+VGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKIL\n+DRGGARTEDSESSSGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDS\n+KGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIESSNSSSTDDPWHHVE\n+TDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELN\n+>match$32_49_68\n+STPSGVDGTPSTPRHRGGKK\n+>match$33_2211_2234\n+RRSLEILKRSLPSEDARDSEGAFS\n+>match$34_520_581\n+NRSLSPQGPQSWTSPSHSSHHQQRGPDPARVHPGDHNTAHQHQHAQQQHQPQHYAQHQHS\n+GS\n+>match$35_2095_2134\n+FDEGRSPDKLDKANRSFEDRNKSFEDSEKSDAPEDMLIKS\n+>match$36_542_558\n+QRGPDPARVHPGDHNTA\n+>match$37_2637_2661\n+LAQKEQQSTWRPFPIESSNSSSTDD\n+>match$38_3816_3830\n+LPAPANHPMPPATGQ\n+>match$39_1762_1784\n+VSLQRQKATQQQSPTTERRTKSL\n+>match$40_3537_3551\n+AKHHSYDDRTLSKTQ\n+>match$41_2753_2799\n+ERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNT\n+>match$42_3502_3704\n+AVAAKPPPPPVPTVPPIVTPSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQ\n+LRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSATNTTTTTNTLNSESTEPNSPQM\n+PQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSS\n+LDVRGQEAKMRSRGYRKKRQLPV\n+>match$43_2538_2562\n+PIEERIAKILDRGGARTEDSESSSG\n+>match$44_520_541\n+NRSLSPQGPQSWTSPSHSSHHQ\n+>match$45_1520_1558\n+NARKANKPRVIPKTKSAGKSKKGKTRITSFSSDDESLDS\n+>match$46_3633_3682\n+PYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSSL\n+>match$47_3324_3375\n+GNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAAARK\n+>match$48_915_931\n+YMEDEEEEEEDEGENIY\n+>match$49_1762_1787\n+VSLQRQKATQQQSPTTERRTKSLDTP\n+>match$50_1483_1502\n+QIESDSATSSRTESMQQQKL\n+>match$51_3504_3532\n+AAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+>match$52_2423_2442\n+TSGEKDTSPQRQQDWSLSVG\n+>match$53_1086_1105\n+DDESVSEGHQQHKYRSEMDV\n+>match$54_2235_2261\n+RKPSTAESLDSYVSVDESHSAASKSPV\n+>match$55_2679_2705\n+NPLRDSSDSDVKEASPDDQKDASDASY\n+>match$56_2152_2318\n+VVHKKIDGKSSSLERPAEHHYLGPDVKARSLDDKRQATEAAKKNEEKPAPVVRSAIGDQR\n+RSLEILKRSLPSEDARDSEGAFSRKPSTAESLDSYVSVDESHSAASKSPVPGTGGGTEGY\n+PHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETG\n+>match$57_2152_2200\n+VVHKKIDGKSSSLERPAEHHYLGPDVKARSLDDKRQATEAAKKNEEKPA\n+>match$58_2396_2471\n+YTGSSDTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVS\n+CSIARPLGISQDFGKE\n+>match$59_113_138\n+TSSSTNSSSSSSPTGSPSSSSVSAVP\n+>match$60_39_66\n+GGDRGRSEGRGANANHWRDGEDERGRSC\n+>match$61_1_22\n+GWPATQQHQAGCQASSIGQHGA\n+>match$62_1_95\n+GWPATQQHQAGCQASSIGQHGASYAALSRAPARSSWRGGGDRGRSEGRGANANHWRDGED\n+ERGRSCWRTGSIWSGLTRPGSPRSTVSFSESNSQR\n+>match$63_70_95\n+GSIWSGLTRPGSPRSTVSFSESNSQR\n+>match$64_67_169\n+DLNSSMELEEEAEDPGTADTEEEEGEPVGEEEEEELVEEEVDSDLTASTINRETMGAASA\n+AAASASALGSGLASAHRPQLRSQESHACSTTPMPTRPKLKRTT\n+>match$65_142_169\n+HRPQLRSQESHACSTTPMPTRPKLKRTT\n+>match$66_72_108\n+MELEEEAEDPGTADTEEEEGEPVGEEEEEELVEEEVD\n+>match$67_17_78\n+GCANPSTSPPSRRASSPRTWRTSRPRSVTTSRSRFGRRLSTWTAKPRSKPSRRPSATPRA\n+KL\n+>match$68_20_66\n+NPSTSPPSRRASSPRTWRTSRPRSVTTSRSRFGRRLSTWTAKPRSKP\n+>match$69_38_72\n+GANAQSLWASESPPPAVFHRRGPAASWSRPDRAPG\n+>match$70_1_20\n+RCNRPWPRCSPGSGAGSASA\n+>match$71_375_431\n+EEEIVEEVREEEQMQIIGETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRS\n+>match$72_1_25\n+MPPTINNSAVNSAAEKRPQRQTERK\n+>match$73_1_16\n+MPPTINNSAVNSAAEK\n+>match$74_1_57\n+HCGMGLSQSMQRLDDPFSDEEGSGYLGSPTRQSYRPHAFSSHSSPGGDLSDEELCSG\n+>match$75_98_138\n+VVDFAQSQQPPPTARSRSGSCASRRSQYSSSASTRPAAAPT\n+>match$76_25_47\n+YLGSPTRQSYRPHAFSSHSSPGG\n+>match$77_1_20\n+SRSCGQERARPAGGDVAGHD\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/transcript_out/out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_out/out.json Mon Nov 15 17:53:24 2021 +0000
[
b'@@ -0,0 +1,1958 @@\n+{\n+ "interproscan-version": "5.52-86.0",\n+"results": [ {\n+  "id" : 2,\n+  "sequence" : "atgccacccacgatcaacaattcggcggtaaacagtgccgccgaaaagcgaccccagcggcaaacggagcgcaaatccgagatcatttgccgcgtgaagtatggaaacaacctgccggatataccatttgatctgaagtttctgcagtaccccttcgacagccaccgcttcgtgcagtacaacccaacgtcgctagagcgtaacttcaagtatgacgtgctgacggaacacgatttgggtgtcacggtggacctgattaaccgggagctctatcaggccgactccatgacgctgctggaccccgccgatgaaaaactgctggaggaggagactctgacgcccacagactctgtgcgttcgcgccagcattcgaggacggtgtcatggttgcgcaaatccgagtacatctccaccgagcagacgcgcttccagccccagaacctggagaacatcgaggccaaggtcggttacaacgtcaagaagtcgcttcgggaggagactctctacctggaccgcgaagcccagatcaaagccatcgagaagaccttcagcgacaccaagagcgaaattaccaagcactattccaagcccaatgtggtgccagtggaggtactgcctatcttccccgacttcaccaactggaagttcccgtgcgcccaggtcatatttgacagtgatcccgctcctgcgggcaagaacgtgcccgcccagctggaggagatgtcgcaggccatgattcgtggtgtgatggacgagagcggcgaacagtttgtcgcctacttcctgcccacagagcagacgctggagaaacgccgtacagacttcatcaatggcgagctgtacaaggaggaggaggagtacgagtacaagatcgctcgagagtacaactggaacgtgaagaccaaagcttccaagggctacgaagaaaactacttcttcgtgatgcgtcaggacggcatctactacaacgagctagaaacccgtgtgcgccttaacaagcgtcgcgttaaggttggccagcaacccaacaacaccaagctggttgtcaagcatcgtccattggacagcatggagcatcgtatgcagcgctatcgcgagcgccagctagaagttcctggcgaggaggaggagatcgtggaagaagtgagggaagaggagcaaatgcaaatcattggcgagacggagaagacgagcgaggacgcagctgttggcgcacaggcagcatctggagcggactcacccgcccaggtagcccgcgatcgacagtctcgttctcggagtcgaactcgcagcgg",\n+  "md5" : "1e5f0b711475708f1f8b964633355652",\n+  "crossReferences" : [ {\n+    "name" : "FUN_000018-T1 FUN_000018",\n+    "nucleotideSequence" : 2,\n+    "id" : "FUN_000018-T1"\n+  } ],\n+  "openReadingFrames" : [ {\n+    "id" : 5,\n+    "start" : 385,\n+    "end" : 972,\n+    "strand" : "ANTISENSE",\n+    "protein" : {\n+      "sequence" : "LVVVDAVLTHHEEVVFFVALGSFGLHVPVVLSSDLVLVLLLLLVQLAIDEVCTAFLQRLLCGQEVGDKLFAALVHHTTNHGLRHLLQLGGHVLARRSGITVKYDLGARELPVGEVGEDRQYLHWHHIGLGIVLGNFALGVAEGLLDGFDLGFAVQVESLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP",\n+      "md5" : "3b7c99de3345df5d6c3300ca9b357d7f",\n+      "matches" : [ ],\n+      "xref" : [ {\n+        "name" : "orf355 source=FUN_000018-T1 coords=972..385 length=196 frame=6 desc=FUN_000018",\n+        "id" : "orf355"\n+      } ]\n+    },\n+    "nucleotideSequence" : 2\n+  }, {\n+    "id" : 1,\n+    "start" : 312,\n+    "end" : 668,\n+    "strand" : "SENSE",\n+    "protein" : {\n+      "sequence" : "KTAGGGDSDAHRLCAFAPAFEDGVMVAQIRVHLHRADALPAPEPGEHRGQGRLQRQEVASGGDSLPGPRSPDQSHREDLQRHQERNYQALFQAQCGASGGTAYLPRLHQLEVPVRPGHI",\n+      "md5" : "0b28fe115d4cc09260b038b19fb0b21d",\n+      "matches" : [ {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 70,\n+          "end" : 84,\n+          "location-fragments" : [ {\n+            "start" : 70,\n+            "end" : 84,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : "Polyampholyte"\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 37,\n+          "end" : 53,\n+          "location-fragments" : [ {\n+            "start" : 37,\n+            "end" : 53,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : "Polyampholyte"\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensu'..b'ibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 1086,\n+          "end" : 1105,\n+          "location-fragments" : [ {\n+            "start" : 1086,\n+            "end" : 1105,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : ""\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 2235,\n+          "end" : 2261,\n+          "location-fragments" : [ {\n+            "start" : 2235,\n+            "end" : 2261,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : "Polar"\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 2679,\n+          "end" : 2705,\n+          "location-fragments" : [ {\n+            "start" : 2679,\n+            "end" : 2705,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : "Polyampholyte"\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 2152,\n+          "end" : 2318,\n+          "location-fragments" : [ {\n+            "start" : 2152,\n+            "end" : 2318,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : ""\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 2152,\n+          "end" : 2200,\n+          "location-fragments" : [ {\n+            "start" : 2152,\n+            "end" : 2200,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : "Polyampholyte"\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      }, {\n+        "signature" : {\n+          "accession" : "mobidb-lite",\n+          "name" : "disorder_prediction",\n+          "description" : "consensus disorder prediction",\n+          "signatureLibraryRelease" : {\n+            "library" : "MOBIDB_LITE",\n+            "version" : "2.0"\n+          },\n+          "entry" : null\n+        },\n+        "locations" : [ {\n+          "start" : 2396,\n+          "end" : 2471,\n+          "location-fragments" : [ {\n+            "start" : 2396,\n+            "end" : 2471,\n+            "dc-status" : "CONTINUOUS"\n+          } ],\n+          "sequence-feature" : ""\n+        } ],\n+        "model-ac" : "mobidb-lite"\n+      } ],\n+      "xref" : [ {\n+        "name" : "orf133 source=FUN_000012-T1 coords=1..11931 length=3977 frame=1 desc=FUN_000012",\n+        "id" : "orf133"\n+      } ]\n+    },\n+    "nucleotideSequence" : 1\n+  } ]\n+} ]\n+}\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/transcript_out/out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_out/out.tsv Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,77 @@\n+FUN_000018-T1_orf336\t0b28fe115d4cc09260b038b19fb0b21d\t119\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t70\t84\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf336\t0b28fe115d4cc09260b038b19fb0b21d\t119\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t37\t53\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf336\t0b28fe115d4cc09260b038b19fb0b21d\t119\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t37\t87\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1090\t1105\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1527\t1541\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t912\t931\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t559\t581\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1920\t2025\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3426\t3463\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3552\t3583\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3910\t3929\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3429\t3449\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2398\t2422\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t836\t859\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3812\t3938\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1920\t1945\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2590\t2630\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1362\t1410\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2492\t2531\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3596\t3621\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2757\t2773\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3213\t3252\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3133\t3158\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3040\t3060\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1947\t2024\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2774\t2799\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3345\t3371\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t486\t508\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc'..b'_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1762\t1787\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1483\t1502\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t3504\t3532\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2423\t2442\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1086\t1105\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2235\t2261\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2679\t2705\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2152\t2318\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2152\t2200\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf133\t01beedc2fbf8012cba37f0c0d39aa071\t3977\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t2396\t2471\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf304\t796ae4d6bd379b343d45415308fabd86\t214\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t113\t138\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf343\t3b79c550933dca29ee431283750e0521\t95\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t39\t66\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf343\t3b79c550933dca29ee431283750e0521\t95\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t22\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf343\t3b79c550933dca29ee431283750e0521\t95\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t95\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf343\t3b79c550933dca29ee431283750e0521\t95\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t70\t95\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf20\t26b17402a3132f16013f57a340e1455e\t169\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t67\t169\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf20\t26b17402a3132f16013f57a340e1455e\t169\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t142\t169\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf20\t26b17402a3132f16013f57a340e1455e\t169\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t72\t108\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf337\te1712547dc145b28c8846d277417cf4d\t132\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t17\t78\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf337\te1712547dc145b28c8846d277417cf4d\t132\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t20\t66\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf357\taafbe3c8b4e6aa9c5ae6f9f19b280990\t72\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t38\t72\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf357\taafbe3c8b4e6aa9c5ae6f9f19b280990\t72\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t20\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf342\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t375\t431\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf342\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t25\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf342\t5ca2cbf7936c8539c0f5b069cce1b285\t431\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t16\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf18\ta35b46a97905a750e8fc70890d827e9b\t187\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t57\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf18\ta35b46a97905a750e8fc70890d827e9b\t187\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t98\t138\t-\tT\t09-11-2021\t-\t-\n+FUN_000012-T1_orf18\ta35b46a97905a750e8fc70890d827e9b\t187\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t25\t47\t-\tT\t09-11-2021\t-\t-\n+FUN_000018-T1_orf339\t477d6280d797e71d06bba76c8f4b0a95\t105\tMobiDBLite\tmobidb-lite\tconsensus disorder prediction\t1\t20\t-\tT\t09-11-2021\t-\t-\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/transcript_out/out.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_out/out.xml Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,1133 @@\n+<?xml version="1.0" encoding="UTF-8"?><nucleotide-sequence-matches xmlns="http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5" interproscan-version="5.52-86.0">\n+  <nucleotide-sequence>\n+    <sequence md5="1e5f0b711475708f1f8b964633355652">atgccacccacgatcaacaattcggcggtaaacagtgccgccgaaaagcgaccccagcggcaaacggagcgcaaatccgagatcatttgccgcgtgaagtatggaaacaacctgccggatataccatttgatctgaagtttctgcagtaccccttcgacagccaccgcttcgtgcagtacaacccaacgtcgctagagcgtaacttcaagtatgacgtgctgacggaacacgatttgggtgtcacggtggacctgattaaccgggagctctatcaggccgactccatgacgctgctggaccccgccgatgaaaaactgctggaggaggagactctgacgcccacagactctgtgcgttcgcgccagcattcgaggacggtgtcatggttgcgcaaatccgagtacatctccaccgagcagacgcgcttccagccccagaacctggagaacatcgaggccaaggtcggttacaacgtcaagaagtcgcttcgggaggagactctctacctggaccgcgaagcccagatcaaagccatcgagaagaccttcagcgacaccaagagcgaaattaccaagcactattccaagcccaatgtggtgccagtggaggtactgcctatcttccccgacttcaccaactggaagttcccgtgcgcccaggtcatatttgacagtgatcccgctcctgcgggcaagaacgtgcccgcccagctggaggagatgtcgcaggccatgattcgtggtgtgatggacgagagcggcgaacagtttgtcgcctacttcctgcccacagagcagacgctggagaaacgccgtacagacttcatcaatggcgagctgtacaaggaggaggaggagtacgagtacaagatcgctcgagagtacaactggaacgtgaagaccaaagcttccaagggctacgaagaaaactacttcttcgtgatgcgtcaggacggcatctactacaacgagctagaaacccgtgtgcgccttaacaagcgtcgcgttaaggttggccagcaacccaacaacaccaagctggttgtcaagcatcgtccattggacagcatggagcatcgtatgcagcgctatcgcgagcgccagctagaagttcctggcgaggaggaggagatcgtggaagaagtgagggaagaggagcaaatgcaaatcattggcgagacggagaagacgagcgaggacgcagctgttggcgcacaggcagcatctggagcggactcacccgcccaggtagcccgcgatcgacagtctcgttctcggagtcgaactcgcagcgg</sequence>\n+    <xref id="FUN_000018-T1" name="FUN_000018-T1 FUN_000018"/>\n+    <orf end="972" start="385" strand="ANTISENSE">\n+      <protein>\n+        <sequence md5="3b7c99de3345df5d6c3300ca9b357d7f">LVVVDAVLTHHEEVVFFVALGSFGLHVPVVLSSDLVLVLLLLLVQLAIDEVCTAFLQRLLCGQEVGDKLFAALVHHTTNHGLRHLLQLGGHVLARRSGITVKYDLGARELPVGEVGEDRQYLHWHHIGLGIVLGNFALGVAEGLLDGFDLGFAVQVESLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP</sequence>\n+        <xref id="orf355" name="orf355 source=FUN_000018-T1 coords=972..385 length=196 frame=6 desc=FUN_000018"/>\n+        <matches/>\n+      </protein>\n+    </orf>\n+    <orf end="668" start="312" strand="SENSE">\n+      <protein>\n+        <sequence md5="0b28fe115d4cc09260b038b19fb0b21d">KTAGGGDSDAHRLCAFAPAFEDGVMVAQIRVHLHRADALPAPEPGEHRGQGRLQRQEVASGGDSLPGPRSPDQSHREDLQRHQERNYQALFQAQCGASGGTAYLPRLHQLEVPVRPGHI</sequence>\n+        <xref id="orf336" name="orf336 source=FUN_000018-T1 coords=312..668 length=119 frame=3 desc=FUN_000018"/>\n+        <matches>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="Polyampholyte" start="70" end="84">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="70" end="84" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="Polyampholyte" start="37" end="53">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="37" end="53" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_predictio'..b'rediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="" start="1086" end="1105">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="1086" end="1105" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="Polar" start="2235" end="2261">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="2235" end="2261" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="Polyampholyte" start="2679" end="2705">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="2679" end="2705" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="" start="2152" end="2318">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="2152" end="2318" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="Polyampholyte" start="2152" end="2200">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="2152" end="2200" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+          <mobidblite-match>\n+            <signature ac="mobidb-lite" desc="consensus disorder prediction" name="disorder_prediction">\n+              <signature-library-release library="MOBIDB_LITE" version="2.0"/>\n+            </signature>\n+            <model-ac>mobidb-lite</model-ac>\n+            <locations>\n+              <mobidblite-location sequence-feature="" start="2396" end="2471">\n+                <location-fragments>\n+                  <mobidblite-location-fragment start="2396" end="2471" dc-status="CONTINUOUS"/>\n+                </location-fragments>\n+              </mobidblite-location>\n+            </locations>\n+          </mobidblite-match>\n+        </matches>\n+      </protein>\n+    </orf>\n+  </nucleotide-sequence>\n+</nucleotide-sequence-matches>\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 test-data/transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.fa Mon Nov 15 17:53:24 2021 +0000
b
b'@@ -0,0 +1,169 @@\n+>FUN_000012-T1 FUN_000012\n+ATGTCCCTGGCCGACATGGGCACAGCCTCTCGATCCGCCGGCGGAGAAGGTGGTAGACACTATGATCTAGCCACGGGCGG\n+AGCTGGAAGTGGGGGTCATCCAGTGGGCGGCGGTCTGCCCGGAGGCCGCATGACGCACTCCCTCAGCACTCCTTCCGGGG\n+TGGATGGCACGCCCTCCACACCCCGACATCGCGGCGGCAAAAAGCTGACTGTTCGCATCCAGATGCTGGACGACTCGATC\n+ACCATGTTCCAAGTACAGGCTAAAGCACTGGGCCGCGTGCTGTTTGAACAGGTATGCCGTCAGCTGAATTTACTGGAGGC\n+CGACTACTTCGGCTTGGAGTACCAGGAAGTCTCCACACATACCAAATACTGGCTGGATCTGGAGAAGCCAATGAACCGCC\n+AGGTGGGCCTCTCTCTTATCGATCCAGTGCTGCGCTTCTGCATCAAGTTCTATACACCCGATCCAGCGCAATTGGAAGAG\n+GAGTACACCAGGTATTTGTTCTGCTTGCAAATCAAACGAGACCTGGCCACAGGCAGTCTGCAGTGCAACGATAACACGGC\n+GGCCTTAATGGCAAGCTACATTGTACAGGCGTCATGCGGCGACTTCGTTCCGGAGGATTATCCCGACCATACTTATCTGT\n+CCTCGTATCGCTTTGTGCCCAACCAGGACGCCACCATGCAGCGAAAGATTATGGAGAACCACAAAAAGCATGTTGGTCAA\n+TCCCCGGCGGAGGCGGACCTTAACCTCTTGGAGACGGCTCGGAGGTGTGAGCTGTATGGCATGAAAATGCATCCGGCGAA\n+GGATGTGGAAGGGGTGCCGCTTAATCTGGCTGTGGCCCACATGGGCATCACAGTCTTCCAGAACATCACGCGGATCAACA\n+CCTTCTCGTGGGCTAAGATACGCAAGATTTCTTTCAAGCGCAAGCGATTCCTGGTCAAACTGCATCCGGAGGGATATGGA\n+TATTACAAGGATACCGTGGAGTTCTTCTTCGAGGGTCGCAACGAGTGCAAAAACTTCTGGAAAAAATGCGTCGAAAATCA\n+CGGATTCTTCCGATGCACTGCCGTACAAAATACGCCCAGGCGCAAAACTCGCGTTCTCTCGCGGGGTAGTTCATTCCGCT\n+ATAGCGGAAAAACCCAGAAGCAGATTATCGAGTTCGTTCGCGAAAATTATGTGAAGCGCCAAAACTTCCAAAGGTCTCAG\n+TCATTCCGGCAAGGGCCACTGAATGCTAGTAGCCGAAGTCAATCGCATACGTATGTGAATTCCAGCATTTCAGCCAATCC\n+CCTTCTCCCAATTGACACTGCGGCATGGGACTATCGCAATCAATGCAGCGACTCGATGACCCCTTCTCTGACGAAGAAGG\n+CAGCGGATACCTTGGATCGCCGACGCGACAATCCTATCGGCCACATGCGTTCTCAAGTCACAGCAGCCCAGGTGGAGATC\n+TATCAGACGAAGAACTATGCAGCGGACTCGCCAACCTCCCAGGAGGAAGCGGAATGCTCGGCGGCGGGGGCGGAACGGCA\n+ACACCACTCGGCAGTGGCTATGGACAAACTGAATTCGAACCGCTCACTTTCGCCCCAGGGACCGCAGTCGTGGACTTCGC\n+CCAGTCACAGCagccaccaccaacAGCGCGGTCCCGATCCGGCTCGTGTGCATCCAGGCGATCACAATACAGctcatcag\n+catcaacacgcccagcagcagcaccaaccacaacaCTATGCACAGCACCAGCACAGTGGTTCGTTCGGTGGATCTCAACA\n+GCTCTATGGAGTTggaggaggaggcggaggATCCGGGAACTGCGGACACGgaggaggaggagggggagcCTGTGGGggag\n+gaggaggaggaggaattagtcgaggaggaagtggaTTCGGACTTGACGGCATCTACCATCAATCGGGAGACTATGGGGGC\n+AGCCTCAgcggcggcagcatcggcctcggctttgggctcgggcttggcCAGTGCCCATCGTCCGCAGCTTCGGTCACAGG\n+AGTCCCATGCATGCTCAACAACGCCaatgccaactcgaccgaaactgaaacggACAACATAATCTACACCGATATCAACG\n+ACATGGGGCACTACAAGTATCCGGACTTTCACAGCTCTGCACACCAGGAGCACAAGATCTCGAATAGCGAGCACCTCAAT\n+CTTAACGAACGCAACGACATATATGCCACTGTAAATCGCAAAGCCAAGTCAAAGATTCGAGAGAAGCATTTTAGCGATGA\n+GTTCATCGATCAGTCTATACTCCAGTACACGAGGGCCAAGCAAATGGGACTGAGTCAAGTTCCTGTTCTCCTGCAACAAC\n+CAAGCACCTCTGCCTATGCGGCTGCCCTTCAAGAAAGAAGTTACAACAGCGGAAGTTACCAGTATCATAACGATCTGAAT\n+CACCCCTCAGAGTCCTCatcctcctcctacTTTGGGACTGGATTCGGCACTAAGCGGTATGGTCAGAGCGAGCGAACCAA\n+GCTTGCCCAATCTGAAAATTACCTGACGCCTTCTCTGGACTCGCAAAGTTCCCGGCACACTCACTCACTTCCCCGGAACT\n+CGGAGTTGTCTGGAGTTGATTCAGTAGATTCGTACGACAGCCACTACATCACCCATCATTCAGTAGGAATTCTTCCCCCC\n+GGTCATCTGTCTTCTTCGTCAGAGAAACTTGATTTCCAGTTACCCTCTGGTAATCTCTATTCAACGACTCAATTTGTCAA\n+GCCTGAAGATGCTTCTGCCCACTACATGGAAGATgaagaggaagaagaggaagatgaaggCGAGAATATTTACGACCAGG\n+TGGAGCGTGATTCGTGGCTGAAGAGTAGCTCTTGCAAAGATCTCTTCGAGCGCTACAGTAGCACATACTACGATCGAGAG\n+CATTCTAAGCCAATGACTGCATTTGACAAAGAGTTTCTTTCCTCCAATCGTATTGACTCGCCAGTGACGCGATACGACGA\n+TTCCCGTCACTCATTGTATGTGGCCAAAAACCACTCGATCGATGCGCACCAAGGCTTCTCTCCATTGCCGCAGAGCAAGG\n+TGTACTCATCCAGTTATCCTGGCAGTACTTACAATACCCAGGAGCACAAGCGCCAGCATGGTCACCATATGGACTATTCA\n+GGTTCGCAAGACGACATCTCGCAGGACAGTTACGAACTTCTGGAGAAGTACGATATTGACTTCTTTGGAGGACGTGGCCG\n+CTCCGAGGACCACATGCGCTCTTGTTCGCTTGATTCGGGCAACTACATTCCGAGCGACGACGAGTCGGTATCCGAGGGCC\n+ATCAGCAGCACAAGTACCGCTCGGAAATGGACGTCAAGTCAAAGTCAGCAGCGGATATTATGAATGAAATTTGGGATGCT\n+GAGGATCCACGCTATCTGCCGCGCGAGAAGCTGTCTGTCAAGTCAGATGGGAACTTCTACAAGAAGATTCAGGAGGCTCT\n+GATCTCTGCTTTCCGTCGGCGATCTGATAGTGAGAATATCTTCAGCTTCGACCAGGATCGCATTGAGTGCATCCAAAAGT\n+ACACCAAACAGTGGGAGGGTCAGCAGCCCAAGGACGAAAAGAGCAAGCAGAGTCCGGAGTATCCACTCACTCCCGAACTT\n+GTGTCGGAGTTTGAGAAGCAGCAGGCGCGCCTAATGGCCCACCAAAAGATAACACGTAAGGAGGAGCTAATTGCCAAAAC\n+GCACTTTGAGGAATATAACCCCATTATGGTGCCACTTAATTACTCTTCTCATGCTACGGTGGAGCGCACCAAAAGCGATC\n+CGAACTCGCTGGCCAACCGTGCTCGTTTAGGAAGTAATTCTCGCCGGCATGTGCTTATGCATCAGAAGTCTATTGACTTA\n+ACCCCCGCAGACTCGAA'..b'AA\n+AACTACGAAGCTCGTTTGAGCAgcggtggaggaggagCAAGCTGCAAGAAGTGCAGTCATTCCAGCCACTCCGAGGAGGA\n+AACGAGTTCCCTTGGTACGGATCTTGATGGAACGGTCAGAATGGGTGGATTACAGCAGAAGAAGTGCACCCATAGCTCTC\n+ACTCGGAAGACACCTCAATTGGACTAAGCATCTCGGAATGGTCCACGGGAACCAACACAGTACGCCAGTATGCCAATCTA\n+TCCGGATCGGACAGCCTTTCCGCCGTGTCCACACACTCGTGTGCTAAGAGCGAGAAATCTAATCAGACTAAATCAAGTAT\n+AAGTTCGATTAACAAATCAGCTGAAAGCCTAAATGAGCAGAGTGGTGGAAGTAGCTTCTCGCACAAGTTTAGCGGGGATA\n+ATGGTTCATCCGATGGCCTGCGTTATGATATGCTCTCCAATTCCGAGACCGATAAGCTAAGTGAGGCCACCTCAGCCACT\n+AGGAGTGATGACACTACCTTGACTCTTACCGAAATGGCTCACACCATTAGTGAGTGGTCAACATCCAGTAGTCGCACACT\n+TGTTGGCGTGGCACCTGGAGAGTATCTTCCTCTCAAGCAGGCACTGTCGGGAAATAAAACTAGTTTAAGCTCTCCCAGTG\n+AGGAAAAGCGTTGCGCGCTCCCACAAGTTCATCGAAGGAGTGGTAGCAATGGCAATCAGGCGAGAGCTGCACAGGAGCAC\n+GCTGACAGTCAAACTGGTCCGGAGACAAGTGCGGCTGCTCGGAAACGTCGGTCGCTGGAGATGATGTCTAAATTGTATCA\n+GAGCCAGGAAATATGCTCTGAGTCAGAGTCGCCGTTTGTCGAGCGTTTGTATGCGCACAGCGAGAAGTTAACCGAGCGCT\n+ATCAAAGCCAGGAGTTTGTGCCGCTCCATGGTGGACCTCCAGCATCCCACCTCGCTTCGTCAACCACTAGTCAGATTCAA\n+ACCCAGCAACCCCAACAGGTCCGCCAAAAGCCAAGGGCGCCTCAACCACCAACGAAACCAAAACCTGCAGTCACTCGCCC\n+AATCATGCAGGCTTTGCTCAACAAAATGAAGCAACCTGGACTGGCGGAGCAAGCAGCCGAAGCTGCTGAGGCAGAGGAAA\n+aaaaaGCAATGATTgctgcttctgctgtggcggcgaagcccccaccaccgccAGTACCCACTGTACCGCccattgtgacg\n+cccAGCGATTTACCTGGCGACGCAGTGGCTCCCCCACCCAAGCCGCTGGCGAAGCACCACAGCTATGATGACAGAACTCT\n+GTCCAAGACCCAGATACGTGAGTTCAAGACCACTAGCAAGCAATTGCGTCAGTCAAGCTCCTTCCATGAGCACATGCTCA\n+GCAAATCGCAGCAGTCGTCCCAGGAGCTGCCCATGAGAATCGACGAAGAGAGGGATCCGCACTCCACGTCTTCAGCGACC\n+AATACAaccaccaccaccaacaCTCTGAACAGCGAGAGTACAGAACCGAATTCTCCACAAATGCCTCAGCGGGCGGACAA\n+GTTGGTCCGCTGTTCACCTTACTATTCCAGCAGCTTAAGCTCGGAATCACCGCCGAATCAGTTGCTACAGAAGCCCCCTA\n+GGAAAACGGCCACCCAGCTGAGCGCTGGAGCTGTAGCGGCTTCCTTGAAAAGTCCCCCCAGTGGCAACGATACGGACAGC\n+TCGCTGGACGTGCGAGGTCAGGAGGCAAAGATGAGAAGCAGAGGCTATCGCAAGAAACGTCAGTTACCCGTCAAAAGAAT\n+GAGGGCTAATTTAACAGCTGCAGCTCTGTTGGAGCAAGCGGAGAGTTCAGAGTGCTCCGAAGGCTACGTACCGGAAGTTG\n+ATTCAGGTAGCTCTGAGTATTCGTCTTGCCAGCGGGATGACCAGTACCTTGAGTTCGACGAAGAACTGGAGAGGGATCAA\n+ACTGACGACTATGAGGACTATCCCCAATATAGTGGCAAATTTGAAAGTCTGGATATGAGCGACAATGTGGACGAGATGGG\n+TTTTCCCCGGTACGACCGCCTTAGTCACATCACCAAGCCCATGTatcaccaggcccttgtaATGGAGCGTCCTAATCCGG\n+TGCAGCTTCCAGCCCCCGCTAATCATCCAATGCCACCGGCCACAGGACAGCCGGTGAAACCAGCTCGGACCAAGAAACGG\n+CAATTCAAGCGGGAGGATTCCACTGCAGCAGGAACTTCCGGACATTCGACTGCAGCTCCCCAGGTTCGACCCTATCATGG\n+ACGCAGTTACTGCAACCCAGAGGAGAGCGAGTACGAAACCAGAGGTGGCGGATTGTCCGATGAGTTGGCTAACTCTAGCG\n+AGGATAGCTGTAGTGGATTCGGGGGTGACGCGGGAGCATCAGGATCGGGTACTATAAGGAGGGGTACGACCAAAGGAGCG\n+GGACAGGATCAGGAGCAAGGAACTGGAGGCCAGGCTCGACATGTGCCGTATCCCGATTTTTTGTCTGACTACGAATCCGA\n+ACCCATTGAATACGAACGATATGCCTGCGGACTGGACATACGTGTGGATCCCCCACCCAAGTTTCATGATTCTGATGAGC\n+TAAGTGACCAGTAA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 8ee90fc5fe11 -r fcedfe919603 tool-data/interproscan.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/interproscan.loc.sample Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of interproscan databases used for the
+# interproscan annotation tool
+#
+# the columns are:
+# value description interproscan_version path
+#
+# for example
+# 5.52-86.0 InterProScan 5.52-86.0 5.52-86.0 /tmp/database/interproscan/5.52-86.0/
b
diff -r 8ee90fc5fe11 -r fcedfe919603 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="interproscan" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, interproscan_version, path</columns>
+        <file path="tool-data/interproscan.loc" />
+    </table>
+</tables>
b
diff -r 8ee90fc5fe11 -r fcedfe919603 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Nov 15 17:53:24 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="interproscan" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, interproscan_version, path</columns>
+        <file path="${__HERE__}/test-data/interproscan.loc" />
+    </table>
+</tables>