Repository 'gecco'
hg clone https://toolshed.g2.bx.psu.edu/repos/althonos/gecco

Changeset 14:56b924f62165 (2022-04-05)
Previous changeset 13:fde43648cba0 (2022-03-31) Next changeset 15:64528877558f (2022-04-11)
Commit message:
"Update tests files for Galaxy tool wrapper"
modified:
CHANGELOG.md
gecco.xml
test-data/BGC0001866.1_cluster_1.gbk
test-data/clusters.tsv
test-data/features.tsv
test-data/sideload.json
added:
test-data/genes.tsv
b
diff -r fde43648cba0 -r 56b924f62165 CHANGELOG.md
--- a/CHANGELOG.md Thu Mar 31 18:00:15 2022 +0000
+++ b/CHANGELOG.md Tue Apr 05 23:18:49 2022 +0000
[
@@ -5,7 +5,14 @@
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
-[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha4...master
+[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1...master
+
+## [v0.9.1] - 2022-04-05
+[v0.9.1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha4...v0.9.1
+
+### Changed
+- Make the `genes.tsv` and `features.tsv` table contain all genes even when they come from a contig too short to be processed by the CRF sliding window.
+- Replaced the `--force-clusters-tsv` flag with a `--force-tsv` flag to force writing TSV tables even when no genes or clusters were found in `gecco run` or `gecco annotate`.
 
 ## [v0.9.1-alpha4] - 2022-03-31
 [v0.9.1-alpha4]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha3...v0.9.1-alpha4
@@ -15,7 +22,7 @@
 $ python -m gecco -vv train --c1 0.4 --c2 0 --select 0.25 --window-size 20 \
          -f mibig-2.0.proG2.Pfam-v35.0.features.tsv \
          -c mibig-2.0.proG2.clusters.tsv \
-         -g GECCO-data/data/embeddings/mibig-2.0.proG2.genes.gff \
+         -g GECCO-data/data/embeddings/mibig-2.0.proG2.genes.tsv \
          -o models/v0.9.1-alpha4
 ```
 
b
diff -r fde43648cba0 -r 56b924f62165 gecco.xml
--- a/gecco.xml Thu Mar 31 18:00:15 2022 +0000
+++ b/gecco.xml Tue Apr 05 23:18:49 2022 +0000
[
@@ -1,8 +1,8 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5">
+<tool id="gecco" name="GECCO" version="0.9.1" python_template_version="3.5">
     <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
     <requirements>
-        <requirement type="package" version="0.8.10">gecco</requirement>
+        <requirement type="package" version="0.9.1">gecco</requirement>
     </requirements>
     <version_command>gecco --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
@@ -18,8 +18,10 @@
         --format $input.ext
         --genome input_tempfile.$file_extension
         --postproc $postproc
-        --edge-distance $edge_distance
-        --force-clusters-tsv
+        --force-tsv
+        #if $edge_distance
+            --edge-distance $edge_distance
+        #end if
         #if $mask
             --mask
         #end if
@@ -33,6 +35,7 @@
             --antismash-sideload
         #end if
 
+        && mv input_tempfile.genes.tsv '$genes'
         && mv input_tempfile.features.tsv '$features'
         && mv input_tempfile.clusters.tsv '$clusters'
         #if $antismash_sideload
@@ -49,13 +52,14 @@
             <option value="antismash">antiSMASH</option>
             <option value="gecco" selected="true">GECCO</option>
         </param>
-        <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/>
+        <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/>
         <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
     </inputs>
     <outputs>
         <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)">
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" ext="genbank" visible="false" />
         </collection>
+        <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/>
         <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/>
         <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/>
         <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)">
@@ -66,12 +70,14 @@
         <test>
             <param name="input" value="BGC0001866.fna"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
         </test>
         <test>
             <param name="input" value="BGC0001866.fna"/>
             <param name="edge_distance" value="0"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
             <output_collection name="records" type="list">
                 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
@@ -82,6 +88,7 @@
             <param name="antismash_sideload" value="True"/>
             <param name="edge_distance" value="0"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
             <output name="sideload" file="sideload.json"/>
             <output_collection name="records" type="list">
@@ -107,8 +114,9 @@
 
 GECCO will create the following files once done (using the same prefix as the input file):
 
-- ``features.tsv``: The features file, containing the identified proteins and domains in the input sequences.
-- ``clusters.tsv``: If any were found, a clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
+- ``features.tsv``: The genes file, containing the genes identified in the input sequences.
+- ``features.tsv``: The features file, containing the protein domains identified in the input sequences.
+- ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains.
 
 Contact
b
diff -r fde43648cba0 -r 56b924f62165 test-data/BGC0001866.1_cluster_1.gbk
--- a/test-data/BGC0001866.1_cluster_1.gbk Thu Mar 31 18:00:15 2022 +0000
+++ b/test-data/BGC0001866.1_cluster_1.gbk Tue Apr 05 23:18:49 2022 +0000
b
b'@@ -1,4 +1,4 @@\n-LOCUS       BGC0001866.1_cluster_1 32633 bp    DNA     linear   UNK 21-NOV-2021\n+LOCUS       BGC0001866.1_cluster_1 32633 bp    DNA     linear   UNK 06-APR-2022\n DEFINITION  BGC0001866.1 Byssochlamys spectabilis strain CBS 101075 chromosome\n             Unknown C8Q69scaffold_14, whole genome shotgun sequence.\n ACCESSION   BGC0001866.1_cluster_1\n@@ -15,15 +15,15 @@\n   JOURNAL   bioRxiv (2021.05.03.442509)\n   REMARK    doi:10.1101/2021.05.03.442509\n COMMENT     ##GECCO-Data-START##\n-            version                :: GECCO v0.8.10\n-            creation_date          :: 2021-11-21T16:33:58.470847\n+            version                :: GECCO v0.9.1\n+            creation_date          :: 2022-04-06T01:08:36.965708\n             biosyn_class           :: Polyketide\n-            alkaloid_probability   :: 0.0\n-            polyketide_probability :: 0.98\n+            alkaloid_probability   :: 0.010000000000000009\n+            polyketide_probability :: 0.96\n             ripp_probability       :: 0.0\n             saccharide_probability :: 0.0\n-            terpene_probability    :: 0.0\n-            nrp_probability        :: 0.09999999999999998\n+            terpene_probability    :: 0.010000000000000009\n+            nrp_probability        :: 0.14\n             ##GECCO-Data-END##\n FEATURES             Location/Qualifiers\n      CDS             complement(1..1143)\n@@ -41,7 +41,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00394"\n                      /db_xref="InterPro:IPR001117"\n-                     /note="e-value: 2.1941888078432915e-08"\n+                     /note="e-value: 2.262067179461254e-08"\n                      /note="p-value: 8.178117062405111e-12"\n                      /function="Multicopper oxidase"\n                      /standard_name="PF00394"\n@@ -49,7 +49,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF07731"\n                      /db_xref="InterPro:IPR011706"\n-                     /note="e-value: 3.9374169295176556e-23"\n+                     /note="e-value: 4.059222969454281e-23"\n                      /note="p-value: 1.467542649838858e-26"\n                      /function="Multicopper oxidase"\n                      /standard_name="PF07731"\n@@ -93,7 +93,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00891"\n                      /db_xref="InterPro:IPR001077"\n-                     /note="e-value: 4.743887678074703e-16"\n+                     /note="e-value: 4.890642309934635e-16"\n                      /note="p-value: 1.7681280946979883e-19"\n                      /function="O-methyltransferase domain"\n                      /standard_name="PF00891"\n@@ -108,7 +108,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00135"\n                      /db_xref="InterPro:IPR002018"\n-                     /note="e-value: 4.674605664377319e-21"\n+                     /note="e-value: 4.819217021121008e-21"\n                      /note="p-value: 1.7423055029360116e-24"\n                      /function="Carboxylesterase family"\n                      /standard_name="PF00135"\n@@ -123,7 +123,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00135"\n                      /db_xref="InterPro:IPR002018"\n-                     /note="e-value: 3.9706994470948554e-30"\n+                     /note="e-value: 4.0935350990176556e-30"\n                      /note="p-value: 1.4799476135277136e-33"\n                      /function="Carboxylesterase family"\n                      /standard_name="PF00135"\n@@ -140,7 +140,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00135"\n                      /db_xref="InterPro:IPR002018"\n-                     /note="e-value: 1.4185801852307574e-15"\n+                     /note="e-value: 1.4624647008379705e-15"\n                      /note="p-value: 5.287291037'..b'815692371726e-82"\n                      /note="p-value: 9.942088102809735e-86"\n                      /function="Beta-ketoacyl synthase, N-terminal domain"\n                      /standard_name="PF00109"\n@@ -476,7 +476,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF02801"\n                      /db_xref="InterPro:IPR014031"\n-                     /note="e-value: 2.4031043351141288e-34"\n+                     /note="e-value: 2.4774456171918303e-34"\n                      /note="p-value: 8.956780973217029e-38"\n                      /function="Beta-ketoacyl synthase, C-terminal domain"\n                      /standard_name="PF02801"\n@@ -484,15 +484,15 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF16197"\n                      /db_xref="InterPro:IPR032821"\n-                     /note="e-value: 2.535893425129411e-07"\n-                     /note="p-value: 9.451708628883381e-11"\n+                     /note="e-value: 8.475099126640419e-07"\n+                     /note="p-value: 3.0640271607521397e-10"\n                      /function="Ketoacyl-synthetase C-terminal extension"\n                      /standard_name="PF16197"\n      misc_feature    28322..29233\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00698"\n                      /db_xref="InterPro:IPR014043"\n-                     /note="e-value: 4.597134671955754e-38"\n+                     /note="e-value: 4.739349423268586e-38"\n                      /note="p-value: 1.7134307387088164e-41"\n                      /function="Acyl transferase domain"\n                      /standard_name="PF00698"\n@@ -509,7 +509,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF14765"\n                      /db_xref="InterPro:IPR020807"\n-                     /note="e-value: 7.778696660229127e-11"\n+                     /note="e-value: 8.019334685871699e-11"\n                      /note="p-value: 2.8992533209948296e-14"\n                      /function="Polyketide synthase dehydratase"\n                      /standard_name="PF14765"\n@@ -533,7 +533,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00550"\n                      /db_xref="InterPro:IPR009081"\n-                     /note="e-value: 5.884377030377924e-14"\n+                     /note="e-value: 6.066413293337807e-14"\n                      /note="p-value: 2.193207987468477e-17"\n                      /function="Phosphopantetheine attachment site"\n                      /standard_name="PF00550"\n@@ -541,7 +541,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00550"\n                      /db_xref="InterPro:IPR009081"\n-                     /note="e-value: 3.9212317886052276e-10"\n+                     /note="e-value: 4.042537132792419e-10"\n                      /note="p-value: 1.461510170930014e-13"\n                      /function="Phosphopantetheine attachment site"\n                      /standard_name="PF00550"\n@@ -549,7 +549,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00550"\n                      /db_xref="InterPro:IPR009081"\n-                     /note="e-value: 1.367829688372301e-08"\n+                     /note="e-value: 1.4101442109719659e-08"\n                      /note="p-value: 5.098135252971677e-12"\n                      /function="Phosphopantetheine attachment site"\n                      /standard_name="PF00550"\n@@ -557,7 +557,7 @@\n                      /inference="protein motif"\n                      /db_xref="PFAM:PF00975"\n                      /db_xref="InterPro:IPR001031"\n-                     /note="e-value: 6.711355516947163e-24"\n+                     /note="e-value: 6.91897478936856e-24"\n                      /note="p-value: 2.5014370171252933e-27"\n                      /function="Thioesterase domain"\n                      /standard_name="PF00975"\n'
b
diff -r fde43648cba0 -r 56b924f62165 test-data/clusters.tsv
--- a/test-data/clusters.tsv Thu Mar 31 18:00:15 2022 +0000
+++ b/test-data/clusters.tsv Tue Apr 05 23:18:49 2022 +0000
b
@@ -1,2 +1,2 @@
 sequence_id bgc_id start end average_p max_p type alkaloid_probability polyketide_probability ripp_probability saccharide_probability terpene_probability nrp_probability proteins domains
-BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9969495815733557 0.9999999447224028 Polyketide 0.0 0.98 0.0 0.0 0.0 0.09999999999999998 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197
+BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9958958770931704 0.9999999976946022 Polyketide 0.010000000000000009 0.96 0.0 0.0 0.010000000000000009 0.14 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197
b
diff -r fde43648cba0 -r 56b924f62165 test-data/features.tsv
--- a/test-data/features.tsv Thu Mar 31 18:00:15 2022 +0000
+++ b/test-data/features.tsv Tue Apr 05 23:18:49 2022 +0000
b
b'@@ -1,38 +1,38 @@\n sequence_id\tprotein_id\tstart\tend\tstrand\tdomain\thmm\ti_evalue\tpvalue\tdomain_start\tdomain_end\tbgc_probability\r\n-BGC0001866.1\tBGC0001866.1_1\t347\t1489\t-\tPF00394\tPfam\t2.1941888078432915e-08\t8.178117062405111e-12\t1\t63\t0.9852038761627908\r\n-BGC0001866.1\tBGC0001866.1_1\t347\t1489\t-\tPF07731\tPfam\t3.9374169295176556e-23\t1.467542649838858e-26\t150\t281\t0.9852038761627908\r\n-BGC0001866.1\tBGC0001866.1_6\t3946\t4389\t+\tPF00891\tPfam\t4.743887678074703e-16\t1.7681280946979883e-19\t17\t121\t0.9910535094227727\r\n-BGC0001866.1\tBGC0001866.1_7\t4683\t5138\t+\tPF00135\tPfam\t4.674605664377319e-21\t1.7423055029360116e-24\t48\t140\t0.9913598896683397\r\n-BGC0001866.1\tBGC0001866.1_8\t5384\t5812\t+\tPF00135\tPfam\t3.9706994470948554e-30\t1.4799476135277136e-33\t2\t114\t0.9925093258822111\r\n-BGC0001866.1\tBGC0001866.1_9\t5823\t6599\t+\tPF00135\tPfam\t1.4185801852307574e-15\t5.287291037013632e-19\t2\t209\t0.9946019708257335\r\n-BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF13434\tPfam\t5.777178703900199e-08\t2.153253337271785e-11\t13\t124\t0.9978201609931655\r\n-BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF00743\tPfam\t5.089108077410868e-07\t1.8967976434628658e-10\t36\t102\t0.9978201609931655\r\n-BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF07690\tPfam\t5.839871260376694e-37\t2.1766199255969786e-40\t1\t362\t0.9990971143689635\r\n-BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF06609\tPfam\t9.543170598318239e-09\t3.55690294383833e-12\t17\t244\t0.9990971143689635\r\n-BGC0001866.1\tBGC0001866.1_15\t14920\t15912\t+\tPF08493\tPfam\t2.6165794251055913e-17\t9.752439154325723e-21\t139\t224\t0.9999977987864139\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00109\tPfam\t9.025888536170949e-60\t3.364103069761815e-63\t2\t248\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF02801\tPfam\t2.2171445990751238e-35\t8.263677223537547e-39\t257\t368\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF16197\tPfam\t3.8698172759236842e-25\t1.4423471024687604e-28\t371\t487\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00698\tPfam\t1.0799913424517567e-26\t4.025312495161225e-30\t512\t648\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF00698\tPfam\t2.639223271303753e-16\t9.836836642950999e-20\t2\t151\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF14765\tPfam\t2.520598829779557e-60\t9.394703055458656e-64\t228\t504\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13489\tPfam\t1.0131254482174088e-12\t3.776091868123029e-16\t661\t817\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13847\tPfam\t8.939870258494623e-11\t3.332042586095648e-14\t666\t776\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13649\tPfam\t2.319131521369124e-13\t8.643799930559537e-17\t667\t764\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08242\tPfam\t3.6288099491186147e-22\t1.3525195486837923e-25\t668\t766\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08241\tPfam\t5.245291385894328e-12\t1.9550098344742185e-15\t668\t767\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_18\t22762\t23235\t+\tPF00107\tPfam\t1.0960342036668699e-15\t4.085106983476965e-19\t12\t117\t0.9999176675645223\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF08659\tPfam\t1.5141662612831146e-61\t5.643556695054471e-65\t65\t239\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00106\tPfam\t1.1379002942545491e-07\t4.2411490654288077e-11\t68\t221\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00550\tPfam\t3.359618716013185e-10\t1.2521873708584363e-13\t384\t437\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_20\t25769\t26056\t+\tPF16073\tPfam\t1.3071857188363548e-23\t4.872104803713585e-27\t8\t94\t0.999988513111687\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16073\tPfam\t8.208876065249628e-11\t3.059588544632735e-14\t2\t47\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00109\tPfam\t2.667462237983852e-82\t9.942088102809735e-86\t178\t426\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF02801\tPfam\t2.4031043351141288e-34\t8.956780973217029e-38\t434\t555\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t'..b'511\r\n+BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF13434\tPfam\t5.955898730893757e-08\t2.153253337271785e-11\t13\t124\t0.9986351193337516\r\n+BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF00743\tPfam\t5.246542281818287e-07\t1.8967976434628658e-10\t36\t102\t0.9986351193337516\r\n+BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF07690\tPfam\t6.020530714201243e-37\t2.1766199255969786e-40\t1\t362\t0.9994485509803548\r\n+BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF06609\tPfam\t9.83839354265682e-09\t3.55690294383833e-12\t17\t244\t0.9994485509803548\r\n+BGC0001866.1\tBGC0001866.1_15\t14920\t15912\t+\tPF08493\tPfam\t2.686865976406516e-17\t9.713904470016327e-21\t139\t224\t0.9999999296901834\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00109\tPfam\t9.30510909096118e-60\t3.364103069761815e-63\t2\t248\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF02801\tPfam\t2.2857331200304854e-35\t8.263677223537547e-39\t257\t368\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF16197\tPfam\t4.800730099641783e-25\t1.7356218726109122e-28\t371\t488\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00698\tPfam\t1.113401436161595e-26\t4.025312495161225e-30\t512\t648\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF00698\tPfam\t2.7208690154402465e-16\t9.836836642950999e-20\t2\t151\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF14765\tPfam\t2.598574865139864e-60\t9.394703055458656e-64\t228\t504\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13489\tPfam\t1.04446701072283e-12\t3.776091868123029e-16\t661\t817\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13847\tPfam\t8.752004453621267e-11\t3.1641375465008194e-14\t666\t776\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13649\tPfam\t2.4253465299984994e-13\t8.76842563267715e-17\t667\t764\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08242\tPfam\t3.7410690716593694e-22\t1.3525195486837923e-25\t668\t766\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08241\tPfam\t5.4075572021556884e-12\t1.9550098344742185e-15\t668\t767\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_18\t22762\t23235\t+\tPF00107\tPfam\t1.1299405916297285e-15\t4.085106983476965e-19\t12\t117\t0.9999802025553775\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF08659\tPfam\t1.5610077818520667e-61\t5.643556695054471e-65\t65\t239\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00106\tPfam\t1.1731018314976082e-07\t4.2411490654288077e-11\t68\t221\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00550\tPfam\t3.463550267794435e-10\t1.2521873708584363e-13\t384\t437\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_20\t25769\t26056\t+\tPF16073\tPfam\t9.422238725791962e-24\t3.406449286258844e-27\t8\t94\t0.9999994733759681\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16073\tPfam\t4.380197593141013e-11\t1.5835855362042708e-14\t2\t47\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00109\tPfam\t2.7499815692371726e-82\t9.942088102809735e-86\t178\t426\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF02801\tPfam\t2.4774456171918303e-34\t8.956780973217029e-38\t434\t555\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16197\tPfam\t8.475099126640419e-07\t3.0640271607521397e-10\t567\t673\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00698\tPfam\t4.739349423268586e-38\t1.7134307387088164e-41\t709\t1012\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_22\t30150\t30890\t+\tPF14765\tPfam\t8.019334685871699e-11\t2.8992533209948296e-14\t39\t244\t0.9999912059124727\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t6.066413293337807e-14\t2.193207987468477e-17\t67\t128\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t4.042537132792419e-10\t1.461510170930014e-13\t174\t238\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t1.4101442109719659e-08\t5.098135252971677e-12\t299\t360\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00975\tPfam\t6.91897478936856e-24\t2.5014370171252933e-27\t443\t550\t0.9998703656415205\r\n'
b
diff -r fde43648cba0 -r 56b924f62165 test-data/genes.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genes.tsv Tue Apr 05 23:18:49 2022 +0000
b
@@ -0,0 +1,24 @@
+sequence_id protein_id start end strand average_p max_p
+BGC0001866.1 BGC0001866.1_1 347 1489 - 0.9791890143072265 0.9791890143072265
+BGC0001866.1 BGC0001866.1_2 1525 2016 + 0.9816626269970528 0.9816626269970528
+BGC0001866.1 BGC0001866.1_3 2513 2722 - 0.9844997726878899 0.9844997726878899
+BGC0001866.1 BGC0001866.1_4 2905 3378 + 0.9877300777686966 0.9877300777686966
+BGC0001866.1 BGC0001866.1_5 3353 3922 + 0.9913872741253911 0.9913872741253911
+BGC0001866.1 BGC0001866.1_6 3946 4389 + 0.9955095513800687 0.9955095513800687
+BGC0001866.1 BGC0001866.1_7 4683 5138 + 0.995982045872177 0.995982045872177
+BGC0001866.1 BGC0001866.1_8 5384 5812 + 0.9966491071789748 0.9966491071789748
+BGC0001866.1 BGC0001866.1_9 5823 6599 + 0.9975265367646511 0.9975265367646511
+BGC0001866.1 BGC0001866.1_10 7758 9029 + 0.9986351193337516 0.9986351193337516
+BGC0001866.1 BGC0001866.1_11 9800 10384 + 0.9988029392597757 0.9988029392597757
+BGC0001866.1 BGC0001866.1_12 11109 11537 + 0.999073142625125 0.999073142625125
+BGC0001866.1 BGC0001866.1_13 11550 12662 + 0.9994485509803548 0.9994485509803548
+BGC0001866.1 BGC0001866.1_14 12681 13127 + 0.9996778954036583 0.9996778954036583
+BGC0001866.1 BGC0001866.1_15 14920 15912 + 0.9999999296901834 0.9999999296901834
+BGC0001866.1 BGC0001866.1_16 17173 19143 + 0.9999998571963613 0.9999998571963613
+BGC0001866.1 BGC0001866.1_17 19152 22424 + 0.9999990994944158 0.9999990994944158
+BGC0001866.1 BGC0001866.1_18 22762 23235 + 0.9999802025553775 0.9999802025553775
+BGC0001866.1 BGC0001866.1_19 23268 24623 + 0.9999913868972266 0.9999913868972266
+BGC0001866.1 BGC0001866.1_20 25769 26056 + 0.9999994733759681 0.9999994733759681
+BGC0001866.1 BGC0001866.1_21 26544 29999 + 0.9999999976946022 0.9999999976946022
+BGC0001866.1 BGC0001866.1_22 30150 30890 + 0.9999912059124727 0.9999912059124727
+BGC0001866.1 BGC0001866.1_23 30937 32979 + 0.9998703656415205 0.9998703656415205
b
diff -r fde43648cba0 -r 56b924f62165 test-data/sideload.json
--- a/test-data/sideload.json Thu Mar 31 18:00:15 2022 +0000
+++ b/test-data/sideload.json Tue Apr 05 23:18:49 2022 +0000
[
@@ -5,14 +5,14 @@
             "subregions": [
                 {
                     "details": {
-                        "alkaloid_probability": "0.000",
-                        "average_p": "0.997",
+                        "alkaloid_probability": "0.010",
+                        "average_p": "0.996",
                         "max_p": "1.000",
-                        "nrp_probability": "0.100",
-                        "polyketide_probability": "0.980",
+                        "nrp_probability": "0.140",
+                        "polyketide_probability": "0.960",
                         "ripp_probability": "0.000",
                         "saccharide_probability": "0.000",
-                        "terpene_probability": "0.000"
+                        "terpene_probability": "0.010"
                     },
                     "end": 32979,
                     "label": "Polyketide",
@@ -25,11 +25,13 @@
         "configuration": {
             "cds": "3",
             "e-filter": "None",
+            "edge-distance": "0",
+            "mask": "False",
             "postproc": "'gecco'",
-            "threshold": "0.3"
+            "threshold": "0.8"
         },
         "description": "Biosynthetic Gene Cluster prediction with Conditional Random Fields.",
         "name": "GECCO",
-        "version": "0.8.10"
+        "version": "0.9.1"
     }
 }
\ No newline at end of file