Previous changeset 13:fde43648cba0 (2022-03-31) Next changeset 15:64528877558f (2022-04-11) |
Commit message:
"Update tests files for Galaxy tool wrapper" |
modified:
CHANGELOG.md gecco.xml test-data/BGC0001866.1_cluster_1.gbk test-data/clusters.tsv test-data/features.tsv test-data/sideload.json |
added:
test-data/genes.tsv |
b |
diff -r fde43648cba0 -r 56b924f62165 CHANGELOG.md --- a/CHANGELOG.md Thu Mar 31 18:00:15 2022 +0000 +++ b/CHANGELOG.md Tue Apr 05 23:18:49 2022 +0000 |
[ |
@@ -5,7 +5,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] -[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha4...master +[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1...master + +## [v0.9.1] - 2022-04-05 +[v0.9.1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha4...v0.9.1 + +### Changed +- Make the `genes.tsv` and `features.tsv` table contain all genes even when they come from a contig too short to be processed by the CRF sliding window. +- Replaced the `--force-clusters-tsv` flag with a `--force-tsv` flag to force writing TSV tables even when no genes or clusters were found in `gecco run` or `gecco annotate`. ## [v0.9.1-alpha4] - 2022-03-31 [v0.9.1-alpha4]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha3...v0.9.1-alpha4 @@ -15,7 +22,7 @@ $ python -m gecco -vv train --c1 0.4 --c2 0 --select 0.25 --window-size 20 \ -f mibig-2.0.proG2.Pfam-v35.0.features.tsv \ -c mibig-2.0.proG2.clusters.tsv \ - -g GECCO-data/data/embeddings/mibig-2.0.proG2.genes.gff \ + -g GECCO-data/data/embeddings/mibig-2.0.proG2.genes.tsv \ -o models/v0.9.1-alpha4 ``` |
b |
diff -r fde43648cba0 -r 56b924f62165 gecco.xml --- a/gecco.xml Thu Mar 31 18:00:15 2022 +0000 +++ b/gecco.xml Tue Apr 05 23:18:49 2022 +0000 |
[ |
@@ -1,8 +1,8 @@ <?xml version='1.0' encoding='utf-8'?> -<tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5"> +<tool id="gecco" name="GECCO" version="0.9.1" python_template_version="3.5"> <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> <requirements> - <requirement type="package" version="0.8.10">gecco</requirement> + <requirement type="package" version="0.9.1">gecco</requirement> </requirements> <version_command>gecco --version</version_command> <command detect_errors="aggressive"><![CDATA[ @@ -18,8 +18,10 @@ --format $input.ext --genome input_tempfile.$file_extension --postproc $postproc - --edge-distance $edge_distance - --force-clusters-tsv + --force-tsv + #if $edge_distance + --edge-distance $edge_distance + #end if #if $mask --mask #end if @@ -33,6 +35,7 @@ --antismash-sideload #end if + && mv input_tempfile.genes.tsv '$genes' && mv input_tempfile.features.tsv '$features' && mv input_tempfile.clusters.tsv '$clusters' #if $antismash_sideload @@ -49,13 +52,14 @@ <option value="antismash">antiSMASH</option> <option value="gecco" selected="true">GECCO</option> </param> - <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/> + <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/> <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> </inputs> <outputs> <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)"> <discover_datasets pattern="(?P<designation>.*)\.gbk" ext="genbank" visible="false" /> </collection> + <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/> <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/> <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/> <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)"> @@ -66,12 +70,14 @@ <test> <param name="input" value="BGC0001866.fna"/> <output name="features" file="features.tsv"/> + <output name="genes" file="genes.tsv"/> <output name="clusters" file="clusters.tsv"/> </test> <test> <param name="input" value="BGC0001866.fna"/> <param name="edge_distance" value="0"/> <output name="features" file="features.tsv"/> + <output name="genes" file="genes.tsv"/> <output name="clusters" file="clusters.tsv"/> <output_collection name="records" type="list"> <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> @@ -82,6 +88,7 @@ <param name="antismash_sideload" value="True"/> <param name="edge_distance" value="0"/> <output name="features" file="features.tsv"/> + <output name="genes" file="genes.tsv"/> <output name="clusters" file="clusters.tsv"/> <output name="sideload" file="sideload.json"/> <output_collection name="records" type="list"> @@ -107,8 +114,9 @@ GECCO will create the following files once done (using the same prefix as the input file): -- ``features.tsv``: The features file, containing the identified proteins and domains in the input sequences. -- ``clusters.tsv``: If any were found, a clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. +- ``features.tsv``: The genes file, containing the genes identified in the input sequences. +- ``features.tsv``: The features file, containing the protein domains identified in the input sequences. +- ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains. Contact |
b |
diff -r fde43648cba0 -r 56b924f62165 test-data/BGC0001866.1_cluster_1.gbk --- a/test-data/BGC0001866.1_cluster_1.gbk Thu Mar 31 18:00:15 2022 +0000 +++ b/test-data/BGC0001866.1_cluster_1.gbk Tue Apr 05 23:18:49 2022 +0000 |
b |
b'@@ -1,4 +1,4 @@\n-LOCUS BGC0001866.1_cluster_1 32633 bp DNA linear UNK 21-NOV-2021\n+LOCUS BGC0001866.1_cluster_1 32633 bp DNA linear UNK 06-APR-2022\n DEFINITION BGC0001866.1 Byssochlamys spectabilis strain CBS 101075 chromosome\n Unknown C8Q69scaffold_14, whole genome shotgun sequence.\n ACCESSION BGC0001866.1_cluster_1\n@@ -15,15 +15,15 @@\n JOURNAL bioRxiv (2021.05.03.442509)\n REMARK doi:10.1101/2021.05.03.442509\n COMMENT ##GECCO-Data-START##\n- version :: GECCO v0.8.10\n- creation_date :: 2021-11-21T16:33:58.470847\n+ version :: GECCO v0.9.1\n+ creation_date :: 2022-04-06T01:08:36.965708\n biosyn_class :: Polyketide\n- alkaloid_probability :: 0.0\n- polyketide_probability :: 0.98\n+ alkaloid_probability :: 0.010000000000000009\n+ polyketide_probability :: 0.96\n ripp_probability :: 0.0\n saccharide_probability :: 0.0\n- terpene_probability :: 0.0\n- nrp_probability :: 0.09999999999999998\n+ terpene_probability :: 0.010000000000000009\n+ nrp_probability :: 0.14\n ##GECCO-Data-END##\n FEATURES Location/Qualifiers\n CDS complement(1..1143)\n@@ -41,7 +41,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00394"\n /db_xref="InterPro:IPR001117"\n- /note="e-value: 2.1941888078432915e-08"\n+ /note="e-value: 2.262067179461254e-08"\n /note="p-value: 8.178117062405111e-12"\n /function="Multicopper oxidase"\n /standard_name="PF00394"\n@@ -49,7 +49,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF07731"\n /db_xref="InterPro:IPR011706"\n- /note="e-value: 3.9374169295176556e-23"\n+ /note="e-value: 4.059222969454281e-23"\n /note="p-value: 1.467542649838858e-26"\n /function="Multicopper oxidase"\n /standard_name="PF07731"\n@@ -93,7 +93,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00891"\n /db_xref="InterPro:IPR001077"\n- /note="e-value: 4.743887678074703e-16"\n+ /note="e-value: 4.890642309934635e-16"\n /note="p-value: 1.7681280946979883e-19"\n /function="O-methyltransferase domain"\n /standard_name="PF00891"\n@@ -108,7 +108,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00135"\n /db_xref="InterPro:IPR002018"\n- /note="e-value: 4.674605664377319e-21"\n+ /note="e-value: 4.819217021121008e-21"\n /note="p-value: 1.7423055029360116e-24"\n /function="Carboxylesterase family"\n /standard_name="PF00135"\n@@ -123,7 +123,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00135"\n /db_xref="InterPro:IPR002018"\n- /note="e-value: 3.9706994470948554e-30"\n+ /note="e-value: 4.0935350990176556e-30"\n /note="p-value: 1.4799476135277136e-33"\n /function="Carboxylesterase family"\n /standard_name="PF00135"\n@@ -140,7 +140,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00135"\n /db_xref="InterPro:IPR002018"\n- /note="e-value: 1.4185801852307574e-15"\n+ /note="e-value: 1.4624647008379705e-15"\n /note="p-value: 5.287291037'..b'815692371726e-82"\n /note="p-value: 9.942088102809735e-86"\n /function="Beta-ketoacyl synthase, N-terminal domain"\n /standard_name="PF00109"\n@@ -476,7 +476,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF02801"\n /db_xref="InterPro:IPR014031"\n- /note="e-value: 2.4031043351141288e-34"\n+ /note="e-value: 2.4774456171918303e-34"\n /note="p-value: 8.956780973217029e-38"\n /function="Beta-ketoacyl synthase, C-terminal domain"\n /standard_name="PF02801"\n@@ -484,15 +484,15 @@\n /inference="protein motif"\n /db_xref="PFAM:PF16197"\n /db_xref="InterPro:IPR032821"\n- /note="e-value: 2.535893425129411e-07"\n- /note="p-value: 9.451708628883381e-11"\n+ /note="e-value: 8.475099126640419e-07"\n+ /note="p-value: 3.0640271607521397e-10"\n /function="Ketoacyl-synthetase C-terminal extension"\n /standard_name="PF16197"\n misc_feature 28322..29233\n /inference="protein motif"\n /db_xref="PFAM:PF00698"\n /db_xref="InterPro:IPR014043"\n- /note="e-value: 4.597134671955754e-38"\n+ /note="e-value: 4.739349423268586e-38"\n /note="p-value: 1.7134307387088164e-41"\n /function="Acyl transferase domain"\n /standard_name="PF00698"\n@@ -509,7 +509,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF14765"\n /db_xref="InterPro:IPR020807"\n- /note="e-value: 7.778696660229127e-11"\n+ /note="e-value: 8.019334685871699e-11"\n /note="p-value: 2.8992533209948296e-14"\n /function="Polyketide synthase dehydratase"\n /standard_name="PF14765"\n@@ -533,7 +533,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00550"\n /db_xref="InterPro:IPR009081"\n- /note="e-value: 5.884377030377924e-14"\n+ /note="e-value: 6.066413293337807e-14"\n /note="p-value: 2.193207987468477e-17"\n /function="Phosphopantetheine attachment site"\n /standard_name="PF00550"\n@@ -541,7 +541,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00550"\n /db_xref="InterPro:IPR009081"\n- /note="e-value: 3.9212317886052276e-10"\n+ /note="e-value: 4.042537132792419e-10"\n /note="p-value: 1.461510170930014e-13"\n /function="Phosphopantetheine attachment site"\n /standard_name="PF00550"\n@@ -549,7 +549,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00550"\n /db_xref="InterPro:IPR009081"\n- /note="e-value: 1.367829688372301e-08"\n+ /note="e-value: 1.4101442109719659e-08"\n /note="p-value: 5.098135252971677e-12"\n /function="Phosphopantetheine attachment site"\n /standard_name="PF00550"\n@@ -557,7 +557,7 @@\n /inference="protein motif"\n /db_xref="PFAM:PF00975"\n /db_xref="InterPro:IPR001031"\n- /note="e-value: 6.711355516947163e-24"\n+ /note="e-value: 6.91897478936856e-24"\n /note="p-value: 2.5014370171252933e-27"\n /function="Thioesterase domain"\n /standard_name="PF00975"\n' |
b |
diff -r fde43648cba0 -r 56b924f62165 test-data/clusters.tsv --- a/test-data/clusters.tsv Thu Mar 31 18:00:15 2022 +0000 +++ b/test-data/clusters.tsv Tue Apr 05 23:18:49 2022 +0000 |
b |
@@ -1,2 +1,2 @@ sequence_id bgc_id start end average_p max_p type alkaloid_probability polyketide_probability ripp_probability saccharide_probability terpene_probability nrp_probability proteins domains -BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9969495815733557 0.9999999447224028 Polyketide 0.0 0.98 0.0 0.0 0.0 0.09999999999999998 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197 +BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9958958770931704 0.9999999976946022 Polyketide 0.010000000000000009 0.96 0.0 0.0 0.010000000000000009 0.14 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197 |
b |
diff -r fde43648cba0 -r 56b924f62165 test-data/features.tsv --- a/test-data/features.tsv Thu Mar 31 18:00:15 2022 +0000 +++ b/test-data/features.tsv Tue Apr 05 23:18:49 2022 +0000 |
b |
b'@@ -1,38 +1,38 @@\n sequence_id\tprotein_id\tstart\tend\tstrand\tdomain\thmm\ti_evalue\tpvalue\tdomain_start\tdomain_end\tbgc_probability\r\n-BGC0001866.1\tBGC0001866.1_1\t347\t1489\t-\tPF00394\tPfam\t2.1941888078432915e-08\t8.178117062405111e-12\t1\t63\t0.9852038761627908\r\n-BGC0001866.1\tBGC0001866.1_1\t347\t1489\t-\tPF07731\tPfam\t3.9374169295176556e-23\t1.467542649838858e-26\t150\t281\t0.9852038761627908\r\n-BGC0001866.1\tBGC0001866.1_6\t3946\t4389\t+\tPF00891\tPfam\t4.743887678074703e-16\t1.7681280946979883e-19\t17\t121\t0.9910535094227727\r\n-BGC0001866.1\tBGC0001866.1_7\t4683\t5138\t+\tPF00135\tPfam\t4.674605664377319e-21\t1.7423055029360116e-24\t48\t140\t0.9913598896683397\r\n-BGC0001866.1\tBGC0001866.1_8\t5384\t5812\t+\tPF00135\tPfam\t3.9706994470948554e-30\t1.4799476135277136e-33\t2\t114\t0.9925093258822111\r\n-BGC0001866.1\tBGC0001866.1_9\t5823\t6599\t+\tPF00135\tPfam\t1.4185801852307574e-15\t5.287291037013632e-19\t2\t209\t0.9946019708257335\r\n-BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF13434\tPfam\t5.777178703900199e-08\t2.153253337271785e-11\t13\t124\t0.9978201609931655\r\n-BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF00743\tPfam\t5.089108077410868e-07\t1.8967976434628658e-10\t36\t102\t0.9978201609931655\r\n-BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF07690\tPfam\t5.839871260376694e-37\t2.1766199255969786e-40\t1\t362\t0.9990971143689635\r\n-BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF06609\tPfam\t9.543170598318239e-09\t3.55690294383833e-12\t17\t244\t0.9990971143689635\r\n-BGC0001866.1\tBGC0001866.1_15\t14920\t15912\t+\tPF08493\tPfam\t2.6165794251055913e-17\t9.752439154325723e-21\t139\t224\t0.9999977987864139\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00109\tPfam\t9.025888536170949e-60\t3.364103069761815e-63\t2\t248\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF02801\tPfam\t2.2171445990751238e-35\t8.263677223537547e-39\t257\t368\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF16197\tPfam\t3.8698172759236842e-25\t1.4423471024687604e-28\t371\t487\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00698\tPfam\t1.0799913424517567e-26\t4.025312495161225e-30\t512\t648\t0.9999994272691842\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF00698\tPfam\t2.639223271303753e-16\t9.836836642950999e-20\t2\t151\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF14765\tPfam\t2.520598829779557e-60\t9.394703055458656e-64\t228\t504\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13489\tPfam\t1.0131254482174088e-12\t3.776091868123029e-16\t661\t817\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13847\tPfam\t8.939870258494623e-11\t3.332042586095648e-14\t666\t776\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13649\tPfam\t2.319131521369124e-13\t8.643799930559537e-17\t667\t764\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08242\tPfam\t3.6288099491186147e-22\t1.3525195486837923e-25\t668\t766\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08241\tPfam\t5.245291385894328e-12\t1.9550098344742185e-15\t668\t767\t0.9999940983719267\r\n-BGC0001866.1\tBGC0001866.1_18\t22762\t23235\t+\tPF00107\tPfam\t1.0960342036668699e-15\t4.085106983476965e-19\t12\t117\t0.9999176675645223\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF08659\tPfam\t1.5141662612831146e-61\t5.643556695054471e-65\t65\t239\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00106\tPfam\t1.1379002942545491e-07\t4.2411490654288077e-11\t68\t221\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00550\tPfam\t3.359618716013185e-10\t1.2521873708584363e-13\t384\t437\t0.9999724741067139\r\n-BGC0001866.1\tBGC0001866.1_20\t25769\t26056\t+\tPF16073\tPfam\t1.3071857188363548e-23\t4.872104803713585e-27\t8\t94\t0.999988513111687\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16073\tPfam\t8.208876065249628e-11\t3.059588544632735e-14\t2\t47\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00109\tPfam\t2.667462237983852e-82\t9.942088102809735e-86\t178\t426\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF02801\tPfam\t2.4031043351141288e-34\t8.956780973217029e-38\t434\t555\t0.9999999447224028\r\n-BGC0001866.1\tBGC0001866.1_21\t'..b'511\r\n+BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF13434\tPfam\t5.955898730893757e-08\t2.153253337271785e-11\t13\t124\t0.9986351193337516\r\n+BGC0001866.1\tBGC0001866.1_10\t7758\t9029\t+\tPF00743\tPfam\t5.246542281818287e-07\t1.8967976434628658e-10\t36\t102\t0.9986351193337516\r\n+BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF07690\tPfam\t6.020530714201243e-37\t2.1766199255969786e-40\t1\t362\t0.9994485509803548\r\n+BGC0001866.1\tBGC0001866.1_13\t11550\t12662\t+\tPF06609\tPfam\t9.83839354265682e-09\t3.55690294383833e-12\t17\t244\t0.9994485509803548\r\n+BGC0001866.1\tBGC0001866.1_15\t14920\t15912\t+\tPF08493\tPfam\t2.686865976406516e-17\t9.713904470016327e-21\t139\t224\t0.9999999296901834\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00109\tPfam\t9.30510909096118e-60\t3.364103069761815e-63\t2\t248\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF02801\tPfam\t2.2857331200304854e-35\t8.263677223537547e-39\t257\t368\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF16197\tPfam\t4.800730099641783e-25\t1.7356218726109122e-28\t371\t488\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_16\t17173\t19143\t+\tPF00698\tPfam\t1.113401436161595e-26\t4.025312495161225e-30\t512\t648\t0.9999998571963613\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF00698\tPfam\t2.7208690154402465e-16\t9.836836642950999e-20\t2\t151\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF14765\tPfam\t2.598574865139864e-60\t9.394703055458656e-64\t228\t504\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13489\tPfam\t1.04446701072283e-12\t3.776091868123029e-16\t661\t817\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13847\tPfam\t8.752004453621267e-11\t3.1641375465008194e-14\t666\t776\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF13649\tPfam\t2.4253465299984994e-13\t8.76842563267715e-17\t667\t764\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08242\tPfam\t3.7410690716593694e-22\t1.3525195486837923e-25\t668\t766\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_17\t19152\t22424\t+\tPF08241\tPfam\t5.4075572021556884e-12\t1.9550098344742185e-15\t668\t767\t0.9999990994944158\r\n+BGC0001866.1\tBGC0001866.1_18\t22762\t23235\t+\tPF00107\tPfam\t1.1299405916297285e-15\t4.085106983476965e-19\t12\t117\t0.9999802025553775\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF08659\tPfam\t1.5610077818520667e-61\t5.643556695054471e-65\t65\t239\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00106\tPfam\t1.1731018314976082e-07\t4.2411490654288077e-11\t68\t221\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_19\t23268\t24623\t+\tPF00550\tPfam\t3.463550267794435e-10\t1.2521873708584363e-13\t384\t437\t0.9999913868972266\r\n+BGC0001866.1\tBGC0001866.1_20\t25769\t26056\t+\tPF16073\tPfam\t9.422238725791962e-24\t3.406449286258844e-27\t8\t94\t0.9999994733759681\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16073\tPfam\t4.380197593141013e-11\t1.5835855362042708e-14\t2\t47\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00109\tPfam\t2.7499815692371726e-82\t9.942088102809735e-86\t178\t426\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF02801\tPfam\t2.4774456171918303e-34\t8.956780973217029e-38\t434\t555\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF16197\tPfam\t8.475099126640419e-07\t3.0640271607521397e-10\t567\t673\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_21\t26544\t29999\t+\tPF00698\tPfam\t4.739349423268586e-38\t1.7134307387088164e-41\t709\t1012\t0.9999999976946022\r\n+BGC0001866.1\tBGC0001866.1_22\t30150\t30890\t+\tPF14765\tPfam\t8.019334685871699e-11\t2.8992533209948296e-14\t39\t244\t0.9999912059124727\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t6.066413293337807e-14\t2.193207987468477e-17\t67\t128\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t4.042537132792419e-10\t1.461510170930014e-13\t174\t238\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00550\tPfam\t1.4101442109719659e-08\t5.098135252971677e-12\t299\t360\t0.9998703656415205\r\n+BGC0001866.1\tBGC0001866.1_23\t30937\t32979\t+\tPF00975\tPfam\t6.91897478936856e-24\t2.5014370171252933e-27\t443\t550\t0.9998703656415205\r\n' |
b |
diff -r fde43648cba0 -r 56b924f62165 test-data/genes.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genes.tsv Tue Apr 05 23:18:49 2022 +0000 |
b |
@@ -0,0 +1,24 @@ +sequence_id protein_id start end strand average_p max_p +BGC0001866.1 BGC0001866.1_1 347 1489 - 0.9791890143072265 0.9791890143072265 +BGC0001866.1 BGC0001866.1_2 1525 2016 + 0.9816626269970528 0.9816626269970528 +BGC0001866.1 BGC0001866.1_3 2513 2722 - 0.9844997726878899 0.9844997726878899 +BGC0001866.1 BGC0001866.1_4 2905 3378 + 0.9877300777686966 0.9877300777686966 +BGC0001866.1 BGC0001866.1_5 3353 3922 + 0.9913872741253911 0.9913872741253911 +BGC0001866.1 BGC0001866.1_6 3946 4389 + 0.9955095513800687 0.9955095513800687 +BGC0001866.1 BGC0001866.1_7 4683 5138 + 0.995982045872177 0.995982045872177 +BGC0001866.1 BGC0001866.1_8 5384 5812 + 0.9966491071789748 0.9966491071789748 +BGC0001866.1 BGC0001866.1_9 5823 6599 + 0.9975265367646511 0.9975265367646511 +BGC0001866.1 BGC0001866.1_10 7758 9029 + 0.9986351193337516 0.9986351193337516 +BGC0001866.1 BGC0001866.1_11 9800 10384 + 0.9988029392597757 0.9988029392597757 +BGC0001866.1 BGC0001866.1_12 11109 11537 + 0.999073142625125 0.999073142625125 +BGC0001866.1 BGC0001866.1_13 11550 12662 + 0.9994485509803548 0.9994485509803548 +BGC0001866.1 BGC0001866.1_14 12681 13127 + 0.9996778954036583 0.9996778954036583 +BGC0001866.1 BGC0001866.1_15 14920 15912 + 0.9999999296901834 0.9999999296901834 +BGC0001866.1 BGC0001866.1_16 17173 19143 + 0.9999998571963613 0.9999998571963613 +BGC0001866.1 BGC0001866.1_17 19152 22424 + 0.9999990994944158 0.9999990994944158 +BGC0001866.1 BGC0001866.1_18 22762 23235 + 0.9999802025553775 0.9999802025553775 +BGC0001866.1 BGC0001866.1_19 23268 24623 + 0.9999913868972266 0.9999913868972266 +BGC0001866.1 BGC0001866.1_20 25769 26056 + 0.9999994733759681 0.9999994733759681 +BGC0001866.1 BGC0001866.1_21 26544 29999 + 0.9999999976946022 0.9999999976946022 +BGC0001866.1 BGC0001866.1_22 30150 30890 + 0.9999912059124727 0.9999912059124727 +BGC0001866.1 BGC0001866.1_23 30937 32979 + 0.9998703656415205 0.9998703656415205 |
b |
diff -r fde43648cba0 -r 56b924f62165 test-data/sideload.json --- a/test-data/sideload.json Thu Mar 31 18:00:15 2022 +0000 +++ b/test-data/sideload.json Tue Apr 05 23:18:49 2022 +0000 |
[ |
@@ -5,14 +5,14 @@ "subregions": [ { "details": { - "alkaloid_probability": "0.000", - "average_p": "0.997", + "alkaloid_probability": "0.010", + "average_p": "0.996", "max_p": "1.000", - "nrp_probability": "0.100", - "polyketide_probability": "0.980", + "nrp_probability": "0.140", + "polyketide_probability": "0.960", "ripp_probability": "0.000", "saccharide_probability": "0.000", - "terpene_probability": "0.000" + "terpene_probability": "0.010" }, "end": 32979, "label": "Polyketide", @@ -25,11 +25,13 @@ "configuration": { "cds": "3", "e-filter": "None", + "edge-distance": "0", + "mask": "False", "postproc": "'gecco'", - "threshold": "0.3" + "threshold": "0.8" }, "description": "Biosynthetic Gene Cluster prediction with Conditional Random Fields.", "name": "GECCO", - "version": "0.8.10" + "version": "0.9.1" } } \ No newline at end of file |