Repository 'augustus_training'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/augustus_training

Changeset 2:0d425a4b6896 (2019-05-23)
Previous changeset 1:1fbb1135da16 (2019-05-10) Next changeset 3:ccf8d0bbebe9 (2019-05-30)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 0fed5bb024a096dcb5b2858520ba191da7798b6d
modified:
augustus_training.xml
added:
test-data/augustus.hints.output.gtf
test-data/augustus.hints_and_range.output.gtf
test-data/chr2R.truncated.fa
test-data/extrinsic.truncated.cfg
test-data/hints.truncated.adjusted.gff
b
diff -r 1fbb1135da16 -r 0d425a4b6896 augustus_training.xml
--- a/augustus_training.xml Fri May 10 08:52:20 2019 -0400
+++ b/augustus_training.xml Thu May 23 18:17:22 2019 -0400
[
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="augustus_training" name="Train Augustus" profile="16.04" version="@VERSION@+galaxy1">
+<tool id="augustus_training" name="Train Augustus" profile="16.04" version="@VERSION@+galaxy2">
     <description>ab-initio gene predictor</description>
     <macros>
         <import>macros.xml</import>
@@ -8,7 +8,7 @@
         <requirement type="package" version="2.31.10">maker</requirement>
     </expand>
     <command><![CDATA[
-        cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ &&
+        cp -r "\$AUGUSTUS_CONFIG_PATH" augustus_dir/ &&
 
         export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ &&
 
b
diff -r 1fbb1135da16 -r 0d425a4b6896 test-data/augustus.hints.output.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus.hints.output.gtf Thu May 23 18:17:22 2019 -0400
b
b'@@ -0,0 +1,195 @@\n+# This output was generated with AUGUSTUS (version 3.2.3).\n+# AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),\n+# O. Keller, S. K\xc3\xb6nig, L. Gerischer and L. Romoth.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# Sources of extrinsic information: M RM E W \n+# Setting CDSpart local malus: 0.985\n+# Setting UTRpart local malus: 0.973\n+# reading in the file /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_14.dat ...\n+# Have extrinsic information about 1 sequences (in the specified range). \n+# Initialising the parameters using config directory /Users/sargentl/miniconda3/envs/__augustus@3.2.3/config/ ...\n+# fly version. Using default transition matrix.\n+# Looks like /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_13.dat is in fasta format.\n+# We have hints for 1 sequence and for 1 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 9950, name = chr2R) -----\n+#\n+# Delete group HintGroup gi|2701440, 8630-8864, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|4203815, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|4245769, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|4245770, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|13769068, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|14693753, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|14695912, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|14699170, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|14700619, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|15539951, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|15543927, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup gi|38623822, 8630-9250, mult= 1, priority= 4 6 features\n+# Delete group HintGroup SRR023546.8642467/1, 8630-8693, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|2871896, 8811-9250, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|15543159, 8811-9250, mult= 1, priority= 4 4 features\n+# Delete group HintGroup , 8811-8864, mult= 8, priority= 4 1 features\n+# Delete group HintGroup , 8811-8864, mult= 8, priority= 4 1 features\n+# Delete group HintGroup gi|2700091, 9191-9250, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|2701304, 9191-9250, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|2701309, 9191-9250, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|3101873, 9191-9250, mult= 1, priority= 4 2 features\n+# Delete group HintGroup , 9191-9250, mult= 3, priority= 4 1 features\n+# Delete group HintGroup , 9191-9250, mult= 3, priority= 4 1 features\n+# Deleted 23 groups because some hint was not satisfiable.\n+# Constraints/Hints:\n+chr2R\tw2h\tep\t7551\t7560\t5.3\t.\t.\tmult=5;src=W "1.03743;0.992;9:0"\n+chr2R\tw2h\tep\t7561\t7570\t7.4\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t7571\t7580\t9.7\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7581\t7590\t10.2\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t7591\t7600\t9\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7601\t7610\t9.7\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7611\t7620\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7621\t7630\t15.3\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t7631\t7640\t13.4\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7641\t7650\t13.7\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7651\t7660\t14.4\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t7661\t7670\t14.6\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t7671\t7680\t16.8\t.\t.\tmult=16;src=W'..b'2857;0.992;7:0"\n+chr2R\tw2h\tep\t8749\t8758\t5.7\t.\t.\tmult=5;src=W "1.03743;0.992;9:0"\n+chr2R\tw2h\tep\t8759\t8768\t8\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8769\t8778\t8\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8779\t8788\t8.6\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8789\t8798\t8.7\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8880\t8889\t12.7\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t8890\t8899\t15.3\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t8900\t8909\t17.6\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8910\t8919\t17.9\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8920\t8929\t17.2\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8930\t8939\t18.4\t.\t.\tmult=18;src=W "1.17304;0.992;35:0"\n+chr2R\tw2h\tep\t8940\t8949\t19.9\t.\t.\tmult=19;src=W "1.18455;0.992;37:0"\n+chr2R\tw2h\tep\t8950\t8959\t19.1\t.\t.\tmult=19;src=W "1.18455;0.992;37:0"\n+chr2R\tw2h\tep\t8960\t8969\t15.2\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t8970\t8979\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t8980\t8989\t14.7\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t8990\t8999\t14.5\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t9000\t9009\t14.5\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t9010\t9019\t12.5\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9020\t9029\t11.9\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t9030\t9039\t12.1\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9040\t9049\t10\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t9050\t9059\t9.7\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t9060\t9069\t10.6\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t9070\t9079\t12.3\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9080\t9089\t13.1\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t9090\t9099\t13.4\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t9100\t9109\t12\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9110\t9119\t10.7\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t9120\t9129\t11\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t9130\t9139\t10.9\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t9140\t9149\t7.9\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t9150\t9159\t4.9\t.\t.\tmult=4;src=W "1.02857;0.992;7:0"\n+chr2R\tw2h\tep\t9160\t9169\t4.4\t.\t.\tmult=4;src=W "1.02857;0.992;7:0"\n+chr2R\tw2h\tep\t9265\t9274\t11.4\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t9275\t9284\t12.8\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9285\t9294\t13.1\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t9295\t9304\t13.6\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t9305\t9314\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t9315\t9324\t12.4\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t9325\t9334\t9\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t9335\t9344\t5.2\t.\t.\tmult=5;src=W "1.03743;0.992;9:0"\n+# Predicted genes for sequence number 1 on both strands\n+# start gene chr2R.g1\n+chr2R\tAUGUSTUS\tgene\t7560\t9303\t0.79\t-\t.\tchr2R.g1\n+chr2R\tAUGUSTUS\ttranscript\t7560\t9303\t0.79\t-\t.\tchr2R.g1.t1\n+chr2R\tAUGUSTUS\tstart_codon\t9301\t9303\t.\t-\t0\ttranscript_id "chr2R.g1.t1"; gene_id "chr2R.g1";\n+# Evidence for and against this transcript:\n+# % of transcript supported by hints (any source): 57.1\n+# CDS exons: 4/4\n+#      W:   4 \n+# CDS introns: 0/3\n+# 5\'UTR exons and introns: 0/0\n+# 3\'UTR exons and introns: 0/0\n+# hint groups fully obeyed: 0\n+# incompatible hint groups: 129\n+#      W: 129 \n+# end gene chr2R.g1\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_13.dat --UTR=off --genemodel=complete --hintsfile=/private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_14.dat --extrinsicCfgFile=/private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_15.dat --species=fly\n\\ No newline at end of file\n'
b
diff -r 1fbb1135da16 -r 0d425a4b6896 test-data/augustus.hints_and_range.output.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus.hints_and_range.output.gtf Thu May 23 18:17:22 2019 -0400
b
b'@@ -0,0 +1,165 @@\n+# This output was generated with AUGUSTUS (version 3.2.3).\n+# AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),\n+# O. Keller, S. K\xc3\xb6nig, L. Gerischer and L. Romoth.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# Sources of extrinsic information: M RM E W \n+# Setting CDSpart local malus: 0.985\n+# Setting UTRpart local malus: 0.973\n+# reading in the file /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_18.dat ...\n+# Have extrinsic information about 1 sequences (in the specified range). \n+# Initialising the parameters using config directory /Users/sargentl/miniconda3/envs/__augustus@3.2.3/config/ ...\n+# fly version. Using default transition matrix.\n+# Looks like /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_17.dat is in fasta format.\n+# We have hints for 1 sequence and for 1 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 2001, name = chr2R) -----\n+#\n+# Delete group HintGroup gi|2701440, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|4203815, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|4245769, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|4245770, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|13769068, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|14693753, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|14695912, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|14699170, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|14700619, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|15539951, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|15543927, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup gi|38623822, 1631-1865, mult= 1, priority= 4 4 features\n+# Delete group HintGroup SRR023546.8642467/1, 1631-1694, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|2871896, 1812-1865, mult= 1, priority= 4 2 features\n+# Delete group HintGroup gi|15543159, 1812-1865, mult= 1, priority= 4 2 features\n+# Delete group HintGroup , 1812-1865, mult= 8, priority= 4 1 features\n+# Delete group HintGroup , 1812-1865, mult= 8, priority= 4 1 features\n+# Deleted 17 groups because some hint was not satisfiable.\n+# Constraints/Hints:\n+chr2R\tw2h\tep\t7551\t7560\t5.3\t.\t.\tmult=5;src=W "1.03743;0.992;9:0"\n+chr2R\tw2h\tep\t7561\t7570\t7.4\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t7571\t7580\t9.7\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7581\t7590\t10.2\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t7591\t7600\t9\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7601\t7610\t9.7\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t7611\t7620\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7621\t7630\t15.3\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t7631\t7640\t13.4\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7641\t7650\t13.7\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t7651\t7660\t14.4\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t7661\t7670\t14.6\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t7671\t7680\t16.8\t.\t.\tmult=16;src=W "1.15043;0.992;31:0"\n+chr2R\tw2h\tep\t7681\t7690\t16.7\t.\t.\tmult=16;src=W "1.15043;0.992;31:0"\n+chr2R\tw2h\tep\t7691\t7700\t17.6\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t7701\t7710\t20.3\t.\t.\tmult=20;src=W "1.19619;0.992;39:0"\n+chr2R\tw2h\tep\t7711\t7720\t20.5\t.\t.\tmult=20;src=W "1.19619;0.992;39:0"\n+chr2R\tw2h\tep\t7721\t7730\t18.8\t.\t.\tmult=18;src=W "1.17304;0.992;35:0"\n+chr2R\tw2h\tep\t7731\t7740\t16.7\t.\t.\tmult=16;src=W "1.15043;0.992;31:0"\n+chr2R\tw2h\tep\t7741\t7750\t16.8\t.\t.\tmul'..b'\t8309\t7\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t8310\t8319\t7.3\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t8320\t8329\t9.6\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t8330\t8339\t11.6\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t8340\t8349\t10.8\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t8350\t8359\t10.1\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t8360\t8369\t11\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t8370\t8379\t13.4\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t8380\t8389\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t8390\t8399\t11.8\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t8400\t8409\t9\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t8410\t8419\t7.7\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t8420\t8429\t8.9\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8430\t8439\t11.5\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t8440\t8449\t11.4\t.\t.\tmult=11;src=W "1.0964;0.992;21:0"\n+chr2R\tw2h\tep\t8450\t8459\t9.8\t.\t.\tmult=9;src=W "1.07589;0.992;17:0"\n+chr2R\tw2h\tep\t8460\t8469\t10.2\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t8470\t8479\t10.8\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t8480\t8489\t10\t.\t.\tmult=10;src=W "1.08606;0.992;19:0"\n+chr2R\tw2h\tep\t8490\t8499\t8.2\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8500\t8509\t7.1\t.\t.\tmult=7;src=W "1.05617;0.992;13:0"\n+chr2R\tw2h\tep\t8510\t8519\t4.2\t.\t.\tmult=4;src=W "1.02857;0.992;7:0"\n+chr2R\tw2h\tep\t8520\t8529\t4.7\t.\t.\tmult=4;src=W "1.02857;0.992;7:0"\n+chr2R\tw2h\tep\t8739\t8748\t4.3\t.\t.\tmult=4;src=W "1.02857;0.992;7:0"\n+chr2R\tw2h\tep\t8749\t8758\t5.7\t.\t.\tmult=5;src=W "1.03743;0.992;9:0"\n+chr2R\tw2h\tep\t8759\t8768\t8\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8769\t8778\t8\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8779\t8788\t8.6\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8789\t8798\t8.7\t.\t.\tmult=8;src=W "1.06592;0.992;15:0"\n+chr2R\tw2h\tep\t8880\t8889\t12.7\t.\t.\tmult=12;src=W "1.1069;0.992;23:0"\n+chr2R\tw2h\tep\t8890\t8899\t15.3\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t8900\t8909\t17.6\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8910\t8919\t17.9\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8920\t8929\t17.2\t.\t.\tmult=17;src=W "1.16167;0.992;33:0"\n+chr2R\tw2h\tep\t8930\t8939\t18.4\t.\t.\tmult=18;src=W "1.17304;0.992;35:0"\n+chr2R\tw2h\tep\t8940\t8949\t19.9\t.\t.\tmult=19;src=W "1.18455;0.992;37:0"\n+chr2R\tw2h\tep\t8950\t8959\t19.1\t.\t.\tmult=19;src=W "1.18455;0.992;37:0"\n+chr2R\tw2h\tep\t8960\t8969\t15.2\t.\t.\tmult=15;src=W "1.13933;0.992;29:0"\n+chr2R\tw2h\tep\t8970\t8979\t13\t.\t.\tmult=13;src=W "1.11756;0.992;25:0"\n+chr2R\tw2h\tep\t8980\t8989\t14.7\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t8990\t8999\t14.5\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+chr2R\tw2h\tep\t9000\t9009\t14.5\t.\t.\tmult=14;src=W "1.12838;0.992;27:0"\n+# Predicted genes for sequence number 1 on both strands\n+# start gene chr2R.g1\n+chr2R\tAUGUSTUS\tgene\t7560\t8931\t0.83\t-\t.\tchr2R.g1\n+chr2R\tAUGUSTUS\ttranscript\t7560\t8931\t0.83\t-\t.\tchr2R.g1.t1\n+chr2R\tAUGUSTUS\tstart_codon\t8929\t8931\t.\t-\t0\ttranscript_id "chr2R.g1.t1"; gene_id "chr2R.g1";\n+# Evidence for and against this transcript:\n+# % of transcript supported by hints (any source): 60\n+# CDS exons: 3/3\n+#      W:   3 \n+# CDS introns: 0/2\n+# 5\'UTR exons and introns: 0/0\n+# 3\'UTR exons and introns: 0/0\n+# hint groups fully obeyed: 0\n+# incompatible hint groups: 102\n+#      W: 102 \n+# end gene chr2R.g1\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_17.dat --UTR=off --genemodel=complete --hintsfile=/private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_18.dat --extrinsicCfgFile=/private/var/folders/2d/g1vdzd1n6fv1fgxtlx8vrb189nw7dq/T/tmpsUsEy3/files/000/dataset_19.dat --predictionStart=7000 --predictionEnd=9000 --species=fly\n\\ No newline at end of file\n'
b
diff -r 1fbb1135da16 -r 0d425a4b6896 test-data/chr2R.truncated.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr2R.truncated.fa Thu May 23 18:17:22 2019 -0400
b
b'@@ -0,0 +1,200 @@\n+>chr2R\n+TTGCAATTTGCACTTTTCAATTGCCCCAAAAGATACGCCAACGAAATGCT\n+TATTAAAGTATCAAAAACAAATACCAGAAAGCTGTCACCAATCCCGCCCC\n+ATAGAAAACATGCCCACTTTGCCGCAGAATTTGCATATTTGTCTCCGTCG\n+GCGGTCGTCATTTTAATAAAAAAAAAATGTTTGTATAATTTCAGGTCGTC\n+GTGGGATGCGGCGGCTGCTTGCttataaaattaattttaaattaaacaat\n+tttaattatGATTCAATAATCAACAAACACACTGCAGCTTCGTCAATGTT\n+GTCGCAGACTTTCAACAATGCAAATATTGGTTTACGGCCATAAAGGCCCA\n+GCGCGACCAACCAAACCATTAATCCAACTCCGCTCGACTATTGCAAATTG\n+TTTAATAGCCGCCTTTCTTTCTTTTATGTTTGCAAAAAGAATGTATTTCC\n+ATAATATTGCCGCTTTATGTTTCATCTGGAACCACTTAACTTTGCCTCTG\n+CTAATTCAATTAAAAGGAGAATTTAAAAACTTGAATAGCTTGTAAAGTGA\n+CAAAAAATTCAGTACTTCTTTTATTGAGTTAGTAACTAATATCAATGCTA\n+ATTCAATTCGCAGATGCCCTTGTTTCCACCAAAAATTTAAATTACTACAT\n+TGAAAAAAAAAATAATTCTAATAAACTATATTGATTTGCAATAGTATATT\n+CCCAATGCTCCATAAACTTGCTTCACATGACttaaaaaataaattaaaat\n+ttataGAAAAAACTTGATTTTTGAATTTGTTCCTTGCTTGAAACTTGCTG\n+AATTTTTTTATGTGCAGAAATATTTTCGATGGTAATTGAATTAACTCTTA\n+ATTTGGTATTTTGCCTGTGTGGTCATAAAATTTAAAGAGCACATTAATTG\n+TCTAATTGAAAAAACGGTTTGGCTGCATAAATTGCTCGACGACTTGCTCA\n+TTTTTCTCATGATTATTATAGTTATGACAACCATTTGTTTAAGCTAGTTA\n+CAGTGCCTCTTGGGAAGACTTACTAAGGGAATGCACCTTTAGTCAAATGC\n+AACAGATATTAATTAAATATTCAGTGCTTAATCACTTGCAACAAGTTAAA\n+TGTTGATTTTTGCCCAGAGCCCTCTGTGACTTTATTCAATTAGCGTTTGT\n+TAACTCCCGTTCCCGAACTTCTCCAGATTTTTTGCCAATCTTTTTCGATG\n+CCGGCGGGCACATCATTACATATGCCACGCCCACCTCGCCATTGCCTGCC\n+CCAAGCTTTATGCTTGGTGTACGCAAAAATTTATTTACACGCATAATTTA\n+AAAAACCTTTTGCGCTTTGCTTAATGGCCATTCATTTATTGTAACACACA\n+CACACACTCGCTGGGTGACACTTGCATTATTGAATCTGCCACGCCCACCA\n+AAAAGGCGGAGGCACATAATGGGCTGATTTCGTTAGTTGACAGCCACATG\n+TTTGCGGTACTCGTTAAGCTGAAAACATGCCTTTTCAAGGGGTTTTGGCA\n+TTTATTTTATAATTTAAGTATCTATGCCCATGAAGCTTTTAATATAAAAC\n+CACTCAAAACATTTTAAAATACTTTTAAAATATACATTACGTTACATTAA\n+ATAATCAAATCAATTGATAGTACGAAACTGACATATAAACAATATTATAG\n+GAATGAACACATTTCACACTTACAAAATGTAGAATTTAATTTAAAGGCTA\n+TGAAGTATAAACGTATAAAATATATCCAATTATTAAAATATTAAAACAAA\n+ATATCAAGTTGAGGCCGCCACCTTTGTTTAAATCTATAAATTGGGAAACA\n+GTTTGTTTATTTGTGCTGGACATTAATTGTCTGTTCGAAGAAAGGTAATT\n+AAAACAAATCCACATTGTTATCCTGCGCTATCAATATGCCAGCAAATTAA\n+TTATGATGTGTTAGACAGATCAATGACATTCTTGCTGAGAGCCGAGTTGC\n+TGCAACTCGCTTGCCAAAGTTAAACGCACTCACTTAACTCGCACCATTCA\n+GCTTTAGTCACCTGGATTTGATTCGATTTTGTCATCTGCCCGGGCAACAa\n+cacatacatatatactacacatgcaaacatatacatatatgcccatatct\n+acatGGCCAACTACTTCGATTCCTGGCCTTTTGTACTGGCCCCTTTCGCC\n+TGTGGCAATGCTTCCTGTCTGATGTTGCACGAGTAGATGTTGCCgctgtt\n+gctgttgctgttgctgttgctTGTGTGCCTTGAGTTGGCAGGCCACGTTC\n+TGGTTTCGGTTGCGGTTTCGGTTTGCTGGCTGCGATGGGCGCCTGTTGAC\n+GCTTTCGGCTACCCAGACATGGGTATCAGCCAGGTACCGACTTGGCTCTC\n+CAGTTTATGGCCGTTTACATGCTCACCGCCCGTTTCTCATATCACTTTAT\n+TTGCCAGCGGAATTCGCACGCAACTCCCTCGGTAATGATGCTCAAGAATT\n+TTTAATAAAAAACTGCCAGCCAAGCAGGAGGAGCTTTGCTCTTTTATTTT\n+TGTGCTGCGACTGATGAAGGCAATTGGAAGGACCTTAATGACAGGCAGCC\n+TTTGTGGGTTAAGCAATATATTAATCAGCATATTAATTATGCAGCAAGGT\n+AGTTTTGCCCGAGTGCGGGCGGGCATGGCCAGTGTCGAAGGTCAACGAAC\n+TGCAGCGGCCGCATGCATTGTTGGGGCATATAAATTGTTTAAACAACTTT\n+ACAGACAACATCCGTTGGGCCAGGCAAAATGGCCCGAAGGCGATGGGAAT\n+TCGAGTACCAGGCTgccagccagccagccagccagccagccagccagcca\n+gtcagccaTGTGACAGGCAGTCGGTCATTATTTCGCACTTTTGTTGCCGC\n+TGCGGAAATTTCACAGCAAGAGGCAGCGGCAAACATGCGACACGTGCAAC\n+ACCGAAAGTCGAGAGTCTGGGCGATACAGTGTGCCATAACAATAATGCCA\n+CAACAAAGATAGAGTCCCTGCCAGTAGAGAGCAGGCTGGGCGAAAAATGT\n+GTGCGGGAGTCAGGTGGAAAAGATGCACAAGTCGGTGGCAGACAAAATGG\n+AATTTTAATGATATGAGAAGGCGCGCCTTCTGAATATTTTTGCAGGTGAA\n+ACTGCTTACCAGCCATCCACTTGGCCACCTCCCCCTCCACCAAACAGCAC\n+CAAACTCTTACCCTTTGAACTCGCGTAGTACAACCGGCACTTGAACTACG\n+CCGCTTACTACGCTCTTGGCGCTCTTTTCAGTTTGTTTTTGCTGCGTTAT\n+AGTGCCCGTTTTTTTTCTGTCCTGACTCAGGCGCAGGCGAGTTGGCCCAG\n+AAGAGCGCAAGAGAACCGCTAAGAGCGCATTTACGATTGGCAAGCCGGTA\n+AAAATAGAAAAATAAGTAGTTTGAACACAGTTTGTATGAAGGTATGCTGT\n+TTCGCTTGAAATTTAATGCAGTTTATATTACAGAAATAGAAAACTAACCA\n+ATTACTTTGTTTATTTTGGTATTAAATGAATGAGCTGCACTTACACTTTT\n+CAAAATAGTTTATCCAAAGCACTTGACATTTTGGTGGTAGTTATTGCATA\n+GTTCAGGTGCTTTTTGTAGTTTAAAACATTTTGAAACCCCTTTTAAGATA\n+TCACAAGCACAAAGCACAAAGGCGCACTCACACAATTGCGAACGGCAAGC\n+GCCATGTGCGCCTGGCGGTATGCAATAGTTTCTGTTTCTCTTTCGTTTCC\n+GTAAATGGTAGTGCGCGTATAGTAgtgtgcgtttgcgtatgcgtgtgtgt\n+gtgtgtgtgtgtgtgcgtTTTAGCCGACAAAGTGCAACACACAGCGCAAT\n+GACCGACGTACTCGAGCGTT'..b'TGACTCATTTACGCGCTGCACTCGAGTTTCAACCGGCAGCCGGCGA\n+AGGAAAAGCGGGACTCCGAGAACTTTTCCAAGCTATTTCTGCGCCCAAAT\n+GACAGCGAAATGTAATAATGAAAAACTTTTTCTTTGTCAACTTGATGAGT\n+ATGAAATTTTTCACaaaaaaaaaaaaagaaaaaagattaaataaaataaa\n+agaCGAGCGGCAAAGGAACGGGTTGAGCTTGGGTGGCAGGGCGGTGAGCT\n+AGAAATATGAAACGTCAGGACTGGTCCTTAGTCTTAGGTTGGCCTTAGTC\n+TTGGTTGGCCTTCGAGATTAGTTATGGCAAAGGAATTTCCAAGCGAAGGC\n+GCACGCAAATGATTGCATGCCGCATTACGCATACGCCCGGTTGTGCGTGG\n+CCGGTTTTAAAATTGAATTTTAATATACAGCAGACACAAAGAGATGTACA\n+TGACCATATGTATATGTATATAGCTGATCCCTGGCCATGGCTGCTATAAG\n+CCAGGTTATCCAGCCACCCCGAAAAAACACATGTTCTGCATTTTTACTTT\n+GTACTGGACCAGGACCCACGGTCCATCATCCCTACCATTTTGCTGCTGCA\n+CCGCATCGGGGATTGCGGAATTCATAAGCTATGTGCGTTTGCTACAAAAG\n+TGGAATTTTCGCCACGCCAAAGGATACCCGCTTGCAGGCTCCCAGCTCAC\n+AGTTCCCACTTCCCGTTTCCTAGATCCTGAGCATGCCATTAACCCACTCG\n+CAAAAAGCGCAAAATCGCTTAAATTATTGTTACAAGCTGCCAGTGCCAAG\n+TGGCAGGCACAGCAACAGCAAAAACAAGAACAAGACCAGGGGggcaggtg\n+gcaggtggcaggtggcaCAGGGCAAACCACCTGAGCTTTTGGCCGCGCTT\n+TGCTTTGAGTGCCAGTTAGCATAACTCCGCAGGCCGCAGGCGAATATGTc\n+tgttgttgctgcttttgccgctttgctactctcctgatgttgggtgctgc\n+tcctgattttgctgttgccgttAGAAGTATTTGTGGCGCGGCAAAGGACA\n+TCCTCTCGGCAGCCCGTTCTCACCTGCCTGCCAAGCTGCAAGCTCCAGCT\n+GAAAGTGTCTTTGGTGAGCGCTACCCACCACCTAAATATACAAAGTGTGG\n+GGGAAAAACAAGAAAAATAACAACCTACAATAGTCTCTTCTTTGTTACCT\n+AAAAATATAAGAAAAGCACACCTCGCAATTGTGTTGAAAAAAACAAATTG\n+ATACGAAAAAAAGCGATAGGGATGCCACAACAAAGCGCAATCTTATCAAA\n+CGTATTAAACATTTTGCTTATCAAAACAGCGAGGTTTGCACAACAATGCG\n+AAATAGGAGGAGGCAAAATTCTTATCTGCATTTCATCAGTCAAACATTTA\n+CTGTAATCAAAAATCGCAACTATAAGGAACACAAGTAAAGGCCACAAATG\n+AGAAGCAAATTATTGTTCGATATCCGTGAATTTGAAGGGGAACGTAATGG\n+CCGGTTCCATGACGAACATGGTCTTGAATGGACGGCTGGCATCCCGATTG\n+AACTCCACATGGAAATTGCGAATCAACTTGGCCACTGTGGTCTCCATTTC\n+CAGATCCACCACCCGTTTGCCAATGCACATGCGGGGTCCAAAGCCAAAGG\n+GAAGGAACGTGAAGGGGCTGACCTGCATCTTCTTTCCGGTCTCCGGATCC\n+CTTAACCAGCGCTCCGGCAGAAATTCATCTGGTCGTGGATAATATGTAGC\n+TTCCTTCATCAGCACATTTGAGCCGAGCAGGACAGTCGTTCCCTTGGGCA\n+CCCGGTAACCCGAAAGTATCACATCATTTTGGCAGGTTCTCATGGTTCCC\n+AAGCCATTGGGATAGTATCGCAGTGTCTCCTTGATCACAGCCCTCAAATA\n+TGGCATATCCTTCATGTTCTCCTCGTTGAGTAGGGAATCCTTTGTGGGCA\n+TGATGCTCAAGAGCTCCTCCCGCAGTTTGGCCTGCTTATCCGGATGCTTT\n+GAGAGGCAGAGTAAAACAGCCGACAGAAGCGTGGCTGTGGCATCCACACC\n+AGCGAATAGGATGTCCAGACTCATAATCACCGCCACCTTGGGATCGATCT\n+CCATTAATCTCTCCAGCATACTGTTACTATTTATTTTTTCGCCAGCCTGG\n+CGGCGCTTTTCCAGTGCATCCTGATTTTCCTTCAGCATTTTCTGGGCCAC\n+ATTCAAACTGTCATTGAGAGTTCGCTTCATTTTCCTGTAGGTGGGCGTCG\n+ATATGATCTTCCACATGGAGGGCTGAATGTCTAGCTTAAATGTGAGACGG\n+AAAATATCCCTCGAGGTCTGGAAGAGGGTCAATGCATCGGAATTATCGCG\n+GTTTTTTCTAATCAGACCCATTTGCCGATCGAAAGCCACCAGGCCGAGTG\n+ACTCGAAAACAAGCCGGCTTATTTCATCCGTAAAATCTTCGGGGACTTCT\n+AGAGTTTTTGGATCGCGGATTTCCTTAATGCTGCACAGTAAAATGGAATT\n+TATTGCTATATATAAATAACAATTCAATAACAAAAATTTCTTACCGCTCT\n+ATAAACTCATTATTGATATTAGACAATGGTTCATAATACATTCTCAAGCC\n+CCTGGGTTGCATAAAGATGGGATTAATGGCTGATCGTAGTTTTCCCCATG\n+CCTCATTTTGTCTGAAATATTCAAATTCAATTGTTTCCTTGCTACTTATT\n+AGATTTTATACTCACGAAGCCACCAATCCTTGAACCTCACCGTAAACATC\n+TGGTCGAACGTGTTCGCGGAAATATACAATGGAATCCAGGCCATCACGAC\n+GTGGCCAGATACCCTCGTTGCGGAATACCATCTCAATGTCCTTTGTGTTG\n+AAAGTGGTGACCCAATCCTTGCGGCCAAACATTCCGGGCATTACATAGAT\n+ATCTCCATAGCGCTTTCGCATGGCACTGGTGTATTCCGTAATCGATGCAT\n+TTTGGAATTCACCACCGGGCATGAAAGCCCTCATAAATTTGAATTTGTTG\n+GGCCGCGGAATCTCATCGTAGGTCTTGTGCTCCTCCGTTATCTATAATTG\n+GAAACTAATAACATGATAAATCTATTTCGACTGGATTTTGTGCATACTAA\n+CACTAGATTTAGCTTGTTCATGTGCCAAAACGGAGGCTGATCTTGAAGAT\n+CGAACGGGACCCACGTAAATCGCCACAGATCGCGCACTGCTCAATGTATT\n+CATTCTATTCCTTTTGGTGATCAACTGCAACTAAAAACAGTTCCACTTTC\n+GAGAACGTTCTTATACCAAAGGCTTATCTCTAAAACGTAGAAAAGAGAGG\n+AGAGCGAATAACTTGAAATGCTTCAAAATGCAAAAATCTCAAACAAAAAC\n+AAAAGCCAGCAAACAAAGCATGAGTTTCATTTTCTTATCAGCTGAACGGT\n+ATAATCGCCGTGAATTGCCTACCTCACTCAACTGAAAAAATAAACAAATG\n+AACGTGACTGGCTAGAAAAACAAATTGAGAGCGTTGGAACTTACAATAAA\n+TACACCTCAGAGAATTTGATACTACTATTACGACTATTACATACCCGTTA\n+CTCATTTAGTGGGATCGCTAGaaaaaaatctaaaatatttaaatattttt\n+taaaaatttataaGGATTCTAGTTCAACGGTAGACAATTTCAAGATAAAT\n+AACCAAATTAAAGAAAAATAAAAACAATTCTTAGAGTTGATCCTTGTTCT\n+CTTTGCATACTGatattttatttattttttattttatttatttaCTGGCG\n+CCGCTTTTCCACAGAGTCCTCGGTGTCCTTTATCATTTTCTGTGAGAACA\n+TTAAGATGTCATCGAGAAGACGCATCATCATCCGAAAGTTTGGAGTGGAG\n'
b
diff -r 1fbb1135da16 -r 0d425a4b6896 test-data/extrinsic.truncated.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extrinsic.truncated.cfg Thu May 23 18:17:22 2019 -0400
[
b"@@ -0,0 +1,183 @@\n+==# extrinsic information configuration file for AUGUSTUS\n+# \n+# protein hints\n+# include with --extrinsicCfgFile=filename\n+# date: 16.10.2007\n+# Mario Stanke (mstanke@gwdg.de)\n+\n+\n+# source of extrinsic information:\n+# M manual anchor (required)\n+# P protein database hit\n+# E EST/cDNA database hit\n+# C combined est/protein database hit\n+# D Dialign\n+# R retroposed genes\n+# T transMapped refSeqs\n+# W wiggle track coverage info from RNA-Seq\n+\n+[SOURCES]\n+M RM E W\n+\n+#\n+# individual_liability: Only unsatisfiable hints are disregarded. By default this flag is not set\n+# and the whole hint group is disregarded when one hint in it is unsatisfiable.\n+# 1group1gene: Try to predict a single gene that covers all hints of a given group. This is relevant for\n+# hint groups with gaps, e.g. when two ESTs, say 5' and 3', from the same clone align nearby.\n+#\n+[SOURCE-PARAMETERS]\n+\n+\n+#   feature        bonus         malus   gradelevelcolumns\n+#\t\tr+/r-\n+#\n+# the gradelevel colums have the following format for each source\n+# sourcecharacter numscoreclasses boundary    ...  boundary    gradequot  ...  gradequot\n+# \n+\n+[GENERAL]\n+      start        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+       stop        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+        tss        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+        tts        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+        ass        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+        dss        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+   exonpart        1     .992  M    1  1e+100  RM  1     1    E 1    1    W 1  1.005\n+       exon        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+ intronpart        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+     intron        1       .8  M    1  1e+100  RM  1     1    E 1    1000 W 1    1\n+    CDSpart        1  1 0.985  M    1  1e+100  RM  1     1    E 1    1\t  W 1    1\n+        CDS        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+    UTRpart        1   1 .973  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+        UTR        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+     irpart        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+nonexonpart        1        1  M    1  1e+100  RM  1     1.01 E 1    1    W 1    1\n+  genicpart        1        1  M    1  1e+100  RM  1     1    E 1    1    W 1    1\n+\n+#\n+# Explanation: \n+# \n+# The gff/gtf file containint the hints must contain somewhere in the last\n+# column an entry source=?, where ? is one of the source characters listed in\n+# the line after [SOURCES] above. You can use different sources when you have\n+# hints of different reliability of the same type, e.g. exon hints from ESTs\n+# and exon hints from evolutionary conservation information.\n+# \n+# In the [GENERAL] section the entries second column specify a bonus for obeying\n+# a hint and the entry in the third column specify a malus (penalty) for\n+# predicting a feature that is not supported by any hint. The bonus and the\n+# malus is a factor that is multiplied to the posterior probability of gene\n+# structueres. \n+# Example: \n+#   CDS     1000  0.7  ....\n+# means that, when AUGUSTUS is searching for the most likely gene structure,\n+# every gene structure that has a CDS exactly as given in a hint gets\n+# a bonus factor of 1000. Also, for every CDS that is not supported the\n+# probability of the gene structure gets a malus of 0.7. Increase the bonus to\n+# make AUGUSTUS obey more hints, decrease the malus to make AUGUSTUS predict few\n+# features that are not supported by hints. The malus helps increasing\n+# specificity, e.g. when the exons predicted by AUGUSTUS are suspicious because\n+# there is no evidence from ESTs, mRNAs, protein databases, sequence\n+# conservation, transMapped expressed sequ"..b"region in the\n+#              specified interval. This is useful if you want to tell AUGUSTUS\n+#              that two distant exons belong to the same gene, when AUGUSTUS\n+#              tends to split that gene into smaller genes.\n+# nonexonpart: intergenic region or intron. The bonus applies to very non-exon\n+#              base that overlaps with the interval from the hint. It is\n+#              geometric in the length of that overlap, so choose it close to\n+#              1.0. This is useful as a weak kind of masking, e.g. when it is\n+#              unlikely that a retroposed gene contains a coding region but you\n+#              do not want to completely forbid exons.\n+#   genicpart: everything that is not intergenic region, i.e. intron or exon or UTR if\n+#              applicable. The bonus applies to every genic base that overlaps with the\n+#              interval from the hint. This can be used in particular to make Augustus\n+#              predict one gene between positions a and b if a and b are experimentally\n+#              confirmed to be part of the same gene, e.g. through ESTs from the same clone.\n+#              alias: nonirpart\n+#\n+# Any hints of types dss, intron, exon, CDS, UTR that (implicitly) suggest a donor splice\n+# site allow AUGUSTUS to predict a donor splice site that has a GC instead of the much more common GT.\n+# AUGUSTUS does not predict a GC donor splice site unless there is a hint for one.\n+# \n+# Starting in column number 4 you can tell AUGUSTUS how to modify the bonus \n+# depending on the source of the hint and the score of the hint. \n+# The score of the hints is specified in the 6th column of the hint gff/gtf.\n+# If the score is used at all, the score is not used directly through some\n+# conversion formula but by distinguishing different classes of scores, e.g. low\n+# score, medium score, high score. The format is the following:\n+# First, you specify the source character, then the number of classes (say n), then you\n+# specify the score boundaries that separate the classes (n-1 thresholds) and then you specify\n+# for each score class the multiplicative modifier to the bonus (n factors). \n+# \n+# Examples:\n+# \n+# M 1 1e+100\n+# means for the manual hint there is only one score class, the bonus for this\n+# type of hint is multiplied by 10^100. This practically forces AUGUSTUS to obey\n+# all manual hints.\n+# \n+# T    2       1.5 1 5e29\n+# For the transMap hints distinguish 2 classes. Those with a score below 1.5 and\n+# with a score above 1.5. The bonus if the lower score hints is unchanged and\n+# the bonus of the higher score hints is multiplied by 5x10^29.\n+# \n+# D    8     1.5  2.5  3.5  4.5  5.5  6.5  7.5  0.58  0.4  0.2  2.9  0.87  0.44 0.31  7.3\n+# Use 8 score classes for the DIALIGN hints. DIALIGN hints give a score, a strand and\n+# reading frame information for CDSpart hints. The strand and reading frame are often correct but not\n+# often enough to rely on them. To account for that I generated hints for all\n+# 6 combinations of a strand and reading frame and then used 2x2x2=8 different\n+# score classes:\n+# {low score, high score} x {DIALIGN strand, opposite strand} x {DIALIGN reading frame, other reading frame}\n+# This example shows that scores don't have to be monotonous. A higher score\n+# does not have to mean a higher bonus. They are merely a way of classifying the\n+# hints into categories as you wish. In particular, you could get the effect of\n+# having different sources by having just hints of one source and then distinguishing\n+# more scores classes.\n+# \n+# \n+# Future plans:\n+# - Add fuzzy intron hints. Introns get a bonus only when they approximately\n+# have the same boundaries as in the hint.\n+# - Make the splice site hints fuzzy also. Allow a hint interval that contains a\n+# likely splice site, as opposed to only an individual position.\n+# - Write a program that automatically optimizes the boni and mali given an\n+# annotated test set of genes and hints for that set of sequences.\n+\n"
b
diff -r 1fbb1135da16 -r 0d425a4b6896 test-data/hints.truncated.adjusted.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hints.truncated.adjusted.gff Thu May 23 18:17:22 2019 -0400
b
b'@@ -0,0 +1,482 @@\n+chr2R\tb2h\tep\t8895\t9191\t0\t.\t.\tgrp=gi|2700091;pri=4;src=E\n+chr2R\tb2h\tep\t8895\t9191\t0\t.\t.\tgrp=gi|2700091;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2700091;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2700091;pri=4;src=E\n+chr2R\tb2h\tep\t8878\t9191\t0\t.\t.\tgrp=gi|2701304;pri=4;src=E\n+chr2R\tb2h\tep\t8878\t9191\t0\t.\t.\tgrp=gi|2701304;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2701304;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2701304;pri=4;src=E\n+chr2R\tb2h\tep\t8880\t9191\t0\t.\t.\tgrp=gi|2701309;pri=4;src=E\n+chr2R\tb2h\tep\t8880\t9191\t0\t.\t.\tgrp=gi|2701309;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2701309;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9338\t0\t.\t.\tgrp=gi|2701309;pri=4;src=E\n+chr2R\tb2h\tep\t8559\t8630\t0\t.\t.\tgrp=gi|2701440;pri=4;src=E\n+chr2R\tb2h\tep\t8559\t8630\t0\t.\t.\tgrp=gi|2701440;pri=4;src=E\n+chr2R\tb2h\tep\t8866\t9172\t0\t.\t.\tgrp=gi|2701440;pri=4;src=E\n+chr2R\tb2h\tep\t8866\t9172\t0\t.\t.\tgrp=gi|2701440;pri=4;src=E\n+chr2R\tb2h\tep\t8782\t8811\t0\t.\t.\tgrp=gi|2871896;pri=4;src=E\n+chr2R\tb2h\tep\t8782\t8811\t0\t.\t.\tgrp=gi|2871896;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9360\t0\t.\t.\tgrp=gi|2871896;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9360\t0\t.\t.\tgrp=gi|2871896;pri=4;src=E\n+chr2R\tb2h\tep\t9077\t9191\t0\t.\t.\tgrp=gi|3101873;pri=4;src=E\n+chr2R\tb2h\tep\t9077\t9191\t0\t.\t.\tgrp=gi|3101873;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9337\t0\t.\t.\tgrp=gi|3101873;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9337\t0\t.\t.\tgrp=gi|3101873;pri=4;src=E\n+chr2R\tb2h\tep\t8489\t8630\t0\t.\t.\tgrp=gi|4203815;pri=4;src=E\n+chr2R\tb2h\tep\t8489\t8630\t0\t.\t.\tgrp=gi|4203815;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9339\t0\t.\t.\tgrp=gi|4203815;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9339\t0\t.\t.\tgrp=gi|4203815;pri=4;src=E\n+chr2R\tb2h\tep\t8575\t8630\t0\t.\t.\tgrp=gi|4245769;pri=4;src=E\n+chr2R\tb2h\tep\t8575\t8630\t0\t.\t.\tgrp=gi|4245769;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|4245769;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|4245769;pri=4;src=E\n+chr2R\tb2h\tep\t8563\t8630\t0\t.\t.\tgrp=gi|4245770;pri=4;src=E\n+chr2R\tb2h\tep\t8563\t8630\t0\t.\t.\tgrp=gi|4245770;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|4245770;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|4245770;pri=4;src=E\n+chr2R\tb2h\tep\t8487\t8630\t0\t.\t.\tgrp=gi|13769068;pri=4;src=E\n+chr2R\tb2h\tep\t8487\t8630\t0\t.\t.\tgrp=gi|13769068;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|13769068;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9340\t0\t.\t.\tgrp=gi|13769068;pri=4;src=E\n+chr2R\tb2h\tep\t8550\t8630\t0\t.\t.\tgrp=gi|14693753;pri=4;src=E\n+chr2R\tb2h\tep\t8550\t8630\t0\t.\t.\tgrp=gi|14693753;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14693753;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14693753;pri=4;src=E\n+chr2R\tb2h\tep\t8613\t8630\t0\t.\t.\tgrp=gi|14695912;pri=4;src=E\n+chr2R\tb2h\tep\t8613\t8630\t0\t.\t.\tgrp=gi|14695912;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14695912;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14695912;pri=4;src=E\n+chr2R\tb2h\tep\t8551\t8630\t0\t.\t.\tgrp=gi|14699170;pri=4;src=E\n+chr2R\tb2h\tep\t8551\t8630\t0\t.\t.\tgrp=gi|14699170;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14699170;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14699170;pri=4;src=E\n+chr2R\tb2h\tep\t8614\t8630\t0\t.\t.\tgrp=gi|14700619;pri=4;src=E\n+chr2R\tb2h\tep\t8614\t8630\t0\t.\t.\tgrp=gi|14700619;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14700619;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9372\t0\t.\t.\tgrp=gi|14700619;pri=4;src=E\n+chr2R\tb2h\tep\t8536\t8630\t0\t.\t.\tgrp=gi|15539951;pri=4;src=E\n+chr2R\tb2h\tep\t8536\t8630\t0\t.\t.\tgrp=gi|15539951;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9346\t0\t.\t.\tgrp=gi|15539951;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9346\t0\t.\t.\tgrp=gi|15539951;pri=4;src=E\n+chr2R\tb2h\tep\t7756\t8449\t0\t.\t.\tgrp=gi|15542574;pri=4;src=E\n+chr2R\tb2h\tep\t7756\t8449\t0\t.\t.\tgrp=gi|15542574;pri=4;src=E\n+chr2R\tb2h\tep\t8705\t8811\t0\t.\t.\tgrp=gi|15543159;pri=4;src=E\n+chr2R\tb2h\tep\t8705\t8811\t0\t.\t.\tgrp=gi|15543159;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9342\t0\t.\t.\tgrp=gi|15543159;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9342\t0\t.\t.\tgrp=gi|15543159;pri=4;src=E\n+chr2R\tb2h\tep\t8492\t8630\t0\t.\t.\tgrp=gi|15543927;pri=4;src=E\n+chr2R\tb2h\tep\t8492\t8630\t0\t.\t.\tgrp=gi|15543927;pri=4;src=E\n+chr2R\tb2h\tep\t9252\t9359\t0\t.\t.\tgrp=gi|15543927;pri=4;src=E\n+chr2R\tb2h\tep\t9'..b'\t8759\t8768\t8.000\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8769\t8778\t8.000\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8769\t8778\t8.000\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8779\t8788\t8.600\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8779\t8788\t8.600\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8789\t8798\t8.700\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8789\t8798\t8.700\t.\t.\tsrc=W;mult=8;\n+chr2R\tw2h\tep\t8880\t8889\t12.700\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t8880\t8889\t12.700\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t8890\t8899\t15.300\t.\t.\tsrc=W;mult=15;\n+chr2R\tw2h\tep\t8890\t8899\t15.300\t.\t.\tsrc=W;mult=15;\n+chr2R\tw2h\tep\t8900\t8909\t17.600\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8900\t8909\t17.600\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8910\t8919\t17.900\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8910\t8919\t17.900\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8920\t8929\t17.200\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8920\t8929\t17.200\t.\t.\tsrc=W;mult=17;\n+chr2R\tw2h\tep\t8930\t8939\t18.400\t.\t.\tsrc=W;mult=18;\n+chr2R\tw2h\tep\t8930\t8939\t18.400\t.\t.\tsrc=W;mult=18;\n+chr2R\tw2h\tep\t8940\t8949\t19.900\t.\t.\tsrc=W;mult=19;\n+chr2R\tw2h\tep\t8940\t8949\t19.900\t.\t.\tsrc=W;mult=19;\n+chr2R\tw2h\tep\t8950\t8959\t19.100\t.\t.\tsrc=W;mult=19;\n+chr2R\tw2h\tep\t8950\t8959\t19.100\t.\t.\tsrc=W;mult=19;\n+chr2R\tw2h\tep\t8960\t8969\t15.200\t.\t.\tsrc=W;mult=15;\n+chr2R\tw2h\tep\t8960\t8969\t15.200\t.\t.\tsrc=W;mult=15;\n+chr2R\tw2h\tep\t8970\t8979\t13.000\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t8970\t8979\t13.000\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t8980\t8989\t14.700\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t8980\t8989\t14.700\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t8990\t8999\t14.500\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t8990\t8999\t14.500\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t9000\t9009\t14.500\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t9000\t9009\t14.500\t.\t.\tsrc=W;mult=14;\n+chr2R\tw2h\tep\t9010\t9019\t12.500\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9010\t9019\t12.500\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9020\t9029\t11.900\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9020\t9029\t11.900\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9030\t9039\t12.100\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9030\t9039\t12.100\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9040\t9049\t10.000\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9040\t9049\t10.000\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9050\t9059\t9.700\t.\t.\tsrc=W;mult=9;\n+chr2R\tw2h\tep\t9050\t9059\t9.700\t.\t.\tsrc=W;mult=9;\n+chr2R\tw2h\tep\t9060\t9069\t10.600\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9060\t9069\t10.600\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9070\t9079\t12.300\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9070\t9079\t12.300\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9080\t9089\t13.100\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9080\t9089\t13.100\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9090\t9099\t13.400\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9090\t9099\t13.400\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9100\t9109\t12.000\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9100\t9109\t12.000\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9110\t9119\t10.700\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9110\t9119\t10.700\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9120\t9129\t11.000\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9120\t9129\t11.000\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9130\t9139\t10.900\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9130\t9139\t10.900\t.\t.\tsrc=W;mult=10;\n+chr2R\tw2h\tep\t9140\t9149\t7.900\t.\t.\tsrc=W;mult=7;\n+chr2R\tw2h\tep\t9140\t9149\t7.900\t.\t.\tsrc=W;mult=7;\n+chr2R\tw2h\tep\t9150\t9159\t4.900\t.\t.\tsrc=W;mult=4;\n+chr2R\tw2h\tep\t9150\t9159\t4.900\t.\t.\tsrc=W;mult=4;\n+chr2R\tw2h\tep\t9160\t9169\t4.400\t.\t.\tsrc=W;mult=4;\n+chr2R\tw2h\tep\t9160\t9169\t4.400\t.\t.\tsrc=W;mult=4;\n+chr2R\tw2h\tep\t9265\t9274\t11.400\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9265\t9274\t11.400\t.\t.\tsrc=W;mult=11;\n+chr2R\tw2h\tep\t9275\t9284\t12.800\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9275\t9284\t12.800\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9285\t9294\t13.100\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9285\t9294\t13.100\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9295\t9304\t13.600\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9295\t9304\t13.600\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9305\t9314\t13.000\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9305\t9314\t13.000\t.\t.\tsrc=W;mult=13;\n+chr2R\tw2h\tep\t9315\t9324\t12.400\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9315\t9324\t12.400\t.\t.\tsrc=W;mult=12;\n+chr2R\tw2h\tep\t9325\t9334\t9.000\t.\t.\tsrc=W;mult=9;\n+chr2R\tw2h\tep\t9325\t9334\t9.000\t.\t.\tsrc=W;mult=9;\n+chr2R\tw2h\tep\t9335\t9344\t5.200\t.\t.\tsrc=W;mult=5;\n+chr2R\tw2h\tep\t9335\t9344\t5.200\t.\t.\tsrc=W;mult=5;\n'