changeset 9:2fdc599f2814 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit b8194e4d746760fff7a7c5bb9c41df28089d9782
author iuc
date Thu, 17 Jul 2025 09:06:11 +0000
parents aada0deea587
children
files augustus_training.xml macros.xml test-data/augustus.hints.output.gtf test-data/augustus.hints_and_range.output.gtf test-data/human_augustus.fa.gz test-data/human_augustus_protein_codingseq_introns_cds_main.gtf test-data/human_augustus_utr-on.gff test-data/human_augustus_utr-on.gtf test-data/human_augustus_utr-on_softmasking.gtf
diffstat 9 files changed, 30 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/augustus_training.xml	Sun Dec 15 09:09:49 2024 +0000
+++ b/augustus_training.xml	Thu Jul 17 09:06:11 2025 +0000
@@ -8,7 +8,7 @@
         <xref type="bio.tools">augustus</xref>
     </xrefs>
     <expand macro="requirements">
-        <requirement type="package" version="2.31.10">maker</requirement>
+        <requirement type="package" version="3.01.03">maker</requirement>
     </expand>
     <command><![CDATA[
         cp -r `command -v augustus | xargs dirname`/../config/ augustus_dir/ &&
--- a/macros.xml	Sun Dec 15 09:09:49 2024 +0000
+++ b/macros.xml	Thu Jul 17 09:06:11 2025 +0000
@@ -7,8 +7,8 @@
         </requirements>
     </xml>
 
-    <token name="@TOOL_VERSION@">3.4.0</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@TOOL_VERSION@">3.5.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">23.1</token>
 
 
--- a/test-data/augustus.hints.output.gtf	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/augustus.hints.output.gtf	Thu Jul 17 09:06:11 2025 +0000
@@ -1,17 +1,17 @@
-# This output was generated with AUGUSTUS (version 3.4.0).
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # Sources of extrinsic information: M RM E W 
 # Setting CDSpart local malus: 0.985
 # Setting UTRpart local malus: 0.973
-# reading in the file /tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat ...
+# reading in the file /tmp/tmpjsgft19_/files/a/6/2/dataset_a62f021d-9ecd-4331-83ac-119ad4aac229.dat ...
 # Have extrinsic information about 1 sequences (in the specified range). 
 # Initializing the parameters using config directory /usr/local/config/ ...
 # fly version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/7/b/a/dataset_7ba46cfd-42e9-4f8e-8875-e339c6728906.dat is in fasta format.
 # We have hints for 1 sequence and for 1 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9950, name = chr2R) -----
@@ -44,7 +44,6 @@
 # start gene chr2R.g1
 chr2R	AUGUSTUS	gene	7560	9303	0.84	-	.	chr2R.g1
 chr2R	AUGUSTUS	transcript	7560	9303	0.84	-	.	chr2R.g1.t1
-chr2R	AUGUSTUS	start_codon	9301	9303	.	-	0	transcript_id "chr2R.g1.t1"; gene_id "chr2R.g1";
 # Evidence for and against this transcript:
 # % of transcript supported by hints (any source): 57.1
 # CDS exons: 4/4
@@ -59,4 +58,4 @@
 # end gene chr2R.g1
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/8/6/b/dataset_86b0a149-1d37-4615-9915-2c48586e3ca1.dat --species=fly
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --start=off --stop=off --cds=off --singlestrand=false /tmp/tmpjsgft19_/files/7/b/a/dataset_7ba46cfd-42e9-4f8e-8875-e339c6728906.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpjsgft19_/files/a/6/2/dataset_a62f021d-9ecd-4331-83ac-119ad4aac229.dat --extrinsicCfgFile=/tmp/tmpjsgft19_/files/3/4/e/dataset_34e31244-f096-4c30-9f0d-5ca9e518e4bb.dat --species=fly
--- a/test-data/augustus.hints_and_range.output.gtf	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/augustus.hints_and_range.output.gtf	Thu Jul 17 09:06:11 2025 +0000
@@ -1,17 +1,17 @@
-# This output was generated with AUGUSTUS (version 3.4.0).
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # Sources of extrinsic information: M RM E W 
 # Setting CDSpart local malus: 0.985
 # Setting UTRpart local malus: 0.973
-# reading in the file /tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat ...
+# reading in the file /tmp/tmpjsgft19_/files/2/8/2/dataset_2822734d-a594-477d-a8ef-82ffb21dcccf.dat ...
 # Have extrinsic information about 1 sequences (in the specified range). 
 # Initializing the parameters using config directory /usr/local/config/ ...
 # fly version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/6/d/9/dataset_6d9a0d6d-a406-4749-8cb1-1d06374faa38.dat is in fasta format.
 # We have hints for 1 sequence and for 1 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 2001, name = chr2R) -----
@@ -38,7 +38,6 @@
 # start gene chr2R.g1
 chr2R	AUGUSTUS	gene	7560	8931	0.84	-	.	chr2R.g1
 chr2R	AUGUSTUS	transcript	7560	8931	0.84	-	.	chr2R.g1.t1
-chr2R	AUGUSTUS	start_codon	8929	8931	.	-	0	transcript_id "chr2R.g1.t1"; gene_id "chr2R.g1";
 # Evidence for and against this transcript:
 # % of transcript supported by hints (any source): 60
 # CDS exons: 3/3
@@ -53,4 +52,4 @@
 # end gene chr2R.g1
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/3/3/5/dataset_335e9fec-9340-42e6-97ce-af35d5220fcc.dat --predictionStart=7000 --predictionEnd=9000 --species=fly
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --start=off --stop=off --cds=off --singlestrand=false /tmp/tmpjsgft19_/files/6/d/9/dataset_6d9a0d6d-a406-4749-8cb1-1d06374faa38.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpjsgft19_/files/2/8/2/dataset_2822734d-a594-477d-a8ef-82ffb21dcccf.dat --extrinsicCfgFile=/tmp/tmpjsgft19_/files/7/6/7/dataset_7673297f-b12d-48f2-b40c-2fcd8e18239d.dat --predictionStart=7000 --predictionEnd=9000 --species=fly
Binary file test-data/human_augustus.fa.gz has changed
--- a/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf	Thu Jul 17 09:06:11 2025 +0000
@@ -1,13 +1,13 @@
-# This output was generated with AUGUSTUS (version 3.4.0).
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
 # Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/b/3/0/dataset_b30bd3b8-32ff-49ce-be3d-bdb824bd72df.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -98,4 +98,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat --UTR=off --genemodel=complete --softmasking=0 --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpjsgft19_/files/b/3/0/dataset_b30bd3b8-32ff-49ce-be3d-bdb824bd72df.dat --UTR=off --genemodel=complete --softmasking=0 --species=human
--- a/test-data/human_augustus_utr-on.gff	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/human_augustus_utr-on.gff	Thu Jul 17 09:06:11 2025 +0000
@@ -1,14 +1,14 @@
 ##gff-version 3
-# This output was generated with AUGUSTUS (version 3.4.0).
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
 # Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/2/a/c/dataset_2acd78b6-6b4d-4592-91d1-ec3a4dc4a8f0.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -19,7 +19,6 @@
 HS04636	AUGUSTUS	transcript	836	8857	.	+	.	ID=HS04636.g1.t1;Parent=HS04636.g1
 HS04636	AUGUSTUS	transcription_start_site	836	836	.	+	.	Parent=HS04636.g1.t1
 HS04636	AUGUSTUS	exon	836	1017	.	+	.	Parent=HS04636.g1.t1
-HS04636	AUGUSTUS	start_codon	966	968	.	+	0	Parent=HS04636.g1.t1
 HS04636	AUGUSTUS	CDS	966	1017	.	+	0	ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1
 HS04636	AUGUSTUS	CDS	1818	1934	.	+	2	ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1
 HS04636	AUGUSTUS	exon	1818	1934	.	+	.	Parent=HS04636.g1.t1
@@ -77,7 +76,6 @@
 HS08198	AUGUSTUS	transcript	86	2105	.	+	.	ID=HS08198.g2.t1;Parent=HS08198.g2
 HS08198	AUGUSTUS	transcription_start_site	86	86	.	+	.	Parent=HS08198.g2.t1
 HS08198	AUGUSTUS	exon	86	582	.	+	.	Parent=HS08198.g2.t1
-HS08198	AUGUSTUS	start_codon	445	447	.	+	0	Parent=HS08198.g2.t1
 HS08198	AUGUSTUS	CDS	445	582	.	+	0	ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1
 HS08198	AUGUSTUS	CDS	812	894	.	+	0	ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1
 HS08198	AUGUSTUS	exon	812	894	.	+	.	Parent=HS08198.g2.t1
@@ -103,4 +101,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --start=off --stop=off --cds=on --singlestrand=false /tmp/tmpjsgft19_/files/2/a/c/dataset_2acd78b6-6b4d-4592-91d1-ec3a4dc4a8f0.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
--- a/test-data/human_augustus_utr-on.gtf	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/human_augustus_utr-on.gtf	Thu Jul 17 09:06:11 2025 +0000
@@ -1,13 +1,14 @@
-# This output was generated with AUGUSTUS (version 3.4.0).
+# Looks like /tmp/tmpjsgft19_/files/a/4/8/dataset_a48be2bb-95b9-461d-aa42-cc5ff0ce9360.dat is in gzip format. Deflating...
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
 # Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/a/4/8/dataset_a48be2bb-95b9-461d-aa42-cc5ff0ce9360.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -18,7 +19,6 @@
 HS04636	AUGUSTUS	transcript	836	8857	.	+	.	HS04636.g1.t1
 HS04636	AUGUSTUS	tss	836	836	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
 HS04636	AUGUSTUS	exon	836	1017	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
-HS04636	AUGUSTUS	start_codon	966	968	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
 HS04636	AUGUSTUS	CDS	966	1017	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
 HS04636	AUGUSTUS	CDS	1818	1934	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
 HS04636	AUGUSTUS	exon	1818	1934	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
@@ -76,7 +76,6 @@
 HS08198	AUGUSTUS	transcript	86	2105	.	+	.	HS08198.g2.t1
 HS08198	AUGUSTUS	tss	86	86	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
 HS08198	AUGUSTUS	exon	86	582	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
-HS08198	AUGUSTUS	start_codon	445	447	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
 HS08198	AUGUSTUS	CDS	445	582	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
 HS08198	AUGUSTUS	CDS	812	894	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
 HS08198	AUGUSTUS	exon	812	894	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
@@ -102,4 +101,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --start=off --stop=off --cds=on --singlestrand=false /tmp/tmpjsgft19_/files/a/4/8/dataset_a48be2bb-95b9-461d-aa42-cc5ff0ce9360.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
--- a/test-data/human_augustus_utr-on_softmasking.gtf	Sun Dec 15 09:09:49 2024 +0000
+++ b/test-data/human_augustus_utr-on_softmasking.gtf	Thu Jul 17 09:06:11 2025 +0000
@@ -1,6 +1,6 @@
-# This output was generated with AUGUSTUS (version 3.4.0).
+# This output was generated with AUGUSTUS (version 3.5.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
-# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
@@ -8,7 +8,7 @@
 # Sources of extrinsic information: M RM 
 # Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat is in fasta format.
+# Looks like /tmp/tmpjsgft19_/files/0/e/e/dataset_0ee7b75a-3ef6-4edb-811b-e06e2e5fae27.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -21,4 +21,4 @@
 # Predicted genes for sequence number 2 on both strands
 # (none)
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat --UTR=on --genemodel=complete --softmasking=1 --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --start=off --stop=off --cds=off --singlestrand=false /tmp/tmpjsgft19_/files/0/e/e/dataset_0ee7b75a-3ef6-4edb-811b-e06e2e5fae27.dat --UTR=on --genemodel=complete --softmasking=1 --species=human