diff test-data/human_augustus_utr-on.gtf @ 3:f5075dee9d6b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/augustus commit cf04d83d615ff09c4458982282d422fbef7d83ac
author iuc
date Fri, 22 May 2015 04:55:25 -0400
parents af307d3285c5
children 4de31938431b
line wrap: on
line diff
--- a/test-data/human_augustus_utr-on.gtf	Wed May 06 14:31:02 2015 -0400
+++ b/test-data/human_augustus_utr-on.gtf	Fri May 22 04:55:25 2015 -0400
@@ -1,13 +1,13 @@
-# This output was generated with AUGUSTUS (version 2.7).
-# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de)
-# and Oliver Keller (keller@cs.uni-goettingen.de).
+# This output was generated with AUGUSTUS (version 3.1.0).
+# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de),
+# Oliver Keller, Stefanie König and Lizzy Gerischer.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
-# Initialising the parameters ...
-# human version. Using species specific transition matrix: /home/bag/Downloads/augustus.2.7/config/species/human/human_trans_shadow_partial_utr.pbl
-# Looks like ./examples/example.fa is in fasta format.
+# Initialising the parameters using config directory /home/bag/projects/code/galaxy/tool_deps/augustus/3.1/iuc/package_augustus_3_1/820bf3789c44/config/ ...
+# human version. Using default transition matrix.
+# Looks like /tmp/tmpboMLLQ/job_working_directory/000/2/task_0/dataset_1.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -15,33 +15,51 @@
 # Constraints/Hints:
 # (none)
 # Predicted genes for sequence number 1 on both strands
-# start gene g1
-HS04636	AUGUSTUS	gene	836	8857	1	+	.	g1
-HS04636	AUGUSTUS	transcript	836	8857	.	+	.	g1.t1
-HS04636	AUGUSTUS	tss	836	836	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	836	1017	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	start_codon	966	968	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	966	1017	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	1818	1934	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	1818	1934	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	2055	2198	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	2055	2198	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	2852	2995	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	2852	2995	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	3426	3607	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	3426	3607	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	4340	4423	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	4340	4423	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	4543	4789	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	4543	4789	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	5072	5358	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	5072	5358	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	5860	6007	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	5860	6007	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	CDS	6494	6903	.	+	2	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	exon	6494	8857	.	+	.	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	stop_codon	6901	6903	.	+	0	transcript_id "g1.t1"; gene_id "g1";
-HS04636	AUGUSTUS	tts	8857	8857	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+# start gene HS04636.g1
+HS04636	AUGUSTUS	gene	836	8857	1	+	.	HS04636.g1
+HS04636	AUGUSTUS	transcript	836	8857	.	+	.	HS04636.g1.t1
+HS04636	AUGUSTUS	tss	836	836	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	836	1017	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	start_codon	966	968	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	966	1017	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	1818	1934	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	1818	1934	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	2055	2198	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	2055	2198	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	2852	2995	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	2852	2995	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	3426	3607	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	3426	3607	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	4340	4423	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	4340	4423	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	4543	4789	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	4543	4789	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	5072	5358	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	5072	5358	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	5860	6007	.	+	0	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	5860	6007	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	CDS	6494	6903	.	+	2	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	exon	6494	8857	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+HS04636	AUGUSTUS	tts	8857	8857	.	+	.	transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1";
+# coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc
+# aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt
+# ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc
+# cttccttcgaaatgcaattatgagttatgtcttgacatccagatcacatttgattgacagtccaccaacttacaatgctgactatggctacaaaagct
+# gggaagccttctctaacctctcctattatactagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtaaaaagcagctt
+# cctgattcaaatgagattgtggaaaaattgcttctaagaagaaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagca
+# cttcacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctgggccatggggtggacttaaatcatatttacggtgaaa
+# ctctggctagacagcgtaaactgcgccttttcaaggatggaaaaatgaaatatcagataattgatggagagatgtatcctcccacagtcaaagatact
+# caggcagagatgatctaccctcctcaagtccctgagcatctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtatgc
+# cacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcctgaatggggtgatgagcagttgttccagacaagcaggctaa
+# tactgataggagagactattaagattgtgattgaagattatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc
+# aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactggcatccccttctgcctgacacctttcaaattcatgacca
+# gaaatacaactatcaacagtttatctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcaccaggcaaattgctggca
+# gggttgctggtggtaggaatgttccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaataccagtcttttaatgag
+# taccgcaaacgctttatgctgaagccctatgaatcatttgaagaacttacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacat
+# cgatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatctttggtgaaaccatggtagaagttggagcaccattctcct
+# tgaaaggacttatgggtaatgttatatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatcatcaacactgcctca
+# attcagtctctcatctgcaataacgtgaagggctgtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaag
+# ttcttcccgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag]
 # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
 # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
 # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
@@ -49,7 +67,7 @@
 # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
 # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
 # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
-# end gene g1
+# end gene HS04636.g1
 ###
 #
 # ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
@@ -57,24 +75,35 @@
 # Constraints/Hints:
 # (none)
 # Predicted genes for sequence number 2 on both strands
-# start gene g2
-HS08198	AUGUSTUS	gene	86	2344	1	+	.	g2
-HS08198	AUGUSTUS	transcript	86	2344	.	+	.	g2.t1
-HS08198	AUGUSTUS	tss	86	86	.	+	.	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	exon	86	582	.	+	.	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	start_codon	445	447	.	+	0	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	CDS	445	582	.	+	0	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	CDS	812	894	.	+	0	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	exon	812	894	.	+	.	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	CDS	1053	1123	.	+	1	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	exon	1053	1123	.	+	.	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	CDS	1208	1315	.	+	2	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	exon	1208	1315	.	+	.	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	CDS	1587	1688	.	+	2	transcript_id "g2.t1"; gene_id "g2";
-HS08198	AUGUSTUS	exon	1587	1688	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+# start gene HS08198.g2
+HS08198	AUGUSTUS	gene	86	2105	1	+	.	HS08198.g2
+HS08198	AUGUSTUS	transcript	86	2105	.	+	.	HS08198.g2.t1
+HS08198	AUGUSTUS	tss	86	86	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	86	582	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	start_codon	445	447	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	445	582	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	812	894	.	+	0	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	812	894	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	1053	1123	.	+	1	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	1053	1123	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	1208	1315	.	+	2	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	1208	1315	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	1587	1688	.	+	2	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	1587	1688	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	CDS	1772	1848	.	+	2	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	exon	1772	2105	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+HS08198	AUGUSTUS	tts	2105	2105	.	+	.	transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2";
+# coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac
+# gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg
+# gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg
+# ccgcttcctgcttcaagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccagagtttcgctgtcctgtacctggagcggg
+# cggggcagctgtcagtgaagctctacgcccgctcgctccctgtgagcgactcggtcctgagtgggtttgagcagcgggtccaggaggcccacctgact
+# gaggaccagatcttctacttccccaagtacggcttctgcgaggctgcagaccagttccacgtcctggacggtgagtgcacagcgggggcaagcatggc
+# ggcgtggtga]
 # protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC
-# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKY]
-# end gene g2
+# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF
+# HVLDGECTAGASMAAW]
+# end gene HS08198.g2
 ###
 # command line:
-# ./bin/augustus --species=human --UTR=on ./examples/example.fa
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpboMLLQ/job_working_directory/000/2/task_0/dataset_1.dat --UTR=on --genemodel=complete --species=human