diff test-data/human_augustus_protein_codingseq_introns_cds_main.gtf @ 0:af307d3285c5 draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 10:07:41 -0400
parents
children f5075dee9d6b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf	Sat Jul 06 10:07:41 2013 -0400
@@ -0,0 +1,101 @@
+# This output was generated with AUGUSTUS (version 2.7).
+# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de)
+# and Oliver Keller (keller@cs.uni-goettingen.de).
+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding
+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
+# No extrinsic information on sequences given.
+# Initialising the parameters ...
+# human version. Using default transition matrix.
+# Looks like /home/bag/projects/galaxy/galaxy-central/database/files/001/dataset_1149.dat is in fasta format.
+# We have hints for 0 sequences and for 0 of the sequences in the input set.
+#
+# ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
+#
+# Constraints/Hints:
+# (none)
+# Predicted genes for sequence number 1 on both strands
+# start gene g1
+HS04636	AUGUSTUS	gene	966	6903	1	+	.	g1
+HS04636	AUGUSTUS	transcript	966	6903	.	+	.	g1.t1
+HS04636	AUGUSTUS	intron	1018	1817	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	1935	2054	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	2199	2851	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	2996	3425	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	3608	4339	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	4424	4542	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	4790	5071	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	5359	5859	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	intron	6008	6493	.	+	.	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	966	1017	.	+	0	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	1818	1934	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	2055	2198	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	2852	2995	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	3426	3607	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	4340	4423	.	+	0	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	4543	4789	.	+	0	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	5072	5358	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	5860	6007	.	+	0	transcript_id "g1.t1"; gene_id "g1";
+HS04636	AUGUSTUS	CDS	6494	6903	.	+	2	transcript_id "g1.t1"; gene_id "g1";
+# coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc
+# aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt
+# ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc
+# cttccttcgaaatgcaattatgagttatgtcttgacatccagatcacatttgattgacagtccaccaacttacaatgctgactatggctacaaaagct
+# gggaagccttctctaacctctcctattatactagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtaaaaagcagctt
+# cctgattcaaatgagattgtggaaaaattgcttctaagaagaaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagca
+# cttcacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctgggccatggggtggacttaaatcatatttacggtgaaa
+# ctctggctagacagcgtaaactgcgccttttcaaggatggaaaaatgaaatatcagataattgatggagagatgtatcctcccacagtcaaagatact
+# caggcagagatgatctaccctcctcaagtccctgagcatctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtatgc
+# cacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcctgaatggggtgatgagcagttgttccagacaagcaggctaa
+# tactgataggagagactattaagattgtgattgaagattatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc
+# aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactggcatccccttctgcctgacacctttcaaattcatgacca
+# gaaatacaactatcaacagtttatctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcaccaggcaaattgctggca
+# gggttgctggtggtaggaatgttccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaataccagtcttttaatgag
+# taccgcaaacgctttatgctgaagccctatgaatcatttgaagaacttacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacat
+# cgatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatctttggtgaaaccatggtagaagttggagcaccattctcct
+# tgaaaggacttatgggtaatgttatatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatcatcaacactgcctca
+# attcagtctctcatctgcaataacgtgaagggctgtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaag
+# ttcttcccgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag]
+# protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
+# THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
+# PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
+# QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH
+# WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
+# KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
+# PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
+# end gene g1
+###
+#
+# ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
+#
+# Constraints/Hints:
+# (none)
+# Predicted genes for sequence number 2 on both strands
+# start gene g2
+HS08198	AUGUSTUS	gene	445	1848	1	+	.	g2
+HS08198	AUGUSTUS	transcript	445	1848	.	+	.	g2.t1
+HS08198	AUGUSTUS	intron	583	811	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	intron	895	1052	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	intron	1124	1207	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	intron	1316	1586	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	intron	1689	1771	.	+	.	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	445	582	.	+	0	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	812	894	.	+	0	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	1053	1123	.	+	1	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	1208	1315	.	+	2	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	1587	1688	.	+	2	transcript_id "g2.t1"; gene_id "g2";
+HS08198	AUGUSTUS	CDS	1772	1848	.	+	2	transcript_id "g2.t1"; gene_id "g2";
+# coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac
+# gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg
+# gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg
+# ccgcttcctgcttcaagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccagagtttcgctgtcctgtacctggagcggg
+# cggggcagctgtcagtgaagctctacgcccgctcgctccctgtgagcgactcggtcctgagtgggtttgagcagcgggtccaggaggcccacctgact
+# gaggaccagatcttctacttccccaagtacggcttctgcgaggctgcagaccagttccacgtcctggacggtgagtgcacagcgggggcaagcatggc
+# ggcgtggtga]
+# protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC
+# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF
+# HVLDGECTAGASMAAW]
+# end gene g2
+###
+# command line:
+# augustus --strand=both --noInFrameStop=false --gff3=off --protein=on --introns=on --start=off --stop=off --cds=on --codingseq=on --singlestrand=false /home/bag/projects/galaxy/galaxy-central/database/files/001/dataset_1149.dat --UTR=off --genemodel=complete --species=human