Mercurial > repos > bgruening > augustus_training
view test-data/human_augustus_protein_codingseq_introns_cds_main.gtf @ 6:dc027fa56d55 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit a5c4068d1515fc946d1ee7caf66cc59c9938a124"
author | iuc |
---|---|
date | Thu, 26 Aug 2021 20:35:53 +0000 |
parents | 7be22100e5e1 |
children |
line wrap: on
line source
# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. # Looks like /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- # # Predicted genes for sequence number 1 on both strands # start gene HS04636.g1 HS04636 AUGUSTUS gene 966 6903 1 + . HS04636.g1 HS04636 AUGUSTUS transcript 966 6903 . + . HS04636.g1.t1 HS04636 AUGUSTUS start_codon 966 968 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 1018 1817 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 1935 2054 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 2199 2851 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 2996 3425 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 3608 4339 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 4424 4542 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 4790 5071 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 5359 5859 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS intron 6008 6493 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 966 1017 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 1818 1934 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 2055 2198 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 2852 2995 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 3426 3607 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 4340 4423 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 4543 4789 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 5072 5358 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 5860 6007 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS CDS 6494 6903 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; # coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc # aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt # ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc # cttccttcgaaatgcaattatgagttatgtcttgacatccagatcacatttgattgacagtccaccaacttacaatgctgactatggctacaaaagct # gggaagccttctctaacctctcctattatactagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtaaaaagcagctt # cctgattcaaatgagattgtggaaaaattgcttctaagaagaaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagca # cttcacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctgggccatggggtggacttaaatcatatttacggtgaaa # ctctggctagacagcgtaaactgcgccttttcaaggatggaaaaatgaaatatcagataattgatggagagatgtatcctcccacagtcaaagatact # caggcagagatgatctaccctcctcaagtccctgagcatctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtatgc # cacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcctgaatggggtgatgagcagttgttccagacaagcaggctaa # tactgataggagagactattaagattgtgattgaagattatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc # aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactggcatccccttctgcctgacacctttcaaattcatgacca # gaaatacaactatcaacagtttatctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcaccaggcaaattgctggca # gggttgctggtggtaggaatgttccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaataccagtcttttaatgag # taccgcaaacgctttatgctgaagccctatgaatcatttgaagaacttacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacat # cgatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatctttggtgaaaccatggtagaagttggagcaccattctcct # tgaaaggacttatgggtaatgttatatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatcatcaacactgcctca # attcagtctctcatctgcaataacgtgaagggctgtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaag # ttcttcccgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag] # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] # end gene HS04636.g1 ### # # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- # # Predicted genes for sequence number 2 on both strands # start gene HS08198.g2 HS08198 AUGUSTUS gene 445 1848 1 + . HS08198.g2 HS08198 AUGUSTUS transcript 445 1848 . + . HS08198.g2.t1 HS08198 AUGUSTUS start_codon 445 447 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS intron 583 811 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS intron 895 1052 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS intron 1124 1207 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS intron 1316 1586 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS intron 1689 1771 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 445 582 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 812 894 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 1053 1123 . + 1 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 1208 1315 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 1587 1688 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS CDS 1772 1848 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; HS08198 AUGUSTUS stop_codon 1846 1848 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; # coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac # gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg # gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg # ccgcttcctgcttcaagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccagagtttcgctgtcctgtacctggagcggg # cggggcagctgtcagtgaagctctacgcccgctcgctccctgtgagcgactcggtcctgagtgggtttgagcagcgggtccaggaggcccacctgact # gaggaccagatcttctacttccccaagtacggcttctgcgaggctgcagaccagttccacgtcctggacggtgagtgcacagcgggggcaagcatggc # ggcgtggtga] # protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC # WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF # HVLDGECTAGASMAAW] # end gene HS08198.g2 ### # command line: # augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat --UTR=off --genemodel=complete --softmasking=0 --species=human