# HG changeset patch # User iuc # Date 1432284925 14400 # Node ID f5075dee9d6b4e250f0894579082c341173bbf4f # Parent a77a4e9921e0e0c20b94686348d56c3f1ca49b0b planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/augustus commit cf04d83d615ff09c4458982282d422fbef7d83ac diff -r a77a4e9921e0 -r f5075dee9d6b augustus.xml --- a/augustus.xml Wed May 06 14:31:02 2015 -0400 +++ b/augustus.xml Fri May 22 04:55:25 2015 -0400 @@ -1,8 +1,8 @@ - + gene prediction for eukaryotic genomes - augustus + augustus @@ -12,14 +12,14 @@ - - - - + + + + @@ -94,7 +102,7 @@ - + @@ -105,18 +113,19 @@ - - + + - - - + + + + - + @@ -127,9 +136,9 @@ - - - + + + @@ -143,25 +152,62 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - - + @@ -174,12 +220,11 @@ - - + @@ -193,28 +238,21 @@ - - + + - - - - - - - - - - + + + - - + + diff -r a77a4e9921e0 -r f5075dee9d6b test-data/arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf --- a/test-data/arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf Wed May 06 14:31:02 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -# This output was generated with AUGUSTUS (version 2.7). -# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de) -# and Oliver Keller (keller@cs.uni-goettingen.de). -# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), -# Using native and syntenically mapped cDNA alignments to improve de novo gene finding -# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 -# No extrinsic information on sequences given. -# arabidopsis version. Using default transition matrix. -# We have hints for 0 sequences and for 0 of the sequences in the input set. -# -# ----- prediction on sequence number 1 (length = 2066, name = arabidopsis) ----- -# -# Constraints/Hints: -# (none) -# Predicted genes for sequence number 1 on both strands -# start gene g1 -arabidopsis AUGUSTUS gene 775 1851 0 + . g1 -arabidopsis AUGUSTUS transcript 775 1851 . + . g1.t1 -arabidopsis AUGUSTUS start_codon 775 777 . + 0 transcript_id "g1.t1"; gene_id "g1"; -arabidopsis AUGUSTUS CDS 775 1851 0.99 + 0 transcript_id "g1.t1"; gene_id "g1"; -arabidopsis AUGUSTUS stop_codon 1849 1851 . + 0 transcript_id "g1.t1"; gene_id "g1"; -# protein sequence = [MDLSLAPTTTTSSDQEQDRDQELTSNIGASSSSGPSGNNNNLPMMMIPPPEKEHMFDKVVTPSDVGKLNRLVIPKQHA -# ERYFPLDSSNNQNGTLLNFQDRNGKMWRFRYSYWNSSQSYVMTKGWSRFVKEKKLDAGDIVSFQRGIGDESERSKLYIDWRHRPDMSLVQAHQFGNFG -# FNFNFPTTSQYSNRFHPLPEYNSVPIHRGLNIGNHQRSYYNTQRQEFVGYGYGNLAGRCYYTGSPLDHRNIVGSEPLVIDSVPVVPGRLTPVMLPPLP -# PPPSTAGKRLRLFGVNMECGNDYNQQEESWLVPRGEIGASSSSSSALRLNLSTDHDDDNDDGDDGDDDQFAKKGKSSLSLNFNP] -# end gene g1 -### -# start gene g2 -arabidopsis AUGUSTUS gene 841 1661 . - . g2 -arabidopsis AUGUSTUS transcript 841 1661 . - . g2.t1 -arabidopsis AUGUSTUS stop_codon 841 843 . - 0 transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS intron 1024 1101 0.75 - . transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS intron 1193 1325 0.03 - . transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS intron 1416 1512 0.85 - . transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS CDS 841 1023 0.87 - 0 transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS CDS 1102 1192 0.78 - 1 transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS CDS 1326 1415 0.05 - 1 transcript_id "g2.t1"; gene_id "g2"; -arabidopsis AUGUSTUS CDS 1513 1661 0.35 - 0 transcript_id "g2.t1"; gene_id "g2"; -# protein sequence = [SLPHSIFTPKSLSLFPAVEGGGGSGGNITGVNLPGTTGTESITNGSDPTMNVGDFRCLSPGESGRSYILAMDEICWNI -# EKDNVSCIELFLFDETAPSFGHNVALARVPIRREIPLSVLFRDHESVEFSDVAWCYHFVEHVFFLRRWNHHHRKVVVVSAGTGAAACSDVGG] -# end gene g2 -### -# -# ----- prediction on sequence number 2 (length = 1802, name = arabidopsis2) ----- -# -# Constraints/Hints: -# (none) -# Predicted genes for sequence number 2 on both strands -# start gene g3 -arabidopsis2 AUGUSTUS gene 97 1600 . - . g3 -arabidopsis2 AUGUSTUS transcript 97 1600 . - . g3.t1 -arabidopsis2 AUGUSTUS stop_codon 97 99 . - 0 transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS intron 349 521 0.73 - . transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS intron 1210 1333 0.36 - . transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS intron 1452 1560 0.54 - . transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS CDS 97 348 0.73 - 0 transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS CDS 522 1209 0.6 - 1 transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS CDS 1334 1451 0.44 - 2 transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS CDS 1561 1600 0.42 - 0 transcript_id "g3.t1"; gene_id "g3"; -arabidopsis2 AUGUSTUS start_codon 1598 1600 . - 0 transcript_id "g3.t1"; gene_id "g3"; -# protein sequence = [MVKLVFSDLSCAFASANSTASSTNSSIFFSIDFSSASFASLFSIKYFFNAAIGITELIGISQGVSSESISLTFNQGWT -# FSSSSSSDSFFSDFPDLQDIHTVNMNTRHPKCRTFLPNLGVRSGSQVRHPNGPQVILDNKDNRQFIESSHVETLEELSVVTSSISEKGDSDIITILFQ -# DFPPVLGAKGSTSGDRNAFTNKSKATKHVVFFGEHVHGSTLASAASSNLAEELAHNSTSRDTFAEGMDMVTVGTNDRVRLGKELDKASRNSLLAIVQV -# NKTKHLASAGIIGGLLLLDNFLDRDDGRSTGGVGVIESAKGEGTGGSEERRRRSQSDREQETILMGSMQRNRGSELSGSESRRHCYQWKPQ] -# end gene g3 -### -# start gene g4 -arabidopsis2 AUGUSTUS gene 121 1582 0 + . g4 -arabidopsis2 AUGUSTUS transcript 121 1582 . + . g4.t1 -arabidopsis2 AUGUSTUS start_codon 121 123 . + 0 transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS intron 352 434 0.97 + . transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS intron 1213 1304 1 + . transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS CDS 121 351 0.72 + 0 transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS CDS 435 1212 0.97 + 0 transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS CDS 1305 1582 1 + 2 transcript_id "g4.t1"; gene_id "g4"; -arabidopsis2 AUGUSTUS stop_codon 1580 1582 . + 0 transcript_id "g4.t1"; gene_id "g4"; -# protein sequence = [MATAFAPTKLTATVPLHGSHENRLLLPIRLAPPSSFLGSTRSLSLRRLNHSNATRRSPVVSVQEVVKEKQSTNNTSLL -# ITKEEGLELYEDMILGRSFEDMCAQMYYRGKMFGFVHLYNGQEAVSTGFIKLLTKSDSVVSTYRDHVHALSKGVSARAVMSELFGKVTGCCRGQGGSM -# HMFSKEHNMLGGFAFIGEGIPVATGAAFSSKYRREVLKQDCDDVTVAFFGDGTCNNGQFFECLNMAALYKLPIIFVVENNLWAIGMSHLRATSDPEIW -# KKGPAFGMPGVHVDGMDVLKVREVAKEAVTRARRGEGPTLVECETYRFRGHSLADPDELRDAAEKAKYAARDPIAALKKYLIENKLAKEAELKSIEKK -# IDELVEEAVEFADASPQPGRSQLLENVFADPKGFGIGPDGRYRCEDPKFTEGTAQV] -# end gene g4 -### -# command line: -# ./bin/augustus --species=arabidopsis --UTR=off --singlestrand=true --mea=1 /home/bag/projects/github/galaxytools/augustus/test-data/arabidopsis_augustus.fa diff -r a77a4e9921e0 -r f5075dee9d6b test-data/human_augustus_protein_codingseq_introns_cds_codingseq.fasta --- a/test-data/human_augustus_protein_codingseq_introns_cds_codingseq.fasta Wed May 06 14:31:02 2015 -0400 +++ b/test-data/human_augustus_protein_codingseq_introns_cds_codingseq.fasta Fri May 22 04:55:25 2015 -0400 @@ -1,4 +1,4 @@ ->g1 +>HS04636.g1 atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtca tgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtcaa aaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactg @@ -23,7 +23,7 @@ cacttttggtggagaagtgggttttcaaatcatcaacactgcctcaattcagtctctcatctgcaataacgtgaagggct gtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaagttcttcccgctccgga ctagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag ->g2 +>HS08198.g2 atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccaca tgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccacgc cggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggc diff -r a77a4e9921e0 -r f5075dee9d6b test-data/human_augustus_protein_codingseq_introns_cds_main.gtf --- a/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf Wed May 06 14:31:02 2015 -0400 +++ b/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf Fri May 22 04:55:25 2015 -0400 @@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 2.7). -# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de) -# and Oliver Keller (keller@cs.uni-goettingen.de). +# This output was generated with AUGUSTUS (version 3.1.0). +# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de), +# Oliver Keller, Stefanie König and Lizzy Gerischer. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initialising the parameters ... +# Initialising the parameters using config directory /home/bag/projects/code/galaxy/tool_deps/augustus/3.1/iuc/package_augustus_3_1/820bf3789c44/config/ ... # human version. Using default transition matrix. -# Looks like /home/bag/projects/galaxy/galaxy-central/database/files/001/dataset_1149.dat is in fasta format. +# Looks like /tmp/tmpboMLLQ/job_working_directory/000/6/task_0/dataset_9.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -15,28 +15,30 @@ # Constraints/Hints: # (none) # Predicted genes for sequence number 1 on both strands -# start gene g1 -HS04636 AUGUSTUS gene 966 6903 1 + . g1 -HS04636 AUGUSTUS transcript 966 6903 . + . g1.t1 -HS04636 AUGUSTUS intron 1018 1817 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 1935 2054 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 2199 2851 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 2996 3425 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 3608 4339 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 4424 4542 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 4790 5071 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 5359 5859 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS intron 6008 6493 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 966 1017 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 1818 1934 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 2055 2198 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 2852 2995 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 3426 3607 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 4340 4423 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 4543 4789 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 5072 5358 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 5860 6007 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 6494 6903 . + 2 transcript_id "g1.t1"; gene_id "g1"; +# start gene HS04636.g1 +HS04636 AUGUSTUS gene 966 6903 1 + . HS04636.g1 +HS04636 AUGUSTUS transcript 966 6903 . + . HS04636.g1.t1 +HS04636 AUGUSTUS start_codon 966 968 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 1018 1817 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 1935 2054 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 2199 2851 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 2996 3425 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 3608 4339 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 4424 4542 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 4790 5071 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 5359 5859 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS intron 6008 6493 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 966 1017 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 1818 1934 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 2055 2198 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 2852 2995 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 3426 3607 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 4340 4423 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 4543 4789 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 5072 5358 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 5860 6007 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 6494 6903 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; # coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc # aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt # ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc @@ -63,7 +65,7 @@ # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] -# end gene g1 +# end gene HS04636.g1 ### # # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- @@ -71,20 +73,22 @@ # Constraints/Hints: # (none) # Predicted genes for sequence number 2 on both strands -# start gene g2 -HS08198 AUGUSTUS gene 445 1848 1 + . g2 -HS08198 AUGUSTUS transcript 445 1848 . + . g2.t1 -HS08198 AUGUSTUS intron 583 811 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS intron 895 1052 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS intron 1124 1207 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS intron 1316 1586 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS intron 1689 1771 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 445 582 . + 0 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 812 894 . + 0 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1053 1123 . + 1 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1208 1315 . + 2 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1587 1688 . + 2 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1772 1848 . + 2 transcript_id "g2.t1"; gene_id "g2"; +# start gene HS08198.g2 +HS08198 AUGUSTUS gene 445 1848 1 + . HS08198.g2 +HS08198 AUGUSTUS transcript 445 1848 . + . HS08198.g2.t1 +HS08198 AUGUSTUS start_codon 445 447 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS intron 583 811 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS intron 895 1052 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS intron 1124 1207 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS intron 1316 1586 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS intron 1689 1771 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 445 582 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 812 894 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1053 1123 . + 1 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1208 1315 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1587 1688 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1772 1848 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS stop_codon 1846 1848 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; # coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac # gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg # gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg @@ -95,7 +99,7 @@ # protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC # WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF # HVLDGECTAGASMAAW] -# end gene g2 +# end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --protein=on --introns=on --start=off --stop=off --cds=on --codingseq=on --singlestrand=false /home/bag/projects/galaxy/galaxy-central/database/files/001/dataset_1149.dat --UTR=off --genemodel=complete --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpboMLLQ/job_working_directory/000/6/task_0/dataset_9.dat --UTR=off --genemodel=complete --species=human diff -r a77a4e9921e0 -r f5075dee9d6b test-data/human_augustus_protein_codingseq_introns_cds_protein.fasta --- a/test-data/human_augustus_protein_codingseq_introns_cds_protein.fasta Wed May 06 14:31:02 2015 -0400 +++ b/test-data/human_augustus_protein_codingseq_introns_cds_protein.fasta Fri May 22 04:55:25 2015 -0400 @@ -1,4 +1,4 @@ ->g1 +>HS04636.g1 MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILML ARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFK GFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNE @@ -8,7 +8,7 @@ HGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYG DIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCP FTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL ->g2 +>HS08198.g2 MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGICML PPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGICWQVR QLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIF diff -r a77a4e9921e0 -r f5075dee9d6b test-data/human_augustus_utr-on.gff --- a/test-data/human_augustus_utr-on.gff Wed May 06 14:31:02 2015 -0400 +++ b/test-data/human_augustus_utr-on.gff Fri May 22 04:55:25 2015 -0400 @@ -1,46 +1,64 @@ ##gff-version 3 -# This output was generated with AUGUSTUS (version 2.7). -# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de) -# and Oliver Keller (keller@cs.uni-goettingen.de). +# This output was generated with AUGUSTUS (version 3.1.0). +# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de), +# Oliver Keller, Stefanie König and Lizzy Gerischer. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initialising the parameters ... -# human version. Using species specific transition matrix: /home/bag/Downloads/augustus.2.7/config/species/human/human_trans_shadow_partial_utr.pbl -# Looks like ./examples/example.fa is in fasta format. +# Initialising the parameters using config directory /home/bag/projects/code/galaxy/tool_deps/augustus/3.1/iuc/package_augustus_3_1/820bf3789c44/config/ ... +# human version. Using default transition matrix. +# Looks like /tmp/tmpboMLLQ/job_working_directory/000/4/task_0/dataset_5.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- # # Predicted genes for sequence number 1 on both strands -# start gene g1 -HS04636 AUGUSTUS gene 836 8857 1 + . ID=g1 -HS04636 AUGUSTUS transcript 836 8857 . + . ID=g1.t1;Parent=g1 -HS04636 AUGUSTUS transcription_start_site 836 836 . + . Parent=g1.t1 -HS04636 AUGUSTUS exon 836 1017 . + . Parent=g1.t1 -HS04636 AUGUSTUS start_codon 966 968 . + 0 Parent=g1.t1 -HS04636 AUGUSTUS CDS 966 1017 . + 0 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS CDS 1818 1934 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 1818 1934 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 2055 2198 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 2055 2198 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 2852 2995 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 2852 2995 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 3426 3607 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 3426 3607 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 4340 4423 . + 0 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 4340 4423 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 4543 4789 . + 0 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 4543 4789 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 5072 5358 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 5072 5358 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 5860 6007 . + 0 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 5860 6007 . + . Parent=g1.t1 -HS04636 AUGUSTUS CDS 6494 6903 . + 2 ID=g1.t1.cds;Parent=g1.t1 -HS04636 AUGUSTUS exon 6494 8857 . + . Parent=g1.t1 -HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1 -HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1 +# start gene HS04636.g1 +HS04636 AUGUSTUS gene 836 8857 1 + . ID=HS04636.g1 +HS04636 AUGUSTUS transcript 836 8857 . + . ID=HS04636.g1.t1;Parent=HS04636.g1 +HS04636 AUGUSTUS transcription_start_site 836 836 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 836 1017 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS start_codon 966 968 . + 0 Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 966 1017 . + 0 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 1818 1934 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 1818 1934 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 2055 2198 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 2055 2198 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 2852 2995 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 2852 2995 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 3426 3607 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 3426 3607 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 4340 4423 . + 0 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 4340 4423 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 4543 4789 . + 0 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 4543 4789 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 5072 5358 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 5072 5358 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 5860 6007 . + 0 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 5860 6007 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS CDS 6494 6903 . + 2 ID=HS04636.g1.t1.cds;Parent=HS04636.g1.t1 +HS04636 AUGUSTUS exon 6494 8857 . + . Parent=HS04636.g1.t1 +HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=HS04636.g1.t1 +# coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc +# aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt +# ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc +# cttccttcgaaatgcaattatgagttatgtcttgacatccagatcacatttgattgacagtccaccaacttacaatgctgactatggctacaaaagct +# gggaagccttctctaacctctcctattatactagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtaaaaagcagctt +# cctgattcaaatgagattgtggaaaaattgcttctaagaagaaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagca +# cttcacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctgggccatggggtggacttaaatcatatttacggtgaaa +# ctctggctagacagcgtaaactgcgccttttcaaggatggaaaaatgaaatatcagataattgatggagagatgtatcctcccacagtcaaagatact +# caggcagagatgatctaccctcctcaagtccctgagcatctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtatgc +# cacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcctgaatggggtgatgagcagttgttccagacaagcaggctaa +# tactgataggagagactattaagattgtgattgaagattatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc +# aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactggcatccccttctgcctgacacctttcaaattcatgacca +# gaaatacaactatcaacagtttatctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcaccaggcaaattgctggca +# gggttgctggtggtaggaatgttccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaataccagtcttttaatgag +# taccgcaaacgctttatgctgaagccctatgaatcatttgaagaacttacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacat +# cgatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatctttggtgaaaccatggtagaagttggagcaccattctcct +# tgaaaggacttatgggtaatgttatatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatcatcaacactgcctca +# attcagtctctcatctgcaataacgtgaagggctgtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaag +# ttcttcccgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag] # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG @@ -48,30 +66,41 @@ # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] -# end gene g1 +# end gene HS04636.g1 ### # # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- # # Predicted genes for sequence number 2 on both strands -# start gene g2 -HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2 -HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2 -HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1 -HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1 -HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1 -HS08198 AUGUSTUS CDS 445 582 . + 0 ID=g2.t1.cds;Parent=g2.t1 -HS08198 AUGUSTUS CDS 812 894 . + 0 ID=g2.t1.cds;Parent=g2.t1 -HS08198 AUGUSTUS exon 812 894 . + . Parent=g2.t1 -HS08198 AUGUSTUS CDS 1053 1123 . + 1 ID=g2.t1.cds;Parent=g2.t1 -HS08198 AUGUSTUS exon 1053 1123 . + . Parent=g2.t1 -HS08198 AUGUSTUS CDS 1208 1315 . + 2 ID=g2.t1.cds;Parent=g2.t1 -HS08198 AUGUSTUS exon 1208 1315 . + . Parent=g2.t1 -HS08198 AUGUSTUS CDS 1587 1688 . + 2 ID=g2.t1.cds;Parent=g2.t1 -HS08198 AUGUSTUS exon 1587 1688 . + . Parent=g2.t1 +# start gene HS08198.g2 +HS08198 AUGUSTUS gene 86 2105 1 + . ID=HS08198.g2 +HS08198 AUGUSTUS transcript 86 2105 . + . ID=HS08198.g2.t1;Parent=HS08198.g2 +HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 86 582 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 445 582 . + 0 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 812 894 . + 0 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 812 894 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 1053 1123 . + 1 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 1053 1123 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 1208 1315 . + 2 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 1208 1315 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 1587 1688 . + 2 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 1587 1688 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS CDS 1772 1848 . + 2 ID=HS08198.g2.t1.cds;Parent=HS08198.g2.t1 +HS08198 AUGUSTUS exon 1772 2105 . + . Parent=HS08198.g2.t1 +HS08198 AUGUSTUS transcription_end_site 2105 2105 . + . Parent=HS08198.g2.t1 +# coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac +# gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg +# gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg +# ccgcttcctgcttcaagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccagagtttcgctgtcctgtacctggagcggg +# cggggcagctgtcagtgaagctctacgcccgctcgctccctgtgagcgactcggtcctgagtgggtttgagcagcgggtccaggaggcccacctgact +# gaggaccagatcttctacttccccaagtacggcttctgcgaggctgcagaccagttccacgtcctggacggtgagtgcacagcgggggcaagcatggc +# ggcgtggtga] # protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC -# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKY] -# end gene g2 +# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF +# HVLDGECTAGASMAAW] +# end gene HS08198.g2 ### # command line: -# ./bin/augustus --species=human --UTR=on --gff3=on ./examples/example.fa +# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpboMLLQ/job_working_directory/000/4/task_0/dataset_5.dat --UTR=on --genemodel=complete --species=human diff -r a77a4e9921e0 -r f5075dee9d6b test-data/human_augustus_utr-on.gtf --- a/test-data/human_augustus_utr-on.gtf Wed May 06 14:31:02 2015 -0400 +++ b/test-data/human_augustus_utr-on.gtf Fri May 22 04:55:25 2015 -0400 @@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 2.7). -# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de) -# and Oliver Keller (keller@cs.uni-goettingen.de). +# This output was generated with AUGUSTUS (version 3.1.0). +# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de), +# Oliver Keller, Stefanie König and Lizzy Gerischer. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initialising the parameters ... -# human version. Using species specific transition matrix: /home/bag/Downloads/augustus.2.7/config/species/human/human_trans_shadow_partial_utr.pbl -# Looks like ./examples/example.fa is in fasta format. +# Initialising the parameters using config directory /home/bag/projects/code/galaxy/tool_deps/augustus/3.1/iuc/package_augustus_3_1/820bf3789c44/config/ ... +# human version. Using default transition matrix. +# Looks like /tmp/tmpboMLLQ/job_working_directory/000/2/task_0/dataset_1.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -15,33 +15,51 @@ # Constraints/Hints: # (none) # Predicted genes for sequence number 1 on both strands -# start gene g1 -HS04636 AUGUSTUS gene 836 8857 1 + . g1 -HS04636 AUGUSTUS transcript 836 8857 . + . g1.t1 -HS04636 AUGUSTUS tss 836 836 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 836 1017 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS start_codon 966 968 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 966 1017 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 1818 1934 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 1818 1934 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 2055 2198 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 2055 2198 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 2852 2995 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 2852 2995 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 3426 3607 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 3426 3607 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 4340 4423 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 4340 4423 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 4543 4789 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 4543 4789 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 5072 5358 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 5072 5358 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 5860 6007 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 5860 6007 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS CDS 6494 6903 . + 2 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS exon 6494 8857 . + . transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 transcript_id "g1.t1"; gene_id "g1"; -HS04636 AUGUSTUS tts 8857 8857 . + . transcript_id "g1.t1"; gene_id "g1"; +# start gene HS04636.g1 +HS04636 AUGUSTUS gene 836 8857 1 + . HS04636.g1 +HS04636 AUGUSTUS transcript 836 8857 . + . HS04636.g1.t1 +HS04636 AUGUSTUS tss 836 836 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 836 1017 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS start_codon 966 968 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 966 1017 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 1818 1934 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 1818 1934 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 2055 2198 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 2055 2198 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 2852 2995 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 2852 2995 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 3426 3607 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 3426 3607 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 4340 4423 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 4340 4423 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 4543 4789 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 4543 4789 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 5072 5358 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 5072 5358 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 5860 6007 . + 0 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 5860 6007 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS CDS 6494 6903 . + 2 transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS exon 6494 8857 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +HS04636 AUGUSTUS tts 8857 8857 . + . transcript_id "HS04636.g1.t1"; gene_id "HS04636.g1"; +# coding sequence = [atgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacagcaaatccttgctgttcccacccatgtc +# aaaaccgaggtgtatgtatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggagaaaactgctcaacaccggaattt +# ttgacaagaataaaattatttctgaaacccactccaaacacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc +# cttccttcgaaatgcaattatgagttatgtcttgacatccagatcacatttgattgacagtccaccaacttacaatgctgactatggctacaaaagct +# gggaagccttctctaacctctcctattatactagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtaaaaagcagctt +# cctgattcaaatgagattgtggaaaaattgcttctaagaagaaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagca +# cttcacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctgggccatggggtggacttaaatcatatttacggtgaaa +# ctctggctagacagcgtaaactgcgccttttcaaggatggaaaaatgaaatatcagataattgatggagagatgtatcctcccacagtcaaagatact +# caggcagagatgatctaccctcctcaagtccctgagcatctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtatgc +# cacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcctgaatggggtgatgagcagttgttccagacaagcaggctaa +# tactgataggagagactattaagattgtgattgaagattatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc +# aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactggcatccccttctgcctgacacctttcaaattcatgacca +# gaaatacaactatcaacagtttatctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcaccaggcaaattgctggca +# gggttgctggtggtaggaatgttccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaataccagtcttttaatgag +# taccgcaaacgctttatgctgaagccctatgaatcatttgaagaacttacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacat +# cgatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatctttggtgaaaccatggtagaagttggagcaccattctcct +# tgaaaggacttatgggtaatgttatatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatcatcaacactgcctca +# attcagtctctcatctgcaataacgtgaagggctgtccctttacttcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaag +# ttcttcccgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactgtag] # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG @@ -49,7 +67,7 @@ # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] -# end gene g1 +# end gene HS04636.g1 ### # # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- @@ -57,24 +75,35 @@ # Constraints/Hints: # (none) # Predicted genes for sequence number 2 on both strands -# start gene g2 -HS08198 AUGUSTUS gene 86 2344 1 + . g2 -HS08198 AUGUSTUS transcript 86 2344 . + . g2.t1 -HS08198 AUGUSTUS tss 86 86 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS exon 86 582 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS start_codon 445 447 . + 0 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 445 582 . + 0 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 812 894 . + 0 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS exon 812 894 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1053 1123 . + 1 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS exon 1053 1123 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1208 1315 . + 2 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS exon 1208 1315 . + . transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS CDS 1587 1688 . + 2 transcript_id "g2.t1"; gene_id "g2"; -HS08198 AUGUSTUS exon 1587 1688 . + . transcript_id "g2.t1"; gene_id "g2"; +# start gene HS08198.g2 +HS08198 AUGUSTUS gene 86 2105 1 + . HS08198.g2 +HS08198 AUGUSTUS transcript 86 2105 . + . HS08198.g2.t1 +HS08198 AUGUSTUS tss 86 86 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 86 582 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS start_codon 445 447 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 445 582 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 812 894 . + 0 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 812 894 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1053 1123 . + 1 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 1053 1123 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1208 1315 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 1208 1315 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1587 1688 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 1587 1688 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS CDS 1772 1848 . + 2 transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS exon 1772 2105 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +HS08198 AUGUSTUS tts 2105 2105 . + . transcript_id "HS08198.g2.t1"; gene_id "HS08198.g2"; +# coding sequence = [atgctgccccctgggactgcgaccctcttgactctgctcctggcagctggctcgctgggccagaagcctcagaggccac +# gccggcccgcatcccccatcagcaccatccagcccaaggccaattttgatgcgcagcaggagcagggccaccgggccgaggccaccacactgcatgtg +# gctccccagggcacagccatggctgtcagtaccttccgaaagctggatgggatctgctggcaggtgcgccagctctatggagacacaggggtcctcgg +# ccgcttcctgcttcaagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccagagtttcgctgtcctgtacctggagcggg +# cggggcagctgtcagtgaagctctacgcccgctcgctccctgtgagcgactcggtcctgagtgggtttgagcagcgggtccaggaggcccacctgact +# gaggaccagatcttctacttccccaagtacggcttctgcgaggctgcagaccagttccacgtcctggacggtgagtgcacagcgggggcaagcatggc +# ggcgtggtga] # protein sequence = [MLPPGTATLLTLLLAAGSLGQKPQRPRRPASPISTIQPKANFDAQQEQGHRAEATTLHVAPQGTAMAVSTFRKLDGIC -# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKY] -# end gene g2 +# WQVRQLYGDTGVLGRFLLQARGARGAVHVVVAETDYQSFAVLYLERAGQLSVKLYARSLPVSDSVLSGFEQRVQEAHLTEDQIFYFPKYGFCEAADQF +# HVLDGECTAGASMAAW] +# end gene HS08198.g2 ### # command line: -# ./bin/augustus --species=human --UTR=on ./examples/example.fa +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpboMLLQ/job_working_directory/000/2/task_0/dataset_1.dat --UTR=on --genemodel=complete --species=human diff -r a77a4e9921e0 -r f5075dee9d6b tool_conf.xml --- a/tool_conf.xml Wed May 06 14:31:02 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - -

- -

- diff -r a77a4e9921e0 -r f5075dee9d6b tool_dependencies.xml --- a/tool_dependencies.xml Wed May 06 14:31:02 2015 -0400 +++ b/tool_dependencies.xml Fri May 22 04:55:25 2015 -0400 @@ -1,33 +1,6 @@ - - - - http://bioinf.uni-greifswald.de/augustus/binaries/old/augustus.2.7.tar.gz - make - - bin - $INSTALL_DIR/bin - - - scripts - $INSTALL_DIR/scripts - - - config - $INSTALL_DIR/config - - - $INSTALL_DIR/scripts - $INSTALL_DIR/bin - $INSTALL_DIR/config - - - - AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller. -It can be used as an ab initio program, which means it bases its prediction purely on the -sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources -such as EST, MS/MS, protein alignments and synthenic genomic alignments. -http://augustus.gobics.de/binaries/README.TXT + +