Previous changeset 6:ca6d970d931c (2019-12-20) Next changeset 8:28433faa6e42 (2021-08-26) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76" |
modified:
augustus.xml extract_features.py macros.xml test-data/augustus.hints.output.gtf test-data/augustus.hints_and_range.output.gtf test-data/human_augustus_protein_codingseq_introns_cds_main.gtf test-data/human_augustus_utr-on.gff test-data/human_augustus_utr-on.gtf |
added:
test-data/human_augustus_utr-on_softmasking.gtf |
b |
diff -r ca6d970d931c -r 09855551d713 augustus.xml --- a/augustus.xml Fri Dec 20 14:08:53 2019 -0500 +++ b/augustus.xml Thu Jul 15 17:16:12 2021 +0000 |
[ |
b'@@ -1,4 +1,4 @@\n-<tool id="augustus" name="Augustus" profile="16.04" version="@VERSION@">\n+<tool id="augustus" name="Augustus" profile="20.01" version="@VERSION@+galaxy@SUFFIX_VERSION@">\n <description>gene prediction for prokaryotic and eukaryotic genomes</description>\n <macros>\n <import>macros.xml</import>\n@@ -68,7 +68,7 @@\n $input_genome\n $utr\n --genemodel=$genemodel\n-\n+ --softmasking=$softmasking\n #if $hints.usehints == \'T\'\n --hintsfile=\'$hints.hintsfile\' --extrinsicCfgFile=\'$hints.extrinsiccfg\'\n #end if\n@@ -125,11 +125,13 @@\n <!-- If you update this list, please also update it in maker and busco tools (../maker/maker.xml and ../busco/busco.xml) -->\n <option value="human">Homo sapiens</option>\n <option value="fly">Drosophila melanogaster</option>\n+ <option value="maker2_dmel1">Drosophila melanogaster (maker2_dmel1)</option>\n <option value="arabidopsis">Arabidopsis thaliana</option>\n <option value="brugia ">Brugia malayi</option>\n <option value="aedes">Aedes aegypti</option>\n <option value="tribolium2012">Tribolium castaneum</option>\n <option value="schistosoma">Schistosoma mansoni</option>\n+ <option value="schistosoma2">Schistosoma mansoni (schistosoma2)</option>\n <option value="tetrahymena">Tetrahymena thermophila</option>\n <option value="galdieria">Galdieria sulphuraria</option>\n <option value="maize">Zea mays</option>\n@@ -183,13 +185,13 @@\n <option value="cacao">Theobroma cacao (cacao)</option>\n <option value="heliconius_melpomene1">Heliconius melpomene</option>\n <option value="xenoturbella">Xenoturbella</option>\n- <option value="E_coli_K12">E coli K12</option>\n+ <option value="E_coli_K12">Escherichia coli K12</option>\n <option value="c_elegans_trsk">c elegans trsk</option>\n <option value="camponotus_floridanus">Camponotus floridanus</option>\n <option value="coyote_tobacco">Coyote tobacco</option>\n <option value="s_aureus">Staphylococcus aureus</option>\n <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option>\n- <option value="wheat">wheat</option>\n+ <option value="wheat">Triticum aestivum</option>\n <option value="zebrafish">Danio rerio</option>\n <option value="anidulans">Aspergillus nidulans</option>\n <option value="bombus_impatiens1">Bombus impatiens1</option>\n@@ -205,23 +207,55 @@\n <option value="pea_aphid">Acyrthosiphon pisum</option>\n <option value="rhodnius_prolixus">Rhodnius prolixus</option>\n <option value="ustilago_maydis">Ustilago maydis</option>\n- <option value="verticillium_albo_atrum1">Verticillium albo atrum1</option>\n- <option value="verticillium_longisporum1">Verticillium longisporum1</option>\n- <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>\n- <option value="adorsata">adorsata</option>\n- <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>\n- <option value="maker2_athal1">maker2_athal1</option>\n- <option value="maker2_c_elegans1">maker2_c_elegans1</option>\n- <option value="maker2_dmel1">maker2_dmel1</option>\n- <option value="maker2_spomb1">maker2_spomb1</option>\n- <option value="parasteatoda">parasteatoda</option>\n- <option va'..b' <option value="Pseudo-nitzschia_multistriata">Pseudo-nitzschia multistriata</option>\n+ <option value="Phaeodactylum_tricornutum">Phaeodactylum tricornutum</option>\n+ <option value="Fragilariopsis_cylindrus">Fragilariopsis cylindrus</option>\n+ <option value="Fistulifera_solaris">Fistulifera solaris</option>\n+ <option value="Bathycoccus_prasinos">Bathycoccus prasinos</option>\n+ <option value="Chloropicon_primus">Chloropicon primus</option>\n </param>\n </when>\n </conditional>\n \n+ <param name="softmasking" type="boolean" argument="--softmasking"\n+ label="Softmasking"\n+ truevalue="1" falsevalue="0" checked="true"\n+ help="If this option is enabled, lowercase letters are considered as repeated regions." />\n+\n+\n <param name="strand" type="select" argument="--strand"\n label="Predict genes on specific strands">\n <option value="both">both</option>\n@@ -299,6 +333,7 @@\n <param name="input_genome" value="human_augustus.fa" ftype="fasta" />\n <param name="organism" value="human" />\n <param name="utr" value="True" />\n+ <param name="softmasking" value="False"/>\n <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="6"/>\n </test>\n <test>\n@@ -306,12 +341,14 @@\n <param name="organism" value="human" />\n <param name="utr" value="True" />\n <param name="gff" value="True" />\n+ <param name="softmasking" value="False"/>\n <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="6"/>\n </test>\n <test>\n <param name="input_genome" value="human_augustus.fa" ftype="fasta" />\n <param name="organism" value="human" />\n <param name="outputs" value="protein,codingseq,introns,cds,start,stop" />\n+ <param name="softmasking" value="False"/>\n <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gtf" lines_diff="6"/>\n <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" />\n <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" />\n@@ -323,6 +360,7 @@\n <param name="hintsfile" value="hints.truncated.adjusted.gff" />\n <param name="extrinsiccfg" value="extrinsic.truncated.cfg" />\n <param name="outputs" value="" />\n+ <param name="softmasking" value="False"/>\n <output name="output" file="augustus.hints.output.gtf" ftype="gtf" lines_diff="12">\n <assert_contents>\n <has_text_matching expression="chr2R\\tAUGUSTUS\\tgene\\t7560\\t9303\\t0\\.(7[8-9]|8[0-5])\\t-\\t.\\tchr2R.g1" />\n@@ -341,6 +379,7 @@\n <param name="start" value="7000" />\n <param name="stop" value="9000" />\n <param name="outputs" value="" />\n+ <param name="softmasking" value="False"/>\n <output name="output" file="augustus.hints_and_range.output.gtf" ftype="gtf" lines_diff="12">\n <assert_contents>\n <has_text_matching expression="chr2R\\tAUGUSTUS\\tgene\\t7560\\t8931\\t0.8[2-5]\\t-\\t.\\tchr2R.g1" />\n@@ -348,6 +387,14 @@\n </assert_contents>\n </output>\n </test>\n+ <!-- Test softmasking parameter-->\n+ <test>\n+ <param name="input_genome" value="human_augustus.fa" ftype="fasta" />\n+ <param name="organism" value="human" />\n+ <param name="utr" value="True" />\n+ <param name="softmasking" value="True"/>\n+ <output name="output" file="human_augustus_utr-on_softmasking.gtf" ftype="gtf" lines_diff="6"/>\n+ </test>\n \n </tests>\n <help>\n' |
b |
diff -r ca6d970d931c -r 09855551d713 extract_features.py --- a/extract_features.py Fri Dec 20 14:08:53 2019 -0500 +++ b/extract_features.py Thu Jul 15 17:16:12 2021 +0000 |
[ |
@@ -5,76 +5,88 @@ import textwrap -def main( args ): +def main(args): """ - Extract the protein and coding section from an augustus gff, gtf file - Example file: -HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1 -HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1 -# protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL -# THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD -# PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG -# QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH -# WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE -# KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV -# PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] -# end gene g1 -### -# -# ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- -# -# Predicted genes for sequence number 2 on both strands -# start gene g2 -HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2 -HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2 -HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1 -HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1 -HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1 + Extract the protein and coding section from an augustus gff, gtf file + Example file: + HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1 + HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1 + # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL + # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD + # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG + # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH + # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE + # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV + # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] + # end gene g1 + ### + # + # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- + # + # Predicted genes for sequence number 2 on both strands + # start gene g2 + HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2 + HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2 + HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1 + HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1 + HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1 """ - protein_seq = '' - coding_seq = '' + protein_seq = "" + coding_seq = "" if args.protein: - po = open( args.protein, 'w+' ) + po = open(args.protein, "w+") if args.codingseq: - co = open( args.codingseq, 'w+' ) + co = open(args.codingseq, "w+") for line in sys.stdin: # protein- and coding-sequence are stored as comments - if line.startswith('#'): + if line.startswith("#"): line = line[2:].strip() - if line.startswith('start gene'): + if line.startswith("start gene"): gene_name = line[11:].strip() if protein_seq: - if line.endswith(']'): + if line.endswith("]"): protein_seq += line[:-1] - po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) ) - protein_seq = '' + po.write( + ">%s\n%s\n" + % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80))) + ) + protein_seq = "" else: protein_seq += line if coding_seq: - if line.endswith(']'): + if line.endswith("]"): coding_seq += line[:-1] - co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) ) - coding_seq = '' + co.write( + ">%s\n%s\n" + % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80))) + ) + coding_seq = "" else: coding_seq += line - if args.protein and line.startswith('protein sequence = ['): - if line.endswith(']'): + if args.protein and line.startswith("protein sequence = ["): + if line.endswith("]"): protein_seq = line[20:-1] - po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) ) - protein_seq = '' + po.write( + ">%s\n%s\n" + % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80))) + ) + protein_seq = "" else: line = line[20:] protein_seq = line - if args.codingseq and line.startswith('coding sequence = ['): - if line.endswith(']'): + if args.codingseq and line.startswith("coding sequence = ["): + if line.endswith("]"): coding_seq = line[19:-1] - co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) ) - coding_seq = '' + co.write( + ">%s\n%s\n" + % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80))) + ) + coding_seq = "" else: line = line[19:] coding_seq = line @@ -85,10 +97,10 @@ po.close() -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('-p', '--protein', help='Path to the protein file.') - parser.add_argument('-c', '--codingseq', help='Path to the coding file.') + parser.add_argument("-p", "--protein", help="Path to the protein file.") + parser.add_argument("-c", "--codingseq", help="Path to the coding file.") args = parser.parse_args() - main( args ) + main(args) |
b |
diff -r ca6d970d931c -r 09855551d713 macros.xml --- a/macros.xml Fri Dec 20 14:08:53 2019 -0500 +++ b/macros.xml Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -7,7 +7,9 @@ </requirements> </xml> - <token name="@VERSION@">3.3.3</token> + <token name="@VERSION@">3.4.0</token> + <token name="@SUFFIX_VERSION@">0</token> + <xml name="citations"> <citations> |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/augustus.hints.output.gtf --- a/test-data/augustus.hints.output.gtf Fri Dec 20 14:08:53 2019 -0500 +++ b/test-data/augustus.hints.output.gtf Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -1,4 +1,4 @@ -# This output was generated with AUGUSTUS (version 3.3.3). +# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), @@ -7,11 +7,11 @@ # Sources of extrinsic information: M RM E W # Setting CDSpart local malus: 0.985 # Setting UTRpart local malus: 0.973 -# reading in the file /tmp/tmpTS0N1X/files/c/a/3/dataset_ca3a4696-dc20-430a-bb74-705b0b347333.dat ... +# reading in the file /tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat ... # Have extrinsic information about 1 sequences (in the specified range). -# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ... +# Initializing the parameters using config directory /usr/local/config/ ... # fly version. Using default transition matrix. -# Looks like /tmp/tmpTS0N1X/files/9/3/7/dataset_93735de8-5fb1-4086-a035-4b3adc372f30.dat is in fasta format. +# Looks like /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat is in fasta format. # We have hints for 1 sequence and for 1 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9950, name = chr2R) ----- @@ -52,10 +52,11 @@ # CDS introns: 0/3 # 5'UTR exons and introns: 0/0 # 3'UTR exons and introns: 0/0 -# hint groups fully obeyed: 0 -# incompatible hint groups: 129 -# W: 129 +# hint groups fully obeyed: 120 +# W: 120 +# incompatible hint groups: 9 +# W: 9 # end gene chr2R.g1 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpTS0N1X/files/9/3/7/dataset_93735de8-5fb1-4086-a035-4b3adc372f30.dat --UTR=off --genemodel=complete --hintsfile=/tmp/tmpTS0N1X/files/c/a/3/dataset_ca3a4696-dc20-430a-bb74-705b0b347333.dat --extrinsicCfgFile=/tmp/tmpTS0N1X/files/8/9/2/dataset_8920e51c-dd38-4e36-abf4-14825237cd41.dat --species=fly +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/8/6/b/dataset_86b0a149-1d37-4615-9915-2c48586e3ca1.dat --species=fly |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/augustus.hints_and_range.output.gtf --- a/test-data/augustus.hints_and_range.output.gtf Fri Dec 20 14:08:53 2019 -0500 +++ b/test-data/augustus.hints_and_range.output.gtf Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -1,4 +1,4 @@ -# This output was generated with AUGUSTUS (version 3.3.3). +# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), @@ -7,11 +7,11 @@ # Sources of extrinsic information: M RM E W # Setting CDSpart local malus: 0.985 # Setting UTRpart local malus: 0.973 -# reading in the file /tmp/tmpTS0N1X/files/1/8/8/dataset_188fcc93-3347-4fd8-953a-6344470a71f8.dat ... +# reading in the file /tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat ... # Have extrinsic information about 1 sequences (in the specified range). -# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ... +# Initializing the parameters using config directory /usr/local/config/ ... # fly version. Using default transition matrix. -# Looks like /tmp/tmpTS0N1X/files/d/2/6/dataset_d26f480c-5e9f-4f42-82e8-b23f92904b45.dat is in fasta format. +# Looks like /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat is in fasta format. # We have hints for 1 sequence and for 1 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 2001, name = chr2R) ----- @@ -46,10 +46,11 @@ # CDS introns: 0/2 # 5'UTR exons and introns: 0/0 # 3'UTR exons and introns: 0/0 -# hint groups fully obeyed: 0 -# incompatible hint groups: 102 -# W: 102 +# hint groups fully obeyed: 96 +# W: 96 +# incompatible hint groups: 6 +# W: 6 # end gene chr2R.g1 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpTS0N1X/files/d/2/6/dataset_d26f480c-5e9f-4f42-82e8-b23f92904b45.dat --UTR=off --genemodel=complete --hintsfile=/tmp/tmpTS0N1X/files/1/8/8/dataset_188fcc93-3347-4fd8-953a-6344470a71f8.dat --extrinsicCfgFile=/tmp/tmpTS0N1X/files/1/1/0/dataset_1106f730-ecae-4b8f-917b-47cb5fc7334d.dat --predictionStart=7000 --predictionEnd=9000 --species=fly +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/3/3/5/dataset_335e9fec-9340-42e6-97ce-af35d5220fcc.dat --predictionStart=7000 --predictionEnd=9000 --species=fly |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/human_augustus_protein_codingseq_introns_cds_main.gtf --- a/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf Fri Dec 20 14:08:53 2019 -0500 +++ b/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 3.3.3). +# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ... +# Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpTS0N1X/files/b/e/5/dataset_be5ee0f6-7fb0-4170-9543-17032878de48.dat is in fasta format. +# Looks like /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -98,4 +98,4 @@ # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/b/e/5/dataset_be5ee0f6-7fb0-4170-9543-17032878de48.dat --UTR=off --genemodel=complete --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat --UTR=off --genemodel=complete --softmasking=0 --species=human |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/human_augustus_utr-on.gff --- a/test-data/human_augustus_utr-on.gff Fri Dec 20 14:08:53 2019 -0500 +++ b/test-data/human_augustus_utr-on.gff Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -1,14 +1,14 @@ ##gff-version 3 -# This output was generated with AUGUSTUS (version 3.3.3). +# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ... +# Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpTS0N1X/files/c/2/9/dataset_c29ecb21-0612-4152-8d35-994815171865.dat is in fasta format. +# Looks like /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -103,4 +103,4 @@ # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/c/2/9/dataset_c29ecb21-0612-4152-8d35-994815171865.dat --UTR=on --genemodel=complete --species=human +# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat --UTR=on --genemodel=complete --softmasking=0 --species=human |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/human_augustus_utr-on.gtf --- a/test-data/human_augustus_utr-on.gtf Fri Dec 20 14:08:53 2019 -0500 +++ b/test-data/human_augustus_utr-on.gtf Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 3.3.3). +# This output was generated with AUGUSTUS (version 3.4.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. -# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ... +# Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpTS0N1X/files/7/3/d/dataset_73d41293-49eb-4cbc-b881-ddc4c9faf952.dat is in fasta format. +# Looks like /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -102,4 +102,4 @@ # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/7/3/d/dataset_73d41293-49eb-4cbc-b881-ddc4c9faf952.dat --UTR=on --genemodel=complete --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat --UTR=on --genemodel=complete --softmasking=0 --species=human |
b |
diff -r ca6d970d931c -r 09855551d713 test-data/human_augustus_utr-on_softmasking.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/human_augustus_utr-on_softmasking.gtf Thu Jul 15 17:16:12 2021 +0000 |
b |
@@ -0,0 +1,24 @@ +# This output was generated with AUGUSTUS (version 3.4.0). +# AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), +# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), +# Using native and syntenically mapped cDNA alignments to improve de novo gene finding +# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 +# No extrinsic information on sequences given. +# Sources of extrinsic information: M RM +# Initializing the parameters using config directory /usr/local/config/ ... +# human version. Using default transition matrix. +# Looks like /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat is in fasta format. +# We have hints for 0 sequences and for 0 of the sequences in the input set. +# +# ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- +# +# Predicted genes for sequence number 1 on both strands +# (none) +# +# ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- +# +# Predicted genes for sequence number 2 on both strands +# (none) +# command line: +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat --UTR=on --genemodel=complete --softmasking=1 --species=human |