changeset 7:09855551d713 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
author iuc
date Thu, 15 Jul 2021 17:16:12 +0000
parents ca6d970d931c
children 28433faa6e42
files augustus.xml extract_features.py macros.xml test-data/augustus.hints.output.gtf test-data/augustus.hints_and_range.output.gtf test-data/human_augustus_protein_codingseq_introns_cds_main.gtf test-data/human_augustus_utr-on.gff test-data/human_augustus_utr-on.gtf test-data/human_augustus_utr-on_softmasking.gtf
diffstat 9 files changed, 181 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/augustus.xml	Fri Dec 20 14:08:53 2019 -0500
+++ b/augustus.xml	Thu Jul 15 17:16:12 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="augustus" name="Augustus" profile="16.04" version="@VERSION@">
+<tool id="augustus" name="Augustus" profile="20.01" version="@VERSION@+galaxy@SUFFIX_VERSION@">
     <description>gene prediction for prokaryotic and eukaryotic genomes</description>
     <macros>
         <import>macros.xml</import>
@@ -68,7 +68,7 @@
             $input_genome
             $utr
             --genemodel=$genemodel
-
+            --softmasking=$softmasking
             #if $hints.usehints == 'T'
                 --hintsfile='$hints.hintsfile' --extrinsicCfgFile='$hints.extrinsiccfg'
             #end if
@@ -125,11 +125,13 @@
                     <!-- If you update this list, please also update it in maker and busco tools (../maker/maker.xml and ../busco/busco.xml) -->
                     <option value="human">Homo sapiens</option>
                     <option value="fly">Drosophila melanogaster</option>
+                    <option value="maker2_dmel1">Drosophila melanogaster (maker2_dmel1)</option>
                     <option value="arabidopsis">Arabidopsis thaliana</option>
                     <option value="brugia ">Brugia malayi</option>
                     <option value="aedes">Aedes aegypti</option>
                     <option value="tribolium2012">Tribolium castaneum</option>
                     <option value="schistosoma">Schistosoma mansoni</option>
+                    <option value="schistosoma2">Schistosoma mansoni (schistosoma2)</option>
                     <option value="tetrahymena">Tetrahymena thermophila</option>
                     <option value="galdieria">Galdieria sulphuraria</option>
                     <option value="maize">Zea mays</option>
@@ -183,13 +185,13 @@
                     <option value="cacao">Theobroma cacao (cacao)</option>
                     <option value="heliconius_melpomene1">Heliconius melpomene</option>
                     <option value="xenoturbella">Xenoturbella</option>
-                    <option value="E_coli_K12">E coli K12</option>
+                    <option value="E_coli_K12">Escherichia coli K12</option>
                     <option value="c_elegans_trsk">c elegans trsk</option>
                     <option value="camponotus_floridanus">Camponotus floridanus</option>
                     <option value="coyote_tobacco">Coyote tobacco</option>
                     <option value="s_aureus">Staphylococcus aureus</option>
                     <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option>
-                    <option value="wheat">wheat</option>
+                    <option value="wheat">Triticum aestivum</option>
                     <option value="zebrafish">Danio rerio</option>
                     <option value="anidulans">Aspergillus nidulans</option>
                     <option value="bombus_impatiens1">Bombus impatiens1</option>
@@ -205,23 +207,55 @@
                     <option value="pea_aphid">Acyrthosiphon pisum</option>
                     <option value="rhodnius_prolixus">Rhodnius prolixus</option>
                     <option value="ustilago_maydis">Ustilago maydis</option>
-                    <option value="verticillium_albo_atrum1">Verticillium albo atrum1</option>
-                    <option value="verticillium_longisporum1">Verticillium longisporum1</option>
-                    <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>
-                    <option value="adorsata">adorsata</option>
-                    <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>
-                    <option value="maker2_athal1">maker2_athal1</option>
-                    <option value="maker2_c_elegans1">maker2_c_elegans1</option>
-                    <option value="maker2_dmel1">maker2_dmel1</option>
-                    <option value="maker2_spomb1">maker2_spomb1</option>
-                    <option value="parasteatoda">parasteatoda</option>
-                    <option value="rice">rice</option>
-                    <option value="schistosoma2">schistosoma2</option>
-                    <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>
+                    <option value="verticillium_albo_atrum1">Verticillium albo-atrum</option>
+                    <option value="verticillium_longisporum1">Verticillium longisporum</option>
+                    <option value="Xipophorus_maculatus">Xipophorus maculatus</option>
+                    <option value="adorsata">Apis dorsata</option>
+                    <option value="ancylostoma_ceylanicum">Ancylostoma ceylanicum</option>
+                    <option value="maker2_athal1">Arabidopsis thaliana (maker2_athal1)</option>
+                    <option value="maker2_c_elegans1">Caenorhabditis elegans (maker2_c_elegans1)</option>
+                    <option value="maker2_spomb1">Saccharomyces cerevisiae (maker2_spomb1)</option>
+                    <option value="parasteatoda">Parasteatoda SP.</option>
+                    <option value="rice">Oryza sp.</option>
+                    <option value="Cassiopea_xamachana">Cassiopea xamachana</option>
+                    <option value="Ptychodera_flava">Ptychodera flava</option>
+                    <option value="Argopecten_irridians">Argopecten irridians</option>
+                    <option value="Nemopilema_nomurai">Nemopilema nomurai</option>
+                    <option value="Notospermus_geniculatus">Notospermus geniculatus</option>
+                    <option value="Chrysaora_chesapeakeij">Chrysaora chesapeakeij</option>
+                    <option value="Ectocarpus_siliculosus">Ectocarpus siliculosus</option>
+                    <option value="Trichoplax_adhaerens">Trichoplax adhaerens</option>
+                    <option value="Aurelia_aurita">Aurelia aurita</option>
+                    <option value="Rhopilema_esculentum">Rhopilema esculentum</option>
+                    <option value="Encephalitozoon_cuniculi">Encephalitozoon cuniculi</option>
+                    <option value="Gonapodya_prolifera">Dunaliella salina</option>
+                    <option value="Sordaria_macrospora">Sordaria macrospora</option>
+                    <option value="Sphaceloma_murrayae">Sphaceloma murrayae</option>
+                    <option value="Vitrella_brassicaformis">Vitrella brassicaformis</option>
+                    <option value="Monoraphidium_neglectum">Monoraphidium_neglectum</option>
+                    <option value="Raphidocelis_subcapita">Raphidocelis subcapita</option>
+                    <option value="Ostreococcus_tauri">Ostreococcus tauri</option>
+                    <option value="Ostreococcus_sp_lucimarinus">Ostreococcus sp. lucimarinus</option>
+                    <option value="Micromonas_pusilla">Micromonas pusilla</option>
+                    <option value="Micromonas_commoda">Micromonas commoda</option>
+                    <option value="Chlamydomonas_eustigma">Chlamydomonas eustigma</option>
+                    <option value="Thalassiosira_pseudonana">Thalassiosira pseudonana</option>
+                    <option value="Pseudo-nitzschia_multistriata">Pseudo-nitzschia multistriata</option>
+                    <option value="Phaeodactylum_tricornutum">Phaeodactylum tricornutum</option>
+                    <option value="Fragilariopsis_cylindrus">Fragilariopsis cylindrus</option>
+                    <option value="Fistulifera_solaris">Fistulifera solaris</option>
+                    <option value="Bathycoccus_prasinos">Bathycoccus prasinos</option>
+                    <option value="Chloropicon_primus">Chloropicon primus</option>
                 </param>
             </when>
         </conditional>
 
+        <param name="softmasking" type="boolean" argument="--softmasking"
+        label="Softmasking"
+        truevalue="1" falsevalue="0" checked="true"
+        help="If this option is enabled, lowercase letters are considered as repeated regions." />
+
+
         <param name="strand" type="select" argument="--strand"
             label="Predict genes on specific strands">
             <option value="both">both</option>
@@ -299,6 +333,7 @@
             <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
             <param name="organism" value="human" />
             <param name="utr" value="True" />
+            <param name="softmasking" value="False"/>
             <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="6"/>
         </test>
         <test>
@@ -306,12 +341,14 @@
             <param name="organism" value="human" />
             <param name="utr" value="True" />
             <param name="gff" value="True" />
+            <param name="softmasking" value="False"/>
             <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="6"/>
         </test>
         <test>
             <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
             <param name="organism" value="human" />
             <param name="outputs" value="protein,codingseq,introns,cds,start,stop" />
+            <param name="softmasking" value="False"/>
             <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gtf" lines_diff="6"/>
             <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" />
             <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" />
@@ -323,6 +360,7 @@
             <param name="hintsfile" value="hints.truncated.adjusted.gff" />
             <param name="extrinsiccfg" value="extrinsic.truncated.cfg" />
             <param name="outputs" value="" />
+            <param name="softmasking" value="False"/>
             <output name="output" file="augustus.hints.output.gtf" ftype="gtf" lines_diff="12">
                 <assert_contents>
                     <has_text_matching expression="chr2R\tAUGUSTUS\tgene\t7560\t9303\t0\.(7[8-9]|8[0-5])\t-\t.\tchr2R.g1" />
@@ -341,6 +379,7 @@
             <param name="start" value="7000" />
             <param name="stop" value="9000" />
             <param name="outputs" value="" />
+            <param name="softmasking" value="False"/>
             <output name="output" file="augustus.hints_and_range.output.gtf" ftype="gtf" lines_diff="12">
                 <assert_contents>
                     <has_text_matching expression="chr2R\tAUGUSTUS\tgene\t7560\t8931\t0.8[2-5]\t-\t.\tchr2R.g1" />
@@ -348,6 +387,14 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Test softmasking parameter-->
+        <test>
+            <param name="input_genome" value="human_augustus.fa" ftype="fasta" />
+            <param name="organism" value="human" />
+            <param name="utr" value="True" />
+            <param name="softmasking" value="True"/>
+            <output name="output" file="human_augustus_utr-on_softmasking.gtf" ftype="gtf" lines_diff="6"/>
+        </test>
 
     </tests>
     <help>
--- a/extract_features.py	Fri Dec 20 14:08:53 2019 -0500
+++ b/extract_features.py	Thu Jul 15 17:16:12 2021 +0000
@@ -5,76 +5,88 @@
 import textwrap
 
 
-def main( args ):
+def main(args):
     """
-    Extract the protein and coding section from an augustus gff, gtf file
-    Example file:
-HS04636	AUGUSTUS	stop_codon	6901	6903	.	+	0	Parent=g1.t1
-HS04636	AUGUSTUS	transcription_end_site	8857	8857	.	+	.	Parent=g1.t1
-# protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
-# THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
-# PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
-# QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH
-# WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
-# KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
-# PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
-# end gene g1
-###
-#
-# ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
-#
-# Predicted genes for sequence number 2 on both strands
-# start gene g2
-HS08198	AUGUSTUS	gene	86	2344	1	+	.	ID=g2
-HS08198	AUGUSTUS	transcript	86	2344	.	+	.	ID=g2.t1;Parent=g2
-HS08198	AUGUSTUS	transcription_start_site	86	86	.	+	.	Parent=g2.t1
-HS08198	AUGUSTUS	exon	86	582	.	+	.	Parent=g2.t1
-HS08198	AUGUSTUS	start_codon	445	447	.	+	0	Parent=g2.t1
+        Extract the protein and coding section from an augustus gff, gtf file
+        Example file:
+    HS04636	AUGUSTUS	stop_codon	6901	6903	.	+	0	Parent=g1.t1
+    HS04636	AUGUSTUS	transcription_end_site	8857	8857	.	+	.	Parent=g1.t1
+    # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
+    # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
+    # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
+    # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH
+    # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
+    # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
+    # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
+    # end gene g1
+    ###
+    #
+    # ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
+    #
+    # Predicted genes for sequence number 2 on both strands
+    # start gene g2
+    HS08198	AUGUSTUS	gene	86	2344	1	+	.	ID=g2
+    HS08198	AUGUSTUS	transcript	86	2344	.	+	.	ID=g2.t1;Parent=g2
+    HS08198	AUGUSTUS	transcription_start_site	86	86	.	+	.	Parent=g2.t1
+    HS08198	AUGUSTUS	exon	86	582	.	+	.	Parent=g2.t1
+    HS08198	AUGUSTUS	start_codon	445	447	.	+	0	Parent=g2.t1
     """
-    protein_seq = ''
-    coding_seq = ''
+    protein_seq = ""
+    coding_seq = ""
     if args.protein:
-        po = open( args.protein, 'w+' )
+        po = open(args.protein, "w+")
     if args.codingseq:
-        co = open( args.codingseq, 'w+' )
+        co = open(args.codingseq, "w+")
 
     for line in sys.stdin:
         # protein- and coding-sequence are stored as comments
-        if line.startswith('#'):
+        if line.startswith("#"):
             line = line[2:].strip()
-            if line.startswith('start gene'):
+            if line.startswith("start gene"):
                 gene_name = line[11:].strip()
 
             if protein_seq:
-                if line.endswith(']'):
+                if line.endswith("]"):
                     protein_seq += line[:-1]
-                    po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
-                    protein_seq = ''
+                    po.write(
+                        ">%s\n%s\n"
+                        % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
+                    )
+                    protein_seq = ""
                 else:
                     protein_seq += line
 
             if coding_seq:
-                if line.endswith(']'):
+                if line.endswith("]"):
                     coding_seq += line[:-1]
-                    co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
-                    coding_seq = ''
+                    co.write(
+                        ">%s\n%s\n"
+                        % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
+                    )
+                    coding_seq = ""
                 else:
                     coding_seq += line
 
-            if args.protein and line.startswith('protein sequence = ['):
-                if line.endswith(']'):
+            if args.protein and line.startswith("protein sequence = ["):
+                if line.endswith("]"):
                     protein_seq = line[20:-1]
-                    po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
-                    protein_seq = ''
+                    po.write(
+                        ">%s\n%s\n"
+                        % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
+                    )
+                    protein_seq = ""
                 else:
                     line = line[20:]
                     protein_seq = line
 
-            if args.codingseq and line.startswith('coding sequence = ['):
-                if line.endswith(']'):
+            if args.codingseq and line.startswith("coding sequence = ["):
+                if line.endswith("]"):
                     coding_seq = line[19:-1]
-                    co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
-                    coding_seq = ''
+                    co.write(
+                        ">%s\n%s\n"
+                        % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
+                    )
+                    coding_seq = ""
                 else:
                     line = line[19:]
                     coding_seq = line
@@ -85,10 +97,10 @@
         po.close()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-p', '--protein', help='Path to the protein file.')
-    parser.add_argument('-c', '--codingseq', help='Path to the coding file.')
+    parser.add_argument("-p", "--protein", help="Path to the protein file.")
+    parser.add_argument("-c", "--codingseq", help="Path to the coding file.")
 
     args = parser.parse_args()
-    main( args )
+    main(args)
--- a/macros.xml	Fri Dec 20 14:08:53 2019 -0500
+++ b/macros.xml	Thu Jul 15 17:16:12 2021 +0000
@@ -7,7 +7,9 @@
         </requirements>
     </xml>
 
-    <token name="@VERSION@">3.3.3</token>
+    <token name="@VERSION@">3.4.0</token>
+    <token name="@SUFFIX_VERSION@">0</token>
+
 
     <xml name="citations">
         <citations>
--- a/test-data/augustus.hints.output.gtf	Fri Dec 20 14:08:53 2019 -0500
+++ b/test-data/augustus.hints.output.gtf	Thu Jul 15 17:16:12 2021 +0000
@@ -1,4 +1,4 @@
-# This output was generated with AUGUSTUS (version 3.3.3).
+# This output was generated with AUGUSTUS (version 3.4.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
 # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
@@ -7,11 +7,11 @@
 # Sources of extrinsic information: M RM E W 
 # Setting CDSpart local malus: 0.985
 # Setting UTRpart local malus: 0.973
-# reading in the file /tmp/tmpTS0N1X/files/c/a/3/dataset_ca3a4696-dc20-430a-bb74-705b0b347333.dat ...
+# reading in the file /tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat ...
 # Have extrinsic information about 1 sequences (in the specified range). 
-# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ...
+# Initializing the parameters using config directory /usr/local/config/ ...
 # fly version. Using default transition matrix.
-# Looks like /tmp/tmpTS0N1X/files/9/3/7/dataset_93735de8-5fb1-4086-a035-4b3adc372f30.dat is in fasta format.
+# Looks like /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat is in fasta format.
 # We have hints for 1 sequence and for 1 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9950, name = chr2R) -----
@@ -52,10 +52,11 @@
 # CDS introns: 0/3
 # 5'UTR exons and introns: 0/0
 # 3'UTR exons and introns: 0/0
-# hint groups fully obeyed: 0
-# incompatible hint groups: 129
-#      W: 129 
+# hint groups fully obeyed: 120
+#      W: 120 
+# incompatible hint groups: 9
+#      W:   9 
 # end gene chr2R.g1
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpTS0N1X/files/9/3/7/dataset_93735de8-5fb1-4086-a035-4b3adc372f30.dat --UTR=off --genemodel=complete --hintsfile=/tmp/tmpTS0N1X/files/c/a/3/dataset_ca3a4696-dc20-430a-bb74-705b0b347333.dat --extrinsicCfgFile=/tmp/tmpTS0N1X/files/8/9/2/dataset_8920e51c-dd38-4e36-abf4-14825237cd41.dat --species=fly
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/8/6/b/dataset_86b0a149-1d37-4615-9915-2c48586e3ca1.dat --species=fly
--- a/test-data/augustus.hints_and_range.output.gtf	Fri Dec 20 14:08:53 2019 -0500
+++ b/test-data/augustus.hints_and_range.output.gtf	Thu Jul 15 17:16:12 2021 +0000
@@ -1,4 +1,4 @@
-# This output was generated with AUGUSTUS (version 3.3.3).
+# This output was generated with AUGUSTUS (version 3.4.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
 # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
@@ -7,11 +7,11 @@
 # Sources of extrinsic information: M RM E W 
 # Setting CDSpart local malus: 0.985
 # Setting UTRpart local malus: 0.973
-# reading in the file /tmp/tmpTS0N1X/files/1/8/8/dataset_188fcc93-3347-4fd8-953a-6344470a71f8.dat ...
+# reading in the file /tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat ...
 # Have extrinsic information about 1 sequences (in the specified range). 
-# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ...
+# Initializing the parameters using config directory /usr/local/config/ ...
 # fly version. Using default transition matrix.
-# Looks like /tmp/tmpTS0N1X/files/d/2/6/dataset_d26f480c-5e9f-4f42-82e8-b23f92904b45.dat is in fasta format.
+# Looks like /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat is in fasta format.
 # We have hints for 1 sequence and for 1 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 2001, name = chr2R) -----
@@ -46,10 +46,11 @@
 # CDS introns: 0/2
 # 5'UTR exons and introns: 0/0
 # 3'UTR exons and introns: 0/0
-# hint groups fully obeyed: 0
-# incompatible hint groups: 102
-#      W: 102 
+# hint groups fully obeyed: 96
+#      W:  96 
+# incompatible hint groups: 6
+#      W:   6 
 # end gene chr2R.g1
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpTS0N1X/files/d/2/6/dataset_d26f480c-5e9f-4f42-82e8-b23f92904b45.dat --UTR=off --genemodel=complete --hintsfile=/tmp/tmpTS0N1X/files/1/8/8/dataset_188fcc93-3347-4fd8-953a-6344470a71f8.dat --extrinsicCfgFile=/tmp/tmpTS0N1X/files/1/1/0/dataset_1106f730-ecae-4b8f-917b-47cb5fc7334d.dat --predictionStart=7000 --predictionEnd=9000 --species=fly
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/3/3/5/dataset_335e9fec-9340-42e6-97ce-af35d5220fcc.dat --predictionStart=7000 --predictionEnd=9000 --species=fly
--- a/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf	Fri Dec 20 14:08:53 2019 -0500
+++ b/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf	Thu Jul 15 17:16:12 2021 +0000
@@ -1,13 +1,13 @@
-# This output was generated with AUGUSTUS (version 3.3.3).
+# This output was generated with AUGUSTUS (version 3.4.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
 # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
-# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ...
+# Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpTS0N1X/files/b/e/5/dataset_be5ee0f6-7fb0-4170-9543-17032878de48.dat is in fasta format.
+# Looks like /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -98,4 +98,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/b/e/5/dataset_be5ee0f6-7fb0-4170-9543-17032878de48.dat --UTR=off --genemodel=complete --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat --UTR=off --genemodel=complete --softmasking=0 --species=human
--- a/test-data/human_augustus_utr-on.gff	Fri Dec 20 14:08:53 2019 -0500
+++ b/test-data/human_augustus_utr-on.gff	Thu Jul 15 17:16:12 2021 +0000
@@ -1,14 +1,14 @@
 ##gff-version 3
-# This output was generated with AUGUSTUS (version 3.3.3).
+# This output was generated with AUGUSTUS (version 3.4.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
 # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
-# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ...
+# Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpTS0N1X/files/c/2/9/dataset_c29ecb21-0612-4152-8d35-994815171865.dat is in fasta format.
+# Looks like /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -103,4 +103,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/c/2/9/dataset_c29ecb21-0612-4152-8d35-994815171865.dat --UTR=on --genemodel=complete --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
--- a/test-data/human_augustus_utr-on.gtf	Fri Dec 20 14:08:53 2019 -0500
+++ b/test-data/human_augustus_utr-on.gtf	Thu Jul 15 17:16:12 2021 +0000
@@ -1,13 +1,13 @@
-# This output was generated with AUGUSTUS (version 3.3.3).
+# This output was generated with AUGUSTUS (version 3.4.0).
 # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
 # O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
 # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
 # Using native and syntenically mapped cDNA alignments to improve de novo gene finding
 # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
 # No extrinsic information on sequences given.
-# Initializing the parameters using config directory /home/abretaud/miniconda3/envs/__augustus@3.3.3/config/ ...
+# Initializing the parameters using config directory /usr/local/config/ ...
 # human version. Using default transition matrix.
-# Looks like /tmp/tmpTS0N1X/files/7/3/d/dataset_73d41293-49eb-4cbc-b881-ddc4c9faf952.dat is in fasta format.
+# Looks like /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat is in fasta format.
 # We have hints for 0 sequences and for 0 of the sequences in the input set.
 #
 # ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
@@ -102,4 +102,4 @@
 # end gene HS08198.g2
 ###
 # command line:
-# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpTS0N1X/files/7/3/d/dataset_73d41293-49eb-4cbc-b881-ddc4c9faf952.dat --UTR=on --genemodel=complete --species=human
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat --UTR=on --genemodel=complete --softmasking=0 --species=human
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/human_augustus_utr-on_softmasking.gtf	Thu Jul 15 17:16:12 2021 +0000
@@ -0,0 +1,24 @@
+# This output was generated with AUGUSTUS (version 3.4.0).
+# AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de),
+# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff.
+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),
+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding
+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013
+# No extrinsic information on sequences given.
+# Sources of extrinsic information: M RM 
+# Initializing the parameters using config directory /usr/local/config/ ...
+# human version. Using default transition matrix.
+# Looks like /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat is in fasta format.
+# We have hints for 0 sequences and for 0 of the sequences in the input set.
+#
+# ----- prediction on sequence number 1 (length = 9453, name = HS04636) -----
+#
+# Predicted genes for sequence number 1 on both strands
+# (none)
+#
+# ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
+#
+# Predicted genes for sequence number 2 on both strands
+# (none)
+# command line:
+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat --UTR=on --genemodel=complete --softmasking=1 --species=human