Repository 'funannotate_predict'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/funannotate_predict

Changeset 0:40b87aef5241 (2021-08-26)
Next changeset 1:1a59958c1f76 (2021-10-04)
Commit message:
"planemo upload commit 9613152729099079c7465c3d5d42005ef22ca91e"
added:
README.md
funannotate_predict.xml
macros.xml
test-data/SRR7458692.bam
test-data/cleaned.fa
test-data/cleaned_ident.fa
test-data/funannotate.loc
test-data/funannotate_db/funannotate-db-info.txt
test-data/funannotate_db/insecta/ancestral
test-data/funannotate_db/insecta/dataset.cfg
test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm
test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm
test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm
test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm
test-data/funannotate_db/insecta/lengths_cutoff
test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl
test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl
test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl
test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl
test-data/funannotate_db/insecta/scores_cutoff
test-data/funannotate_db/repeats.dmnd
test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt
test-data/funannotate_db/trained_species/fly/info.json
test-data/funannotate_db/uniprot_sprot.fasta
test-data/genome.fa
test-data/genome_masked.fa
test-data/predict_augustus/Genus_species.cds-transcripts.fa
test-data/predict_augustus/Genus_species.discrepency.report.txt
test-data/predict_augustus/Genus_species.error.summary.txt
test-data/predict_augustus/Genus_species.gbk
test-data/predict_augustus/Genus_species.gff3
test-data/predict_augustus/Genus_species.mrna-transcripts.fa
test-data/predict_augustus/Genus_species.proteins.fa
test-data/predict_augustus/Genus_species.scaffolds.fa
test-data/predict_augustus/Genus_species.stats.json
test-data/predict_augustus/Genus_species.tbl
test-data/predict_augustus/Genus_species.validation.txt
test-data/predict_augustus/fly.parameters.json
test-data/predict_bam/Genus_species.cds-transcripts.fa
test-data/predict_bam/Genus_species.discrepency.report.txt
test-data/predict_bam/Genus_species.error.summary.txt
test-data/predict_bam/Genus_species.gbk
test-data/predict_bam/Genus_species.gff3
test-data/predict_bam/Genus_species.mrna-transcripts.fa
test-data/predict_bam/Genus_species.proteins.fa
test-data/predict_bam/Genus_species.stats.json
test-data/predict_bam/Genus_species.tbl
test-data/predict_bam/Genus_species.validation.txt
test-data/predict_bam/fly.parameters.json
test-data/predict_scratch/Genus_species.cds-transcripts.fa
test-data/predict_scratch/Genus_species.discrepency.report.txt
test-data/predict_scratch/Genus_species.error.summary.txt
test-data/predict_scratch/Genus_species.gbk
test-data/predict_scratch/Genus_species.gff3
test-data/predict_scratch/Genus_species.mrna-transcripts.fa
test-data/predict_scratch/Genus_species.proteins.fa
test-data/predict_scratch/Genus_species.scaffolds.fa
test-data/predict_scratch/Genus_species.stats.json
test-data/predict_scratch/Genus_species.tbl
test-data/predict_scratch/Genus_species.validation.txt
test-data/predict_scratch/fly.parameters.json
tool-data/funannotate.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 40b87aef5241 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,5 @@
+# Funannotate
+
+Funannotate can use GeneMark to predict gene, but due to licensing issues, we are not allowed to distribute GeneMark automatically.
+
+If you want to use it, the Galaxy administrator needs to install it GeneMark following the instructions on https://github.com/nextgenusfs/funannotate, and set the `GENEMARK_PATH` variable on the job destination.
b
diff -r 000000000000 -r 40b87aef5241 funannotate_predict.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/funannotate_predict.xml Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,487 @@\n+<tool id="funannotate_predict" name="Funannotate predict annotation" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+    <description></description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <requirements>\n+        <expand macro="requirements" />\n+    </requirements>\n+    <version_command><![CDATA[funannotate check --show-versions]]></version_command>\n+    <command><![CDATA[\n+#if $genemark.genemark_license:\n+    if [ -z "\\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi &&\n+    if [ ! -f "\\$GENEMARK_PATH/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi &&\n+    ## GeneMark only search for license in ~/.gm_key\n+    cp \'${genemark.genemark_license}\' ~/.gm_key &&\n+#end if\n+\n+#if $uglyTestingHack == "true":\n+    ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager)\n+    ## Need to copy too as the test_data is read only on CI\n+    cp -r \'${database.fields.path}\' \'./hacked_database\' &&\n+    sed -i.bak \'s|/tmp/prout|\'`pwd`\'/hacked_database|\' \'./hacked_database/trained_species/fly/info.json\' &&\n+#end if\n+\n+funannotate predict\n+--input \'${input}\'\n+--out output\n+\n+#if $uglyTestingHack == "true":\n+    --database `pwd`\'/hacked_database\'\n+#else\n+    --database \'$database.fields.path\'\n+#end if\n+\n+--species \'${organism.species}\'\n+--isolate \'${organism.isolate}\'\n+--strain \'${organism.strain}\'\n+--organism \'${organism.organism}\'\n+--ploidy ${organism.ploidy}\n+--SeqCenter \'${organism.SeqCenter}\'\n+--SeqAccession \'${organism.SeqAccession}\'\n+--name \'${organism.name}\'\n+--numbering ${organism.numbering}\n+\n+#if $parameters:\n+    --parameters \'${parameters}\'\n+#end if\n+\n+#if $evidences.rna_bam:\n+    --rna_bam ${evidences.rna_bam}\n+#end if\n+\n+#set est_list = ""\n+#if len($evidences.transcript_evidence) > 0:\n+    #for $estev in $evidences.transcript_evidence:\n+        #if $estev:\n+        #set est_list += " \'" + str($estev) + "\'"\n+        #end if\n+    #end for\n+#end if\n+#if $est_list:\n+    --transcript_evidence $est_list\n+#end if\n+\n+#if $evidences.prot_evidence == \'custom\':\n+    --protein_evidence\n+    #for $protev in $evidences.protein_evidence:\n+        \'${protev}\'\n+    #end for\n+#end if\n+--p2g_pident ${evidences.p2g_pident}\n+--p2g_prefilter ${evidences.p2g_prefilter}\n+\n+#if $augustus.augustus_species != \'none\':\n+    --augustus_species \'${augustus.augustus_species}\'\n+#end if\n+--min_training_models ${augustus.min_training_models}\n+${augustus.optimize_augustus}\n+\n+#if $genemark.genemark_license:\n+    --genemark_mode \'${genemark.genemark_mode}\'\n+    #if $genemark.genemark_mod:\n+        --genemark_mod \'${genemark.genemark_mod}\'\n+    #end if\n+    --soft_mask ${genemark.soft_mask}\n+#end if\n+\n+--busco_seed_species \'${busco.busco_seed_species}\'\n+--busco_db \'${busco.busco_db}\'\n+\n+$evm.repeats2evm\n+#if $evm.evm_partitioning.evm_partition == "yes":\n+--evm-partition-interval ${evm.evm_partitioning.evm_partition_interval}\n+#else:\n+--no-evm-partitions\n+#end if\n+#if $evm.weights:\n+    --weights \'${evm.weights}\'\n+#end if\n+\n+#if $other_predictors.stringtie:\n+    --stringtie \'${other_predictors.stringtie}\'\n+#end if\n+#if $other_predictors.maker_gff:\n+    --maker_gff \'${other_predictors.maker_gff}\'\n+#end if\n+#if $other_predictors.pasa_gff:\n+    --pasa_gff \'${other_predictors.pasa_gff}:${other_predictors.pasa_gff_weight}\'\n+#end if\n+#if $other_predictors.other_gff:\n+    --other_gff \'${other_predictors.other_gff}:${other_predictors.other_gff_weight}\'\n+#end if\n+\n+--min_intronlen ${filtering.min_intronlen}\n+--max_intronlen ${filtering.max_intronlen}\n+--min_protlen ${filtering.min_protlen}\n+${filtering.keep_no_stops}\n+--repeat_filter ${filtering.repeat_filter}\n+\n+--cpus \\${GALAXY_SLOTS:-2}\n+\n+&&\n+\n+mv output/predict_results/*.gbk out.gbk &&\n+mv output/predict_results/*.tbl out.tbl &&\n+mv output/predict_results/*.gff3 out.gff3 &&\n+mv output/pre'..b'section>\n+            <section name="evidences">\n+                <param name="rna_bam" value="SRR7458692.bam" />\n+                <param name="transcript_evidence" value="predict_scratch/Genus_species.mrna-transcripts.fa" />\n+                <conditional name="prot_evidence">\n+                    <param name="prot_evidence_source" value="custom" />\n+                    <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" />\n+                </conditional>\n+            </section>\n+            <section name="augustus">\n+                <param name="min_training_models" value="3" />\n+            </section>\n+            <section name="busco">\n+                <param name="busco_seed_species" value="fly" />\n+                <param name="busco_db" value="insecta" />\n+            </section>\n+            <!-- non deterministic results, so can\'t be more precise here -->\n+            <output name="annot_gbk">\n+                <assert_contents>\n+                    <has_text text="  TITLE     Direct Submission" />\n+                    <has_text text="/locus_tag=&quot;FUN_000001&quot;" />\n+                </assert_contents>\n+            </output>\n+            <output name="annot_tbl">\n+                <assert_contents>\n+                    <has_text text=">Feature sample" />\n+                    <has_text text="gnl|ncbi|FUN_000001-T1_mrna" />\n+                </assert_contents>\n+            </output>\n+            <output name="annot_gff3">\n+                <assert_contents>\n+                    <has_text text="##gff-version 3" />\n+                    <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" />\n+                </assert_contents>\n+            </output>\n+            <output name="fasta_proteins">\n+                <assert_contents>\n+                    <has_text text=">FUN_000001-T1 FUN_000001" />\n+                </assert_contents>\n+            </output>\n+            <output name="fasta_transcripts_mrna">\n+                <assert_contents>\n+                    <has_text text=">FUN_000001-T1 FUN_000001" />\n+                </assert_contents>\n+            </output>\n+            <output name="fasta_transcripts_cds">\n+                <assert_contents>\n+                    <has_text text=">FUN_000001-T1 FUN_000001" />\n+                </assert_contents>\n+            </output>\n+            <assert_stderr>\n+                <has_text text="augustus     busco"/>\n+                <has_text text="glimmerhmm   busco"/>\n+                <has_text text="snap         busco"/>\n+                <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/>\n+                <not_has_text text="Skipping CodingQuarry as no --rna_bam passed"/>\n+                <has_text text="Running Augustus gene prediction using genus_species parameters"/>\n+                <has_text text="Training Augustus using BUSCO gene models"/>\n+                <has_text text="Aligning transcript evidence to genome with minimap2"/>\n+                <has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/>\n+                <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/>\n+                <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/>\n+                <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/>\n+            </assert_stderr>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+Funannotate_ predict\n+--------------------\n+\n+Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).\n+\n+Script takes genome multi-fasta file and a variety of inputs to do a comprehensive whole\n+genome gene prediction.  Uses AUGUSTUS, GeneMark, Snap, GlimmerHMM, BUSCO, EVidence Modeler,\n+tbl2asn, tRNAScan-SE, Exonerate, minimap2.\n+\n+.. _Funannotate: http://funannotate.readthedocs.io\n+    ]]></help>\n+    <expand macro="citations" />\n+</tool>\n'
b
diff -r 000000000000 -r 40b87aef5241 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,159 @@\n+<?xml version="1.0"?>\n+<macros>\n+    <token name="@TOOL_VERSION@">1.8.9</token>\n+    <token name="@VERSION_SUFFIX@">0</token>\n+\n+    <xml name="requirements">\n+        <requirement type="package" version="@TOOL_VERSION@">funannotate</requirement>\n+    </xml>\n+\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.5281/zenodo.4054262</citation>\n+        </citations>\n+    </xml>\n+\n+    <xml name="augustus_species">\n+        <!-- list generated from a Funannotate database directory, listing trained_species/* -->\n+        <option value="adorsata">adorsata</option>\n+        <option value="aedes">aedes</option>\n+        <option value="amphimedon">amphimedon</option>\n+        <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>\n+        <option value="anidulans">anidulans</option>\n+        <option value="arabidopsis">arabidopsis</option>\n+        <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>\n+        <option value="aspergillus_nidulans">aspergillus_nidulans</option>\n+        <option value="aspergillus_oryzae">aspergillus_oryzae</option>\n+        <option value="aspergillus_terreus">aspergillus_terreus</option>\n+        <option value="bombus_impatiens1">bombus_impatiens1</option>\n+        <option value="bombus_terrestris2">bombus_terrestris2</option>\n+        <option value="botrytis_cinerea">botrytis_cinerea</option>\n+        <option value="b_pseudomallei">b_pseudomallei</option>\n+        <option value="brugia">brugia</option>\n+        <option value="cacao">cacao</option>\n+        <option value="caenorhabditis">caenorhabditis</option>\n+        <option value="camponotus_floridanus">camponotus_floridanus</option>\n+        <option value="candida_albicans">candida_albicans</option>\n+        <option value="candida_guilliermondii">candida_guilliermondii</option>\n+        <option value="candida_tropicalis">candida_tropicalis</option>\n+        <option value="c_elegans_trsk">c_elegans_trsk</option>\n+        <option value="chaetomium_globosum">chaetomium_globosum</option>\n+        <option value="chicken">chicken</option>\n+        <option value="chiloscyllium">chiloscyllium</option>\n+        <option value="chlamy2011">chlamy2011</option>\n+        <option value="chlamydomonas">chlamydomonas</option>\n+        <option value="chlorella">chlorella</option>\n+        <option value="ciona">ciona</option>\n+        <option value="coccidioides_immitis">coccidioides_immitis</option>\n+        <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>\n+        <option value="coprinus">coprinus</option>\n+        <option value="coprinus_cinereus">coprinus_cinereus</option>\n+        <option value="coyote_tobacco">coyote_tobacco</option>\n+        <option value="cryptococcus">cryptococcus</option>\n+        <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>\n+        <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>\n+        <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>\n+        <option value="culex">culex</option>\n+        <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>\n+        <option value="E_coli_K12">E_coli_K12</option>\n+        <option value="elephant_shark">elephant_shark</option>\n+        <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>\n+        <option value="eremothecium_gossypii">eremothecium_gossypii</option>\n+        <option value="fly">fly</option>\n+        <option value="fly_exp">fly_exp</option>\n+        <option value="fusarium">fusarium</option>\n+        <option value="fusarium_graminearum">fusarium_graminearum</option>\n+        <option value="galdieria">galdieria</option>\n+        <option value="generic">generic</option>\n+        <option value="heliconius_melpomene1">heliconius_melpomene1</option>\n+        <option value="histoplasma">histoplasma</opt'..b'lue="pneumocystis">pneumocystis</option>\n+        <option value="rhincodon">rhincodon</option>\n+        <option value="rhizopus_oryzae">rhizopus_oryzae</option>\n+        <option value="rhodnius">rhodnius</option>\n+        <option value="rice">rice</option>\n+        <option value="saccharomyces">saccharomyces</option>\n+        <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>\n+        <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>\n+        <option value="s_aureus">s_aureus</option>\n+        <option value="schistosoma">schistosoma</option>\n+        <option value="schistosoma2">schistosoma2</option>\n+        <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>\n+        <option value="scyliorhinus">scyliorhinus</option>\n+        <option value="sealamprey">sealamprey</option>\n+        <option value="s_pneumoniae">s_pneumoniae</option>\n+        <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>\n+        <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>\n+        <option value="template_prokaryotic">template_prokaryotic</option>\n+        <option value="tetrahymena">tetrahymena</option>\n+        <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>\n+        <option value="tomato">tomato</option>\n+        <option value="toxoplasma">toxoplasma</option>\n+        <option value="tribolium2012">tribolium2012</option>\n+        <option value="trichinella">trichinella</option>\n+        <option value="ustilago">ustilago</option>\n+        <option value="ustilago_maydis">ustilago_maydis</option>\n+        <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>\n+        <option value="verticillium_longisporum1">verticillium_longisporum1</option>\n+        <option value="volvox">volvox</option>\n+        <option value="wheat">wheat</option>\n+        <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>\n+        <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>\n+        <option value="zebrafish">zebrafish</option>\n+    </xml>\n+\n+    <xml name="busco_species">\n+        <!-- list generated from a Funannotate database directory, with the "funannotate database -show-buscos command" -->\n+        <option value="eukaryota">eukaryota</option>\n+        <option value="metazoa">metazoa</option>\n+        <option value="nematoda">nematoda</option>\n+        <option value="arthropoda">arthropoda</option>\n+        <option value="insecta">insecta</option>\n+        <option value="endopterygota">endopterygota</option>\n+        <option value="hymenoptera">hymenoptera</option>\n+        <option value="diptera">diptera</option>\n+        <option value="vertebrata">vertebrata</option>\n+        <option value="actinopterygii">actinopterygii</option>\n+        <option value="tetrapoda">tetrapoda</option>\n+        <option value="aves">aves</option>\n+        <option value="mammalia">mammalia</option>\n+        <option value="euarchontoglires">euarchontoglires</option>\n+        <option value="laurasiatheria">laurasiatheria</option>\n+        <option value="fungi">fungi</option>\n+        <option value="dikarya">dikarya</option>\n+        <option value="ascomycota">ascomycota</option>\n+        <option value="pezizomycotina">pezizomycotina</option>\n+        <option value="eurotiomycetes">eurotiomycetes</option>\n+        <option value="sordariomycetes">sordariomycetes</option>\n+        <option value="saccharomycetes">saccharomycetes</option>\n+        <option value="saccharomycetales">saccharomycetales</option>\n+        <option value="basidiomycota">basidiomycota</option>\n+        <option value="microsporidia">microsporidia</option>\n+        <option value="embryophyta">embryophyta</option>\n+        <option value="protists">protists</option>\n+        <option value="alveolata_stramenophiles">alveolata_stramenophiles</option>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/SRR7458692.bam
b
Binary file test-data/SRR7458692.bam has changed
b
diff -r 000000000000 -r 40b87aef5241 test-data/cleaned.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cleaned.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,2698 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'\n+ATCGGCGACTGTCTGTCATTGTATCCTTCTGCATTCCATTCGTATGTCCGTTTGTCTGTTCATTAGTCCGTCCGTTTGTC\n+CGCCCGTAACTCCGTCCCTGTGTCCTTTTTTTCCGTCCGTTTTCCTGATAAATACTTTTTAAGGAATCCAGCTTACCCTT\n+TTGCACTACAGGTAGCGTGAATAAAAATAAAATAAAAGAGCTAATTTTAAATTAAAATAAAAACAAAAACATCTCTTTGT\n+GTTTATATTTTCCCACTGTGCAGCACAAACACCCCTTTTGCCCACTTAAGCTTGCCACGTTTTCCCTTACTTATAACAGA\n+ACTTACAAACATGCGTTTGGTTTTCGTGGGTTGAGTTTGGTGCTCTCCGTTTACTTTTGCGGTTTTGTTCAGCGCTGCAT\n+ATATTTCCATATTAATTCCCCTGATTATGGGGAATCGTCATCGTCTGCGTTCTGTTCCCTGACGGTTTTGCCCAAATCCA\n+AATCCAAATCCAAATCCAAATCCAAATGCGAATGCGAATATCTGAATGCAGACCACAATTCGACGATGATGTTTCTGTTT\n+CAGAATAATCTAAATCGGCATTATTTATGCATTCAGTTCTTGCATTCATCACAACCACTTAGCGGTTCCACTTCAACGAA\n+CCCACAGATACACAATACATACATATTCATTTATGTATGTATGTACATACCGTCGTGTATATAATTAGTATGCATAGAAG\n+ATACATACGTATGCATTTTAATGGACCACAATTCCCTCGGCCAAAAGAGTGCTCCGATAATAAATATTAATTTTAATAAA\n+TGCTGATGCAGGTCAGCTGATTTCAAACGACCCTTTGTGTCACGGGGCAATGACTTTTGCTCAGTATATGTAGTATTCAA\n+TTTTCAATTTTCCGAAAATGGTATTACAATTACAATTTTTTAAATATTAGATCTCAAAAAATTGCTTGGCATACTTAGCA\n+ACATATCTTTAACTCTCAAGAGTCATATACACCCGATTTTGTTTCAAGTCCCCCTGGGACTTAAAAACCGACAATTACGC\n+CCAACTTGCAATCATTTGGAAAAAAAAACACCAGCTACGAACTTATCAAAACTTATATATCAGCAAAAAATAAATGGGTC\n+TCTATGCGCAGTTATCGGCCCACAAAATATATACAAAAGAAATGACATGTCATTAACTCCGGCCAAGGGCGTGCTTTTTG\n+GTGCCATCGCATTTGGGGGTAACTAGTCGCGGGTCGACCCAGAATCACATCTCCAGGAAGTGGATGTCCAGGGCGTGGTG\n+GTCCACATCGACGGGAGGGTGCAGCTGCAGGAACTTGACGTAGGCCAGGAAGGTGGTCCAGCACATGCTGAAGAAGGAGG\n+TAAACACGACCTGGTTTCGCGCCGGAACAAAGGCGAAGTTCACCGTCTGCACGCAGGGCCAATAAATAACGCCCACCTTG\n+TAGGCGTCCAGGAATTTATCGCTGACCTGGGAACGAGAGGGGGCAACGGGGTGGGTGAGTGCAGTGACATAATGCCCCCA\n+GTAGCAGTCGGAAAATGGAAATGGAAAATGCCAGCGCGGAAAAATTGTAATTAAGTGCCGCCGTCCAGCAGCGGTAGGAG\n+ACGTTTAATATTTTATTTATGGCCTGATCTCTTCTCCTCTTTTAAGGGGCGCTTGACTGGGGCGTGGCATTCAGTTAATG\n+TTGTTAATTAAAAGGCAACGCCTGGCTAGAAAAATTATCATCCAGGCACAGACTGTGTAACAAATGTAACATAGAGCACT\n+TTGGCACATTTTCAATTGGCATGAGAACTTCATTTAACTACAAAGACTATCCTTGTGCCATAAAACTTTCTTTTGTGGAT\n+CTATAGAAGTTGAATCGTTTTACAATCCTCACATAAAATATAGAACTTCGATGGCGAACGCATAGCTATTCCGAAAGCGA\n+TCTTGAAACTTGTACTGCCTTCTACCCGCTTCCAATTTGAAACTACTTTTAGGTGAGGGTCGAGGAACTTACGTCTCTAT\n+CGGCCACGCACACAACTCTTTTACGGCCCAAACCTGAGCCTATCTCAACCAAATCGCTCAGAATAGTCGAGCGATATATC\n+TTTGGAGAACGCATGCAAGGCAAATGAGACGACGCACGTTTGACACAAATATATAGCAAAACATTTACATTTATCAAGAT\n+AAATGGTTTCTAATGATATGGAAGTCAGCCATCGAACCGAGTTTGGCCATGGAGCACAGTGCCTGAAAGTATCTCGCAGT\n+ATGGCCGCAGAGCCAACAAATCAATGTTGACAGCTCGAAACGGAGGACCAGACGCAGAAATTGGGAATGGAGGGAGGTGA\n+GCGACAGACAGGTGATATTTATGCATGCTTACATTGCAGCCTCTGTCTCCGCCACAGATGCAGATTCATAGATACAGATA\n+CAAAGATGGGGATACTGCCTCTGAATGTGTCGTGAGAAAATGGATTCCGAAATCAAAAACTACAGACCGAAAACCGATTT\n+CCAAATAAATAACAATGCATACTGGGCACACACTTAGTAATGAGCACATCTGCGAAATGAAAGACCTTACCAGATAACTG\n+TCAACATTTTAAAATCGTTAAAAGTTGATTCAGGATTTGGGGTAGCCGTGCCAACGGTGTGAATGGGCATGAATAATATG\n+ACATATTCCTTTCCCGAGTAATGAAAAATGTTTTCAGCGAATCTATCCACGAATACCGTACATAAAATAGAGACTCTTCT\n+GCTTGTTAGATATCGTTGGCCCCCGACAAAATGTGGTTCTTTGAAATGAAATTTGAAATAAGTTTGTTGTGCTTAGGCCT\n+TAGTGACTTTGGGATGGGAATATACTTCACCTCCCGCTTGGCCTCCGCGTACGAGTTGCCCTCCATCAAGGTCATGAAGA\n+AGAGGAACGAGCTGATGGCCATCGGATCATAAGCGGTCTGCTCGGTGATCGCCTTGCAGAGCGATGACTTAATGTCGGTG\n+CGCGGCCACATAACGCTGGCCAATCTGATCCACACATATATGGTGGGCCCCATAAAGAAGAAGCCGAATAAGCTGAACCT\n+GCAATGGAATTAGCCAAACCATATCGGGTCCAATTAGCAATCGAATCGGGCCAAGACCCGTGCAGATTGCTCATCACCTA\n+AGGCACTTCATCCAGTCGTACGTCCGGAATGTCTTCTTCTCGATCATGGTCTGCTCGATGAGGGAGCCGCAGGGCCAGAG\n+GGTGCCATACGATATCATGCCGCGTAGGACTTTGTATTTGCTTGTAATATTCACCAAGCTACGAAACATTTTCAACGGTC\n+TCTTGTGATGATCTCGTCGAATGTCAGTGATAGTTTCACTTCCACAAAAGTGCTATAAACACACGCTGAAAGATAAATTG\n+TTTTAGATCAATACGGTGGGCTTTACATGGCTGAGTTCGCTTGGGTTAGTTTTATTAGGTGCCATATTTGTCTTGGCGGA\n+TTTCACTTTTGCAAAAAGTTCGGTTTTAACTCATACGACCTAGAAAGTTATCGAGTACTGATTGTCTTGGGTGGCCATTC\n+CAAATTCAACCGCATTATTCTCCCTCGACAGTGCCTAACTTAGCCACAGAATTCCAGCTCAAGACAATTGCAAATAAGTG\n+TCGGCCAAGCACAGCTGGCACTTCTGGACTCTTCTCTGAACTGATGGATGGCTAAGAGGTGGGTTGGTCTTTGGAAATGG\n+AGAGGGGGTGGTGCCCGCCATCCAGTAACACATGTCAAGTATTATTATCGAGCAACTACAAACGAAATACAAACATACAT\n+TTAATTGCCAAGAATGTTCCTAGCCAAACAAATGTCGAAATAATGTCGGGCCACGGCCAAAGATTTATCATATTCACTCG\n+ACTGTCATAAAAAGCAATTAGGCCCGGGCGAAAGATACACGACCGGCAGTCCCGAAATGT\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/cleaned_ident.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cleaned_ident.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,2849 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'ATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCTAGTCGATTTACACTTGGCTG\n+AGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGTGCTTAGGAAT\n+TGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAATGATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTC\n+GTTCCAACTACGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAG\n+TAGATTCTCAAGTTCTGGAGTTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGCTGGTGAAGGC\n+TGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATG\n+GGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCATCACTCCCGTGTGGATGCGCG\n+TGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCT\n+GGCTAACAAAATAAGGGGCGGCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCC\n+GCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCATGTGTACGGC\n+ATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGAGCATGCTCAGCGCCCCACTCAGCGCGTATG\n+GCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTC\n+AGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCA\n+GGGCACGCCCATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGC\n+ACTTGTAGGGCTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCATTCACTATAT\n+CACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATACATAGTCATATGAACAGTTGA\n+AAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTT\n+CTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTT\n+TTTTTGCTGCCAGTGAGCATAGAAAAAAAAAATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTC\n+CGCATTTTCGTGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGA\n+GCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCCTCCTGAGCCATCAGACTTGT\n+GTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACC\n+GAGGTGGAACACATTTCGCCAGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTT\n+CGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGC\n+AAGTGAAGATTTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCACGGCTATATACTATATGTAT\n+GTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGCACGATGCGCA\n+GGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGACTTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATA\n+ATTGGCTTTTCCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAA\n+AGTAAACACATTAATATGTACTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAATACAAAACCCA\n+CACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAA\n+TCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTGAATCGCTGTGTGCTATTTTT\n+ATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate.loc Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of funannotate databases used for the
+# funannotate annotation tool
+#
+# the columns are:
+# value  description format_version path
+#
+2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 ${__HERE__}/funannotate_db
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/funannotate-db-info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/funannotate-db-info.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,10 @@
+merops diamond /tmp/prout/merops.dmnd 12.0 2017-10-04 5009 a6dd76907896708f3ca5335f58560356
+uniprot diamond /tmp/prout/uniprot.dmnd 2021_03 2021-06-02 565254 68ed1e475d13bb3d5574c53822d11cd3
+dbCAN hmmer3 /tmp/prout/dbCAN.hmm 9.0 2020-08-04 641 04696dfba1c3bb82ff9b72cfbb3e4a65
+pfam hmmer3 /tmp/prout/Pfam-A.hmm 34.0 2021-03 19179 f83c0d00445257fd9c066ad3e9e10568
+repeats diamond /tmp/prout/repeats.dmnd 1.0 2021-07-19 11950 4e8cafc3eea47ec7ba505bb1e3465d21
+go text /tmp/prout/go.obo 2021-07-02 2021-07-02 47228 f5b79fe1a6d6a67c542e39da5d4661dc
+mibig diamond /tmp/prout/mibig.dmnd 1.4 2021-07-19 31023 118f2c11edde36c81bdea030a0228492
+interpro xml /tmp/prout/interpro.xml 86.0 2021-06-03 38913 0d8c575f88f397397b9491520b38db1e
+busco_outgroups outgroups /tmp/prout/outgroups 1.0 2021-07-19 8 6795b1d4545850a4226829c7ae8ef058
+gene2product text /tmp/prout/ncbi_cleaned_gene_products.txt 1.70 2021-06-15 34039 e93924259b8294255def54097bdab07b
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/ancestral
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/ancestral Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,10899 @@\n+>EOG090W0028\n+VPLKDNQDVACFLVTKHSWKGKYKRIFSIGTAGITTYNPDKLEVTNKWLYSDVISVAPEF\n+VLTLKKDKKVDSLKFSSEHRAELLTEALKYFAEKPKRYEAYKLHWSDTRLPVVLEVTPAS\n+LDQLDPATNTVLASYAYKDIEGIGGFVIVVGGFSRLHLFEIKKKILESALGIEIKVITLE\n+EFEEQRLGKYSGDEHQTSLSEFTVEKVRHKEPVRRLLCLSETCLLERDPQTYSIVTLRPL\n+SDVFALVRIEYLNGQVRSYLATDRDSLLASLLDGVRASGNRDVHVKIKKTERGKRLGPLV\n+DEEVEALLLKLLQNEVLERFNANVPYSGLLYSVTQDGLFAENKEKLILEALQALVQKELE\n+AQFHALRRLVASKVGFAAFTKLSGFREAIGKKVVKALKRNDEAVTQAAIDLICALMQPMD\n+LDLRQEQLNKSSLLSSKKFLESLLDMWTEHVSKGTGALVVSAMLDLLTFALCVPYSETTD\n+GKQFDALLELVAERGRVLFKLFQHPSLAIVKGAGLVMRAIIEEGVAAKMQELALAEGALP\n+RHLLAALYTRLLTHRQLSRHLVGLWVTAMELLKRILPAGLLAFLESEEKVPEEEKLNVRD\n+NLKLAQDHASKKKVEKHLEALKHWGAKVEKIKERPVVLRKRRERKKSKLFYYKFNKDHAL\n+PNLIWNHKTREELREALENELRAFESDKELAGLVAWNYAEFEVKYQCLADEVKIGDYYLR\n+LLLEKDDSDSLIRKSYELFNDLYHRFLLTTKVELKVLCLQALAIVYGRYYEDIGPFSDTK\n+YIVQLLDRCLDRDRLVLFLKKLILHKRNVKEILDVRILVDLLTLAHLHTSRAEKEWYYNE\n+RKGPVSFKELKELYKKGKITAKTKVWAQGLDGWRSLQQVPQLKWTLVAKGSPVLNESELA\n+ALILDILIKLTEYFPSRAVIRPLPRVKRLLSELACLPHIVQLLLTFDPVLVEKVATLLLE\n+IMKDNPEVSKLYLTGVFYFILLYTGSNVLPIARFLKLTHTKQAFRSDESDIMQRSILGQL\n+LPEAMVSYLENHGAEKFAEIFLGEFDTPEAIWNSEMRRLLIEKIAAHIADFTPRLRSHTR\n+ARYQYLAIPAVRYPQLEKELFCNIFYLRHLCDTAKFPDWPIADPVKLLKDVLEAWKKEVE\n+KKPPAMTVEEAYKELGLDEAAVRKAYYKLAQKYHPDKNPEGRDKFEAVNKAYEFLCSRSS\n+WSGPNPNNIVLILRTQSILFERYSELRPYKYAGYPQLIKTIKLETKDEQLFSKLLAAASE\n+LAYHTVKCSALNAEELRREEGLEVLLEAYSRCVSVLSKSSKEEDQVCLNITRCFAVAAKF\n+EACRDKIVELPQLVKDLVRVLKFKHLAADSELQLQLVKAGVLWSLLLFLFEYDYTLEESG\n+VERSEEENKQEVANKLAKLAVKACAALAGYLEKLLTPYLARKLILKILTSNTENPYLIWD\n+NGTRAELLEFLEEKRFKYSAHKDELKIGEVFIRIYNEQPTFPINPKEFVLDLLEFLKHVV\n+MALEALANVIKNNKGVEIQCIGKFKLLFGLLSIKKAALEVISLVSRNKECVEDIAASEVL\n+VKLLLLLKVLDTLSALLKIVKEALAKGAVLYLLDLFCNSIREAAAELLAKLSADKLSGPK\n+VRLTLSKFLPKLLADALRDSPVQLFESKHENPELIWDDEARKRVNELVVGGVYLRLFVAN\n+PAWTLRKPKEFLSDLLDTVLELLSKLELATTALVALLRAQPALADAVPSLGHIPKLVRQL\n+KSALLVLHQLALSEICVSAISQTECISPLKRDLIAVACETLSRLFDKLVKQALEAELVKY\n+LLELLESRTKAQIVKALKAMSRSGEKVKAILEKSSVWAEYKDQKHDLFISAAGYLTAGPS\n+TSPPPVD\n+>EOG090W002U\n+MTTDISVVEYDGGNSSSRLFERSRIKAERESVQKKTFQKWVNSHLVRRIGDLLRDGKKLI\n+KLLEVLSGERLPRPTKGKMRIHCLENVDKALQFLREQRVHLENLGSHDIVDGNARLSLGL\n+IWTIILRFQIQDITIEETDNKETKSAKDALLLWCQMKTAGYHNVNVRNFTTSWRDGLAFN\n+AIIHKHRPDLIQFEKLSKSNAIYNLNNAFNVAEDKLGLTKLLDAEDVFVEQPDEKSIITY\n+VVTYYHYFSKLKQETVQGKRIGKVVGIAMENDRMIKEYESLTSDLLKWIEATIEALGDRK\n+FANSLVGVQQQLAQFSNYRTVEKPPKFVEKGNLEVLLFTLQSKMRANNQKPYTPKEGKMI\n+SDINKAWERLEKAEHERELALREELIRQEKLEQLAARFNRKASMRETWLSENQRLVSQDN\n+FGFDLAAVEAAAKKHEAIETDIFAYEERVQAVVAVSQELEAENYHDIERINARKDNVLRL\n+WNYLLELLRARRLRLELSLQLQQNFQEMLYILDSMEELKLRLLTDDYGKHLMGVEDLLQK\n+HSLVEADINVLGERVKAVVQQSQRFLYKPCDPAIIVERVQQLEDAYAELVKLAVERRARL\n+EESRKLWQFYWDMADEENWIKEKEQIVSTADIGHDLTTVNLLLSKHKALENEIQSHEPQL\n+MSVVAVGDELVHFGADRIQERLKEILAKWNHLLDLRRKRLEAVDYHQLFADADDVDIWML\n+DTLRLVSSEDVGRDEANVQSLLKKHKDVTDELKNYALHQQAEELERLASIDSRYKELLEL\n+AKLRKQRLLDALSLYKLLSESDGVEQWIGEKDRMLDTMVPAKDIEDVEILKHRYDGFDKE\n+MNANASRVAVVNQLARQLLHVEHPNSEQIVARQNELNQKWAELREKAEAKRDELNSAHGV\n+QTFYIECRETVSWIEDKKRILQETDSLEMDLTGVMTLQRRLSGMERDLAAIQAKLDSLEK\n+EAEAIHPEEAALIRERIAQIELIWEQLTQMLKERDAKLEEAGDLHRFLRDLDHFQAWLTK\n+TQTDVASEDTPTSLAEAEKLLSQHQSIKEEIDNYTDDYKKMMEYGERLTAEPSTQDDPQY\n+MFLRERLKALKDGWEELHQMWENRQQLLSQSLNLQLFNRDARQAEVLLSQQEHVLAKDET\n+PVNLEQAENLLKRHEAFLTTMEANDDKINSVVQFAERLVDEEHFAADKVKKKAENIEERA\n+NREKAEKLKDQEFLQDLEELSEWVQEKKITAQDETYRSAKTVHSKWTRHQAFEAEIASNK\n+ERLKPELAEIIEPKLKELADQFEELETTTKEKGERLFDANREVLIHQTCDDIDSWLNELE\n+KQIESEDTGSDLASVNILMQKQQLIETQMAVKAKQVEELEKQAEYLQKTVPVKKEKVEER\n+FEKLKAPLLERQRQLEKKKEAFQFRRDVEDEKLWIAEKLPLATSTEYGNSLFNVHVLKKK\n+NQSLKTEIDNHEPRIKAVCNNGQKLIDEGHEDAKEFEKLIEELWKELKDAVEEREKAQQY\n+LFDASEAESWMSEQELYMMVEDRGKDEISAQNLMKKHESLEKAVEDYAETIRQLGETARQ\n+LDQIAVKQSQVDKLYAGLKDLAGERRAKLDEALQLFMLNREVDDLEQWIAEREVVAGSHE\n+LGQDYDHVTLLWERFKEFARDTEAVGSERVAAVNEIADELIAAGHSDSATIAEWKDGLNE\n+AWQDLLELIETRTQMLAASRELHKFFHDCKDVLGRILEKQSDELGRDAGSVSALQRKHQN\n+FLQDLSTLQSQVQQIQEESAKLQASYAGDKAKEITNREAEVVAAWANLQALCDARKAKLA\n+DTGDLFFFNLVRTLLLWLDDVVRQMNTSEKPRDVSGVELLMNNHQSLKAEIDAREDNFSA\n+CISLGKELLARNHYASIKEKLLALTNQRNALLKRWEERWENLQLILEVYQFARDAAVAEA\n+WLIAQEPYLLSQELGHTIDEVENLIKKHEAFEKSA'..b'OG090W0MK4\n+DAEQIKSFKDFLLSYNKLSELCFVDCISDFTSREVEEKCALNCLEKYLKMNQRISQRFQE\n+FQLIANENALAAAKK\n+>EOG090W0MLJ\n+QKKLQELDKYKQVQKEYKKAVKQRQQLDGQLNENKVVELDLLKEDNEVYKLIGPVLVKQE\n+LEEAKQNVSKRIEYISKELKRVEDLIASLEKKQEKHRENLEKLQQQLQ\n+>EOG090W0MM4\n+LYEPDYLKPKIPLYDVLNVQIKGYDYAVLESYQKLIHKIAEALDLDVEDSWALPAQELKV\n+QRYKPKSTVVEAEYKLKVYERNVQISDVSSPILLRVLEAALPEGVTLEVEEHEEEKEEKR\n+YVPDKELLDLKQELDEL\n+>EOG090W0MNZ\n+KIEEYETFINDVLKEDLKKLEKKLEKLNEEIAEYVQLKSTIETLDGLKTKVDIGCNFFVQ\n+AKVEDSKILVNIGLGVYLELTLEEALKFIDVRIKLLEKQIEKLRKESAKTKAHIKLVLLA\n+IEELQ\n+>EOG090W0MYQ\n+NPFEKEKKKCILCKLNIEPDYKNVKLLSQFQSPYTGRIYGRHITGLCKKKQEKVEKEILK\n+AQFLKDPKLFDPEKPLRPHK\n+>EOG090W0MZQ\n+PPINQKRLLAFINHFIISTVSFLNKFAKSCEEKLLEFEKKLQKVEASLVILEAKLSSIPE\n+LEEDPEYKKYFKMVQVGVPKEAVKLKMQQEGLDPSLLD\n+>EOG090W0N0N\n+LSKKEKLKKAVKDYGSTVVVFHVGISLISLGALYLLVSSGLDVLLEKLEASTFVVAYAVH\n+KVLAPVRISITLAATPLIVRYLRKIGLLK\n+>EOG090W0N4N\n+MDLSKVKNEKKLELCKLYFGFALLPFLWAVNAVWFFKEAFKKPEYEEQKQIKKYVILSAI\n+GALIWAWIVIFQLKRAEWGELADEISFIIPLG\n+>EOG090W0N5S\n+MKAVTAVCATGASVPAVASGRVKRRRDLENEEIQMYLSKLKDLVPFMPKNRKLSKLEVIQ\n+HVIDYICDLQTALEEHPAAAALARQPLGVLPNTIL\n+>EOG090W0N7H\n+MKLSHETVTIELKNGTQVHGTITGVDVAMNTHLKAVKLTIKNRLETLSIRGNNIRYYILP\n+DSLPLETLLIDDTPKAKAKKK\n+>EOG090W0N7U\n+SSTSQKHREPMGDKPVTDLAGVGEVLGKRLVVLGQYLVLKKDKELFKEWMKDTCSANSKQ\n+SSDCYQCLSDWCEEF\n+>EOG090W0NCE\n+VNKTVSIITDGRNFIGTLKGFDQTINLILDESHERVYSTTQGVEQVVLGLHIIRGDNVAI\n+VGELDDSRLDLSSIRAEPLSSVVH\n+>EOG090W0NFV\n+DPELEAIRAQRLAQLQSQYKGQKAQEEKKREQEEMKNSILSQVLDQSARARLNTLKLGKP\n+EKGKMVENLLIRMAQRGQIKGKLGEKELIKLLESVNQQTTVKFDRRRAALDSDDD\n+>EOG090W0NJA\n+TRVYVGGLTEKVKKEDLEAEFEKYGKLNSVWVAFNPPGFAFIEFENKDEAEKACDNLNGT\n+ELLGSKLRVEISRGRGRKGGRGKRGSRFRSRSPVGR\n+>EOG090W0NJU\n+YLKSWEEFEKAAERLYLQDPLKRYTMKYVHSKGLLVLKLTDNCLQYKTEDLKKIEKFISN\n+LMRHMASKE\n+>EOG090W0NK3\n+VNVPKQRRTFCKKCKVHKLHKVTQYKKSKEGRRRYDRKQQGFGGQTKPIFRKKAKTTKKI\n+VLRLECTECKYRKPLKRCKHFELGGDKKRK\n+>EOG090W0NO8\n+MGKVKCSELRTKDKKELLKQLEELKTELTNLRVAKVTGGAASKLSKIRVVRKAIARVYIV\n+LHQKQKENLRKNKKYKPLDLRPKKTRALRRALTTLKEIRKRKYAVKA\n+>EOG090W0NRT\n+RKEALSQFIQQIHGRPVVVKLNSGVDYRGVLACLDGYMNIALEQTEEYVNGQLKNKYGDA\n+FIRGNNVLY\n+>EOG090W0NTV\n+EEWLEKEVIGLRVWQLLLLVLSILLSLVILLCCCIRFRIPRTKQEIEADYERKKLTKKFR\n+KRLKKIKNSEMDELDLKKAEAESLE\n+>EOG090W0O4V\n+MPKYYCDYCDTYLTHDSPSVRKTHCQGRKHKDNVKFYYQKWMEEQAQHLIDATTAAFKAG\n+KIASNPFAGVAIPPPGPGLAAPPGMPMMMGPHGPMPPMMMRPLMKPKGPMAPMGPLGALG\n+PVRPPL\n+>EOG090W0O82\n+MLEITCNDRLGKKVRVKCNPDDTIGDLKKLIAAQTGTKIVLKKWYTIFKDHIKLQD\n+>EOG090W0O88\n+AKRTKKVGITGKYGTRYGASLRKMVKKMEITQHSKYTCSFCGKAMKRSVVGIWSCKRCKR\n+TVAGGAWVYSTTAAASVRSAVRRLRE\n+>EOG090W0ODH\n+MEEKLAEYRAKKRREELLEKVKEKLKEVYLLYFLLWATLYIIAIELEFGAVYLVLSALVF\n+IYLNTRTGPKKKGEVSAYSVFNKNCEAIDGTLKAEQFEREIRYG\n+>EOG090W0OM7\n+LGRSRSPSPRRRRKERRDRRRRRSRERRRRSRDRERSLSRSRSRSEERERPVITEADLEG\n+KSPEEQEMLKLMGFCGFDTTKGKKVEGNDVGEVHVILKRKYRQYMNRKGGFNRPLDFV\n+>EOG090W0ORD\n+DEYALVAKGKLKLKSDKKKKKKKKRTKAELAFKMQEKMQKERIKEKASMTHKQRVEEFNR\n+HLDSLTEHFDIPKVSWTK\n+>EOG090W0ORX\n+PREIKEIKDFLLKARRKDAKSKIKKNAENVKFKVRCSRFLYTLVITDKEKAEKLKQSLPP\n+G\n+>EOG090W0OS5\n+KELEKLEEAKLKAKYPEGHSAFLQKRLAKGQKYFDSGDYQMAKQKTGEAIPTPETVPVRK\n+TSIIQP\n+>EOG090W0PDB\n+FAKDSIRLVKRCTKPDREFQKIAIATAIGFCIMGFIGFFVKLIHIPINNIIV\n+>EOG090W0PQO\n+LLLLAVALAAAQLFLAQALEASLAHPAVVENAEAEAQLPEELRNPFYKNPRIAAALAKES\n+WFTNKEMQVIDREAEKIPREKIYKILKNAGLVRRR\n+>EOG090W0PW0\n+EEKELKAGHPPAVKAGGMRITQHKTPSPPKTISGAPVKGNEAVQVFHEKKPPTIQQPRK\n+>EOG090W0PZH\n+KPIDSKREEFRKYLERAGVLDALTKVLVSLYEEPEKPEDALEYLRKNLGLKKELEEAKAE\n+IAELE\n+>EOG090W0Q9X\n+VIGGAVVGLLCAILVVMFIVYRLRKKDEGSYALEPKKRSPNREFYA\n+>EOG090W0QXM\n+PAAPSSTSVGSGSRSPSKQRKTTGSGGMWRFYTDDSPGIKVGPVPVLVMSLLFIASVFML\n+HIWGKYTRS\n+>EOG090W0R2X\n+MKRTKEKVEKEEGEELYSNEITEEMKKFIIEPSYVLCEKLIEGRLSFGGMNPEIEKLMEE\n+EEKDVSDEEMA\n+>EOG090W0RGQ\n+DVLDSWEEIDESEALEKKLKKLVIIKEEDELRSQLVPPEPTVKILKRPEKSSNGESKPKQ\n+PIKTLKQREQEYAEARLRILGEAKSPEENVLRLPRGPDGTKGFNVRR\n+>EOG090W0S6D\n+RVNGSLLKQFIGKKVSILGKVKKKSSNGKSFLKTTDNQKVTVELKEPLDEPLEGWVEVHG\n+VVKSSTISCDEYIEFPEETENFDAEAYNKLLNTVKNPWK\n+>EOG090W0T3K\n+MREFTNIVTTLSKLSKECVLRLTKDKLVFIVPLVWCELDQKFFSEYNMEGVSNEIYLELS\n+AEMLSRSLSSLKAKSVKIKLTNKQSPCLTVEIELSSESRQVVHDIPVTVIPRKEWSEYEE\n+PSIELPSLKKLRKVVDRMKNLSPSLTISATLKIETDTATVSTHFKNLKVSARVDIKKLSA\n+FLEVICSIEKLIKLELVKLHYFLPAV\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/dataset.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/dataset.cfg Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,6 @@
+name=insecta_odb9
+species=fly
+domain=eukaryota
+creation_date=2016-02-13
+number_of_BUSCOs=1658
+number_of_species=42
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,892 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W03A6\n+LENG  290\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:14:12 2016\n+NSEQ  41\n+EFFN  0.573059\n+CKSUM 4199501958\n+STATS LOCAL MSV      -11.0137  0.70159\n+STATS LOCAL VITERBI  -11.8914  0.70159\n+STATS LOCAL FORWARD   -5.4517  0.70159\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.61260  4.34648  2.94292  2.63725  3.00827  2.97272  3.70379  2.92379  2.63311  2.34924  3.71976  3.18648  3.40267  3.05104  2.84850  2.73289  2.87560  2.67151  4.50628  3.28193\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.06535  3.91193  3.14059  0.61958  0.77255  0.00000        *\n+      1   3.00919  4.52311  4.14821  3.62922  3.09723  4.03596  4.32783  2.27940  3.34231  1.08727  2.86803  3.94138  4.37623  3.72340  3.44120  3.39248  3.26003  2.31137  4.98326  3.74489      1 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      2   2.36218  4.33870  3.13011  2.73889  4.02232  3.10957  3.85645  3.40174  2.64759  3.07800  3.92900  3.09783  3.59930  3.07936  2.94839  1.61731  2.43809  2.94535  5.34618  4.08403      2 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      3   2.67521  4.27814  3.24960  2.71563  4.05115  3.41144  3.69906  3.32637  1.42497  3.01802  3.94128  3.12968  3.88008  2.88045  2.51411  2.76334  2.94671  2.94958  5.28612  4.02052      3 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      4   2.80665  4.86651  3.06170  2.65102  4.29941  3.41912  3.68135  3.61311  1.29697  3.22463  4.14220  3.08300  3.90692  2.84934  2.39258  2.85149  2.85896  3.30620  5.40498  4.16189      4 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.04394  3.87781  3.80347  0.61958  0.77255  0.51713  0.90691\n+      5   1.15269  4.13250  3.42805  3.19131  4.05298  2.97131  4.22552  3.11818  3.19502  3.02891  4.02623  3.33501  3.71301  3.53350  3.46107  2.45402  2.62633  2.76070  5.51002  4.28558      5 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03162  3.86549  4.58784  0.61958  0.77255  0.52796  0.89112\n+      6   3.14746  4.61180  4.17828  3.81812  3.13308  3.97327  4.47605  2.33676  3.58594  0.88383  3.15824  4.09427  4.40984  3.95594  3.76314  3.54334  3.44203  2.37228  5.00981  3.73151      6 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03162  3.86549  4.58784  0.61958  0.77255  0.52796  0.89112\n+      7   3.23704  4.72886  3.82795  3.55604  2.29363  3.82202  3.61532  3.24723  3.41375  2.70296  3.92299  3.71488  4.29603  3.72147  3'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    283   2.86715  4.85199  2.89168  2.64635  4.01816  3.35460  3.80443  3.64641  2.46840  3.15049  4.17905  3.10585  3.90794  1.26571  2.75284  2.91574  3.16423  3.37724  5.29340  3.98605    284 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    284   2.30662  4.22669  3.19139  3.00653  4.05883  2.95692  4.11678  3.57914  3.07910  3.30080  4.22848  3.23401  3.69430  3.42983  3.36556  1.11793  2.78547  3.11811  5.43277  4.14231    285 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    285   2.97610  4.79637  3.10901  2.86734  3.27278  3.43295  1.18022  3.67628  2.71089  3.17239  4.21410  3.27732  3.98459  3.23340  2.97758  3.04699  3.27893  3.41176  4.73803  3.22416    286 h - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.49247  0.94447\n+    286   3.02629  4.38931  4.55440  4.02788  3.38969  4.29987  4.75221  1.66570  3.90157  1.73351  3.22011  4.29643  4.60414  4.16932  4.12143  3.65546  3.28153  1.24068  5.32317  4.12520    287 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    287   3.12120  4.54522  4.46798  3.93436  3.10940  4.26644  4.61200  2.09702  3.74039  1.03668  2.81140  4.23014  4.55041  4.00262  3.93893  3.62030  3.36876  2.00221  5.11579  3.95042    288 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    288   2.79129  4.78468  3.27333  2.77748  4.23368  3.42379  3.72210  3.67372  2.18570  3.22145  4.12764  3.17710  3.22448  2.90441  1.31319  2.86068  3.06244  3.35113  5.34061  4.12949    289 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    289   2.87748  5.28398  2.07313  1.23939  4.58665  3.21010  3.73813  4.06161  2.69686  3.62602  4.49991  2.71592  3.81535  2.91427  3.22357  2.79830  3.16561  3.67987  5.77966  4.34719    290 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    290   2.83903  4.94076  3.19058  2.65002  4.36201  3.09854  3.60817  3.74025  1.86467  3.26108  4.11943  3.08356  3.91775  2.75841  1.52389  2.86394  3.05522  3.41435  5.37419  4.15539    291 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02078  3.88401        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,973 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W06A3\n+LENG  317\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:14:23 2016\n+NSEQ  42\n+EFFN  0.505005\n+CKSUM 2173114792\n+STATS LOCAL MSV      -11.1177  0.70082\n+STATS LOCAL VITERBI  -11.8237  0.70082\n+STATS LOCAL FORWARD   -5.8436  0.70082\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.60078  4.20386  3.09081  2.72939  3.29201  2.73750  3.80596  2.60287  2.76439  2.32806  3.62751  3.16496  3.24412  3.25571  3.03713  2.74208  2.91733  2.41359  4.52910  3.56529\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.00000        *\n+      1   2.88548  4.98203  3.26484  2.66893  4.37797  3.54234  3.56403  3.73430  1.73193  3.23652  4.10452  3.09116  3.93137  2.65462  1.56967  2.90213  2.96175  3.42446  5.34579  4.14426      1 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      2   1.06014  4.17714  3.43127  3.22164  4.02163  3.00241  4.24732  3.14248  3.23710  3.02549  4.06118  3.37901  3.73871  3.58127  3.48951  2.52791  2.80311  2.80236  5.45195  4.24880      2 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      3   3.05279  4.55050  4.19258  3.77702  3.08275  4.01645  4.45173  2.21046  3.53641  1.02046  3.06577  4.04994  4.42083  3.89443  3.73294  3.46036  3.34301  2.19383  5.01051  3.71888      3 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      4   3.00664  4.43473  4.29003  3.90501  3.35346  4.03179  4.63929  1.11780  3.73250  1.94647  3.29426  4.15480  4.46644  4.08432  3.93271  3.52442  3.30654  1.84634  5.23744  3.97271      4 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      5   3.12038  4.59225  4.13872  3.77890  3.12451  3.94175  4.44453  2.32740  3.54694  0.91499  3.15816  4.05737  4.38285  3.92282  3.72797  3.50962  3.41625  2.35646  4.99189  3.70778      5 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      6   2.78335  4.33516  4.08134  3.74395  3.52758  3.69083  4.55127  2.05344  3.61123  2.22041  3.49797  3.93870  4.25342  3.97351  3.82669  3.19274  3.14588  1.06562  5.30654  4.03600      6 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      7   2.62697  4.40500  3.29425  3.18992  4.33181  0.77941  4.31170  3.92417  3.36686  3.58217  4.55801  3.45867  3.79512  3.69359  3'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    310   2.30574  4.22577  3.18979  3.00487  4.05694  2.95603  4.11516  3.57701  3.07731  3.29883  4.22670  3.23264  3.69326  3.42820  3.36380  1.12089  2.78445  3.11640  5.43104  4.14042    310 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    311   2.79273  4.34096  4.09811  3.76084  3.53604  3.70346  4.56601  2.05196  3.62832  2.22661  3.50436  3.95375  4.26521  3.98962  3.84267  3.20593  3.15484  1.04991  5.31711  4.04742    311 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    312   2.52063  4.46247  3.14399  2.66329  3.61752  3.33464  3.73235  3.11172  2.55088  2.69643  3.68563  3.09303  2.21417  2.90819  2.81318  2.64840  2.75793  2.77252  5.11552  3.82308    312 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    313   2.83516  5.28588  2.02146  1.37746  4.57342  3.19313  3.68741  4.04429  2.57588  3.59161  4.44614  2.67503  3.78602  2.85457  3.15755  2.75155  3.11440  3.65636  5.76541  4.31525    313 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    314   2.42177  4.37033  3.17444  2.98491  4.08142  3.08409  4.11584  3.50342  3.03789  3.18538  4.19247  3.28514  1.20231  3.42206  3.32349  2.65489  2.93898  3.13658  5.40418  4.19997    314 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    315   2.63050  4.69517  2.96852  2.54738  3.99565  3.34796  3.67351  3.38755  2.34178  3.00253  3.70071  3.00925  3.82818  1.82403  2.67315  2.61986  2.77726  3.09208  5.27729  3.96986    315 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    316   2.99195  4.39606  4.46052  3.99700  3.37863  4.17130  4.73529  1.20074  3.83833  1.90438  3.24173  4.24154  4.54901  4.14877  4.05130  3.57249  3.27663  1.64463  5.32209  4.08731    316 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    317   3.01592  4.44036  4.30752  3.92275  3.36125  4.04511  4.65464  1.10243  3.75049  1.95152  3.29989  4.17084  4.47839  4.10092  3.94931  3.53870  3.31551  1.84594  5.24789  3.98436    317 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02167  3.84258        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,352 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W0GYE\n+LENG  110\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:02:11 2016\n+NSEQ  42\n+EFFN  0.417847\n+CKSUM 96359631\n+STATS LOCAL MSV       -9.8588  0.71539\n+STATS LOCAL VITERBI  -10.5314  0.71539\n+STATS LOCAL FORWARD   -4.2136  0.71539\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.58050  4.26382  2.85365  2.80613  3.15035  2.89189  3.81397  2.73428  2.84682  2.50149  3.63880  3.04623  2.92146  3.23736  2.97958  2.69288  2.75694  2.61051  4.37217  3.44918\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.11208  3.80830  2.47881  0.61958  0.77255  0.00000        *\n+      1   2.86563  4.43941  3.90030  3.48825  3.15849  3.72815  4.25618  2.32070  3.26187  1.77984  1.67728  3.78149  4.19270  3.65614  3.48266  3.19769  3.17068  2.30899  4.97739  3.72347      1 m - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03621  3.73242  4.45477  0.61958  0.77255  0.51831  0.90517\n+      2   2.32088  4.22631  3.23062  2.88208  3.75054  3.06494  3.91305  3.12034  2.80280  2.86950  3.46740  3.17206  3.71214  3.20009  3.11255  1.58694  2.72892  2.79566  5.19560  3.87469      2 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      3   2.16583  4.14387  3.31164  3.04023  3.91928  2.99832  4.08555  2.96050  2.99484  2.87006  3.88524  3.25429  3.70169  3.37677  3.27365  2.46801  1.56963  2.64620  5.38268  4.14256      3 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      4   2.50339  4.36552  3.18021  2.87748  3.70569  3.17426  3.92862  3.09513  2.80524  2.51701  3.83182  3.22048  1.72512  3.22934  3.10382  2.66637  2.88472  2.83864  5.15020  3.85399      4 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      5   1.37966  4.07220  3.30932  3.05735  4.00851  2.90363  4.11036  3.10631  3.06226  3.01691  3.96738  3.22700  3.63731  3.40459  3.34236  2.32349  2.50171  2.72896  5.44141  4.20867      5 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      6   2.79738  4.71128  3.35305  2.81859  3.86956  3.45352  3.66728  3.34124  2.12422  2.73535  3.92040  3.19573  3.91394  2.88067  1.48782  2.88950  3.04990  3.10526  5.13081  3.85640      6 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      7   2.79738  4.71128  3.35305  2.81859  3.86956  3.45352  3.66728  3.34124  2.12422  2.73535  3.92040  3.19573  3.91394  2.88067  1.4'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    103   2.28595  4.20469  3.15305  2.96732  4.01306  2.93587  4.07824  3.52766  3.03667  3.25343  4.18597  3.20146  3.66970  3.39139  3.32369  1.19215  2.76154  3.07690  5.39105  4.09660    103 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    104   1.11047  4.16262  3.39942  3.18942  3.99027  2.98852  4.21878  3.10639  3.20405  2.99074  4.03096  3.35463  3.72182  3.55149  3.45809  2.51413  2.78712  2.77202  5.42393  4.21734    104 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    105   2.98123  4.41976  4.24147  3.85605  3.33251  3.99469  4.59708  1.16125  3.68281  1.93347  3.27967  4.11045  4.43341  4.03863  3.88681  3.48501  3.28213  1.84863  5.20897  3.94068    105 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    106   3.08459  4.56673  4.08567  3.72675  3.11378  3.89922  4.40266  2.31619  3.49506  0.95858  3.15957  4.00826  4.34667  3.87883  3.68103  3.46477  3.38236  2.33656  4.96826  3.67627    106 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    107   2.38084  4.22527  3.37365  3.12444  3.91461  3.06621  4.14632  3.03402  3.06612  2.89972  3.95563  3.34312  3.76545  3.46456  3.32701  2.58155  1.28564  2.73588  5.35817  4.13168    107 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    108   2.28595  4.20469  3.15305  2.96732  4.01306  2.93587  4.07824  3.52766  3.03667  3.25343  4.18597  3.20146  3.66970  3.39139  3.32369  1.19215  2.76154  3.07690  5.39105  4.09660    108 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    109   2.98123  4.41976  4.24147  3.85605  3.33251  3.99469  4.59708  1.16125  3.68281  1.93347  3.27967  4.11045  4.43341  4.03863  3.88681  3.48501  3.28213  1.84863  5.20897  3.94068    109 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    110   2.83281  4.81376  2.86370  2.61910  3.97332  3.32524  3.77627  3.59570  2.44478  3.10430  4.13561  3.07764  3.87883  1.35288  2.72955  2.88405  3.13061  3.32988  5.25759  3.94755    110 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02268  3.79747        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,640 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W0T3K\n+LENG  206\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:12:21 2016\n+NSEQ  42\n+EFFN  1.961060\n+CKSUM 265104873\n+STATS LOCAL MSV      -10.5526  0.70500\n+STATS LOCAL VITERBI  -11.3203  0.70500\n+STATS LOCAL FORWARD   -5.0917  0.70500\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.66939  4.05673  3.22993  2.79099  3.15365  3.55719  3.71237  2.54173  2.57440  2.22720  3.33451  3.21043  3.51162  3.12964  3.00991  2.70726  2.80134  2.41473  4.61383  3.55842\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.16208  4.61100  1.96838  0.61958  0.77255  0.00000        *\n+      1   2.93600  4.38428  4.46198  3.90104  3.40267  3.46509  4.45615  1.93047  3.75500  2.17341  1.33550  4.09987  4.43304  3.98673  3.92700  3.38467  3.18624  2.18032  5.08252  3.90751      1 m - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      2   3.51610  5.57440  4.08521  3.23432  5.16373  4.05621  3.85036  4.44109  1.24790  3.78589  4.67516  3.56930  4.38323  2.97693  1.02813  3.49054  3.62846  4.13070  5.74164  4.65992      2 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      3   2.71675  4.18007  2.20863  1.78454  4.48942  3.47153  3.68161  3.95962  2.43737  2.96771  4.22140  2.70561  3.87879  2.38271  2.92830  2.68696  2.95012  3.54764  5.61847  4.21897      3 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      4   3.79955  5.05528  5.24619  4.82928  0.91393  4.84708  4.43636  2.76588  4.65616  1.39209  3.35680  4.77841  5.06654  4.62904  4.66340  4.24935  4.02525  2.91694  4.56452  2.97666      4 f - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      5   2.33881  4.63133  3.04986  2.72039  3.81134  3.58086  3.80889  3.19122  2.69933  2.40831  3.28616  3.19757  3.96766  2.62497  3.12129  2.40087  2.24400  2.60449  5.18249  3.90340      5 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      6   2.53534  4.98255  2.88571  2.48098  3.55645  3.22634  3.11506  3.68024  2.49296  3.25497  4.05057  1.83222  3.89751  2.84319  2.96725  2.38521  2.93102  3.33084  5.46784  3.79390      6 n - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      7   3.33815  4.58635  5.28231  4.77386  3.86630  4.81962  5.40260  1.04591  4.67983  2.15439  3.65019  4.93514  5.08026  4.90950  4.'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    199   3.17292  4.49948  5.07350  4.47897  2.19664  4.41569  4.75303  1.67850  4.30714  1.34256  2.66830  4.55216  4.68996  4.38672  4.33103  3.74557  3.18632  2.02062  5.12680  4.01423    203 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    200   2.82721  5.23462  3.00978  2.50556  4.56561  3.55698  1.89081  4.01902  2.34647  3.51731  4.29242  2.45019  3.96371  2.18070  2.67888  2.52972  3.05315  3.62288  5.65577  4.29113    204 h - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    201   2.79753  3.26426  4.38245  3.80576  2.13306  3.90254  4.09723  2.46777  3.65960  2.40542  3.34950  3.92355  4.27118  3.84123  3.78510  2.46701  3.03179  2.48318  4.61601  1.76843    205 y - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    202   2.72610  3.77026  4.37676  3.78412  1.86105  3.85380  3.70355  1.91248  3.39011  2.25745  3.26975  3.64215  4.21973  3.81329  3.74151  3.15148  2.95801  2.40389  4.70415  2.65981    206 f - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    203   3.15910  4.51063  4.85177  3.56044  3.48065  4.41056  4.77828  1.50858  4.14418  1.18189  3.34602  4.46100  4.70781  4.33071  4.27364  3.73860  3.39432  1.87237  5.27969  4.12987    207 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    204   2.54223  4.83441  3.15835  2.33659  4.10387  3.53825  3.79739  3.50371  2.61506  2.95960  3.75295  3.12487  1.86716  2.96533  3.06273  2.60399  2.36144  3.19033  5.39703  4.08002    208 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    205   1.87617  5.09761  2.66753  2.36334  4.39589  2.74795  3.18023  3.84868  2.46564  3.38501  4.15665  2.99165  3.89862  2.67910  2.94944  2.53808  2.60969  3.21059  5.56524  4.18537    209 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    206   3.02518  4.38844  4.11409  4.15378  3.60416  4.28092  4.67591  1.38646  4.03415  2.26037  3.36491  4.32477  4.61541  4.25349  4.19108  3.60271  2.71238  1.26673  5.26692  4.07016    210 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01092  4.52217        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/lengths_cutoff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/lengths_cutoff Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t0\t26.009834120677418\t151\n+EOG090W0427\t0\t14.787662575205127\t119\n+EOG090W09K7\t0\t48.74476058088982\t218\n+EOG090W0B5K\t0\t8.436399156361812\t117\n+EOG090W0153\t0\t110.0106535860915\t535\n+EOG090W051T\t0\t48.42670503702809\t341\n+EOG090W01WI\t0\t66.27955761081256\t599\n+EOG090W01A3\t0\t147.49150864355283\t636\n+EOG090W067A\t0\t24.752819851328745\t239\n+EOG090W0IUR\t0\t33.027556860826415\t176\n+EOG090W09BV\t0\t59.827044564511475\t249\n+EOG090W0AIA\t0\t62.4605436431282\t296\n+EOG090W05D8\t0\t120.959030700539\t437\n+EOG090W01IP\t0\t203.56155914702887\t709\n+EOG090W02C3\t0\t123.60839779175912\t441\n+EOG090W0B1Y\t0\t14.92967253328818\t221\n+EOG090W01VD\t0\t137.07676494930246\t506\n+EOG090W035W\t0\t58.49371845525309\t413\n+EOG090W0DWN\t0\t41.156486855129046\t178\n+EOG090W028U\t0\t103.86567310127292\t544\n+EOG090W02TI\t0\t98.41080498838295\t449\n+EOG090W00PB\t0\t106.20423261052379\t448\n+EOG090W038B\t0\t77.2556129538091\t369\n+EOG090W0BUR\t0\t61.86700358352731\t260\n+EOG090W0F0L\t0\t46.56222745575028\t243\n+EOG090W08IZ\t0\t62.201306771663276\t286\n+EOG090W0BEB\t0\t24.453706055266927\t238\n+EOG090W0I37\t0\t18.215830819664493\t138\n+EOG090W0CQ9\t0\t22.121097366946984\t209\n+EOG090W04BS\t0\t57.09128334646491\t340\n+EOG090W0FCL\t0\t16.012152397171466\t130\n+EOG090W0AXJ\t0\t28.151048076823233\t222\n+EOG090W0FEP\t0\t12.631376967230036\t126\n+EOG090W0FVI\t0\t15.398685404771472\t141\n+EOG090W0GP3\t0\t19.401812121264605\t132\n+EOG090W01NH\t0\t109.56816301330042\t599\n+EOG090W0IEE\t0\t13.648148270718346\t133\n+EOG090W0IP7\t0\t16.863805057639492\t174\n+EOG090W038Z\t0\t59.028806526983075\t372\n+EOG090W0B8O\t0\t25.131143830790958\t193\n+EOG090W0KP0\t0\t31.31312208007784\t139\n+EOG090W064P\t0\t35.99213522298181\t305\n+EOG090W0A3V\t0\t29.293708718757674\t271\n+EOG090W0BOZ\t0\t11.373911097706742\t158\n+EOG090W0H6T\t0\t40.78248368613364\t174\n+EOG090W0EDI\t0\t25.598027290426895\t161\n+EOG090W0EFH\t0\t32.47988529410085\t158\n+EOG090W04BN\t0\t42.69662448203931\t429\n+EOG090W00SP\t0\t347.84066824884763\t1158\n+EOG090W01ZN\t0\t35.63683451653604\t248\n+EOG090W014R\t0\t303.558641419558\t908\n+EOG090W0FJE\t0\t11.554345658688966\t128\n+EOG090W00EP\t0\t90.98598088688564\t391\n+EOG090W07MY\t0\t26.394659474607508\t276\n+EOG090W00LR\t0\t47.803732742449085\t428\n+EOG090W06QR\t0\t45.20103500608554\t255\n+EOG090W06J7\t0\t48.950477475341124\t349\n+EOG090W04G6\t0\t20.747524095601406\t271\n+EOG090W06IG\t0\t83.91320663964761\t392\n+EOG090W036Y\t0\t90.1813890269454\t405\n+EOG090W0CMO\t0\t27.111085843428626\t212\n+EOG090W0BFE\t0\t43.83422206895252\t292\n+EOG090W0JYN\t0\t8.6240352353082\t95\n+EOG090W054P\t0\t42.0493667617466\t242\n+EOG090W0753\t0\t88.63736820874865\t365\n+EOG090W0FKG\t0\t36.02492014973623\t238\n+EOG090W0B6L\t0\t37.512128120083524\t178\n+EOG090W0C2I\t0\t39.9957024404497\t248\n+EOG090W057Z\t0\t39.37529750270147\t336\n+EOG090W01H1\t0\t167.89279528460568\t626\n+EOG090W07A3\t0\t38.193049881273545\t207\n+EOG090W0D5E\t0\t36.11275078460196\t174\n+EOG090W07PK\t0\t44.50703007278868\t242\n+EOG090W0GKW\t0\t13.60945220480054\t126\n+EOG090W0J9P\t0\t72.21231118153511\t276\n+EOG090W0A69\t0\t38.463468950032976\t247\n+EOG090W0DRQ\t0\t7.927491529558358\t71\n+EOG090W04G1\t0\t70.1561152864883\t382\n+EOG090W08L6\t0\t34.61950687705208\t168\n+EOG090W02QT\t0\t126.78937306395682\t573\n+EOG090W0K04\t0\t13.698118905107428\t123\n+EOG090W02UI\t0\t75.33983172202097\t569\n+EOG090W00MS\t0\t222.74918630603344\t865\n+EOG090W0HXZ\t0\t14.672974042346928\t111\n+EOG090W09LF\t0\t49.42509730797137\t225\n+EOG090W060L\t0\t48.49481993961427\t407\n+EOG090W0I0Q\t0\t8.898446275006405\t87\n+EOG090W0B0M\t0\t19.378137039507983\t192\n+EOG090W0EY0\t0\t25.633665074147427\t177\n+EOG090W019L\t0\t161.9212108198199\t655\n+EOG090W0ALV\t0\t33.567421363630004\t327\n+EOG090W0F9J\t0\t11.9066247090783\t195\n+EOG090W0BZ2\t0\t13.511577175520067\t123\n+EOG090W0B5T\t0\t12.744268370786148\t181\n+EOG090W0JBN\t0\t8.059235823034468\t94\n+EOG090W06CO\t0\t44.76325256823732\t283\n+EOG090W06Y4\t0\t58.56965239084716\t304\n+EOG090W00D0\t0\t173.60012223217007\t969\n+EOG090W0N7U\t0\t7.090271589968341\t73\n+EOG090W02H5\t0\t187.43347716555155\t645\n+EOG090W04DH\t0\t97.51184745911475\t425\n+EOG090W01HI\t0\t141.17028909316608\t668\n+EOG090W02JZ\t0\t106.2021542200327\t499\n+EOG090W0ANA\t0\t43.80791670031081\t242\n+EOG090W055F\t0\t25.315135740520443\t269\n+EOG090W0KMC\t0\t20.563083052290207\t149\n+EOG090W063Z\t0\t20.0'..b'656909\t232\n+EOG090W050K\t0\t77.93871889144076\t376\n+EOG090W0CL8\t0\t25.966747028064802\t218\n+EOG090W0JJQ\t0\t13.62856576746243\t117\n+EOG090W06W8\t0\t48.24622893316924\t267\n+EOG090W0EPV\t0\t35.10902433482864\t239\n+EOG090W00WM\t0\t260.8164746511948\t897\n+EOG090W005S\t0\t611.4917595121128\t1630\n+EOG090W02UQ\t0\t51.49464074592311\t285\n+EOG090W03FA\t0\t53.973113494178904\t344\n+EOG090W02B7\t0\t115.3009159526159\t425\n+EOG090W06DJ\t0\t77.792516412288\t338\n+EOG090W08FE\t0\t36.68154855844776\t279\n+EOG090W06P2\t0\t41.670994647017906\t299\n+EOG090W0C7S\t0\t29.611469746269982\t147\n+EOG090W0C4Z\t0\t26.43060073195963\t145\n+EOG090W00ZP\t0\t161.60550467275013\t697\n+EOG090W0C7Z\t0\t37.684920128988004\t192\n+EOG090W06AN\t0\t42.662308774978584\t289\n+EOG090W0FYR\t0\t13.508876473998257\t128\n+EOG090W015U\t0\t42.296492199255404\t276\n+EOG090W02LX\t0\t53.129566825639934\t495\n+EOG090W0DZ4\t0\t9.599076852633498\t186\n+EOG090W050Y\t0\t60.226966189955085\t368\n+EOG090W08GU\t0\t53.02740180911049\t280\n+EOG090W09LK\t0\t91.72851994063471\t372\n+EOG090W077G\t0\t76.76324145120006\t331\n+EOG090W0B8P\t0\t19.196863497747415\t150\n+EOG090W0A73\t0\t27.406826637563558\t161\n+EOG090W0B3U\t0\t43.48966796277244\t264\n+EOG090W0LL3\t0\t13.321975417932757\t120\n+EOG090W09R9\t0\t36.65014937864991\t212\n+EOG090W0A58\t0\t48.47060965162291\t255\n+EOG090W06VZ\t0\t29.419164232825445\t317\n+EOG090W080B\t0\t79.75592849488558\t305\n+EOG090W0ALP\t0\t13.883838648218155\t145\n+EOG090W0EJV\t0\t5.956771852621479\t160\n+EOG090W0BI6\t0\t30.97683170635923\t224\n+EOG090W05KO\t0\t95.433274449961\t425\n+EOG090W03K0\t0\t72.48414460771538\t334\n+EOG090W0JFZ\t0\t14.305445143137689\t92\n+EOG090W09RO\t0\t29.48406784028039\t267\n+EOG090W00ZV\t0\t61.02240025870282\t345\n+EOG090W005V\t0\t262.52686163098906\t1059\n+EOG090W0F9A\t0\t10.246423950758501\t105\n+EOG090W0IKC\t0\t14.947429244710978\t76\n+EOG090W04QG\t0\t49.56877927900001\t361\n+EOG090W00U5\t0\t71.63448419048026\t402\n+EOG090W0JS6\t0\t8.421979153901846\t87\n+EOG090W06X4\t0\t32.054695199308284\t297\n+EOG090W0H7U\t0\t24.51177799464656\t186\n+EOG090W02LH\t0\t31.4090320424614\t301\n+EOG090W06AU\t0\t21.750918119113738\t153\n+EOG090W0L6N\t0\t11.596992881882745\t94\n+EOG090W0028\t0\t588.5016762903192\t1601\n+EOG090W05ZG\t0\t33.671801910536935\t269\n+EOG090W0DSQ\t0\t53.83672675323885\t204\n+EOG090W0CIU\t0\t10.5996734768809\t188\n+EOG090W09DT\t0\t42.16152158590845\t238\n+EOG090W0883\t0\t43.82515148221423\t331\n+EOG090W08IL\t0\t31.073523765582504\t244\n+EOG090W07HX\t0\t43.87674109168712\t264\n+EOG090W0ADL\t0\t74.64253566218997\t270\n+EOG090W07E5\t0\t31.219065990205618\t342\n+EOG090W0CHN\t0\t36.32844749725724\t170\n+EOG090W0F27\t0\t28.270070111541965\t150\n+EOG090W05FW\t0\t36.389874917560235\t332\n+EOG090W061C\t0\t33.972962707452396\t322\n+EOG090W023I\t0\t6.011474071541897\t134\n+EOG090W09Y9\t0\t20.77931440362711\t167\n+EOG090W029L\t0\t129.73510535308327\t545\n+EOG090W078A\t0\t54.90487338981139\t401\n+EOG090W0C83\t0\t38.32093295366156\t262\n+EOG090W015Z\t0\t176.6733585172255\t684\n+EOG090W05IA\t0\t51.778721543176026\t345\n+EOG090W06HO\t0\t71.48257830521672\t342\n+EOG090W0E6K\t0\t76.78984701084445\t299\n+EOG090W032M\t0\t41.865258362423724\t238\n+EOG090W04ZL\t0\t64.14590294096341\t349\n+EOG090W0A4U\t0\t17.926850008284003\t178\n+EOG090W0G0Z\t0\t34.8181704466556\t189\n+EOG090W012F\t0\t210.2707396955778\t725\n+EOG090W08ME\t0\t14.38099468500289\t138\n+EOG090W090H\t0\t89.63598147718064\t390\n+EOG090W0C7T\t0\t14.33867361284194\t189\n+EOG090W0AUB\t0\t1.1004433696270324\t217\n+EOG090W094H\t0\t21.30754829636339\t247\n+EOG090W00HE\t0\t188.33841438573634\t778\n+EOG090W0HKZ\t0\t15.56285848109415\t128\n+EOG090W02KK\t0\t67.63366170127085\t626\n+EOG090W0828\t0\t24.44301723915799\t140\n+EOG090W07PH\t0\t52.049033834231\t266\n+EOG090W01XB\t0\t112.40728428787705\t527\n+EOG090W02C5\t0\t174.99547032465253\t515\n+EOG090W00WO\t0\t160.60058592597883\t618\n+EOG090W0140\t0\t77.3058731633741\t335\n+EOG090W01QT\t0\t54.84769853734253\t240\n+EOG090W0FQ4\t0\t21.524963224401723\t140\n+EOG090W0CAH\t0\t18.306986664749655\t190\n+EOG090W080Z\t0\t42.915151411519396\t247\n+EOG090W02AU\t0\t90.99510841851644\t457\n+EOG090W096X\t0\t37.173612523675914\t248\n+EOG090W04OJ\t0\t60.586185145045505\t253\n+EOG090W09UY\t0\t59.0061956628029\t262\n+EOG090W07CG\t0\t39.79244204205305\t320\n+EOG090W0KFZ\t0\t5.208228204067198\t77\n+EOG090W0LWB\t0\t5.161494516225474\t78\n+EOG090W0F00\t0\t36.17643723361296\t196\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,495 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t11\n+\n+[block]\n+# block no. 0 follows, 26 sequences, length 14\n+# corresponding to MSA columns:\n+# 26-39\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01222\t0.00873\t0.01222\t0.01397\t0.01572\t0.00873\t0.01222\t0.01572\t0.01746\t0.02270\t0.04016\t0.08556\t0.04365\t0.02095\t0.01048\t0.00349\t0.00698\t0.63506\t0.00698\t0.00698\n+1\t0.00956\t0.00674\t0.00871\t0.01015\t0.01051\t0.00637\t0.00660\t0.01065\t0.01390\t0.01853\t0.03861\t0.60723\t0.04613\t0.15052\t0.01485\t0.00384\t0.00493\t0.01968\t0.00660\t0.00588\n+2\t0.01493\t0.01410\t0.02384\t0.62468\t0.13806\t0.01701\t0.02136\t0.01991\t0.01545\t0.02011\t0.01359\t0.02003\t0.01037\t0.00747\t0.00757\t0.00249\t0.00996\t0.00674\t0.00342\t0.00891\n+3\t0.01103\t0.00735\t0.00827\t0.00827\t0.00827\t0.00735\t0.00460\t0.01103\t0.01103\t0.01471\t0.02390\t0.04964\t0.02758\t0.73343\t0.03861\t0.00735\t0.00735\t0.01103\t0.00460\t0.00460\n+4\t0.00924\t0.00660\t0.00880\t0.01056\t0.01100\t0.00616\t0.00704\t0.01056\t0.01452\t0.01936\t0.04181\t0.72848\t0.05017\t0.02376\t0.00968\t0.00308\t0.00440\t0.02156\t0.00704\t0.00616\n+5\t0.02748\t0.02058\t0.02200\t0.01718\t0.02301\t0.02289\t0.01409\t0.57682\t0.12187\t0.04563\t0.01992\t0.01958\t0.01429\t0.00927\t0.00761\t0.00232\t0.00803\t0.00707\t0.00761\t0.01278\n+6\t0.15256\t0.01392\t0.02108\t0.55605\t0.07699\t0.01694\t0.01888\t0.02009\t0.01487\t0.02226\t0.01300\t0.01874\t0.00987\t0.00745\t0.00706\t0.00248\t0.00930\t0.00627\t0.00363\t0.00857\n+7\t0.11917\t0.01388\t0.02161\t0.58414\t0.08089\t0.01694\t0.01944\t0.01998\t0.01497\t0.02165\t0.01313\t0.01908\t0.00996\t0.00747\t0.00719\t0.00249\t0.00948\t0.00639\t0.00357\t0.00859\n+8\t0.07699\t0.01719\t0.02511\t0.32900\t0.31130\t0.06966\t0.02051\t0.02206\t0.01635\t0.02270\t0.01345\t0.01851\t0.01073\t0.00722\t0.00727\t0.00236\t0.00960\t0.00640\t0.00367\t0.00993\n+9\t0.22837\t0.01515\t0.01649\t0.12060\t0.02277\t0.05359\t0.01194\t0.11110\t0.02019\t0.19029\t0.11508\t0.02414\t0.02104\t0.00925\t0.00698\t0.00235\t0.00710\t0.00718\t0.00650\t0.00989\n+10\t0.01881\t0.01720\t0.02326\t0.46321\t0.09186\t0.07067\t0.01933\t0.15694\t0.02017\t0.02577\t0.01441\t0.01919\t0.01082\t0.00786\t0.00752\t0.00241\t0.00992\t0.00662\t0.00434\t0.00968\n+11\t0.05197\t0.02202\t0.02168\t0.07042\t0.02458\t0.14127\t0.01479\t0.34418\t0.08557\t0.03496\t0.01658\t0.01808\t0.01254\t0.00931\t0.00949\t0.00240\t0.09660\t0.00652\t0.00601\t0.01103\n+12\t0.04828\t0.01906\t0.02153\t0.02065\t0.11087\t0.13727\t0.06274\t0.12273\t0.06954\t0.09870\t0.14622\t0.02634\t0.02452\t0.00986\t0.00865\t0.00235\t0.04619\t0.00808\t0.00615\t0.01027\n+13\t0.08658\t0.02046\t0.07459\t0.03874\t0.02561\t0.07075\t0.13650\t0.20915\t0.09128\t0.05154\t0.01799\t0.02037\t0.01366\t0.01401\t0.09115\t0.00355\t0.01079\t0.00712\t0.00554\t0.01060\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t30\n+\n+[block]\n+# block no. 1 follows, 26 sequences, length 8\n+# corresponding to MSA columns:\n+# 71-78\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02194\t0.06459\t0.02353\t0.16038\t0.11196\t0.02016\t0.01601\t0.31327\t0.06090\t0.03449\t0.06104\t0.02133\t0.01644\t0.00872\t0.00744\t0.00227\t0.00824\t0.00711\t0.00593\t0.03426\n+1\t0.02052\t0.05093\t0.07159\t0.02726\t0.10655\t0.20206\t0.25692\t0.02480\t0.01765\t0.02167\t0.01318\t0.01732\t0.01070\t0.00789\t0.01035\t0.00243\t0.11776\t0.00689\t0.00371\t0.00981\n+2\t0.01568\t0.10472\t0.01891\t0.01780\t0.14957\t0.01459\t0.01164\t0.12310\t0.02011\t0.02557\t0.04608\t0.04525\t0.35102\t0.01341\t0.00792\t0.00233\t0.00617\t0.01110\t0.00599\t0.00904\n+3\t0.10891\t0.01808\t0.02949\t0.10245\t0.33046\t0.01872\t0.14796\t0.10391\t0.01901\t0.02814\t0.01447\t0.01843\t0.01139\t0.00710\t0.00749\t0.00236\t0.00943\t0.00690\t0.00433\t0.01097\n+4\t0.08080\t0.02151\t0.01983\t0.01626\t0.02095\t0.19381\t0.01303\t0.17602\t0.02227\t0.15089\t0.01650\t0.01750\t0.01226\t0.00877\t0.00914\t0.00220\t0.09913\t0.00596\t0.00583\t0.10734\n+5\t0.01885\t0.01934\t0.03543\t0.02641\t0.03208\t0.02036\t0.40079\t0.08844\t0.08944\t0.02629\t0.01628\t0.02034\t0.01261\t0.00841\t0.01174\t0.00268\t0.14739\t0.00825\t0.00453\t0.01032\n+6\t0.01577\t0.01496\t0.02536\t0.49795\t0.25820\t0.01723\t0.02178\t0.02065\t0.01585\t0.02113\t0.01374\t0.01975\t0.01073\t0.00732\t0.00756\t0.00244\t0.00975\t0.00675\t0.00350\t0.0095'..b'1\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+73\t0.01076\t0.00724\t0.00835\t0.00861\t0.00868\t0.00718\t0.00496\t0.01096\t0.01155\t0.01540\t0.02656\t0.15039\t0.03093\t0.62810\t0.03431\t0.00672\t0.00692\t0.01259\t0.00496\t0.00483\n+74\t0.01521\t0.03923\t0.69413\t0.02162\t0.03283\t0.01762\t0.02802\t0.02402\t0.01601\t0.02402\t0.01361\t0.01601\t0.00961\t0.00721\t0.00721\t0.00240\t0.01121\t0.00560\t0.00320\t0.01121\n+75\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+76\t0.01474\t0.01390\t0.02349\t0.65337\t0.11086\t0.01696\t0.02127\t0.01974\t0.01536\t0.01988\t0.01355\t0.02009\t0.01029\t0.00751\t0.00758\t0.00250\t0.01001\t0.00674\t0.00341\t0.00876\n+77\t0.01573\t0.01348\t0.01573\t0.01123\t0.01798\t0.01011\t0.00899\t0.01910\t0.01573\t0.02472\t0.01348\t0.01573\t0.01123\t0.00562\t0.00562\t0.00112\t0.00562\t0.00449\t0.00449\t0.77980\n+78\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+79\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+80\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+81\t0.01091\t0.00784\t0.01074\t0.01074\t0.01159\t0.00869\t0.00767\t0.01261\t0.01176\t0.01653\t0.02165\t0.03717\t0.02216\t0.30815\t0.45648\t0.01039\t0.01550\t0.00921\t0.00426\t0.00596\n+82\t0.14579\t0.01317\t0.01664\t0.01297\t0.01867\t0.01202\t0.01071\t0.03479\t0.02040\t0.59380\t0.02705\t0.02381\t0.01721\t0.00904\t0.00719\t0.00235\t0.00637\t0.00711\t0.00869\t0.01221\n+83\t0.01705\t0.01494\t0.02275\t0.58420\t0.04685\t0.01810\t0.01982\t0.13981\t0.01901\t0.02472\t0.01437\t0.01984\t0.01063\t0.00787\t0.00758\t0.00248\t0.00978\t0.00676\t0.00416\t0.00927\n+84\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+85\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+86\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+87\t0.01659\t0.01659\t0.02323\t0.01991\t0.01991\t0.02323\t0.01659\t0.01825\t0.01162\t0.01825\t0.00996\t0.01659\t0.00996\t0.01328\t0.02489\t0.00332\t0.71955\t0.00664\t0.00332\t0.00830\n+88\t0.01039\t0.00753\t0.00980\t0.00967\t0.01123\t0.00694\t0.00708\t0.01348\t0.02001\t0.02797\t0.53948\t0.18340\t0.09020\t0.01719\t0.00908\t0.00252\t0.00374\t0.01528\t0.00806\t0.00694\n+89\t0.00949\t0.00678\t0.00909\t0.01084\t0.01139\t0.00637\t0.00747\t0.01099\t0.01477\t0.01964\t0.04167\t0.67500\t0.04963\t0.02353\t0.00975\t0.00311\t0.00462\t0.07259\t0.00704\t0.00623\n+90\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+91\t0.01579\t0.11910\t0.61934\t0.02063\t0.03130\t0.01903\t0.02630\t0.02387\t0.01595\t0.02332\t0.01326\t0.01557\t0.00962\t0.00712\t0.00694\t0.00231\t0.01086\t0.00543\t0.00321\t0.01104\n+92\t0.01499\t0.01416\t0.02396\t0.61489\t0.14734\t0.01703\t0.02140\t0.01996\t0.01548\t0.02019\t0.01360\t0.02000\t0.01040\t0.00746\t0.00757\t0.00249\t0.00995\t0.00674\t0.00343\t0.00896\n+93\t0.01755\t0.02345\t0.14323\t0.02872\t0.03641\t0.05046\t0.49780\t0.02386\t0.01761\t0.02374\t0.01621\t0.05431\t0.01303\t0.00744\t0.00862\t0.00253\t0.01217\t0.00886\t0.00390\t0.01012\n+94\t0.01667\t0.01591\t0.02684\t0.34869\t0.37232\t0.01770\t0.02201\t0.02131\t0.01609\t0.02207\t0.01374\t0.01934\t0.01107\t0.00739\t0.00823\t0.00242\t0.03764\t0.00675\t0.00356\t0.01024\n+95\t0.02037\t0.02306\t0.14580\t0.02556\t0.03296\t0.06905\t0.37543\t0.02613\t0.01817\t0.12499\t0.01723\t0.02046\t0.01252\t0.00947\t0.04096\t0.00291\t0.01198\t0.00784\t0.00459\t0.01053\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t774\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W03A6.fa\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,451 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t78\n+\n+[block]\n+# block no. 0 follows, 35 sequences, length 97\n+# corresponding to MSA columns:\n+# 78-174\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01132\t0.00850\t0.01136\t0.01285\t0.05849\t0.00867\t0.00997\t0.01593\t0.10286\t0.03809\t0.03330\t0.08023\t0.10035\t0.01684\t0.04478\t0.00301\t0.00605\t0.42505\t0.00569\t0.00668\n+1\t0.01291\t0.01226\t0.02066\t0.57532\t0.15656\t0.01442\t0.04607\t0.01778\t0.04826\t0.01763\t0.01217\t0.01705\t0.00931\t0.00630\t0.00638\t0.00208\t0.00823\t0.00579\t0.00307\t0.00775\n+2\t0.02846\t0.01080\t0.01472\t0.01129\t0.01619\t0.00932\t0.00932\t0.03092\t0.01816\t0.74187\t0.02503\t0.02159\t0.01570\t0.00785\t0.00638\t0.00196\t0.00540\t0.00638\t0.00785\t0.01080\n+3\t0.00778\t0.00556\t0.00741\t0.00880\t0.00921\t0.00519\t0.00589\t0.00897\t0.01240\t0.01660\t0.06578\t0.74094\t0.04275\t0.01957\t0.00807\t0.00255\t0.00365\t0.01775\t0.00594\t0.00519\n+4\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+5\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+6\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+7\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+8\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+9\t0.00907\t0.00677\t0.01003\t0.01003\t0.01111\t0.00784\t0.00773\t0.01122\t0.01015\t0.01461\t0.01714\t0.02574\t0.01621\t0.08357\t0.71641\t0.01000\t0.01646\t0.00692\t0.00342\t0.00557\n+10\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+11\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+12\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+13\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+14\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+15\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+16\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+17\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+18\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+19\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+20\t0.01276\t0.01176\t0.01807\t0.60873\t0.03813\t0.01444\t0.01607\t0.01979\t0.15849\t0.01832\t0.01425\t0.01829\t0.01069\t0.00681\t0.00637\t0.00212\t0.00775\t0.00595\t0.00356\t0.00766\n+21\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+22\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.0'..b'0.70975\t0.00996\t0.01636\t0.00694\t0.00342\t0.00556\n+89\t0.00785\t0.00644\t0.00692\t0.00680\t0.00879\t0.00551\t0.00510\t0.00979\t0.01530\t0.01913\t0.22483\t0.05778\t0.57309\t0.01532\t0.00749\t0.00211\t0.00316\t0.01293\t0.00615\t0.00551\n+90\t0.02370\t0.03023\t0.01798\t0.01634\t0.01961\t0.75158\t0.01226\t0.02533\t0.01798\t0.01553\t0.00981\t0.01144\t0.00817\t0.00654\t0.00572\t0.00163\t0.01144\t0.00409\t0.00327\t0.00735\n+91\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+92\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+93\t0.01501\t0.01702\t0.03689\t0.02731\t0.06618\t0.01600\t0.68185\t0.02028\t0.01495\t0.02034\t0.01279\t0.01705\t0.00965\t0.00536\t0.00743\t0.00213\t0.01055\t0.00740\t0.00321\t0.00863\n+94\t0.00891\t0.00648\t0.00838\t0.00791\t0.00943\t0.00596\t0.00593\t0.01183\t0.01779\t0.02504\t0.70181\t0.04805\t0.09243\t0.01312\t0.00748\t0.00200\t0.00300\t0.01157\t0.00693\t0.00596\n+95\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+96\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+97\t0.01426\t0.01451\t0.01937\t0.01664\t0.01677\t0.04764\t0.01382\t0.01565\t0.01003\t0.01528\t0.00838\t0.01379\t0.00832\t0.01093\t0.02024\t0.00273\t0.73639\t0.00550\t0.00279\t0.00696\n+98\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.01570\t0.01005\t0.00565\t0.00628\t0.00188\t0.00754\t0.00565\t0.00314\t0.01005\n+99\t0.02399\t0.02158\t0.01871\t0.01513\t0.01965\t0.24313\t0.01212\t0.50536\t0.02621\t0.03251\t0.01357\t0.01407\t0.00999\t0.00729\t0.00616\t0.00182\t0.00834\t0.00522\t0.00541\t0.00974\n+100\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+101\t0.02536\t0.01255\t0.01545\t0.01209\t0.01676\t0.01228\t0.00990\t0.19825\t0.08366\t0.47295\t0.05595\t0.02143\t0.01689\t0.00810\t0.00643\t0.00197\t0.00564\t0.00652\t0.00732\t0.01050\n+102\t0.01980\t0.04080\t0.05751\t0.01220\t0.01625\t0.04709\t0.01024\t0.14781\t0.13568\t0.24600\t0.06686\t0.13103\t0.02095\t0.00976\t0.00657\t0.00204\t0.00578\t0.00810\t0.00631\t0.00921\n+103\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+104\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+105\t0.01281\t0.01107\t0.01383\t0.06016\t0.01698\t0.00905\t0.02996\t0.01609\t0.03517\t0.02003\t0.01323\t0.05154\t0.01172\t0.03363\t0.00611\t0.00136\t0.00515\t0.00499\t0.00387\t0.64326\n+106\t0.01318\t0.09360\t0.65981\t0.01824\t0.05223\t0.01560\t0.02226\t0.01998\t0.01339\t0.01969\t0.01119\t0.01321\t0.00812\t0.00596\t0.00588\t0.00195\t0.00910\t0.00462\t0.00270\t0.00930\n+107\t0.01361\t0.01126\t0.01320\t0.00945\t0.01507\t0.00848\t0.00757\t0.01642\t0.01330\t0.04218\t0.01169\t0.01341\t0.00958\t0.00479\t0.00475\t0.00097\t0.00472\t0.00384\t0.00388\t0.79183\n+108\t0.01557\t0.01687\t0.03452\t0.02493\t0.03104\t0.01623\t0.61740\t0.06216\t0.07226\t0.02197\t0.01400\t0.01752\t0.01047\t0.00574\t0.00734\t0.00213\t0.01003\t0.00736\t0.00365\t0.00880\n+109\t0.00762\t0.00643\t0.00660\t0.00656\t0.00864\t0.00541\t0.00492\t0.00934\t0.01475\t0.01783\t0.11991\t0.05992\t0.67883\t0.01581\t0.00750\t0.00213\t0.00319\t0.01323\t0.00598\t0.00541\n+110\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+111\t0.01022\t0.00730\t0.01022\t0.01168\t0.01314\t0.00730\t0.01022\t0.01314\t0.01460\t0.01899\t0.03359\t0.07156\t0.03651\t0.01752\t0.00876\t0.00292\t0.00584\t0.69478\t0.00584\t0.00584\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t0\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W06A3.fa\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,133 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 0 follows, 37 sequences, length 109\n+# corresponding to MSA columns:\n+# 1-109\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02227\t0.01639\t0.01774\t0.01388\t0.01851\t0.01810\t0.01150\t0.67323\t0.02769\t0.03707\t0.01601\t0.01874\t0.01226\t0.00806\t0.00630\t0.00191\t0.00665\t0.05755\t0.00609\t0.01006\n+1\t0.01614\t0.01295\t0.01387\t0.01234\t0.01590\t0.01476\t0.00964\t0.03233\t0.68224\t0.07953\t0.02486\t0.02269\t0.01842\t0.00825\t0.00622\t0.00206\t0.00487\t0.00686\t0.00633\t0.00974\n+2\t0.01215\t0.01030\t0.01211\t0.00901\t0.01392\t0.00783\t0.00709\t0.01470\t0.01259\t0.01950\t0.01326\t0.09266\t0.01234\t0.00606\t0.00487\t0.00107\t0.00443\t0.00506\t0.00384\t0.73721\n+3\t0.02643\t0.01082\t0.01430\t0.01110\t0.01575\t0.00976\t0.00913\t0.05075\t0.07124\t0.67581\t0.02393\t0.02080\t0.01527\t0.00762\t0.00616\t0.00191\t0.00523\t0.00619\t0.00743\t0.01036\n+4\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+5\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+6\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+7\t0.00895\t0.00571\t0.00763\t0.00870\t0.00938\t0.00527\t0.00593\t0.01011\t0.01215\t0.07029\t0.03303\t0.72437\t0.03860\t0.01831\t0.00769\t0.00244\t0.00367\t0.01657\t0.00582\t0.00538\n+8\t0.00969\t0.00692\t0.00966\t0.01107\t0.01240\t0.00689\t0.00955\t0.01237\t0.01392\t0.01812\t0.03251\t0.12197\t0.03562\t0.01708\t0.00841\t0.00279\t0.00548\t0.65433\t0.00564\t0.00559\n+9\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+10\t0.01570\t0.73033\t0.03022\t0.01033\t0.01520\t0.02279\t0.01012\t0.01760\t0.01238\t0.01449\t0.01039\t0.06715\t0.01028\t0.00627\t0.00422\t0.00140\t0.00632\t0.00432\t0.00285\t0.00764\n+11\t0.00937\t0.00647\t0.00721\t0.00711\t0.00736\t0.00663\t0.00415\t0.01066\t0.06293\t0.01289\t0.01971\t0.03878\t0.02199\t0.72714\t0.02930\t0.00565\t0.00585\t0.00875\t0.00390\t0.00415\n+12\t0.01575\t0.01474\t0.02436\t0.03583\t0.69259\t0.01487\t0.01825\t0.07139\t0.01504\t0.02138\t0.01175\t0.01512\t0.00975\t0.00559\t0.00606\t0.00182\t0.00723\t0.00546\t0.00326\t0.00975\n+13\t0.01183\t0.01105\t0.01802\t0.71383\t0.04021\t0.01372\t0.01646\t0.01690\t0.06602\t0.01638\t0.01192\t0.01680\t0.00894\t0.00628\t0.00613\t0.00204\t0.00791\t0.00555\t0.00298\t0.00701\n+14\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+15\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+16\t0.01228\t0.03166\t0.75316\t0.01745\t0.02649\t0.01422\t0.02262\t0.01939\t0.01292\t0.01939\t0.01099\t0.01292\t0.00775\t0.00582\t0.00582\t0.00194\t0.00905\t0.00452\t0.00258\t0.00905\n+17\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+18\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+19\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+20\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+21\t0.82811\t0.01184\t0.00900\t0.00805\t0.01184\t0.01373\t0.00663\t0.01799\t0.01042\t0.02746\t0.00852\t0.00994\t0.00663\t0.00568\t0.00379\t0.00189\t0.00474\t0.00331\t0.00379\t0.00663\n+22\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04'..b'0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+86\t0.01141\t0.00571\t0.00571\t0.00571\t0.00713\t0.00571\t0.00428\t0.01426\t0.01284\t0.02282\t0.01997\t0.02282\t0.01569\t0.00713\t0.00428\t0.00143\t0.00285\t0.00571\t0.81886\t0.00571\n+87\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+88\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+89\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+90\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+91\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+92\t0.02287\t0.02917\t0.01735\t0.01577\t0.01892\t0.76030\t0.01183\t0.02444\t0.01735\t0.01498\t0.00946\t0.01104\t0.00788\t0.00631\t0.00552\t0.00158\t0.01104\t0.00394\t0.00315\t0.00710\n+93\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+94\t0.01063\t0.00532\t0.00797\t0.00797\t0.00797\t0.00532\t0.00532\t0.00797\t0.00797\t0.01063\t0.01063\t0.01861\t0.01063\t0.02127\t0.02392\t0.82190\t0.00532\t0.00532\t0.00266\t0.00266\n+95\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+96\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+97\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+98\t0.00874\t0.00656\t0.00984\t0.00984\t0.01093\t0.00765\t0.00765\t0.01093\t0.00984\t0.01421\t0.01640\t0.02405\t0.01530\t0.04591\t0.76062\t0.00984\t0.01640\t0.00656\t0.00328\t0.00547\n+99\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+100\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04572\t0.05776\t0.01251\t0.00722\t0.00193\t0.00289\t0.01107\t0.00674\t0.00578\n+101\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+102\t0.02746\t0.01042\t0.01421\t0.01089\t0.01563\t0.00900\t0.00900\t0.02983\t0.01752\t0.75093\t0.02415\t0.02083\t0.01515\t0.00758\t0.00616\t0.00189\t0.00521\t0.00616\t0.00758\t0.01042\n+103\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+104\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+105\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+106\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+107\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+108\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+11\t120\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0GYE.fa\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,429 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t16\n+\n+[block]\n+# block no. 0 follows, 30 sequences, length 21\n+# corresponding to MSA columns:\n+# 93-113\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01057\t0.00773\t0.01034\t0.01163\t0.01327\t0.00750\t0.00987\t0.01344\t0.01601\t0.02067\t0.06875\t0.11927\t0.14101\t0.01901\t0.00933\t0.00302\t0.00572\t0.50008\t0.00649\t0.00630\n+1\t0.01546\t0.06663\t0.01682\t0.01327\t0.01666\t0.04776\t0.06572\t0.02349\t0.22653\t0.06835\t0.14524\t0.05426\t0.08041\t0.04384\t0.04676\t0.00294\t0.00678\t0.04435\t0.00614\t0.00858\n+2\t0.01789\t0.53823\t0.20660\t0.01426\t0.02119\t0.06285\t0.01549\t0.02158\t0.01462\t0.01788\t0.01047\t0.01216\t0.00893\t0.00620\t0.00512\t0.00169\t0.00846\t0.00413\t0.00301\t0.00925\n+3\t0.12683\t0.01280\t0.01355\t0.01162\t0.01527\t0.03360\t0.00908\t0.13641\t0.11546\t0.07711\t0.12786\t0.08985\t0.04344\t0.01130\t0.00812\t0.05341\t0.00558\t0.00834\t0.00613\t0.09425\n+4\t0.14326\t0.01153\t0.03546\t0.01195\t0.01515\t0.01117\t0.05274\t0.04592\t0.11102\t0.02467\t0.09562\t0.20004\t0.11112\t0.01336\t0.00744\t0.00240\t0.00540\t0.05331\t0.00593\t0.04251\n+5\t0.02165\t0.01301\t0.01394\t0.01176\t0.01544\t0.09116\t0.00934\t0.08063\t0.01834\t0.27268\t0.05014\t0.02373\t0.01776\t0.01554\t0.12344\t0.00343\t0.00824\t0.00676\t0.16082\t0.04219\n+6\t0.01092\t0.00790\t0.01079\t0.01213\t0.01380\t0.00777\t0.01047\t0.01401\t0.01627\t0.02115\t0.08035\t0.07615\t0.09660\t0.01884\t0.00944\t0.00308\t0.00600\t0.57143\t0.00651\t0.00639\n+7\t0.01490\t0.01416\t0.02404\t0.44524\t0.32734\t0.01596\t0.02024\t0.01934\t0.01477\t0.01992\t0.01271\t0.01804\t0.01004\t0.00666\t0.00695\t0.00222\t0.00888\t0.00621\t0.00325\t0.00914\n+8\t0.01741\t0.27337\t0.28936\t0.01871\t0.02621\t0.10378\t0.13668\t0.02242\t0.01545\t0.01966\t0.01166\t0.01403\t0.00921\t0.00634\t0.00616\t0.00195\t0.00989\t0.00515\t0.00314\t0.00944\n+9\t0.00978\t0.00661\t0.00772\t0.00809\t0.00818\t0.00652\t0.00474\t0.01005\t0.01087\t0.01450\t0.02570\t0.20443\t0.03005\t0.58960\t0.02952\t0.00588\t0.00615\t0.01233\t0.00474\t0.00455\n+10\t0.01976\t0.01393\t0.01804\t0.01508\t0.01922\t0.01460\t0.07980\t0.16972\t0.21138\t0.14459\t0.02649\t0.13414\t0.02277\t0.01140\t0.00766\t0.00241\t0.00656\t0.06562\t0.00682\t0.01001\n+11\t0.02375\t0.05431\t0.01867\t0.01574\t0.01930\t0.45877\t0.01197\t0.11245\t0.01988\t0.05951\t0.01404\t0.01807\t0.01203\t0.09670\t0.03288\t0.00273\t0.01088\t0.00560\t0.00438\t0.00832\n+12\t0.00905\t0.00728\t0.00811\t0.00811\t0.01012\t0.00650\t0.00600\t0.01186\t0.04448\t0.02139\t0.22165\t0.14724\t0.43929\t0.01702\t0.00828\t0.00238\t0.00362\t0.01457\t0.00680\t0.00624\n+13\t0.05667\t0.00906\t0.01074\t0.00966\t0.01254\t0.00852\t0.00747\t0.01900\t0.07503\t0.17135\t0.27144\t0.08776\t0.20624\t0.01356\t0.00772\t0.00228\t0.00426\t0.01152\t0.00729\t0.00791\n+14\t0.04127\t0.01698\t0.01775\t0.01504\t0.01907\t0.12772\t0.06970\t0.11033\t0.24965\t0.08074\t0.06339\t0.07501\t0.06234\t0.01012\t0.00723\t0.00224\t0.00714\t0.00849\t0.00621\t0.00958\n+15\t0.08182\t0.01473\t0.01498\t0.01335\t0.01725\t0.03410\t0.01044\t0.06184\t0.51550\t0.04834\t0.05791\t0.02708\t0.05309\t0.00969\t0.00695\t0.00231\t0.00561\t0.00790\t0.00682\t0.01031\n+16\t0.00861\t0.00647\t0.00778\t0.00870\t0.00970\t0.00586\t0.00593\t0.00996\t0.01406\t0.01808\t0.06850\t0.45850\t0.24625\t0.08430\t0.01101\t0.00301\t0.00410\t0.01721\t0.00632\t0.00564\n+17\t0.02657\t0.01690\t0.01962\t0.01639\t0.05856\t0.01788\t0.01271\t0.41742\t0.08333\t0.22637\t0.02048\t0.01947\t0.01409\t0.00842\t0.00700\t0.00213\t0.00706\t0.00661\t0.00725\t0.01176\n+18\t0.05960\t0.01505\t0.02492\t0.26659\t0.45727\t0.01618\t0.01999\t0.02020\t0.01504\t0.02166\t0.01270\t0.01737\t0.01029\t0.00648\t0.00679\t0.00216\t0.00846\t0.00608\t0.00339\t0.00979\n+19\t0.00917\t0.00665\t0.00857\t0.00985\t0.01070\t0.00627\t0.00710\t0.01074\t0.01393\t0.01819\t0.04060\t0.47254\t0.12255\t0.08385\t0.01126\t0.00319\t0.00470\t0.14815\t0.00627\t0.00572\n+20\t0.05581\t0.01600\t0.01854\t0.01474\t0.01969\t0.01718\t0.04419\t0.31554\t0.18190\t0.16248\t0.02140\t0.02064\t0.01518\t0.00846\t0.00684\t0.00216\t0.00669\t0.00680\t0.05469\t0.01106\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 1 follows, 30 sequences, length 20\n+# corresponding to MSA columns:\n+# 116-135\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\t'..b'33\t0.01572\t0.00916\t0.09893\t0.05443\t0.01661\t0.06048\t0.08914\t0.29528\t0.05706\t0.01621\t0.00853\t0.00266\t0.00586\t0.17199\t0.04059\t0.00718\n+1\t0.01008\t0.00819\t0.01089\t0.01291\t0.10367\t0.00753\t0.00812\t0.01261\t0.01684\t0.02246\t0.28461\t0.17280\t0.27194\t0.01565\t0.00818\t0.00234\t0.00417\t0.01372\t0.00650\t0.00679\n+2\t0.01697\t0.01675\t0.01923\t0.11577\t0.11874\t0.14866\t0.01441\t0.02029\t0.04384\t0.04284\t0.01635\t0.04987\t0.03525\t0.01258\t0.05954\t0.00298\t0.21250\t0.00690\t0.03830\t0.00823\n+3\t0.00988\t0.00761\t0.01041\t0.10716\t0.01594\t0.00766\t0.00898\t0.01213\t0.01462\t0.01901\t0.07698\t0.32024\t0.13860\t0.05909\t0.01016\t0.00296\t0.00532\t0.16117\t0.00596\t0.00612\n+4\t0.01458\t0.01504\t0.06713\t0.05603\t0.04708\t0.08021\t0.08114\t0.03845\t0.04180\t0.02064\t0.04067\t0.07262\t0.02074\t0.08999\t0.05974\t0.00347\t0.08364\t0.09565\t0.06377\t0.00762\n+5\t0.01213\t0.00727\t0.00944\t0.00982\t0.01117\t0.00674\t0.00703\t0.01376\t0.01473\t0.11171\t0.09387\t0.39223\t0.07980\t0.12326\t0.01241\t0.00325\t0.00481\t0.07357\t0.00654\t0.00645\n+6\t0.01895\t0.19040\t0.13613\t0.01559\t0.02078\t0.13609\t0.04289\t0.12433\t0.05084\t0.02399\t0.06629\t0.01812\t0.01490\t0.00799\t0.00736\t0.00200\t0.05856\t0.00587\t0.04971\t0.00919\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 11 follows, 30 sequences, length 15\n+# corresponding to MSA columns:\n+# 432-446\n+name=unknown_L\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.06236\t0.06510\t0.08912\t0.16991\t0.02482\t0.07478\t0.01410\t0.04641\t0.01563\t0.11644\t0.01478\t0.01743\t0.01120\t0.00780\t0.00831\t0.00210\t0.10327\t0.00563\t0.08136\t0.06945\n+1\t0.14285\t0.23985\t0.21370\t0.01521\t0.02151\t0.06006\t0.05814\t0.06360\t0.01556\t0.02266\t0.01380\t0.07797\t0.01226\t0.00785\t0.00593\t0.00202\t0.00820\t0.00604\t0.00382\t0.00898\n+2\t0.07880\t0.07716\t0.01983\t0.01623\t0.04251\t0.14432\t0.02899\t0.04272\t0.07521\t0.09621\t0.01563\t0.04745\t0.01326\t0.01265\t0.06429\t0.00300\t0.20210\t0.00644\t0.00454\t0.00868\n+3\t0.01021\t0.00702\t0.00876\t0.00889\t0.01028\t0.00644\t0.00636\t0.01227\t0.01612\t0.05081\t0.28848\t0.24278\t0.15953\t0.10544\t0.01168\t0.00297\t0.00416\t0.03488\t0.00674\t0.00616\n+4\t0.01544\t0.03846\t0.06976\t0.05110\t0.11266\t0.03479\t0.07755\t0.07899\t0.08991\t0.04165\t0.09492\t0.11646\t0.02418\t0.10979\t0.01117\t0.00287\t0.00724\t0.00932\t0.00518\t0.00856\n+5\t0.00969\t0.00759\t0.00884\t0.00922\t0.01095\t0.00717\t0.00684\t0.01318\t0.07550\t0.02076\t0.14049\t0.24513\t0.29586\t0.04121\t0.00932\t0.00269\t0.00420\t0.07837\t0.00662\t0.00638\n+6\t0.01690\t0.01784\t0.02771\t0.06260\t0.05360\t0.07422\t0.22649\t0.04652\t0.01448\t0.01995\t0.01149\t0.01650\t0.00979\t0.00915\t0.01485\t0.00262\t0.35666\t0.00667\t0.00342\t0.00854\n+7\t0.01238\t0.00894\t0.01140\t0.01072\t0.01252\t0.00986\t0.00819\t0.10557\t0.01468\t0.02065\t0.02318\t0.03131\t0.07093\t0.13342\t0.42455\t0.00794\t0.01322\t0.00815\t0.06564\t0.00674\n+8\t0.01167\t0.00997\t0.01048\t0.01060\t0.02926\t0.07115\t0.00719\t0.01294\t0.01273\t0.01635\t0.02939\t0.06880\t0.15102\t0.31387\t0.14024\t0.00557\t0.04995\t0.00999\t0.03309\t0.00573\n+9\t0.00925\t0.00996\t0.08892\t0.00991\t0.01224\t0.00707\t0.00833\t0.01173\t0.01525\t0.02012\t0.14372\t0.35023\t0.25207\t0.01770\t0.00834\t0.00252\t0.00449\t0.01551\t0.00625\t0.00640\n+10\t0.01655\t0.01534\t0.07694\t0.01239\t0.01839\t0.01227\t0.01075\t0.05370\t0.16343\t0.06417\t0.01692\t0.01762\t0.01303\t0.00657\t0.00590\t0.00154\t0.00585\t0.00530\t0.00505\t0.47830\n+11\t0.11634\t0.01390\t0.04532\t0.01352\t0.01857\t0.01281\t0.05510\t0.08091\t0.08585\t0.36568\t0.09789\t0.02439\t0.02065\t0.00880\t0.00690\t0.00219\t0.00624\t0.00728\t0.00717\t0.01048\n+12\t0.01105\t0.07400\t0.01185\t0.00947\t0.01187\t0.00952\t0.00741\t0.01565\t0.09949\t0.02356\t0.31299\t0.10422\t0.22936\t0.01467\t0.00787\t0.00229\t0.00417\t0.03677\t0.00673\t0.00706\n+13\t0.01971\t0.07456\t0.14709\t0.01386\t0.01908\t0.07351\t0.01304\t0.16242\t0.06634\t0.11664\t0.08820\t0.02331\t0.03910\t0.00872\t0.00659\t0.00203\t0.00708\t0.00685\t0.10223\t0.00964\n+14\t0.01454\t0.01364\t0.08113\t0.01206\t0.01537\t0.03959\t0.01025\t0.10345\t0.10294\t0.04920\t0.09273\t0.11455\t0.12668\t0.07330\t0.06551\t0.00337\t0.00701\t0.04199\t0.02468\t0.00801\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t46\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0T3K.fa\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/scores_cutoff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/scores_cutoff Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t20.79\n+EOG090W0427\t76.86\n+EOG090W09K7\t70.14\n+EOG090W0B5K\t128.73\n+EOG090W0153\t102.48\n+EOG090W051T\t215.17999999999998\n+EOG090W01WI\t383.17999999999995\n+EOG090W01A3\t180.95\n+EOG090W067A\t151.13\n+EOG090W0IUR\t68.03999999999999\n+EOG090W09BV\t69.41\n+EOG090W0AIA\t45.01\n+EOG090W05D8\t52.35999999999999\n+EOG090W01IP\t193.27\n+EOG090W02C3\t62.79\n+EOG090W0B1Y\t200.97\n+EOG090W01VD\t37.519999999999996\n+EOG090W035W\t263.76\n+EOG090W0DWN\t50.489999999999995\n+EOG090W028U\t132.57999999999998\n+EOG090W02TI\t99.96000000000001\n+EOG090W00PB\t71.53999999999999\n+EOG090W038B\t138.32\n+EOG090W0BUR\t78.89\n+EOG090W0F0L\t27.65\n+EOG090W08IZ\t79.59\n+EOG090W0BEB\t171.71\n+EOG090W0I37\t40.10999999999999\n+EOG090W0CQ9\t196.62999999999997\n+EOG090W04BS\t134.26\n+EOG090W0FCL\t64.96\n+EOG090W0AXJ\t159.88\n+EOG090W0FEP\t79.44999999999999\n+EOG090W0FVI\t120.18999999999998\n+EOG090W0GP3\t64.75\n+EOG090W01NH\t132.65\n+EOG090W0IEE\t90.64999999999999\n+EOG090W0IP7\t102.33999999999999\n+EOG090W038Z\t104.78999999999999\n+EOG090W0B8O\t113.39999999999999\n+EOG090W0KP0\t37.309999999999995\n+EOG090W064P\t142.79999999999998\n+EOG090W0A3V\t166.73999999999998\n+EOG090W0BOZ\t106.18999999999998\n+EOG090W0H6T\t34.089999999999996\n+EOG090W0EDI\t55.36999999999999\n+EOG090W0EFH\t43.89\n+EOG090W04BN\t196.48999999999998\n+EOG090W00SP\t246.04999999999998\n+EOG090W01ZN\t189.64000000000001\n+EOG090W014R\t24.29\n+EOG090W0FJE\t84.41999999999999\n+EOG090W00EP\t16.59\n+EOG090W07MY\t127.67999999999999\n+EOG090W00LR\t281.75\n+EOG090W06QR\t84.84\n+EOG090W06J7\t114.8\n+EOG090W04G6\t264.99\n+EOG090W06IG\t86.66\n+EOG090W036Y\t274.78000000000003\n+EOG090W0CMO\t61.53\n+EOG090W0BFE\t67.34\n+EOG090W0JYN\t62.71999999999999\n+EOG090W054P\t125.64999999999999\n+EOG090W0753\t80.64\n+EOG090W0FKG\t73.42999999999999\n+EOG090W0B6L\t51.66\n+EOG090W0C2I\t135.07999999999998\n+EOG090W057Z\t83.44\n+EOG090W01H1\t66.00999999999999\n+EOG090W07A3\t61.31999999999999\n+EOG090W0D5E\t42.209999999999994\n+EOG090W07PK\t54.809999999999995\n+EOG090W0GKW\t93.86999999999999\n+EOG090W0J9P\t43.47\n+EOG090W0A69\t93.61\n+EOG090W0DRQ\t38.15\n+EOG090W04G1\t107.31\n+EOG090W08L6\t41.019999999999996\n+EOG090W02QT\t95.68999999999998\n+EOG090W0K04\t62.92\n+EOG090W02UI\t134.11999999999998\n+EOG090W00MS\t211.54\n+EOG090W0HXZ\t69.86\n+EOG090W09LF\t17.71\n+EOG090W060L\t301.84\n+EOG090W0I0Q\t47.39\n+EOG090W0B0M\t95.33999999999999\n+EOG090W0EY0\t90.64999999999999\n+EOG090W019L\t107.66\n+EOG090W0ALV\t164.29\n+EOG090W0F9J\t138.67\n+EOG090W0BZ2\t67.19999999999999\n+EOG090W0B5T\t160.85999999999999\n+EOG090W0JBN\t66.43\n+EOG090W06CO\t91.41999999999999\n+EOG090W06Y4\t45.35999999999999\n+EOG090W00D0\t558.88\n+EOG090W0N7U\t66.08\n+EOG090W02H5\t136.95\n+EOG090W04DH\t85.61\n+EOG090W01HI\t194.52999999999997\n+EOG090W02JZ\t297.21999999999997\n+EOG090W0ANA\t126.07\n+EOG090W055F\t99.82\n+EOG090W0KMC\t57.68\n+EOG090W063Z\t241.49999999999997\n+EOG090W0BC3\t72.38\n+EOG090W08A5\t68.03999999999999\n+EOG090W0BMW\t135.1\n+EOG090W08CW\t75.46\n+EOG090W04FE\t98.28\n+EOG090W0FH5\t58.38\n+EOG090W00RS\t269.64\n+EOG090W0EG7\t91.21000000000001\n+EOG090W06OD\t170.1\n+EOG090W00PL\t78.05\n+EOG090W0AV1\t163.45\n+EOG090W015K\t58.239999999999995\n+EOG090W07NX\t129.22\n+EOG090W0J8V\t91.69999999999999\n+EOG090W0FQ8\t58.239999999999995\n+EOG090W0JN2\t78.75\n+EOG090W029M\t83.02\n+EOG090W00TM\t273.90999999999997\n+EOG090W046G\t122.14999999999999\n+EOG090W06J5\t163.23999999999998\n+EOG090W0GWR\t32.34\n+EOG090W063H\t165.48\n+EOG090W01MU\t39.48\n+EOG090W0K07\t75.80999999999999\n+EOG090W02IA\t214.68999999999997\n+EOG090W0HTD\t76.78999999999999\n+EOG090W0F78\t69.36999999999999\n+EOG090W0GR5\t118.58\n+EOG090W04O1\t20.72\n+EOG090W0CN5\t108.57\n+EOG090W00X5\t129.36\n+EOG090W09PQ\t73.5\n+EOG090W06OE\t182.91\n+EOG090W0G1I\t35.769999999999996\n+EOG090W0GZA\t79.53\n+EOG090W07X1\t60.48\n+EOG090W08E9\t146.93\n+EOG090W054N\t226.82\n+EOG090W02JO\t52.849999999999994\n+EOG090W0AKG\t103.81\n+EOG090W04DG\t148.47\n+EOG090W00L3\t195.16\n+EOG090W03O0\t98.56\n+EOG090W0HX7\t36.739999999999995\n+EOG090W08N5\t76.64999999999999\n+EOG090W00BP\t168.21\n+EOG090W0AY7\t48.019999999999996\n+EOG090W051U\t222.67000000000002\n+EOG090W09PJ\t166.18\n+EOG090W03TV\t286.92999999999995\n+EOG090W08A9\t100.44999999999999\n+EOG090W0IBV\t35.629999999999995\n+EOG090W04NQ\t64.05\n+EOG090'..b'92999999999999\n+EOG090W05HI\t39.76\n+EOG090W0DJI\t49.49\n+EOG090W03WV\t73.00999999999999\n+EOG090W0ESV\t140.14\n+EOG090W004H\t39.6\n+EOG090W0BJR\t77.91\n+EOG090W0BPH\t198.37999999999997\n+EOG090W0AH5\t97.86\n+EOG090W0BKY\t96.58\n+EOG090W07XK\t114.38\n+EOG090W0KJ3\t76.58\n+EOG090W0DEY\t74.61999999999999\n+EOG090W0I7M\t15.189999999999998\n+EOG090W05GY\t169.26\n+EOG090W0IF2\t144.41\n+EOG090W09IF\t75.03999999999999\n+EOG090W04XG\t28.49\n+EOG090W06TC\t53.480000000000004\n+EOG090W00ZZ\t305.54999999999995\n+EOG090W05ZP\t55.58\n+EOG090W09AW\t96.46000000000001\n+EOG090W0FFP\t109.76\n+EOG090W0GQZ\t48.51\n+EOG090W0DYP\t29.33\n+EOG090W0GI3\t63.349999999999994\n+EOG090W04OX\t73.36\n+EOG090W0C66\t73.91999999999999\n+EOG090W04IF\t24.29\n+EOG090W04PI\t196.35\n+EOG090W0GPQ\t36.33\n+EOG090W06OY\t207.68999999999997\n+EOG090W0PW0\t15.26\n+EOG090W0BM0\t39.199999999999996\n+EOG090W08QR\t84.91\n+EOG090W00VU\t337.46999999999997\n+EOG090W06PP\t58.169999999999995\n+EOG090W0EIQ\t158.61999999999998\n+EOG090W0KXF\t61.10999999999999\n+EOG090W019B\t225.72\n+EOG090W05XP\t95.61999999999999\n+EOG090W01V1\t162.39999999999998\n+EOG090W0K88\t52.43\n+EOG090W0ITI\t40.10999999999999\n+EOG090W08AN\t158.54999999999998\n+EOG090W0PZH\t39.199999999999996\n+EOG090W0F7U\t63.14\n+EOG090W0CNN\t69.09\n+EOG090W08FZ\t140.35\n+EOG090W0FGQ\t103.38999999999999\n+EOG090W05BJ\t77.77\n+EOG090W0A4R\t40.04\n+EOG090W09QT\t40.10999999999999\n+EOG090W0GDE\t43.05\n+EOG090W050K\t148.72\n+EOG090W0CL8\t70.07\n+EOG090W0JJQ\t77.35\n+EOG090W06W8\t103.72999999999999\n+EOG090W0EPV\t35.559999999999995\n+EOG090W00WM\t140.63\n+EOG090W005S\t210.07\n+EOG090W02UQ\t98.0\n+EOG090W03FA\t114.72999999999999\n+EOG090W02B7\t11.97\n+EOG090W06DJ\t35.209999999999994\n+EOG090W08FE\t122.63999999999999\n+EOG090W06P2\t125.72999999999999\n+EOG090W0C7S\t146.51999999999998\n+EOG090W0C4Z\t55.660000000000004\n+EOG090W00ZP\t144.33999999999997\n+EOG090W0C7Z\t30.52\n+EOG090W06AN\t199.35999999999999\n+EOG090W0FYR\t77.98\n+EOG090W015U\t27.44\n+EOG090W02LX\t267.67999999999995\n+EOG090W0DZ4\t191.17000000000002\n+EOG090W050Y\t215.67000000000002\n+EOG090W08GU\t50.81999999999999\n+EOG090W09LK\t108.36\n+EOG090W077G\t64.61\n+EOG090W0B8P\t58.51999999999999\n+EOG090W0A73\t39.059999999999995\n+EOG090W0B3U\t47.739999999999995\n+EOG090W0LL3\t81.69\n+EOG090W09R9\t63.06999999999999\n+EOG090W0A58\t69.64999999999999\n+EOG090W06VZ\t445.71999999999997\n+EOG090W080B\t61.88\n+EOG090W0ALP\t115.01\n+EOG090W0EJV\t163.1\n+EOG090W0BI6\t112.76999999999998\n+EOG090W05KO\t149.66\n+EOG090W03K0\t120.11999999999999\n+EOG090W0JFZ\t39.199999999999996\n+EOG090W09RO\t93.1\n+EOG090W00ZV\t161.98\n+EOG090W005V\t363.79\n+EOG090W0F9A\t109.55\n+EOG090W0IKC\t25.2\n+EOG090W04QG\t181.85999999999999\n+EOG090W00U5\t126.17\n+EOG090W0JS6\t59.64\n+EOG090W06X4\t243.24999999999997\n+EOG090W0H7U\t18.759999999999998\n+EOG090W02LH\t101.28999999999999\n+EOG090W06AU\t54.10999999999999\n+EOG090W0L6N\t74.13\n+EOG090W0028\t38.39\n+EOG090W05ZG\t117.24999999999999\n+EOG090W0DSQ\t52.36\n+EOG090W0CIU\t151.13\n+EOG090W09DT\t96.46000000000001\n+EOG090W0883\t125.22999999999999\n+EOG090W08IL\t186.41\n+EOG090W07HX\t141.11999999999998\n+EOG090W0ADL\t42.14\n+EOG090W07E5\t140.69\n+EOG090W0CHN\t36.19\n+EOG090W0F27\t66.08\n+EOG090W05FW\t224.20999999999998\n+EOG090W061C\t309.87\n+EOG090W023I\t166.65\n+EOG090W09Y9\t86.59\n+EOG090W029L\t280.28000000000003\n+EOG090W078A\t137.13\n+EOG090W0C83\t33.39\n+EOG090W015Z\t231.07\n+EOG090W05IA\t174.85999999999999\n+EOG090W06HO\t108.71000000000001\n+EOG090W0E6K\t68.25\n+EOG090W032M\t17.009999999999998\n+EOG090W04ZL\t137.82999999999998\n+EOG090W0A4U\t92.11999999999999\n+EOG090W0G0Z\t34.37\n+EOG090W012F\t376.75\n+EOG090W08ME\t86.66\n+EOG090W090H\t203.07\n+EOG090W0C7T\t133.28\n+EOG090W0AUB\t233.79999999999998\n+EOG090W094H\t153.51\n+EOG090W00HE\t202.16\n+EOG090W0HKZ\t63.76999999999999\n+EOG090W02KK\t178.78\n+EOG090W0828\t35.349999999999994\n+EOG090W07PH\t84.84\n+EOG090W01XB\t80.36\n+EOG090W02C5\t70.14\n+EOG090W00WO\t96.25\n+EOG090W0140\t17.849999999999998\n+EOG090W01QT\t99.46999999999998\n+EOG090W0FQ4\t61.669999999999995\n+EOG090W0CAH\t116.61999999999999\n+EOG090W080Z\t53.76\n+EOG090W02AU\t96.03999999999999\n+EOG090W096X\t86.03\n+EOG090W04OJ\t69.36999999999999\n+EOG090W09UY\t66.43\n+EOG090W07CG\t158.41\n+EOG090W0KFZ\t64.75\n+EOG090W0LWB\t66.33\n+EOG090W0F00\t32.269999999999996\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/repeats.dmnd
b
Binary file test-data/funannotate_db/repeats.dmnd has changed
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,10659 @@\n+#exon model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[LENGTH]\n+# maximal individually stored length probability =\n+3000\n+# slope of smoothing bandwidth =\n+0.3\n+# smoothing minwindowcount =\n+8\n+# length single  initial  internal  terminal\n+# total number of exons of above types\n+       1959           8219          22997           8219\n+# number of exons exceeding length d\n+       27             19            136             55\n+# 1000 P(len=k), k=0,1,..., 3000\n+0\t0\t1.95\t0.0374\t0.249\n+1\t0\t2.18\t0.0414\t0.284\n+2\t0\t2.4\t0.0456\t0.321\n+3\t0\t2.62\t0.0501\t0.359\n+4\t0\t2.84\t0.0548\t0.397\n+5\t0\t3.04\t0.0598\t0.435\n+6\t0\t3.23\t0.065\t0.473\n+7\t0\t3.41\t0.0706\t0.509\n+8\t0\t3.57\t0.0765\t0.543\n+9\t0\t3.71\t0.0828\t0.576\n+10\t0\t3.82\t0.0894\t0.606\n+11\t0\t3.92\t0.0965\t0.633\n+12\t0\t3.99\t0.104\t0.658\n+13\t0\t4.05\t0.112\t0.68\n+14\t0\t4.08\t0.121\t0.699\n+15\t0\t4.1\t0.13\t0.716\n+16\t0\t4.1\t0.139\t0.73\n+17\t0\t4.08\t0.15\t0.743\n+18\t0\t4.06\t0.16\t0.754\n+19\t0\t4.02\t0.172\t0.763\n+20\t0\t3.98\t0.184\t0.771\n+21\t0\t3.94\t0.198\t0.778\n+22\t0\t3.89\t0.212\t0.785\n+23\t0\t3.84\t0.226\t0.791\n+24\t0\t3.79\t0.242\t0.797\n+25\t0\t3.75\t0.259\t0.803\n+26\t0\t3.71\t0.277\t0.809\n+27\t0\t3.67\t0.295\t0.816\n+28\t0\t3.65\t0.315\t0.823\n+29\t0\t3.62\t0.335\t0.831\n+30\t0\t3.61\t0.357\t0.841\n+31\t0\t3.6\t0.379\t0.851\n+32\t0\t3.6\t0.401\t0.862\n+33\t0\t3.61\t0.425\t0.874\n+34\t0\t3.62\t0.449\t0.888\n+35\t0\t3.64\t0.473\t0.902\n+36\t0\t3.67\t0.498\t0.918\n+37\t0\t3.7\t0.524\t0.935\n+38\t0\t3.74\t0.549\t0.953\n+39\t0\t3.77\t0.575\t0.971\n+40\t0\t3.82\t0.601\t0.991\n+41\t0\t3.86\t0.628\t1.01\n+42\t0\t3.91\t0.654\t1.03\n+43\t0\t3.96\t0.68\t1.06\n+44\t0\t4.01\t0.706\t1.08\n+45\t0\t4.06\t0.732\t1.11\n+46\t0\t4.11\t0.759\t1.13\n+47\t0\t4.17\t0.785\t1.16\n+48\t0\t4.22\t0.811\t1.18\n+49\t0\t4.27\t0.838\t1.21\n+50\t0\t4.32\t0.864\t1.23\n+51\t0\t4.37\t0.892\t1.26\n+52\t0\t4.42\t0.92\t1.29\n+53\t0\t4.47\t0.949\t1.32\n+54\t0\t4.51\t0.979\t1.34\n+55\t0\t4.55\t1.01\t1.37\n+56\t0\t4.58\t1.04\t1.4\n+57\t0\t4.61\t1.08\t1.43\n+58\t0\t4.63\t1.12\t1.45\n+59\t0\t4.65\t1.15\t1.48\n+60\t0\t4.65\t1.2\t1.51\n+61\t0\t4.65\t1.24\t1.53\n+62\t0\t4.65\t1.28\t1.56\n+63\t0\t4.63\t1.33\t1.58\n+64\t0\t4.6\t1.37\t1.61\n+65\t0\t4.57\t1.42\t1.63\n+66\t0\t4.53\t1.47\t1.64\n+67\t0\t4.48\t1.52\t1.66\n+68\t0\t4.43\t1.56\t1.67\n+69\t0\t4.37\t1.61\t1.69\n+70\t0\t4.31\t1.66\t1.7\n+71\t0\t4.24\t1.7\t1.7\n+72\t0\t4.17\t1.74\t1.71\n+73\t0\t4.1\t1.79\t1.72\n+74\t0\t4.03\t1.83\t1.72\n+75\t0\t3.96\t1.86\t1.73\n+76\t0\t3.89\t1.9\t1.73\n+77\t0\t3.83\t1.94\t1.74\n+78\t0\t3.76\t1.97\t1.75\n+79\t0\t3.71\t2.01\t1.76\n+80\t0\t3.65\t2.05\t1.77\n+81\t0\t3.61\t2.08\t1.78\n+82\t0\t3.57\t2.12\t1.8\n+83\t0\t3.53\t2.15\t1.81\n+84\t0\t3.5\t2.19\t1.83\n+85\t0\t3.47\t2.23\t1.84\n+86\t0\t3.45\t2.27\t1.86\n+87\t0\t3.43\t2.31\t1.88\n+88\t0\t3.41\t2.35\t1.89\n+89\t0\t3.4\t2.39\t1.91\n+90\t0\t3.38\t2.44\t1.92\n+91\t0\t3.37\t2.48\t1.93\n+92\t0\t3.36\t2.53\t1.95\n+93\t0\t3.35\t2.57\t1.96\n+94\t0\t3.33\t2.62\t1.97\n+95\t0\t3.31\t2.66\t1.98\n+96\t0\t3.29\t2.71\t2\n+97\t0\t3.27\t2.76\t2.01\n+98\t0\t3.25\t2.81\t2.02\n+99\t0\t3.22\t2.85\t2.04\n+100\t0\t3.2\t2.9\t2.05\n+101\t0\t3.17\t2.95\t2.07\n+102\t0\t3.14\t3\t2.08\n+103\t0\t3.12\t3.05\t2.1\n+104\t0\t3.09\t3.1\t2.11\n+105\t0\t3.07\t3.15\t2.13\n+106\t0\t3.04\t3.2\t2.14\n+107\t0\t3.03\t3.24\t2.16\n+108\t0\t3.01\t3.29\t2.17\n+109\t0\t3\t3.33\t2.18\n+110\t0\t2.99\t3.37\t2.2\n+111\t0\t2.98\t3.41\t2.21\n+112\t0\t2.98\t3.45\t2.22\n+113\t0\t2.97\t3.49\t2.22\n+114\t0\t2.97\t3.52\t2.23\n+115\t0\t2.97\t3.56\t2.24\n+116\t0\t2.97\t3.59\t2.24\n+117\t0\t2.96\t3.62\t2.25\n+118\t0\t2.96\t3.65\t2.25\n+119\t0\t2.95\t3.68\t2.25\n+120\t0\t2.94\t3.71\t2.26\n+121\t0\t2.93\t3.74\t2.26\n+122\t0\t2.91\t3.77\t2.27\n+123\t0\t2.9\t3.8\t2.28\n+124\t0\t2.88\t3.84\t2.29\n+125\t0\t2.86\t3.87\t2.29\n+126\t0\t2.84\t3.9\t2.3\n+127\t0\t2.81\t3.93\t2.32\n+128\t0\t2.79\t3.96\t2.33\n+129\t0\t2.76\t3.99\t2.34\n+130\t0\t2.74\t4.01\t2.35\n+131\t0\t2.71\t4.04\t2.36\n+132\t0\t2.68\t4.06\t2.37\n+133\t0\t2.66\t4.08\t2.38\n+134\t0\t2.63\t4.1\t2.39\n+135\t0\t2.61\t4.12\t2.39\n+136\t0\t2.58\t4.14\t2.4\n+137\t0\t2.56\t4.15\t2.41\n+138\t0\t2.53\t4.16\t2.41\n+139\t0\t2.51\t4.17\t2.42\n+140\t0\t2.49\t4.17\t2.43\n+141\t0\t2.47\t4.18\t2.44\n+142\t0\t2.44\t4.18\t2.45\n+143\t0\t2.43\t4.18\t2.46\n+144\t0\t2.41\t4.18\t2.48\n+145\t0\t2.39\t4.18\t2.49\n+146\t0\t2.37\t4.18\t2.51\n+147\t0\t2.35\t4.17\t2.52\n+148\t0\t2.33\t4.17\t2.54\n+149\t0\t2.32\t4.16\t2.56\n+150\t0\t2.3\t4.15\t2.58\n+151\t0\t2.28\t4.15\t2.6\n+152\t0\t2.27\t4.13\t2.62\n+153\t0\t2.25\t4.12\t2.63\n+154\t0\t2.24\t4.1\t2.65\n+155\t0\t2.23\t4.09\t2.66\n+156\t0\t2.21\t4.07\t2.67\n+157\t0\t2.2\t4.04\t2.67\n+158\t0\t2.19\t4.02\t2.68\n+159\t0\t2.18\t4\t2.68\n+160\t0\t2.16\t3.9'..b'25\n+tgcca     \t0.25     \t0.25     \t0.25\n+tgccc     \t0.25     \t0.25     \t0.25\n+tgccg     \t0.25     \t0.25     \t0.25\n+tgcct     \t0.25     \t0.25     \t0.25\n+tgcga     \t0.25     \t0.25     \t0.25\n+tgcgc     \t0.25     \t0.25     \t0.25\n+tgcgg     \t0.25     \t0.25     \t0.25\n+tgcgt     \t0.25     \t0.25     \t0.25\n+tgcta     \t0.25     \t0.25     \t0.25\n+tgctc     \t0.25     \t0.25     \t0.25\n+tgctg     \t0.25     \t0.25     \t0.25\n+tgctt     \t0.25     \t0.25     \t0.25\n+tggaa     \t0.25     \t0.25     \t0.25\n+tggac     \t0.25     \t0.25     \t0.25\n+tggag     \t0.25     \t0.25     \t0.25\n+tggat     \t0.25     \t0.25     \t0.25\n+tggca     \t0.25     \t0.25     \t0.25\n+tggcc     \t0.25     \t0.25     \t0.25\n+tggcg     \t0.25     \t0.25     \t0.25\n+tggct     \t0.25     \t0.25     \t0.25\n+tggga     \t0.25     \t0.25     \t0.25\n+tgggc     \t0.25     \t0.25     \t0.25\n+tgggg     \t0.25     \t0.25     \t0.25\n+tgggt     \t0.25     \t0.25     \t0.25\n+tggta     \t0.25     \t0.25     \t0.25\n+tggtc     \t0.25     \t0.25     \t0.25\n+tggtg     \t0.25     \t0.25     \t0.25\n+tggtt     \t0.25     \t0.25     \t0.25\n+tgtaa     \t0.25     \t0.25     \t0.25\n+tgtac     \t0.25     \t0.25     \t0.25\n+tgtag     \t0.25     \t0.25     \t0.25\n+tgtat     \t0.25     \t0.25     \t0.25\n+tgtca     \t0.25     \t0.25     \t0.25\n+tgtcc     \t0.25     \t0.25     \t0.25\n+tgtcg     \t0.25     \t0.25     \t0.25\n+tgtct     \t0.25     \t0.25     \t0.25\n+tgtga     \t0.25     \t0.25     \t0.25\n+tgtgc     \t0.25     \t0.25     \t0.25\n+tgtgg     \t0.25     \t0.25     \t0.25\n+tgtgt     \t0.25     \t0.25     \t0.25\n+tgtta     \t0.25     \t0.25     \t0.25\n+tgttc     \t0.25     \t0.25     \t0.25\n+tgttg     \t0.25     \t0.25     \t0.25\n+tgttt     \t0.25     \t0.25     \t0.25\n+ttaaa     \t0.25     \t0.25     \t0.25\n+ttaac     \t0.25     \t0.25     \t0.25\n+ttaag     \t0.25     \t0.25     \t0.25\n+ttaat     \t0.25     \t0.25     \t0.25\n+ttaca     \t0.25     \t0.25     \t0.25\n+ttacc     \t0.25     \t0.25     \t0.25\n+ttacg     \t0.25     \t0.25     \t0.25\n+ttact     \t0.25     \t0.25     \t0.25\n+ttaga     \t0.25     \t0.25     \t0.25\n+ttagc     \t0.25     \t0.25     \t0.25\n+ttagg     \t0.25     \t0.25     \t0.25\n+ttagt     \t0.25     \t0.25     \t0.25\n+ttata     \t0.25     \t0.25     \t0.25\n+ttatc     \t0.25     \t0.25     \t0.25\n+ttatg     \t0.25     \t0.25     \t0.25\n+ttatt     \t0.25     \t0.25     \t0.25\n+ttcaa     \t0.25     \t0.25     \t0.25\n+ttcac     \t0.25     \t0.25     \t0.25\n+ttcag     \t0.25     \t0.25     \t0.25\n+ttcat     \t0.25     \t0.25     \t0.25\n+ttcca     \t0.25     \t0.25     \t0.25\n+ttccc     \t0.25     \t0.25     \t0.25\n+ttccg     \t0.25     \t0.25     \t0.25\n+ttcct     \t0.25     \t0.25     \t0.25\n+ttcga     \t0.25     \t0.25     \t0.25\n+ttcgc     \t0.25     \t0.25     \t0.25\n+ttcgg     \t0.25     \t0.25     \t0.25\n+ttcgt     \t0.25     \t0.25     \t0.25\n+ttcta     \t0.25     \t0.25     \t0.25\n+ttctc     \t0.25     \t0.25     \t0.25\n+ttctg     \t0.25     \t0.25     \t0.25\n+ttctt     \t0.25     \t0.25     \t0.25\n+ttgaa     \t0.25     \t0.25     \t0.25\n+ttgac     \t0.25     \t0.25     \t0.25\n+ttgag     \t0.25     \t0.25     \t0.25\n+ttgat     \t0.25     \t0.25     \t0.25\n+ttgca     \t0.25     \t0.25     \t0.25\n+ttgcc     \t0.25     \t0.25     \t0.25\n+ttgcg     \t0.25     \t0.25     \t0.25\n+ttgct     \t0.25     \t0.25     \t0.25\n+ttgga     \t0.25     \t0.25     \t0.25\n+ttggc     \t0.25     \t0.25     \t0.25\n+ttggg     \t0.25     \t0.25     \t0.25\n+ttggt     \t0.25     \t0.25     \t0.25\n+ttgta     \t0.25     \t0.25     \t0.25\n+ttgtc     \t0.25     \t0.25     \t0.25\n+ttgtg     \t0.25     \t0.25     \t0.25\n+ttgtt     \t0.25     \t0.25     \t0.25\n+tttaa     \t0.25     \t0.25     \t0.25\n+tttac     \t0.25     \t0.25     \t0.25\n+tttag     \t0.25     \t0.25     \t0.25\n+tttat     \t0.25     \t0.25     \t0.25\n+tttca     \t0.25     \t0.25     \t0.25\n+tttcc     \t0.25     \t0.25     \t0.25\n+tttcg     \t0.25     \t0.25     \t0.25\n+tttct     \t0.25     \t0.25     \t0.25\n+tttga     \t0.25     \t0.25     \t0.25\n+tttgc     \t0.25     \t0.25     \t0.25\n+tttgg     \t0.25     \t0.25     \t0.25\n+tttgt     \t0.25     \t0.25     \t0.25\n+tttta     \t0.25     \t0.25     \t0.25\n+ttttc     \t0.25     \t0.25     \t0.25\n+ttttg     \t0.25     \t0.25     \t0.25\n+ttttt     \t0.25     \t0.25     \t0.25\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl Thu Aug 26 06:55:33 2021 +0000
[
b"@@ -0,0 +1,3445 @@\n+[1]\n+# (a,c,g,t)= (0.295, 0.205, 0.205, 0.295)\n+#\n+# Probabilities file for the intergenic region model\n+#\n+\n+# k =\n+4\n+\n+# The P_l's\n+[P_ls]\n+# l=\n+0\n+# Values\n+A\t0.304\n+C\t0.196\n+G\t0.196\n+T\t0.304\n+# l=\n+1\n+# Values\n+AA\t0.112\n+AC\t0.0524\n+AG\t0.0515\n+AT\t0.088\n+CA\t0.0665\n+CC\t0.0404\n+CG\t0.0378\n+CT\t0.0515\n+GA\t0.0524\n+GC\t0.051\n+GG\t0.0404\n+GT\t0.0524\n+TA\t0.0729\n+TC\t0.0525\n+TG\t0.0665\n+TT\t0.112\n+# l=\n+2\n+# Values\n+AAA\t0.0446\n+AAC\t0.0182\n+AAG\t0.0173\n+AAT\t0.0319\n+ACA\t0.0197\n+ACC\t0.00915\n+ACG\t0.00876\n+ACT\t0.0148\n+AGA\t0.0141\n+AGC\t0.0136\n+AGG\t0.00903\n+AGT\t0.0148\n+ATA\t0.0251\n+ATC\t0.0139\n+ATG\t0.0171\n+ATT\t0.0319\n+CAA\t0.0228\n+CAC\t0.0135\n+CAG\t0.013\n+CAT\t0.0171\n+CCA\t0.0146\n+CCC\t0.00889\n+CCG\t0.00783\n+CCT\t0.00903\n+CGA\t0.0118\n+CGC\t0.00949\n+CGG\t0.00783\n+CGT\t0.00876\n+CTA\t0.0105\n+CTC\t0.0106\n+CTG\t0.013\n+CTT\t0.0173\n+GAA\t0.0192\n+GAC\t0.00881\n+GAG\t0.0106\n+GAT\t0.0139\n+GCA\t0.0166\n+GCC\t0.0113\n+GCG\t0.00949\n+GCT\t0.0136\n+GGA\t0.0111\n+GGC\t0.0113\n+GGG\t0.00889\n+GGT\t0.00915\n+GTA\t0.0118\n+GTC\t0.00881\n+GTG\t0.0135\n+GTT\t0.0182\n+TAA\t0.0254\n+TAC\t0.0118\n+TAG\t0.0105\n+TAT\t0.0251\n+TCA\t0.0156\n+TCC\t0.0111\n+TCG\t0.0118\n+TCT\t0.0141\n+TGA\t0.0156\n+TGC\t0.0166\n+TGG\t0.0146\n+TGT\t0.0197\n+TTA\t0.0254\n+TTC\t0.0192\n+TTG\t0.0228\n+TTT\t0.0446\n+# l=\n+3\n+# Values\n+AAAA\t0.0172\n+AAAC\t0.00735\n+AAAG\t0.00683\n+AAAT\t0.0133\n+AACA\t0.00675\n+AACC\t0.00305\n+AACG\t0.00312\n+AACT\t0.00531\n+AAGA\t0.00461\n+AAGC\t0.0042\n+AAGG\t0.00313\n+AAGT\t0.00541\n+AATA\t0.00908\n+AATC\t0.00494\n+AATG\t0.00662\n+AATT\t0.0113\n+ACAA\t0.00692\n+ACAC\t0.00428\n+ACAG\t0.00328\n+ACAT\t0.00519\n+ACCA\t0.00329\n+ACCC\t0.00206\n+ACCG\t0.00177\n+ACCT\t0.00202\n+ACGA\t0.00278\n+ACGC\t0.00217\n+ACGG\t0.00174\n+ACGT\t0.00208\n+ACTA\t0.00318\n+ACTC\t0.00281\n+ACTG\t0.00337\n+ACTT\t0.00541\n+AGAA\t0.00511\n+AGAC\t0.00223\n+AGAG\t0.00305\n+AGAT\t0.00366\n+AGCA\t0.00446\n+AGCC\t0.00283\n+AGCG\t0.00259\n+AGCT\t0.00373\n+AGGA\t0.00284\n+AGGC\t0.00226\n+AGGG\t0.0019\n+AGGT\t0.00202\n+AGTA\t0.0032\n+AGTC\t0.00248\n+AGTG\t0.00379\n+AGTT\t0.00531\n+ATAA\t0.00847\n+ATAC\t0.00397\n+ATAG\t0.00317\n+ATAT\t0.00947\n+ATCA\t0.00424\n+ATCC\t0.00279\n+ATCG\t0.00317\n+ATCT\t0.00366\n+ATGA\t0.00422\n+ATGC\t0.00407\n+ATGG\t0.00365\n+ATGT\t0.00518\n+ATTA\t0.00767\n+ATTC\t0.00495\n+ATTG\t0.00602\n+ATTT\t0.0133\n+CAAA\t0.00896\n+CAAC\t0.00433\n+CAAG\t0.00353\n+CAAT\t0.00602\n+CACA\t0.00525\n+CACC\t0.00243\n+CACG\t0.002\n+CACT\t0.00379\n+CAGA\t0.00344\n+CAGC\t0.00407\n+CAGG\t0.00215\n+CAGT\t0.00338\n+CATA\t0.00464\n+CATC\t0.00309\n+CATG\t0.00278\n+CATT\t0.00662\n+CCAA\t0.00486\n+CCAC\t0.00325\n+CCAG\t0.00288\n+CCAT\t0.00365\n+CCCA\t0.00319\n+CCCC\t0.00225\n+CCCG\t0.00154\n+CCCT\t0.0019\n+CCGA\t0.00238\n+CCGC\t0.00225\n+CCGG\t0.00146\n+CCGT\t0.00174\n+CCTA\t0.00173\n+CCTC\t0.00201\n+CCTG\t0.00215\n+CCTT\t0.00313\n+CGAA\t0.00419\n+CGAC\t0.00209\n+CGAG\t0.00231\n+CGAT\t0.00317\n+CGCA\t0.00305\n+CGCC\t0.00236\n+CGCG\t0.00149\n+CGCT\t0.00259\n+CGGA\t0.0022\n+CGGC\t0.00232\n+CGGG\t0.00154\n+CGGT\t0.00178\n+CGTA\t0.00197\n+CGTC\t0.00167\n+CGTG\t0.002\n+CGTT\t0.00312\n+CTAA\t0.00374\n+CTAC\t0.00193\n+CTAG\t0.00168\n+CTAT\t0.00317\n+CTCA\t0.00284\n+CTCC\t0.00236\n+CTCG\t0.00231\n+CTCT\t0.00305\n+CTGA\t0.00304\n+CTGC\t0.00383\n+CTGG\t0.00288\n+CTGT\t0.00328\n+CTTA\t0.00383\n+CTTC\t0.00316\n+CTTG\t0.00353\n+CTTT\t0.00683\n+GAAA\t0.00815\n+GAAC\t0.00296\n+GAAG\t0.00316\n+GAAT\t0.00495\n+GACA\t0.003\n+GACC\t0.00166\n+GACG\t0.00167\n+GACT\t0.00248\n+GAGA\t0.003\n+GAGC\t0.00274\n+GAGG\t0.00201\n+GAGT\t0.00281\n+GATA\t0.00357\n+GATC\t0.00227\n+GATG\t0.00309\n+GATT\t0.00494\n+GCAA\t0.00554\n+GCAC\t0.00316\n+GCAG\t0.00383\n+GCAT\t0.00407\n+GCCA\t0.00448\n+GCCC\t0.00221\n+GCCG\t0.00232\n+GCCT\t0.00226\n+GCGA\t0.00295\n+GCGC\t0.00212\n+GCGG\t0.00225\n+GCGT\t0.00217\n+GCTA\t0.00261\n+GCTC\t0.00274\n+GCTG\t0.00407\n+GCTT\t0.0042\n+GGAA\t0.00414\n+GGAC\t0.00178\n+GGAG\t0.00236\n+GGAT\t0.00279\n+GGCA\t0.0037\n+GGCC\t0.00239\n+GGCG\t0.00236\n+GGCT\t0.00282\n+GGGA\t0.00237\n+GGGC\t0.00221\n+GGGG\t0.00225\n+GGGT\t0.00206\n+GGTA\t0.00201\n+GGTC\t0.00166\n+GGTG\t0.00243\n+GGTT\t0.00305\n+GTAA\t0.0039\n+GTAC\t0.00205\n+GTAG\t0.00193\n+GTAT\t0.00397\n+GTCA\t0.00271\n+GTCC\t0.00178\n+GTCG\t0.00209\n+GTCT\t0.00223\n+GTGA\t0.00279\n+GTGC\t0.00315\n+GTGG\t0.00325\n+GTGT\t0.00428\n+GTTA\t0.00359\n+GTTC\t0.00296\n+GTTG\t0.00433\n+GTTT\t0.00735\n+TAAA\t0.0103\n+TAAC\t0.00359\n+TAAG\t0.00383\n+TAAT\t0.00768\n+TACA\t0.00467\n+TACC\t0.00201\n+TACG\t0.00197\n+TACT\t0."..b'1188\n+GTGCA\t415919\n+GTGCC\t261766\n+GTGCG\t295620\n+GTGCT\t323411\n+GTGGA\t332993\n+GTGGC\t385960\n+GTGGG\t323106\n+GTGGT\t292234\n+GTGTA\t359272\n+GTGTC\t243286\n+GTGTG\t676471\n+GTGTT\t478535\n+GTTAA\t529553\n+GTTAC\t237009\n+GTTAG\t244930\n+GTTAT\t462774\n+GTTCA\t351705\n+GTTCC\t242639\n+GTTCG\t284705\n+GTTCT\t337027\n+GTTGA\t395847\n+GTTGC\t467863\n+GTTGG\t398583\n+GTTGT\t518738\n+GTTTA\t656320\n+GTTTC\t525254\n+GTTTG\t635106\n+GTTTT\t1204266\n+TAAAA\t1550931\n+TAAAC\t656320\n+TAAAG\t584278\n+TAAAT\t1453686\n+TAACA\t514469\n+TAACC\t257738\n+TAACG\t228098\n+TAACT\t473671\n+TAAGA\t411688\n+TAAGC\t412400\n+TAAGG\t243072\n+TAAGT\t505986\n+TAATA\t887022\n+TAATC\t429688\n+TAATG\t557972\n+TAATT\t1279151\n+TACAA\t638171\n+TACAC\t359272\n+TACAG\t269554\n+TACAT\t651698\n+TACCA\t294521\n+TACCC\t186072\n+TACCG\t134084\n+TACCT\t210051\n+TACGA\t255187\n+TACGC\t188583\n+TACGG\t144135\n+TACGT\t222807\n+TACTA\t299145\n+TACTC\t243515\n+TACTG\t242385\n+TACTT\t529461\n+TAGAA\t445486\n+TAGAC\t192718\n+TAGAG\t225170\n+TAGAT\t371662\n+TAGCA\t338376\n+TAGCC\t240530\n+TAGCG\t157045\n+TAGCT\t334860\n+TAGGA\t194399\n+TAGGC\t179285\n+TAGGG\t138443\n+TAGGT\t199383\n+TAGTA\t299145\n+TAGTC\t203900\n+TAGTG\t245329\n+TAGTT\t560260\n+TATAA\t905114\n+TATAC\t478377\n+TATAG\t374869\n+TATAT\t1443312\n+TATCA\t432085\n+TATCC\t266505\n+TATCG\t299809\n+TATCT\t467523\n+TATGA\t432243\n+TATGC\t441619\n+TATGG\t339799\n+TATGT\t693461\n+TATTA\t887022\n+TATTC\t522722\n+TATTG\t602062\n+TATTT\t1720154\n+TCAAA\t830733\n+TCAAC\t395847\n+TCAAG\t362165\n+TCAAT\t678967\n+TCACA\t402109\n+TCACC\t212946\n+TCACG\t164762\n+TCACT\t367070\n+TCAGA\t303978\n+TCAGC\t373449\n+TCAGG\t185559\n+TCAGT\t385818\n+TCATA\t432243\n+TCATC\t340433\n+TCATG\t254268\n+TCATT\t707017\n+TCCAA\t464453\n+TCCAC\t332993\n+TCCAG\t285196\n+TCCAT\t425248\n+TCCCA\t332570\n+TCCCC\t246063\n+TCCCG\t164879\n+TCCCT\t229526\n+TCCGA\t253087\n+TCCGC\t252341\n+TCCGG\t163286\n+TCCGT\t233416\n+TCCTA\t194399\n+TCCTC\t267623\n+TCCTG\t279201\n+TCCTT\t425709\n+TCGAA\t507221\n+TCGAC\t242629\n+TCGAG\t284084\n+TCGAT\t460551\n+TCGCA\t375343\n+TCGCC\t294688\n+TCGCG\t173249\n+TCGCT\t370922\n+TCGGA\t253087\n+TCGGC\t265278\n+TCGGG\t190433\n+TCGGT\t269143\n+TCGTA\t255187\n+TCGTC\t227718\n+TCGTG\t218373\n+TCGTT\t441393\n+TCTAA\t393086\n+TCTAC\t220356\n+TCTAG\t208283\n+TCTAT\t413312\n+TCTCA\t305326\n+TCTCC\t255587\n+TCTCG\t248235\n+TCTCT\t424487\n+TCTGA\t303978\n+TCTGC\t389381\n+TCTGG\t307187\n+TCTGT\t414129\n+TCTTA\t411688\n+TCTTC\t375086\n+TCTTG\t371605\n+TCTTT\t735509\n+TGAAA\t979910\n+TGAAC\t351705\n+TGAAG\t365491\n+TGAAT\t675983\n+TGACA\t381622\n+TGACC\t222424\n+TGACG\t180167\n+TGACT\t330719\n+TGAGA\t305326\n+TGAGC\t300824\n+TGAGG\t195709\n+TGAGT\t363535\n+TGATA\t432085\n+TGATC\t244669\n+TGATG\t377889\n+TGATT\t689516\n+TGCAA\t739059\n+TGCAC\t415919\n+TGCAG\t455674\n+TGCAT\t610206\n+TGCCA\t582755\n+TGCCC\t291844\n+TGCCG\t300708\n+TGCCT\t343894\n+TGCGA\t375343\n+TGCGC\t275852\n+TGCGG\t278984\n+TGCGT\t321845\n+TGCTA\t338376\n+TGCTC\t373175\n+TGCTG\t560268\n+TGCTT\t560387\n+TGGAA\t571498\n+TGGAC\t232424\n+TGGAG\t307866\n+TGGAT\t396414\n+TGGCA\t582755\n+TGGCC\t458889\n+TGGCG\t310066\n+TGGCT\t490513\n+TGGGA\t332570\n+TGGGC\t341654\n+TGGGG\t289650\n+TGGGT\t347866\n+TGGTA\t294521\n+TGGTC\t249199\n+TGGTG\t330244\n+TGGTT\t479937\n+TGTAA\t596325\n+TGTAC\t339181\n+TGTAG\t275425\n+TGTAT\t708010\n+TGTCA\t381622\n+TGTCC\t255633\n+TGTCG\t254663\n+TGTCT\t341705\n+TGTGA\t402109\n+TGTGC\t487257\n+TGTGG\t435188\n+TGTGT\t832228\n+TGTTA\t514469\n+TGTTC\t395511\n+TGTTG\t687960\n+TGTTT\t1174890\n+TTAAA\t1506984\n+TTAAC\t529553\n+TTAAG\t603193\n+TTAAT\t1184370\n+TTACA\t596325\n+TTACC\t289904\n+TTACG\t255155\n+TTACT\t462537\n+TTAGA\t393086\n+TTAGC\t402136\n+TTAGG\t254810\n+TTAGT\t488881\n+TTATA\t905114\n+TTATC\t500716\n+TTATG\t638944\n+TTATT\t1435871\n+TTCAA\t845058\n+TTCAC\t412587\n+TTCAG\t438454\n+TTCAT\t676983\n+TTCCA\t571498\n+TTCCC\t388709\n+TTCCG\t320193\n+TTCCT\t422559\n+TTCGA\t507221\n+TTCGC\t428272\n+TTCGG\t349863\n+TTCGT\t436061\n+TTCTA\t445486\n+TTCTC\t421020\n+TTCTG\t474000\n+TTCTT\t759053\n+TTGAA\t845058\n+TTGAC\t370823\n+TTGAG\t388608\n+TTGAT\t662989\n+TTGCA\t739059\n+TTGCC\t538214\n+TTGCG\t352393\n+TTGCT\t647156\n+TTGGA\t464453\n+TTGGC\t630591\n+TTGGG\t411508\n+TTGGT\t490945\n+TTGTA\t638171\n+TTGTC\t422058\n+TTGTG\t611423\n+TTGTT\t1171948\n+TTTAA\t1506984\n+TTTAC\t638916\n+TTTAG\t573057\n+TTTAT\t1526553\n+TTTCA\t979910\n+TTTCC\t759290\n+TTTCG\t714479\n+TTTCT\t897347\n+TTTGA\t830733\n+TTTGC\t883669\n+TTTGG\t776539\n+TTTGT\t1190742\n+TTTTA\t1550931\n+TTTTC\t1313857\n+TTTTG\t1382023\n+TTTTT\t2802550\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl Thu Aug 26 06:55:33 2021 +0000
[
b'@@ -0,0 +1,20382 @@\n+#intron model parameters\n+# begin of content independent part\n+#\n+# ASS probabilities\n+#only nonpseudocount values are shown\n+[ASS]\n+# Size of vector\n+1024\n+# c_ass (ASS count)\n+31180\n+# asspseudocount (added to all possible patterns, no matter if they occur)\n+0.01\n+# Probabilities * 1000\n+aaaaa\t0.321\n+aaaac\t0.289\n+aaaag\t0.0324\n+aaaat\t0.289\n+aaaca\t0.161\n+aaacc\t0.257\n+aaacg\t0.0644\n+aaact\t0.257\n+aaaga\t0.0324\n+aaagc\t0.0644\n+aaagg\t0.0324\n+aaagt\t0.0965\n+aaata\t0.161\n+aaatc\t0.193\n+aaatg\t0.129\n+aaatt\t0.161\n+aacaa\t0.193\n+aacag\t0.129\n+aacat\t0.129\n+aacca\t0.129\n+aaccc\t0.193\n+aaccg\t0.193\n+aacct\t0.129\n+aacga\t0.161\n+aacgc\t0.0644\n+aacgg\t0.0965\n+aacta\t0.0644\n+aactc\t0.0965\n+aactg\t0.129\n+aactt\t0.289\n+aagaa\t0.129\n+aagag\t0.0324\n+aagat\t0.161\n+aagca\t0.0324\n+aagcc\t0.0965\n+aagcg\t0.193\n+aagct\t0.129\n+aagga\t0.0324\n+aaggc\t0.0324\n+aaggg\t0.0965\n+aaggt\t0.0644\n+aagta\t0.0324\n+aagtc\t0.0965\n+aagtg\t0.129\n+aagtt\t0.193\n+aataa\t0.193\n+aatac\t0.353\n+aatag\t0.0644\n+aatat\t0.353\n+aatca\t0.417\n+aatcc\t0.513\n+aatcg\t0.385\n+aatct\t0.321\n+aatga\t0.417\n+aatgc\t0.449\n+aatgg\t0.289\n+aatgt\t0.321\n+aatta\t0.193\n+aattc\t0.417\n+aattg\t0.385\n+aattt\t0.257\n+acaaa\t0.161\n+acaac\t0.193\n+acaag\t0.0965\n+acaat\t0.193\n+acaca\t0.129\n+acacc\t0.257\n+acacg\t0.0324\n+acact\t0.129\n+acagc\t0.161\n+acagg\t0.129\n+acagt\t0.0324\n+acata\t0.0324\n+acatc\t0.161\n+acatg\t0.0965\n+acatt\t0.193\n+accaa\t0.161\n+accac\t0.129\n+accag\t0.0644\n+accat\t0.193\n+accca\t0.0965\n+acccc\t0.0644\n+acccg\t0.193\n+accct\t0.129\n+accga\t0.129\n+accgg\t0.0324\n+accgt\t0.161\n+accta\t0.129\n+acctc\t0.129\n+acctg\t0.193\n+acctt\t0.193\n+acgaa\t0.0324\n+acgac\t0.161\n+acgag\t0.0965\n+acgat\t0.0644\n+acgca\t0.129\n+acgcc\t0.129\n+acgcg\t0.0324\n+acgct\t0.129\n+acgga\t0.161\n+acggc\t0.0644\n+acggt\t0.0644\n+acgtc\t0.129\n+acgtg\t0.0965\n+acgtt\t0.129\n+actaa\t0.129\n+actac\t0.417\n+actat\t0.257\n+actca\t0.257\n+actcc\t0.193\n+actcg\t0.0965\n+actct\t0.321\n+actga\t0.257\n+actgc\t0.385\n+actgg\t0.225\n+actgt\t0.289\n+actta\t0.129\n+acttc\t0.193\n+acttg\t0.0965\n+acttt\t0.289\n+agaaa\t0.513\n+agaac\t0.77\n+agaag\t0.0965\n+agaat\t0.545\n+agaca\t0.353\n+agacc\t0.353\n+agacg\t0.385\n+agact\t0.513\n+agaga\t0.193\n+agagc\t0.385\n+agagg\t0.129\n+agagt\t0.289\n+agata\t0.321\n+agatc\t0.449\n+agatg\t0.417\n+agatt\t0.353\n+agcaa\t0.449\n+agcac\t0.257\n+agcag\t0.0644\n+agcat\t0.642\n+agcca\t0.353\n+agccc\t0.161\n+agccg\t0.353\n+agcct\t0.257\n+agcga\t0.385\n+agcgc\t0.225\n+agcgg\t0.129\n+agcgt\t0.225\n+agcta\t0.161\n+agctc\t0.161\n+agctg\t0.577\n+agctt\t0.417\n+aggaa\t0.225\n+aggac\t0.193\n+aggag\t0.129\n+aggat\t0.353\n+aggca\t0.417\n+aggcc\t0.161\n+aggcg\t0.129\n+aggct\t0.225\n+aggga\t0.129\n+agggc\t0.129\n+agggg\t0.0324\n+agggt\t0.193\n+aggta\t0.161\n+aggtc\t0.321\n+aggtg\t0.193\n+aggtt\t0.257\n+agtaa\t0.609\n+agtac\t0.706\n+agtag\t0.0324\n+agtat\t0.77\n+agtca\t0.866\n+agtcc\t0.577\n+agtcg\t0.385\n+agtct\t0.898\n+agtga\t0.706\n+agtgc\t0.738\n+agtgg\t0.994\n+agtgt\t0.449\n+agtta\t0.577\n+agttc\t0.417\n+agttg\t0.449\n+agttt\t0.802\n+ataaa\t0.193\n+ataac\t0.129\n+ataat\t0.161\n+ataca\t0.0324\n+atacc\t0.0644\n+atacg\t0.0324\n+atact\t0.129\n+atata\t0.0644\n+atatc\t0.0644\n+atatg\t0.129\n+atatt\t0.0644\n+atcaa\t0.225\n+atcac\t0.0965\n+atcat\t0.0644\n+atcca\t0.0965\n+atccc\t0.0965\n+atccg\t0.0965\n+atcct\t0.0965\n+atcga\t0.0965\n+atcgc\t0.129\n+atcgt\t0.0965\n+atcta\t0.129\n+atctc\t0.0644\n+atctg\t0.193\n+atctt\t0.0644\n+atgaa\t0.0644\n+atgac\t0.0965\n+atgat\t0.161\n+atgca\t0.161\n+atgcc\t0.225\n+atgcg\t0.129\n+atgct\t0.321\n+atgga\t0.161\n+atggc\t0.0965\n+atggg\t0.0644\n+atggt\t0.161\n+atgta\t0.0324\n+atgtc\t0.193\n+atgtg\t0.0965\n+atgtt\t0.129\n+attaa\t0.0965\n+attac\t0.225\n+attag\t0.0324\n+attat\t0.0644\n+attca\t0.0965\n+attcc\t0.257\n+attcg\t0.193\n+attct\t0.0644\n+attga\t0.0965\n+attgc\t0.257\n+attgg\t0.0644\n+attgt\t0.161\n+attta\t0.129\n+atttc\t0.289\n+atttg\t0.161\n+atttt\t0.289\n+caaaa\t6.77\n+caaac\t5.48\n+caaag\t2.47\n+caaat\t5.26\n+caaca\t3.88\n+caacc\t3.85\n+caacg\t3.5\n+caact\t4.2\n+caaga\t1.64\n+caagc\t1.51\n+caagg\t1.31\n+caagt\t1.28\n+caata\t2.5\n+caatc\t3.59\n+caatg\t5.23\n+caatt\t3.27\n+cacaa\t4.91\n+cacac\t2.63\n+cacag\t0.545\n+cacat\t3.21\n+cacca\t2.66\n+caccc\t2.47\n+caccg\t1.41\n+cacct\t2.79\n+cacga\t3.05\n+cacgc\t2.82\n+cacgg\t1.09\n+cacgt\t1.03\n+cacta\t1.83\n+cactc\t2.79\n+cactg\t3.11\n+cactt\t2.85\n+cagaa\t3.27\n+cagac\t1.44\n+cagag\t1.76\n+cagat\t2.44\n+cagca\t2.89\n+cagcc\t2.79\n+cagcg\t2.31\n+cagct\t3.62\n+cagga\t2.76\n+caggc\t1.'..b'855631\n+#\tttcgg\t698837\n+#\tttcgt\t871342\n+#\tttcta\t890087\n+#\tttctc\t841394\n+#\tttctg\t946845\n+#\tttctt\t1516968\n+#\tttgaa\t1688548\n+#\tttgac\t741091\n+#\tttgag\t776440\n+#\tttgat\t1325067\n+#\tttgca\t1476775\n+#\tttgcc\t1075827\n+#\tttgcg\t704154\n+#\tttgct\t1293450\n+#\tttgga\t927788\n+#\tttggc\t1260242\n+#\tttggg\t822345\n+#\tttggt\t980857\n+#\tttgta\t1275361\n+#\tttgtc\t843496\n+#\tttgtg\t1221327\n+#\tttgtt\t2341971\n+#\ttttaa\t3011593\n+#\ttttac\t1276443\n+#\ttttag\t1145019\n+#\ttttat\t3051195\n+#\ttttca\t1957941\n+#\ttttcc\t1517354\n+#\ttttcg\t1427298\n+#\ttttct\t1793145\n+#\ttttga\t1659999\n+#\ttttgc\t1765755\n+#\ttttgg\t1551540\n+#\ttttgt\t2379188\n+#\ttttta\t3099452\n+#\tttttc\t2625305\n+#\tttttg\t2761348\n+#\tttttt\t5600229\n+\n+# motif upstream of acceptor splice site\n+[ASSMOTIF]\n+# width of motif, n=\n+32\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0  0.358\t0.167\t0.126\t0.35\t0.329\t0.218\t0.131\t0.323\t0.326\t0.213\t0.151\t0.31\t0.298\t0.172\t0.17\t0.36\n+ 1  0.358\t0.168\t0.125\t0.35\t0.328\t0.215\t0.13\t0.327\t0.328\t0.212\t0.151\t0.309\t0.299\t0.171\t0.169\t0.36\n+ 2  0.358\t0.168\t0.122\t0.352\t0.327\t0.217\t0.128\t0.329\t0.329\t0.214\t0.148\t0.309\t0.301\t0.169\t0.167\t0.362\n+ 3  0.358\t0.17\t0.119\t0.353\t0.326\t0.216\t0.126\t0.331\t0.328\t0.216\t0.146\t0.31\t0.303\t0.169\t0.165\t0.362\n+ 4  0.358\t0.172\t0.116\t0.355\t0.324\t0.216\t0.124\t0.336\t0.325\t0.218\t0.145\t0.312\t0.305\t0.17\t0.163\t0.363\n+ 5  0.358\t0.174\t0.11\t0.358\t0.322\t0.215\t0.12\t0.343\t0.324\t0.219\t0.143\t0.314\t0.308\t0.169\t0.159\t0.364\n+ 6  0.358\t0.176\t0.104\t0.361\t0.319\t0.216\t0.117\t0.349\t0.323\t0.221\t0.139\t0.317\t0.313\t0.169\t0.155\t0.363\n+ 7  0.359\t0.178\t0.0971\t0.365\t0.315\t0.216\t0.114\t0.355\t0.322\t0.222\t0.134\t0.322\t0.316\t0.168\t0.151\t0.365\n+ 8  0.362\t0.179\t0.0893\t0.37\t0.31\t0.216\t0.111\t0.363\t0.321\t0.223\t0.129\t0.328\t0.32\t0.168\t0.146\t0.367\n+ 9  0.362\t0.182\t0.0832\t0.372\t0.305\t0.218\t0.106\t0.371\t0.319\t0.225\t0.124\t0.332\t0.323\t0.167\t0.142\t0.369\n+10  0.364\t0.184\t0.0758\t0.376\t0.301\t0.219\t0.103\t0.377\t0.314\t0.228\t0.119\t0.339\t0.325\t0.167\t0.139\t0.37\n+11  0.364\t0.185\t0.0701\t0.38\t0.298\t0.218\t0.102\t0.382\t0.309\t0.229\t0.116\t0.346\t0.324\t0.167\t0.137\t0.372\n+12  0.366\t0.185\t0.0646\t0.385\t0.294\t0.22\t0.101\t0.385\t0.306\t0.228\t0.114\t0.352\t0.32\t0.168\t0.137\t0.375\n+13  0.367\t0.183\t0.0603\t0.389\t0.293\t0.22\t0.102\t0.385\t0.305\t0.228\t0.111\t0.356\t0.314\t0.169\t0.137\t0.38\n+14  0.364\t0.184\t0.0574\t0.394\t0.29\t0.223\t0.104\t0.383\t0.301\t0.228\t0.111\t0.36\t0.305\t0.171\t0.138\t0.386\n+15  0.359\t0.186\t0.0542\t0.4\t0.286\t0.227\t0.105\t0.382\t0.295\t0.23\t0.11\t0.365\t0.292\t0.175\t0.138\t0.395\n+16  0.355\t0.187\t0.0513\t0.407\t0.279\t0.232\t0.106\t0.382\t0.287\t0.234\t0.11\t0.37\t0.278\t0.18\t0.138\t0.404\n+17  0.347\t0.187\t0.05\t0.415\t0.275\t0.236\t0.108\t0.382\t0.277\t0.238\t0.11\t0.376\t0.261\t0.184\t0.14\t0.414\n+18  0.339\t0.187\t0.047\t0.426\t0.268\t0.239\t0.11\t0.382\t0.268\t0.239\t0.11\t0.383\t0.244\t0.189\t0.14\t0.426\n+19  0.329\t0.188\t0.0445\t0.438\t0.262\t0.242\t0.113\t0.383\t0.262\t0.239\t0.11\t0.39\t0.229\t0.194\t0.139\t0.437\n+20  0.32\t0.189\t0.0399\t0.451\t0.252\t0.246\t0.112\t0.389\t0.256\t0.239\t0.107\t0.398\t0.214\t0.2\t0.137\t0.449\n+21  0.307\t0.192\t0.0368\t0.464\t0.244\t0.253\t0.111\t0.392\t0.245\t0.245\t0.106\t0.404\t0.201\t0.206\t0.133\t0.46\n+22  0.294\t0.198\t0.032\t0.475\t0.235\t0.266\t0.11\t0.389\t0.236\t0.253\t0.103\t0.407\t0.192\t0.218\t0.13\t0.46\n+23  0.286\t0.201\t0.0264\t0.487\t0.233\t0.273\t0.111\t0.383\t0.231\t0.261\t0.0999\t0.408\t0.186\t0.225\t0.127\t0.461\n+24  0.276\t0.207\t0.0212\t0.495\t0.228\t0.282\t0.107\t0.383\t0.225\t0.272\t0.0963\t0.406\t0.179\t0.233\t0.121\t0.466\n+25  0.258\t0.204\t0.0171\t0.521\t0.213\t0.286\t0.103\t0.398\t0.214\t0.271\t0.0937\t0.421\t0.166\t0.236\t0.114\t0.484\n+26  0.255\t0.208\t0.0131\t0.524\t0.207\t0.3\t0.0997\t0.393\t0.214\t0.274\t0.0902\t0.421\t0.148\t0.223\t0.102\t0.527\n+27  0.27\t0.208\t0.0141\t0.508\t0.22\t0.291\t0.116\t0.372\t0.225\t0.275\t0.0979\t0.403\t0.147\t0.217\t0.127\t0.509\n+28  0.269\t0.21\t0.0127\t0.508\t0.219\t0.296\t0.116\t0.369\t0.221\t0.282\t0.096\t0.402\t0.143\t0.217\t0.127\t0.514\n+29  0.267\t0.214\t0.0126\t0.507\t0.221\t0.301\t0.118\t0.36\t0.219\t0.29\t0.0977\t0.393\t0.139\t0.217\t0.128\t0.516\n+30  0.27\t0.214\t0.0126\t0.504\t0.223\t0.302\t0.12\t0.354\t0.224\t0.292\t0.097\t0.387\t0.135\t0.215\t0.13\t0.52\n+31  0.273\t0.209\t0.0136\t0.504\t0.224\t0.299\t0.123\t0.354\t0.23\t0.285\t0.0997\t0.385\t0.126\t0.203\t0.132\t0.539\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,48 @@
+# This file contains the list of meta parameters for the coding regions (CDS) which are subject to optimization. 
+# All other meta parameters are chosen as given in the species parameter file. The order 
+# of the parameters determines the order in the optimization process.
+# Basically, different values for these meta parameters are tried out and the ones
+# giving best performance in a cross-validation on the training set are chosen.
+# For each parameter the range of possible values is specified after the parameter
+# name and at least one white space.
+# 3 cases are possible for the range:
+# - an explicit list is given, e.g. protein "on" "off"
+# - it is an integer range, e.g. window_size "1"-"5"
+# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8"
+#
+# 
+# Mario Stanke, 19.12.2006
+#
+
+/Constant/dss_end "1"-"4"
+/Constant/dss_start "1"-"3"
+/Constant/ass_start "1"-"3"
+/Constant/ass_end "0"-"4"
+/Constant/ass_upwindow_size "1"-"50"
+/IntronModel/d                  "100"-"950"
+/IntronModel/ass_motif_memory "0"-"3"
+/IntronModel/ass_motif_radius "0"-"4"
+/ExonModel/tis_motif_memory "0"-"3"
+/ExonModel/tis_motif_radius "0"-"3"
+/Constant/trans_init_window "0"-"25"
+/Constant/init_coding_len "0"-"18"
+/ExonModel/patpseudocount "0.5"_"5"
+/ExonModel/etpseudocount "0"-"10"
+/ExonModel/etorder "0"-"3"
+/Constant/intterm_coding_len "0"-"13"
+/ExonModel/slope_of_bandwidth "0.05"_"0.6"
+/ExonModel/minwindowcount "1"-"15"
+/IGenicModel/patpseudocount "0.5"_"7"
+/IntronModel/patpseudocount "0.5"_"7"
+/IntronModel/slope_of_bandwidth "0.05"_"0.6"
+/IntronModel/minwindowcount "1"-"8"
+/IntronModel/asspseudocount "0.0005"_"0.03"
+/IntronModel/dsspseudocount "0.0002"_"0.04"
+/IntronModel/dssneighborfactor  "0.0001"_"0.01"
+/ExonModel/minPatSum "100"_"600"
+/Constant/probNinCoding         "0.15"_".25"
+/Constant/decomp_num_steps "1"-"5"
+# comment parameters out that you do not want to be subject of optimization
+#/IGenicModel/k                  "4" "3" "5"
+#/IntronModel/k                  "4" "3" "5"
+#/ExonModel/k                    "4" "3" "5"
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,30 @@
+# This file contains the list of meta parameters for the Untranslated Regions (UTRs), which are subject to optimization. 
+# All other parameters are chosen as given in the species parameter file. The order 
+# of the parameters determines the order in the optimization process.
+# Basically, different values for these meta parameters are tried out and the ones
+# giving best performance in a cross-validation on the training set are chosen.
+# For each parameter the range of possible values is specified after the parameter
+# name and at least one white space.
+# 3 cases are possible for the range:
+# - an explicit list is given, e.g. protein "on" "off"
+# - it is an integer range, e.g. window_size "1"-"5"
+# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8"
+#
+# 
+# Mario Stanke, 9.5.2008
+#
+
+/UtrModel/prob_polya            "0.0"_"1.0"
+/UtrModel/d_polya_cleavage_min  "6"-"14"
+/UtrModel/d_polya_cleavage_max  "17"-"27"
+/UtrModel/tss_start             "0"-"12"
+/UtrModel/tss_end               "0"-"8"
+/UtrModel/tts_motif_memory      "0"-"2"
+/UtrModel/utr5patternweight     "0.1"_"1.0"
+/UtrModel/utr3patternweight     "0.1"_"1.0"
+/UtrModel/patpseudocount        "1"_"3"
+/UtrModel/tssup_k               "0"-"2"
+/UtrModel/slope_of_bandwidth    "0.2"_"0.4"
+/UtrModel/minwindowcount        "1"-"4"
+#/UtrModel/k                     "2"-"4"
+
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,144 @@
+#
+# parameters for all Drosophila versions
+# 
+# date : 11.8.2009
+#
+
+#
+# Properties for augustus
+#------------------------------------
+/augustus/verbosity 3     # 0-3, 0: only print the necessary
+maxDNAPieceSize    200000 # maximum segment that is predicted in one piece
+stopCodonExcludedFromCDS false # make this 'true' if the CDS includes the stop codon (training and prediction)
+
+# gff output options:
+protein             on    # output predicted protein sequence
+codingseq           off   # output the coding sequence
+cds                 on    # output 'cds' as feature for exons
+start               on    # output start codons (translation start)
+stop                on    # output stop codons  (translation stop)
+introns             on    # output introns
+tss                 on    # output transcription start site
+tts                 on    # output transcription termination site
+print_utr           off   # output 5'UTR and 3'UTR lines in addition to exon lines
+
+checkExAcc          off   # internal parameter for extrinsic accuracy
+
+# alternative transcripts and posterior probabilities
+sample                      100   # the number of sampling iterations
+alternatives-from-sampling  false # output alternative transcripts
+minexonintronprob           0.08  # minimal posterior probability of all (coding) exons
+minmeanexonintronprob       0.4   # minimal geometric mean of the posterior probs of introns and exons
+maxtracks                   -1    # maximum number of reported transcripts per gene (-1: no limit)
+keep_viterbi                true  # set to true if all Viterbi transcripts should be reported
+uniqueCDS                   true  # don't report transcripts that differ only in the UTR
+UTR                         on    # predict untranslated regions
+
+#
+# 
+# The rest of the file contains mainly meta parameters used for training.
+#
+
+# global constants
+# ----------------------------
+
+/Constant/trans_init_window           25
+/Constant/ass_upwindow_size           32
+/Constant/ass_start                   1
+/Constant/ass_end                     4
+/Constant/dss_start                   3
+/Constant/dss_end                     4
+/Constant/init_coding_len       9
+/Constant/intterm_coding_len       0
+/Constant/tss_upwindow_size           45
+/Constant/decomp_num_at               1
+/Constant/decomp_num_gc               1
+/Constant/gc_range_min       0.32   # This range has an effect only when decomp_num_steps>1. 
+/Constant/gc_range_max                0.50   # States the minimal and maximal percentage of c or g
+/Constant/decomp_num_steps            1      # I recommend keeping this to 1 for most species.
+/Constant/min_coding_len              201    # no gene with a coding sequence shorter than this is predicted
+/Constant/probNinCoding               0.23   # divide this by .25 to get a malus for making one masked letter part of the coding sequence
+/Constant/amberprob                   0.34   # Prob(stop codon = tag), if 0 tag is assumed to code for amino acid
+/Constant/ochreprob                   0.41   # Prob(stop codon = taa), if 0 taa is assumed to code for amino acid
+/Constant/opalprob                    0.25   # Prob(stop codon = tga), if 0 tga is assumed to code for amino acid
+/Constant/subopt_transcript_threshold 0.7
+/Constant/almost_identical_maxdiff    10
+
+# type of weighing, one of  1 = equalWeights, 2 = gcContentClasses, 3 = multiNormalKernel
+/BaseCount/weighingType    3
+# file with the weight matrix (only for multiNormalKernel type weighing)
+/BaseCount/weightMatrixFile   fly_weightmatrix.txt # change this to your species if at all necessary
+
+# Properties for IGenicModel
+# ----------------------------
+/IGenicModel/verbosity      0
+/IGenicModel/infile         fly_igenic_probs.pbl   # change this and the other five filenames *_probs.pbl below to your species
+/IGenicModel/outfile        fly_igenic_probs.pbl
+/IGenicModel/patpseudocount 5.0
+/IGenicModel/k              4        # order of the Markov chain for content model, keep equal to /ExonModel/k
+
+# Properties for ExonModel
+# ----------------------------
+/ExonModel/verbosity          3
+/ExonModel/infile             fly_exon_probs.pbl
+/ExonModel/outfile            fly_exon_probs.pbl
+/ExonModel/patpseudocount     5.0
+/ExonModel/minPatSum          350
+/ExonModel/k                  4       # order of the Markov chain for content model
+/ExonModel/etorder       2
+/ExonModel/etpseudocount      3
+/ExonModel/exonlengthD        3000    # beyond this the distribution is geometric
+/ExonModel/maxexonlength      15000
+/ExonModel/slope_of_bandwidth 0.3
+/ExonModel/minwindowcount     8
+/ExonModel/tis_motif_memory   3
+/ExonModel/tis_motif_radius   2

+# Properties for IntronModel
+# ----------------------------
+/IntronModel/verbosity          0
+/IntronModel/infile             fly_intron_probs.pbl
+/IntronModel/outfile            fly_intron_probs.pbl
+/IntronModel/patpseudocount     5.0
+/IntronModel/k                  4     # order of the Markov chain for content model, keep equal to /ExonModel/k
+/IntronModel/slope_of_bandwidth 0.4
+/IntronModel/minwindowcount     3
+/IntronModel/asspseudocount     0.01
+/IntronModel/dsspseudocount     0.01015
+/IntronModel/dssneighborfactor  0.001
+#/IntronModel/splicefile         fly_splicefile.txt # this optional file contains additional windows around splice sites for training, uncomment if you have one
+/IntronModel/sf_with_motif false           # if true the splice file is also used to train the branch point region
+/IntronModel/d                  929  # constraint: this must be larger than 4 + /Constant/dss_end + /Constant/ass_upwindow_size + /Constant/ass_start
+/IntronModel/ass_motif_memory   1
+/IntronModel/ass_motif_radius   4
+
+# Properties for UtrModel
+# ----------------------------
+/UtrModel/verbosity             3
+/UtrModel/infile                fly_utr_probs.pbl
+/UtrModel/outfile               fly_utr_probs.pbl
+/UtrModel/k                     4
+/UtrModel/utr5patternweight     0.3    #0.7625
+/UtrModel/utr3patternweight     0.3   #0.5
+/UtrModel/patpseudocount        1
+/UtrModel/tssup_k               1
+/UtrModel/tssup_patpseudocount  1
+/UtrModel/slope_of_bandwidth    0.25
+/UtrModel/minwindowcount        1
+/UtrModel/exonlengthD           800
+/UtrModel/maxexonlength         1200
+/UtrModel/max3singlelength      2000    # excludes roughly 1%
+/UtrModel/max3termlength        1200    # excludes ~ 0.3%
+/UtrModel/tss_start             8
+/UtrModel/tss_end               5
+/UtrModel/tata_start            2
+/UtrModel/tata_end              10
+/UtrModel/tata_pseudocount      2
+/UtrModel/d_tss_tata_min        26      # minimal distance between start of tata box (if existent) and tss 
+/UtrModel/d_tss_tata_max        37      # maximal distance between start of tata box (if existent) and tss
+/UtrModel/polyasig_consensus    aataaa  # polyadenylation signal training not fully automated yet
+/UtrModel/d_polyasig_cleavage   14      # the transcription end is predicted this many bases after the polyadenylation signal
+/UtrModel/d_polya_cleavage_min  9
+/UtrModel/d_polya_cleavage_max  35
+/UtrModel/prob_polya            0.95
+/UtrModel/tts_motif_memory      1
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl Thu Aug 26 06:55:33 2021 +0000
[
b"@@ -0,0 +1,7137 @@\n+# UTR model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[UTRLENGTH]\n+# maximal individually stored length probability d=\n+800\n+# slope of smoothing bandwidth =\n+0.25\n+# smoothing minwindowcount =\n+1\n+# length 5' sing  5' init  5' int  5' term  3' sing  3' init  3' int  3' term \n+# total number of exons of above types\n+      6188      1995       342      1995      7966       178        69       178\n+# number of exons exceeding length d=800\n+        81        32         6        27       823         7         7        48\n+# 1000 P(len=k), k=0,1,..., 800\n+0\t0.681\t5.07e-15\t4.35e-06\t4.62\t0.00284\t5.75\t3.2e-18\t0\n+1\t1.82\t9.16e-12\t0.000391\t10.5\t0.0358\t10.4\t1.04e-17\t0\n+2\t2.76\t6.1e-09\t0.013\t13\t0.178\t8.35\t3.54e-17\t0\n+3\t3.13\t1.49e-06\t0.158\t12.5\t0.405\t6.03\t1.32e-16\t0\n+4\t3.51\t0.000135\t0.708\t12.6\t0.599\t9.23\t5.19e-16\t2.83e-20\n+5\t3.55\t0.00451\t1.17\t13.8\t0.682\t15.3\t2.13e-15\t2.89e-18\n+6\t2.95\t0.0564\t0.708\t15\t0.512\t20.4\t8.93e-15\t2.29e-16\n+7\t2.5\t0.272\t0.158\t15.4\t0.296\t18.5\t3.83e-14\t1.42e-14\n+8\t2.56\t0.548\t0.013\t15.3\t0.27\t12.9\t1.67e-13\t6.84e-13\n+9\t3.12\t0.566\t0.000391\t14.5\t0.332\t11.6\t7.41e-13\t2.57e-11\n+10\t3.83\t0.407\t4.35e-06\t13.6\t0.267\t11.3\t3.35e-12\t7.5e-10\n+11\t4.27\t0.328\t1.78e-08\t12.2\t0.252\t7.9\t1.54e-11\t1.71e-08\n+12\t4.37\t0.472\t5.42e-11\t9.93\t0.347\t6.71\t7.12e-11\t3.03e-07\n+13\t4.42\t0.528\t8.34e-10\t8.9\t0.549\t8.5\t3.31e-10\t4.18e-06\n+14\t4.28\t0.326\t3.63e-08\t9.1\t0.796\t6.71\t1.53e-09\t4.49e-05\n+15\t4.04\t0.333\t4.68e-06\t9.88\t0.875\t4.29\t7.03e-09\t0.000376\n+16\t4.33\t0.612\t0.000396\t11.4\t0.938\t6.01\t3.21e-08\t0.00245\n+17\t4.51\t0.867\t0.013\t12.8\t1.03\t9.28\t1.46e-07\t0.0124\n+18\t4.07\t1.1\t0.159\t12.8\t1.03\t11\t6.62e-07\t0.0492\n+19\t3.76\t1.22\t0.723\t11.3\t0.904\t11.6\t2.99e-06\t0.152\n+20\t3.96\t1.21\t1.34\t10.7\t0.878\t12.3\t1.32e-05\t0.364\n+21\t4.31\t1.12\t1.51\t11\t1.11\t13.6\t5.6e-05\t0.68\n+22\t4.45\t1.4\t1.86\t10.3\t1.38\t12.8\t0.000224\t0.989\n+23\t4.75\t2.06\t2.7\t8.37\t1.36\t8.55\t0.000833\t1.12\n+24\t4.76\t2.19\t3.68\t6.7\t1.25\t5.92\t0.00284\t0.994\n+25\t4.68\t2.04\t3.45\t6\t1.2\t6.15\t0.00881\t0.7\n+26\t4.47\t2.06\t3.12\t6.21\t1.14\t7.01\t0.0248\t0.434\n+27\t4.28\t2.35\t3.29\t6.62\t1.1\t7.47\t0.0629\t0.352\n+28\t4.83\t3.68\t3.35\t6.53\t0.986\t7.33\t0.144\t0.514\n+29\t5.98\t4.63\t3.25\t5.68\t0.939\t6.87\t0.296\t0.878\n+30\t6.15\t4.11\t3.2\t4.95\t1.15\t6.45\t0.549\t1.3\n+31\t5.15\t3.31\t3.23\t5.2\t1.46\t6.14\t0.918\t1.57\n+32\t4.9\t2.94\t3.22\t5.98\t1.72\t5.79\t1.39\t1.59\n+33\t5.97\t2.5\t3.03\t6.3\t1.94\t5.34\t1.9\t1.39\n+34\t7.15\t2.36\t2.73\t6.51\t2.02\t4.96\t2.37\t1.11\n+35\t7.24\t2.86\t2.53\t7.51\t2.19\t4.8\t2.73\t0.859\n+36\t6.12\t2.78\t2.52\t7.26\t2.39\t4.86\t2.94\t0.648\n+37\t4.95\t2.05\t2.64\t5.47\t2.29\t5.02\t3.02\t0.466\n+38\t4.34\t1.81\t2.72\t4.59\t2.15\t5.19\t3.05\t0.311\n+39\t4.11\t2.08\t2.69\t4.74\t2.11\t5.32\t3.09\t0.189\n+40\t4.19\t2.33\t2.7\t4.96\t2\t5.39\t3.19\t0.107\n+41\t4.29\t2.47\t2.95\t4.98\t1.85\t5.44\t3.36\t0.0591\n+42\t4.22\t2.58\t3.52\t4.91\t1.87\t5.48\t3.59\t0.0393\n+43\t4.26\t2.73\t4.35\t4.84\t2.07\t5.58\t3.84\t0.0391\n+44\t4.69\t2.92\t5.33\t4.86\t2.23\t5.78\t4.08\t0.0537\n+45\t4.43\t3.1\t6.32\t4.93\t2.3\t6.09\t4.29\t0.0815\n+46\t3.6\t3.22\t7.22\t4.96\t2.25\t6.51\t4.48\t0.123\n+47\t3.46\t3.29\t7.94\t4.92\t2\t6.99\t4.64\t0.18\n+48\t3.87\t3.39\t8.42\t4.86\t2\t7.48\t4.78\t0.254\n+49\t4.21\t3.6\t8.62\t4.87\t2.27\t7.91\t4.91\t0.344\n+50\t4.36\t4.01\t8.54\t4.94\t2.5\t8.23\t5.04\t0.448\n+51\t4.41\t4.55\t8.25\t4.98\t2.62\t8.41\t5.18\t0.562\n+52\t4.46\t5.04\t7.86\t4.89\t2.68\t8.41\t5.31\t0.679\n+53\t4.62\t5.25\t7.47\t4.66\t2.73\t8.23\t5.42\t0.79\n+54\t4.92\t5.08\t7.17\t4.36\t2.8\t7.87\t5.5\t0.887\n+55\t5.31\t4.64\t6.98\t4.12\t2.9\t7.36\t5.54\t0.963\n+56\t5.61\t4.19\t6.89\t4\t3.07\t6.75\t5.53\t1.01\n+57\t5.7\t3.93\t6.85\t4.03\t3.3\t6.11\t5.46\t1.03\n+58\t5.54\t3.92\t6.83\t4.14\t3.53\t5.48\t5.33\t1.03\n+59\t5.29\t4.03\t6.8\t4.25\t3.69\t4.93\t5.17\t1.01\n+60\t5.13\t4.16\t6.75\t4.3\t3.76\t4.48\t4.97\t0.984\n+61\t5.16\t4.24\t6.66\t4.28\t3.79\t4.14\t4.74\t0.955\n+62\t5.35\t4.23\t6.53\t4.19\t3.8\t3.89\t4.52\t0.934\n+63\t5.63\t4.14\t6.37\t4.05\t3.79\t3.7\t4.29\t0.927\n+64\t5.92\t4\t6.18\t3.88\t3.77\t3.55\t4.1\t0.937\n+65\t6.16\t3.84\t5.96\t3.72\t3.79\t3.4\t3.93\t0.965\n+66\t6.28\t3.71\t5.74\t3.62\t3.87\t3.24\t3.8\t1.01\n+67\t6.2\t3.64\t5.51\t3.59\t3.99\t3.07\t3.72\t1.06\n+68\t5.92\t3.63\t5.3\t3.64\t4.07\t2.88\t3.68\t1.13\n+69\t5.57\t3.68\t5.09\t3.72\t4.04\t2.7\t3.68\t1.19\n+70\t5.32\t3.74\t4.91\t3.79\t3.89\t2.52\t3.73\t1.25\n+71\t5.27\t3.79\t4.73\t3.8\t3.7\t2.35\t3.8\t1.31\n+72\t5."..b'tcgc\t14965\n+#\tttcgg\t11310\n+#\tttcgt\t20239\n+#\tttcta\t28351\n+#\tttctc\t16356\n+#\tttctg\t18660\n+#\tttctt\t32422\n+#\tttgaa\t47499\n+#\tttgac\t14608\n+#\tttgag\t17968\n+#\tttgat\t36555\n+#\tttgca\t31476\n+#\tttgcc\t17850\n+#\tttgcg\t12080\n+#\tttgct\t22771\n+#\tttgga\t20302\n+#\tttggc\t17146\n+#\tttggg\t11267\n+#\tttggt\t16683\n+#\tttgta\t64460\n+#\tttgtc\t18109\n+#\tttgtg\t29654\n+#\tttgtt\t61476\n+#\ttttaa\t101164\n+#\ttttac\t38483\n+#\ttttag\t41333\n+#\ttttat\t89923\n+#\ttttca\t39758\n+#\ttttcc\t27803\n+#\ttttcg\t26520\n+#\ttttct\t36657\n+#\ttttga\t41172\n+#\ttttgc\t30519\n+#\ttttgg\t24062\n+#\ttttgt\t71057\n+#\ttttta\t94640\n+#\tttttc\t46611\n+#\tttttg\t62333\n+#\tttttt\t128813\n+\n+#\n+# The emission probabilities of the tss upwindow\n+#\n+[EMISSION-TSSUPWIN]\n+# size of the emission vector\n+16\n+#tssup_k=\n+1\n+# patpseudo : pseudocount for sequence patterns\n+1\n+aa\t0.326\n+ac\t0.195\n+ag\t0.204\n+at\t0.275\n+ca\t0.306\n+cc\t0.2\n+cg\t0.248\n+ct\t0.245\n+ga\t0.245\n+gc\t0.298\n+gg\t0.193\n+gt\t0.264\n+ta\t0.212\n+tc\t0.219\n+tg\t0.221\n+tt\t0.347\n+\n+# motif around the TSS of TATA-less promoters\n+[TSSMOTIF]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.268\t0.227\t0.222\t0.283\n+ 1  0.257\t0.23\t0.22\t0.293\n+ 2  0.279\t0.234\t0.216\t0.27\n+ 3  0.271\t0.238\t0.214\t0.278\n+ 4  0.285\t0.237\t0.216\t0.261\n+ 5  0.263\t0.214\t0.229\t0.295\n+ 6  0.249\t0.223\t0.195\t0.332\n+ 7  0.162\t0.309\t0.241\t0.288\n+ 8  0.406\t0.252\t0.193\t0.15\n+ 9  0.275\t0.199\t0.233\t0.293\n+10  0.275\t0.194\t0.165\t0.365\n+11  0.294\t0.215\t0.182\t0.308\n+12  0.283\t0.207\t0.228\t0.282\n+\n+# motif around the TSS of TATA promoters\n+[TSSMOTIFTATA]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.293\t0.245\t0.261\t0.201\n+ 1  0.32\t0.188\t0.298\t0.193\n+ 2  0.3\t0.216\t0.248\t0.237\n+ 3  0.301\t0.216\t0.213\t0.271\n+ 4  0.314\t0.254\t0.221\t0.211\n+ 5  0.293\t0.174\t0.264\t0.269\n+ 6  0.235\t0.172\t0.213\t0.38\n+ 7  0.15\t0.422\t0.116\t0.312\n+ 8  0.501\t0.174\t0.208\t0.118\n+ 9  0.256\t0.211\t0.229\t0.304\n+10  0.264\t0.225\t0.113\t0.398\n+11  0.329\t0.222\t0.121\t0.329\n+12  0.312\t0.195\t0.213\t0.28\n+\n+# tata box motif \n+[TATAMOTIF]\n+# width of motif, n=\n+12\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.21\t0.237\t0.381\t0.173\n+ 1  0.19\t0.341\t0.336\t0.133\n+ 2  0.0032\t0.0032\t0.0032\t0.99\n+ 3  0.99\t0.0032\t0.0032\t0.0032\n+ 4  0.0032\t0.0032\t0.0032\t0.99\n+ 5  0.99\t0.0032\t0.0032\t0.0032\n+ 6  0.637\t0.0432\t0.0272\t0.293\n+ 7  0.99\t0.0032\t0.0032\t0.0032\n+ 8  0.602\t0.0592\t0.0576\t0.282\n+ 9  0.37\t0.0976\t0.386\t0.147\n+10  0.202\t0.315\t0.312\t0.171\n+11  0.254\t0.283\t0.278\t0.184\n+\n+# motif after polyA signal\n+[TTSMOTIF]\n+# width of motif, n=\n+14\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0  0.368\t0.167\t0.147\t0.317\t0.414\t0.134\t0.181\t0.27\t0.365\t0.182\t0.115\t0.337\t0.342\t0.126\t0.224\t0.308\n+ 1  0.368\t0.17\t0.139\t0.323\t0.41\t0.145\t0.177\t0.268\t0.338\t0.199\t0.106\t0.358\t0.323\t0.136\t0.21\t0.33\n+ 2  0.397\t0.173\t0.114\t0.316\t0.425\t0.149\t0.173\t0.252\t0.35\t0.19\t0.106\t0.353\t0.335\t0.13\t0.198\t0.337\n+ 3  0.42\t0.159\t0.102\t0.319\t0.437\t0.152\t0.165\t0.246\t0.359\t0.191\t0.102\t0.348\t0.329\t0.124\t0.176\t0.371\n+ 4  0.452\t0.148\t0.104\t0.296\t0.446\t0.147\t0.159\t0.248\t0.386\t0.187\t0.1\t0.328\t0.337\t0.114\t0.167\t0.382\n+ 5  0.455\t0.141\t0.107\t0.297\t0.435\t0.143\t0.159\t0.263\t0.375\t0.194\t0.0979\t0.333\t0.324\t0.115\t0.162\t0.399\n+ 6  0.453\t0.144\t0.11\t0.292\t0.421\t0.15\t0.156\t0.274\t0.362\t0.2\t0.104\t0.334\t0.323\t0.118\t0.158\t0.4\n+ 7  0.453\t0.145\t0.11\t0.292\t0.422\t0.158\t0.145\t0.275\t0.365\t0.194\t0.109\t0.333\t0.325\t0.121\t0.154\t0.401\n+ 8  0.454\t0.145\t0.109\t0.292\t0.427\t0.165\t0.132\t0.276\t0.372\t0.197\t0.112\t0.319\t0.329\t0.119\t0.158\t0.394\n+ 9  0.454\t0.153\t0.106\t0.288\t0.435\t0.156\t0.131\t0.278\t0.371\t0.197\t0.107\t0.325\t0.331\t0.118\t0.166\t0.385\n+10  0.451\t0.155\t0.104\t0.29\t0.424\t0.154\t0.145\t0.276\t0.361\t0.194\t0.105\t0.34\t0.336\t0.116\t0.169\t0.379\n+11  0.457\t0.157\t0.102\t0.284\t0.427\t0.156\t0.147\t0.271\t0.359\t0.199\t0.105\t0.337\t0.343\t0.117\t0.165\t0.376\n+12  0.461\t0.149\t0.103\t0.287\t0.432\t0.168\t0.145\t0.255\t0.364\t0.21\t0.106\t0.32\t0.348\t0.12\t0.162\t0.37\n+13  0.467\t0.144\t0.101\t0.287\t0.447\t0.171\t0.135\t0.247\t0.364\t0.225\t0.111\t0.3\t0.347\t0.122\t0.162\t0.369\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,23 @@
+# 
+# This file contains a matrix used for weighing the training sequences
+# when given an input sequence. Let z = (da, dc, dg, dt) be the vector
+# containing the differences in the relative nucleotide frequencies of
+# two sequences, the input sequence and a training sequence.
+# Then the training sequence has weight proportional to 
+#
+# exp ( - z M z^t)
+#
+# with M being the matrix specified below.
+# If M is nonsingular, then (apart from a two normalizing factors) M
+# is the inverse of the covariance matrix of a multinormal
+# distribution - the kernel for the estimation.
+
+
+# this matrix is gc-content only, i.e. 
+# weight = 10 * exp (-200 * (dc + dg))^2)
+# in particular weight <= 10
+0      0      0             0
+0      200    0             0
+0      0      200           0
+0      0      0             0
+
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/info.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/info.json Thu Aug 26 06:55:33 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{}], "glimmerhmm": [{}]}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/uniprot_sprot.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/uniprot_sprot.fasta Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,83 @@
+>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1
+MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS
+EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD
+AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL
+EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD
+SFRKIYTDLGWKFTPL
+>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1
+MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR
+IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL
+AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC
+KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML
+DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK
+VMFFVAGAVLVAILISTVRW
+>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1
+MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL
+QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT
+FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD
+LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET
+YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY
+STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS
+GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI
+QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC
+>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1
+MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT
+PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS
+TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI
+>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1
+MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD
+RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI
+FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ
+PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD
+AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR
+TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA
+LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR
+KAKIQEMFDNMVSRMVTS
+>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1
+MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY
+>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1
+MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL
+CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC
+KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH
+QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY
+>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1
+MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS
+NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED
+QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT
+REFVDKDAQEFQDFLNSLDASLLS
+>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1
+MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL
+IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII
+>fcresfdr
+MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL
+>BUSCOaEOG7B0HST
+MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA
+VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE
+DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED
+TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID
+LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI
+VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE
+AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN
+AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY
+LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI
+IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT
+SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS
+LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT
+AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG
+AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK
+VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK
+>FBpp0306926
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG
+ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY
+VSKRYKDLPPPHPGFGADQPPA
+>FBpp0078508
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD
+LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA
+DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC
+AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI
+NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR
+RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG
+ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK
+SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS
+RSGSGSRSPSRSRSGSPSGSGSSSGSASDE
b
diff -r 000000000000 -r 40b87aef5241 test-data/genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,3253 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATG\n+TGATGCATTAATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTT\n+TTTTCGGCAAACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAA\n+AAGATACCTATGACATGTGACACCTTTAAAGTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCC\n+CTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATAGATAGTTAGTG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTTTCTTTAAATTTAACCAAATT\n+TATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAAC\n+ATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATAT\n+CTATGCTCAGCGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAAC\n+CTATGAAAATCGCAAAGATCTATTCCTTTGCGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTT\n+CGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATCATATTTTATC\n+AACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCTTTGTGTTTATTTGCATTGGG\n+AATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAA\n+TTCGTTAGCTTTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTT\n+CGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTG\n+ATTCTCCTGCTACCATGGGATCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCAAAAGGAAACTAGACATGTTT\n+CGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAACAGCGATAATGATCTGTGAC\n+TTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAGCATTTTTGGG\n+AGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAATCTAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGT\n+TTTTAAACGTTGATTTTTCAGCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTAT\n+CCATAATATTAGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTG\n+AGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTATCGTACCGGTCAAGTACGGTCACACTGCCA\n+AGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAA\n+AAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCAT\n+TTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGA\n+GCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGA\n+CGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACAT\n+CTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTC\n+AAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCT\n+GCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGA\n+GCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCAT\n+CAATGGCGAGCTGTACAAGGAGGAGGAGGAGTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTG\n+AAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAA\n+GCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCA\n+AGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAG\n+ACGAGCGAGGACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTC\n+TCGGGCCAGCAGCCGCTCAAAGTCTGGTTCTCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCG\n+GCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGAT'..b'ATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTT\n+TAAATAAAATCGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCT\n+AGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTA\n+AGACGTTTTTCTTAGGGGGTGCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAAT\n+GATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTCGTTCCAACTACGTGGCGTCCATCAAAAAGC\n+GCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAG\n+TTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATATGAGCTGTCCC\n+TCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCC\n+TCCTCGGTGGGACATCGATGGGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACT\n+TGTAGGGCATCACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCC\n+TGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGTGACCCGGAAAAGAAGGTATACCTCTCCTTC\n+ATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCCGCTGTGAATC\n+CTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCA\n+TGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTG\n+CTGCACACATCGCACTCCCACAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAG\n+GGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTG\n+GTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCAGGGCACGCCCATCATGTGCA\n+CTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGCACTTGTAGGG\n+CTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCA\n+TTCACTATATCACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATAC\n+ATAGTCATATGAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTT\n+TTTAAAATATAAGGGTATATAGATTTCTTTCTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTC\n+AGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAAAAAAA\n+AATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAGATAATAGCACTTAATATATG\n+TACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTCCGCATTTTCG\n+TGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCG\n+TTTGGGAGGAGCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCT\n+TGCAGTTGGCAAAGGGACAGGCCAGTGGGCCGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAAT\n+ACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCCAGTGCACTCA\n+ATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTTCGGTCTGTGATCATCTATTC\n+AGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCC\n+ATCTCTCGAACAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCA\n+CGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTT\n+CGTTCTAGTCTTTGTAACGCACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTTCCGCCAATATCCAATTGGAA\n+TATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTA\n+CTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTCATTTTCGTAA\n+ATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACAAAACCCACACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTAT\n+TCAGCGATTTAAACAAGCAATCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTAT\n+ACCCCCACTGAATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/genome_masked.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_masked.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,139 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 18 present
+DISC_FEATURE_COUNT:CDS: 18 present
+DISC_FEATURE_COUNT:mRNA: 18 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:32 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:22 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::32 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::32 features have joined location but no exception
+genome:CDS hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017
+genome:mRNA hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002
+genome:CDS hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018
+genome:mRNA hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003
+genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004
+genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006
+genome:CDS hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007
+genome:mRNA hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008
+genome:mRNA hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009
+genome:CDS hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011
+genome:mRNA hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011
+genome:CDS hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012
+genome:mRNA hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012
+genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013
+genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013
+genome:CDS hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014
+genome:mRNA hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 124)
+genome:ncbi:FUN_000002-T1 (length 520)
+genome:ncbi:FUN_000003-T1 (length 220)
+genome:ncbi:FUN_000004-T1 (length 591)
+genome:ncbi:FUN_000005-T1 (length 135)
+genome:ncbi:FUN_000006-T1 (length 662)
+genome:ncbi:FUN_000007-T1 (length 254)
+genome:ncbi:FUN_000008-T1 (length 249)
+genome:ncbi:FUN_000009-T1 (length 138)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 94)
+genome:ncbi:FUN_000012-T1 (length 986)
+genome:ncbi:FUN_000013-T1 (length 4717)
+genome:ncbi:FUN_000014-T1 (length 231)
+genome:ncbi:FUN_000015-T1 (length 478)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000016-T1 (length 124)
+genome:ncbi:FUN_000017-T1 (length 520)
+genome:ncbi:FUN_000018-T1 (length 358)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.error.summary.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.error.summary.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,1 @@
+     2 WARNING: SEQ_FEAT.ShortExon
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4258 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            2126..3863\n+                     /locus_tag="FUN_000002"\n+     mRNA            join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+                     LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+                     DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+                     KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+                     AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+                     NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+                     RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+                     ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+                     RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+                     QDPALEAPQMND"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,151 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000002-T1.exon4;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t35675\t35679\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t35648\t35655\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t34843\t35594\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35675\t35679\t.\t-\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35648\t35655\t.\t-\t1\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t34843\t35594\t.\t-\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t40223\t41234\t.\t+\t.\tID=FUN_000008;\n+sample\tfunannotate\t'..b'N_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t149952\t150112\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t150174\t150248\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t151966\t152072\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152314\t152429\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152496\t152751\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t153651\t159010\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t159150\t164491\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167135\t168360\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169208\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t169350\t169416\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t192049\t194669\t.\t+\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t192049\t194669\t.\t+\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t192049\t192067\t.\t+\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000014-T1.exon4;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t192049\t192067\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000017-T1.exon2;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000017-T1.exon3;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000017-T1.exon4;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t4248\t5494\t.\t-\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t4248\t5494\t.\t-\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t4930\t5494\t.\t-\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t4248\t4759\t.\t-\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4930\t5494\t.\t-\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4248\t4759\t.\t-\t2\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,158 @@\n+>FUN_000001-T1 FUN_000001\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000002-T1 FUN_000002\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000003-T1 FUN_000003\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000004-T1 FUN_000004\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000005-T1 FUN_000005\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000006-T1 FUN_000006\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000007-T1 FUN_000007\n+MKIRYCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSC\n+KLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSL\n+EFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVR\n+QDVVLHSILPAVCM\n+>FUN_000008-T1 FUN_000008\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGTTD\n+RITRLLAQS\n+>FUN_000009-T1 FUN_000009\n+MWIVNCMCLYLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSW\n+LDSCIVGWRSTVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASVPEPQIIM\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MKVHGNVDEKSPSHGYDSEGEESSSSSIITGGAQTPPSTRLDGSAGSSSGHHPPSDWYHTTAPSGSAEAMNPLNHFGHHH\n+HHHHLMHPGAATAY\n+>FUN_000012-T1 FUN_000012\n+MQRGIDSFFKRLPAKAKSAEAENGETPSKAPKRRKAVIISSDEDEVVSPPETKKRKASKTASSEDDVVAATPEPIAKKAR\n+NGQKPALSKLKRHVDPTELFGGETKRVIVPKPKTKAVLEFENEDIDRSLMEVDLDESIKEAAPEKKVHSITRSSPSPKRA\n+KNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGL\n+TFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVK\n+EEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSS\n+QKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIVGQAGAASNVTKLMNWLSKWYVNHDGNKKPQRPNPWAKNDDGSF'..b'KPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESSSGGSRKPPRIEK\n+PARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIE\n+SSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPAMTSSLGGIGVNP\n+TDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTNLAYISDADRRTS\n+AEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWPLPEIPFDHVPVK\n+PADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVILDEEMAVGPPDV\n+AKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKNSSPEVIVAQPTR\n+SPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRKNYEARLSSGGGG\n+ASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANLSGSDSLSAVSTH\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSATRSDDTTLTLTEM\n+AHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAA\n+ARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQTQQPQQVRQKPR\n+APQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+PKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSATNTTTTTNTLNSE\n+STEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSSLDVRGQEAKMR\n+SRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSG\n+KFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKRQFKREDSTAAGT\n+SGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGAGQDQEQGTGGQA\n+RHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQ\n+AEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQV\n+LSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMY\n+EGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSA\n+NFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEH\n+NTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGG\n+GPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYF\n+YKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNRITCRVDLDLCSARYVQCRSTE\n+>FUN_000014-T1 FUN_000014\n+MVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQGYKITLKNMEAFGASN\n+FKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIHAFKGANYLHIDALSL\n+VLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPVEQFYVD\n+>FUN_000015-T1 FUN_000015\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000016-T1 FUN_000016\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000017-T1 FUN_000017\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000018-T1 FUN_000018\n+MKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALS\n+MLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYHKHLLYHSEVKPHVCGVC\n+GRAFKELSTLHNHQRIHSGEKPFKCEVCGKCFRQRVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRCPTEEAQTPE\n+QLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFS\n+GNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.scaffolds.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.scaffolds.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,119 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpkgev4773/files/c/e/8/dataset_ce85d0fa-1534-47df-8c1e-5f0a5c1b82f0.dat --out output --database /home/abretaud/.planemo/planemo_tmp__fmxm4ll/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --augustus_species fly --min_training_models 200 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-21",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 18,
+        "common_name": 0,
+        "mRNA": 18,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 2695.06,
+        "transcript-level": {
+            "CDS_transcripts": 18,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 18,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 18,
+            "CDS_no-start": 0,
+            "CDS_no-stop": 0,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 57,
+            "total_cds_exons": 57,
+            "multiple_exon_transcript": 16,
+            "single_exon_transcript": 2,
+            "avg_exon_length": 558.58,
+            "avg_protein_length": 582.83,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_protein_evidence": 17.54
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,282 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000001
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+2126 3863 gene
+ locus_tag FUN_000002
+2126 2199 mRNA
+2258 3224
+3284 3490
+3549 3863
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+2126 2199 CDS
+2258 3224
+3284 3490
+3549 3863
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 4883 gene
+ locus_tag FUN_000003
+5802 5797 mRNA
+5539 4883
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+5802 5797 CDS
+5539 4883
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10557 8696 gene
+ locus_tag FUN_000004
+10557 10549 mRNA
+10462 8696
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+10557 10549 CDS
+10462 8696
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 14247 gene
+ locus_tag FUN_000005
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+21705 19533 gene
+ locus_tag FUN_000006
+21705 21700 mRNA
+21515 19533
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+21705 21700 CDS
+21515 19533
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+35679 34843 gene
+ locus_tag FUN_000007
+35679 35675 mRNA
+35655 35648
+35594 34843
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+35679 35675 CDS
+35655 35648
+35594 34843
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+40223 41234 gene
+ locus_tag FUN_000008
+40223 40396 mRNA
+40659 41234
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+40223 40396 CDS
+40659 41234
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+41267 42107 gene
+ locus_tag FUN_000009
+41267 41274 mRNA
+41437 41444
+41707 42107
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+41267 41274 CDS
+41437 41444
+41707 42107
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+94727 95583 gene
+ locus_tag FUN_000011
+94727 94732 mRNA
+94873 95016
+95449 95583
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+94727 94732 CDS
+94873 95016
+95449 95583
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+133134 136346 gene
+ locus_tag FUN_000012
+133134 133142 mRNA
+133209 134539
+134668 135510
+135569 136346
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+133134 133142 CDS
+133209 134539
+134668 135510
+135569 136346
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+144294 169416 gene
+ locus_tag FUN_000013
+144294 144551 mRNA
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153651 159010
+159150 164491
+167135 168360
+168722 169208
+169350 169416
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+144294 144551 CDS
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153651 159010
+159150 164491
+167135 168360
+168722 169208
+169350 169416
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+192049 194669 gene
+ locus_tag FUN_000014
+192049 192067 mRNA
+193549 193658
+194041 194455
+194518 194669
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+192049 192067 CDS
+193549 193658
+194041 194455
+194518 194669
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+210553 209044 gene
+ locus_tag FUN_000015
+210553 210548 mRNA
+210474 209044
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+210553 210548 CDS
+210474 209044
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000016
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+2126 3863 gene
+ locus_tag FUN_000017
+2126 2199 mRNA
+2258 3224
+3284 3490
+3549 3863
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+2126 2199 CDS
+2258 3224
+3284 3490
+3549 3863
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+5494 4248 gene
+ locus_tag FUN_000018
+5494 4930 mRNA
+4759 4248
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
+5494 4930 CDS
+4759 4248
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.validation.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.validation.txt Thu Aug 26 06:55:33 2021 +0000
[
@@ -0,0 +1,2 @@
+WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:c35679-35675, c35655-35648, c35594-34843)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000007-T1]
+WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:41267-41274, 41437-41444, 41707-42107)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000009-T1]
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/fly.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,142 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 18 present
+DISC_FEATURE_COUNT:CDS: 18 present
+DISC_FEATURE_COUNT:mRNA: 18 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:30 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:22 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::30 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::30 features have joined location but no exception
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002
+genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003
+genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007
+genome:CDS hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008
+genome:mRNA hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008
+genome:CDS hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009
+genome:mRNA hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011
+genome:mRNA hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011
+genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012
+genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012
+genome:CDS hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013
+genome:mRNA hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013
+genome:CDS hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014
+genome:mRNA hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014
+genome:CDS hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015
+genome:mRNA hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+genome:CDS hypothetical protein sample:<2331-3254 FUN_000001
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 307)
+genome:ncbi:FUN_000002-T1 (length 220)
+genome:ncbi:FUN_000003-T1 (length 591)
+genome:ncbi:FUN_000004-T1 (length 135)
+genome:ncbi:FUN_000005-T1 (length 662)
+genome:ncbi:FUN_000006-T1 (length 278)
+genome:ncbi:FUN_000007-T1 (length 578)
+genome:ncbi:FUN_000008-T1 (length 396)
+genome:ncbi:FUN_000009-T1 (length 130)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 923)
+genome:ncbi:FUN_000012-T1 (length 3977)
+genome:ncbi:FUN_000013-T1 (length 576)
+genome:ncbi:FUN_000014-T1 (length 151)
+genome:ncbi:FUN_000015-T1 (length 246)
+genome:ncbi:FUN_000016-T1 (length 478)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000017-T1 (length 124)
+genome:ncbi:FUN_000018-T1 (length 432)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4259 @@\n+LOCUS       sample                215740 bp    DNA     linear       22-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            <2331..3254\n+                     /locus_tag="FUN_000001"\n+     mRNA            <2331..3254\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             <2331..3254\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+                     DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+                     EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+                     DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+                     LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+                     ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUN_000002"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+                     QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+                     REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+                     NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+                     MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+                     ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+                     KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+                     LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+                     DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+                     FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+                     RQFDKSNDNYRKTFRSVDENSKGEL"\n+     gene            complement(14247..15214)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(14247..14648,15209..15214))\n+                     /locus_tag="FUN_00'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,145 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t2331\t3254\t.\t+\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t2331\t3254\t.\t+\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t34843\t35679\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t40223\t44130\t.\t+\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t40223\t44130\t.\t+\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t40223\t40396\t.\t+\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t40659\t41193\t.\t+\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t41707\t42080\t.\t+\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43409\t43609\t.\t+\t.\tID=FUN_000007-T1.exon4;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43678\t44130\t.\t+\t.\tID=FUN_000007-T1.exon5;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40223\t40396\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40659\t41193\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t41707\t42080\t.\t+\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43409\t43609\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43678\t44130\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t45527\t47195\t.\t-\t.\tID=FUN_000008;\n+sample\tfunannotate\tmRNA\t45527\t47195\t.\t-\t.\tID=FUN_000008-T1;Parent=FUN_000008;product=hypothetical protein;\n+sample\tfunannotate\texon\t46753\t47195\t.\t-\t.\tID=FUN_000008-T1.exon1'..b'_000012-T1;\n+sample\tfunannotate\tCDS\t153296\t153630\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t153689\t155122\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t155789\t158975\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t159190\t164495\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168360\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167121\t168360\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t176699\t178916\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t176699\t178916\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t178873\t178916\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176887\t177172\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176699\t176824\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t178873\t178916\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176887\t177172\t.\t-\t1\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176699\t176824\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t192004\t194669\t.\t+\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t192004\t194669\t.\t+\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t192004\t192067\t.\t+\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000015-T1.exon3;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000015-T1.exon4;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t192004\t192067\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000016;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000018-T1.exon3;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,155 @@\n+>FUN_000001-T1 FUN_000001\n+YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSW\n+LRKSEYISTEQTRFQPQNLENIEAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEY\n+KIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL\n+>FUN_000002-T1 FUN_000002\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000003-T1 FUN_000003\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000004-T1 FUN_000004\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000005-T1 FUN_000005\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000006-T1 FUN_000006\n+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI\n+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL\n+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL\n+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM\n+>FUN_000007-T1 FUN_000007\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL\n+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR\n+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF\n+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI\n+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG\n+YKLQDLWNVMPTKMETME\n+>FUN_000008-T1 FUN_000008\n+MKTLSVRLHRGTEFIKDTVHKALVMSAPTPVAPATAPAPKIVDHSLKRKLSGAGGLMGCSSIGSMTSSIAGSSRSHHYAL\n+TSQVASSQVIPLPSQVPTAAFLRTYTVAPTALHRSAAARKRNPSTDSLLMDLCLFKPIRPMPITPIKIHKFRGFEVKKPK\n+FVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTISDSAFVPMPYIETTNTAINATTTTNSGSRSRSLNTHTSGSAQAITK\n+PKRRRRAPMLTAKRRRKALDTELTTSADAGTEDKAPAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKST\n+SRSEAAKRSRVASVQNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISAST\n+>FUN_000009-T1 FUN_000009\n+MVTLRLPWCIRHKPPLCRIGLSHGCECDNSKKMAASSHAPESDRRAQRLRTQSNWNPPDHSALSLGKLVSRKLTPTAVGH\n+WVVGRQRAACACAGGPNADWTDGQPIESSRGCIFQPAPHCHGGRIARHFG\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASA\n+VLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKL\n+AVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPK'..b'YVSVDESHSAASKSPVPGTGGGTEGYPHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETGSW\n+MTVECDEFIGSDTSDNEPRTLEPDRNVLETQATLEDANPLEYSNCATPTSDLNILLTPPNASPQIEKSVLETFEKYTGSS\n+DTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVSCSIARPLGISQDFGKEEARKCQELK\n+QRMLQLEVGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESS\n+SGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQK\n+EQQSTWRPFPIESSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPA\n+MTSSLGGIGVNPTDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTN\n+LAYISDADRRTSAEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWP\n+LPEIPFDHVPVKPADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVI\n+LDEEMAVGPPDVAKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKN\n+SSPEVIVAQPTRSPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRK\n+NYEARLSSGGGGASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANL\n+SGSDSLSAVSTHSCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSAT\n+RSDDTTLTLTEMAHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEH\n+ADSQTGPETSAAARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQ\n+TQQPQQVRQKPRAPQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVT\n+PSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSAT\n+NTTTTTNTLNSESTEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDS\n+SLDVRGQEAKMRSRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQ\n+TDDYEDYPQYSGKFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKR\n+QFKREDSTAAGTSGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGA\n+GQDQEQGTGGQARHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQ\n+>FUN_000013-T1 FUN_000013\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA\n+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR\n+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI\n+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQ\n+RMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRP\n+PNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVG\n+LGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFK\n+NHVYFFRAESAHTYNR\n+>FUN_000014-T1 FUN_000014\n+MSTPARRRLMRDFKRLQEDPPTGVSGAPTDNNIMIWNAVIFGPHDTPFEDGTFKLTIEFTEEYPNKPPTVRFVSKVFHPN\n+VYADGGICLDILQNRWSPTYDVSAILTSIQSLLSDPNPNSPANSTAAQLYKENRREYEKRVKACVEQSFID\n+>FUN_000015-T1 FUN_000015\n+MNKAVCLVIVIQALRMVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQG\n+YKITLKNMEAFGASNFKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIH\n+AFKGANYLHIDALSLVLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPV\n+EQFYVD\n+>FUN_000016-T1 FUN_000016\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000017-T1 FUN_000017\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000018-T1 FUN_000018\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQVARDRQSRSRSRTRS\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,120 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmp2z22js7e/files/c/c/3/dataset_cc3f66b5-ec9b-4669-93d6-4ddeac0e33c1.dat --out output --database /home/abretaud/.planemo/planemo_tmp_z_14xthq/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --rna_bam /tmp/tmp2z22js7e/files/9/6/5/dataset_965b5091-b838-4f4a-8ec8-9fb84c12cdc5.dat --transcript_evidence /tmp/tmp2z22js7e/files/d/a/e/dataset_daea4ce7-3191-40eb-ad83-b35e9e058d46.dat --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-22",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 18,
+        "common_name": 0,
+        "mRNA": 18,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 2775.33,
+        "transcript-level": {
+            "CDS_transcripts": 18,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 18,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 16,
+            "CDS_no-start": 1,
+            "CDS_no-stop": 1,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 54,
+            "total_cds_exons": 54,
+            "multiple_exon_transcript": 15,
+            "single_exon_transcript": 3,
+            "avg_exon_length": 563.63,
+            "avg_protein_length": 571.83,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_transcript_evidence": 31.48,
+            "pct_exon_overlap_protein_evidence": 9.26
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,276 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+<2331 3254 gene
+ locus_tag FUN_000001
+<2331 3254 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+<2331 3254 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+5802 4883 gene
+ locus_tag FUN_000002
+5802 5797 mRNA
+5539 4883
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 5797 CDS
+5539 4883
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+10557 8696 gene
+ locus_tag FUN_000003
+10557 10549 mRNA
+10462 8696
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10557 10549 CDS
+10462 8696
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+15214 14247 gene
+ locus_tag FUN_000004
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+21705 19533 gene
+ locus_tag FUN_000005
+21705 21700 mRNA
+21515 19533
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+21705 21700 CDS
+21515 19533
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+35679 34843 gene
+ locus_tag FUN_000006
+35679 34843 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+35679 34843 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+40223 44130 gene
+ locus_tag FUN_000007
+40223 40396 mRNA
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+40223 40396 CDS
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+47195 45527 gene
+ locus_tag FUN_000008
+47195 46753 mRNA
+46330 46214
+46157 45527
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+47195 46753 CDS
+46330 46214
+46157 45527
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+79527 78685 gene
+ locus_tag FUN_000009
+79527 79519 mRNA
+79068 78685
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+79527 79519 CDS
+79068 78685
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+133587 137862 gene
+ locus_tag FUN_000011
+133587 134504 mRNA
+134720 135510
+135569 136284
+137516 137862
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+133587 134504 CDS
+134720 135510
+135569 136284
+137516 137862
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+144294 164495 gene
+ locus_tag FUN_000012
+144294 144551 mRNA
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153296 153630
+153689 155122
+155789 158975
+159190 164495
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+144294 144551 CDS
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153296 153630
+153689 155122
+155789 158975
+159190 164495
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+167121 169212 gene
+ locus_tag FUN_000013
+167121 168360 mRNA
+168722 169212
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+167121 168360 CDS
+168722 169212
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+178916 176699 gene
+ locus_tag FUN_000014
+178916 178873 mRNA
+177172 176887
+176824 176699
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+178916 178873 CDS
+177172 176887
+176824 176699
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+192004 194669 gene
+ locus_tag FUN_000015
+192004 192067 mRNA
+193549 193658
+194041 194455
+194518 194669
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+192004 192067 CDS
+193549 193658
+194041 194455
+194518 194669
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+210553 209044 gene
+ locus_tag FUN_000016
+210553 210548 mRNA
+210474 209044
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+210553 210548 CDS
+210474 209044
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000017
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+2126 >3537 gene
+ locus_tag FUN_000018
+2126 2199 mRNA
+2258 3224
+3284 >3537
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
+2126 2199 CDS
+2258 3224
+3284 >3537
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,135 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 16 present
+DISC_FEATURE_COUNT:CDS: 16 present
+DISC_FEATURE_COUNT:mRNA: 16 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:26 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:1 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:20 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 16 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 16 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 16 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::26 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::26 features have joined location but no exception
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+genome:mRNA hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002
+genome:CDS hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003
+genome:CDS hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004
+genome:mRNA hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006
+genome:mRNA hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011
+genome:mRNA hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011
+genome:CDS hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012
+genome:mRNA hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012
+genome:CDS hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013
+genome:mRNA hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::1 featurepartial ends thands that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::20 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 124)
+genome:ncbi:FUN_000002-T1 (length 82)
+genome:ncbi:FUN_000003-T1 (length 367)
+genome:ncbi:FUN_000004-T1 (length 831)
+genome:ncbi:FUN_000005-T1 (length 135)
+genome:ncbi:FUN_000006-T1 (length 137)
+genome:ncbi:FUN_000007-T1 (length 1002)
+genome:ncbi:FUN_000008-T1 (length 278)
+genome:ncbi:FUN_000009-T1 (length 578)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 554)
+genome:ncbi:FUN_000012-T1 (length 479)
+genome:ncbi:FUN_000013-T1 (length 61)
+genome:ncbi:FUN_000014-T1 (length 484)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000015-T1 (length 124)
+genome:ncbi:FUN_000016-T1 (length 432)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4148 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            complement(2565..3142)\n+                     /locus_tag="FUN_000002"\n+     mRNA            complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+                     LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+     gene            complement(4248..5802)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+                     PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+                     SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+                     LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+     gene            complement(7691..10664)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUN_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUN_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000004-T1"\n+                     /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+                     DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+                     ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+                     IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+                     EKAKELENFASVMEKVNARLKS'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,117 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2565\t3142\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2565\t3142\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t3138\t3142\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2883\t3004\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2565\t2686\t.\t-\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3138\t3142\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2883\t3004\t.\t-\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2565\t2686\t.\t-\t2\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4248\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4248\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4937\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4248\t4742\t.\t-\t.\tID=FUN_000003-T1.exon3;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4937\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4248\t4742\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t7691\t10664\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t7691\t10664\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10657\t10664\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8707\t10499\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t7691\t8385\t.\t-\t.\tID=FUN_000004-T1.exon3;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10657\t10664\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8707\t10499\t.\t-\t1\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t7691\t8385\t.\t-\t2\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t15539\t16619\t.\t+\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t15539\t16619\t.\t+\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t15539\t15543\t.\t+\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t15646\t15919\t.\t+\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t16485\t16619\t.\t+\t.\tID=FUN_000006-T1.exon3;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15539\t15543\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15646\t15919\t.\t+\t1\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t16485\t16619\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t18358\t21705\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t18358\t21705\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t19638\t21515\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t18358\t19482\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_00'..b'N_000010-T1;\n+sample\tfunannotate\tCDS\t87202\t87207\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tCDS\t88054\t88320\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tgene\t102510\t106221\t.\t-\t.\tID=FUN_000011;\n+sample\tfunannotate\tmRNA\t102510\t106221\t.\t-\t.\tID=FUN_000011-T1;Parent=FUN_000011;product=hypothetical protein;\n+sample\tfunannotate\texon\t106216\t106221\t.\t-\t.\tID=FUN_000011-T1.exon1;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t104258\t104632\t.\t-\t.\tID=FUN_000011-T1.exon2;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103696\t103947\t.\t-\t.\tID=FUN_000011-T1.exon3;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103229\t103618\t.\t-\t.\tID=FUN_000011-T1.exon4;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t102510\t103151\t.\t-\t.\tID=FUN_000011-T1.exon5;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t106216\t106221\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t104258\t104632\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103696\t103947\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103229\t103618\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t102510\t103151\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000012;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000012-T1;Parent=FUN_000012;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168069\t.\t+\t.\tID=FUN_000012-T1.exon1;Parent=FUN_000012-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000012-T1.exon2;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t167121\t168069\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t180262\t180579\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t180262\t180579\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t180262\t180267\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t180400\t180579\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180262\t180267\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180400\t180579\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t208619\t210553\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t208619\t210553\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t209053\t210474\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t208619\t208645\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t209053\t210474\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t208619\t208645\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000015;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000016-T1.exon3;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,96 @@
+>FUN_000001-T1 FUN_000001
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000002-T1 FUN_000002
+MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA
+QP
+>FUN_000003-T1 FUN_000003
+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN
+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH
+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC
+PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL
+QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT
+>FUN_000004-T1 FUN_000004
+MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL
+EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA
+IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS
+VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN
+ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA
+DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV
+ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL
+NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ
+RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT
+KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS
+DMSSLKDDTSSTTSHSGLSIISLELPLPKKK
+>FUN_000005-T1 FUN_000005
+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT
+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS
+>FUN_000006-T1 FUN_000006
+MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV
+GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL
+>FUN_000007-T1 FUN_000007
+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS
+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN
+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP
+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH
+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR
+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD
+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY
+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH
+IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF
+GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH
+QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP
+QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA
+KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ
+>FUN_000008-T1 FUN_000008
+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI
+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL
+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL
+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM
+>FUN_000009-T1 FUN_000009
+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK
+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD
+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL
+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR
+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF
+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI
+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG
+YKLQDLWNVMPTKMETME
+>FUN_000010-T1 FUN_000010
+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK
+PCNPKRYLTT
+>FUN_000011-T1 FUN_000011
+MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL
+RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI
+IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI
+TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ
+FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD
+FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE
+KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ
+>FUN_000012-T1 FUN_000012
+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA
+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR
+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI
+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE
+EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL
+RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR
+>FUN_000013-T1 FUN_000013
+MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL
+>FUN_000014-T1 FUN_000014
+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR
+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA
+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR
+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES
+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH
+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR
+IREL
+>FUN_000015-T1 FUN_000015
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000016-T1 FUN_000016
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG
+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE
+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI
+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED
+AAVGAQAASGADSPAQVARDRQSRSRSRTRS
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.scaffolds.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.scaffolds.fa Thu Aug 26 06:55:33 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,119 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpm833xrq1/files/e/1/c/dataset_e1c34c74-e579-4cab-b0ed-5ce938ce4e4b.dat --out output --database /home/abretaud/.planemo/planemo_tmp_yntx6ieu/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-21",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 16,
+        "common_name": 0,
+        "mRNA": 16,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 1660.69,
+        "transcript-level": {
+            "CDS_transcripts": 16,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 16,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 15,
+            "CDS_no-start": 0,
+            "CDS_no-stop": 1,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 42,
+            "total_cds_exons": 42,
+            "multiple_exon_transcript": 13,
+            "single_exon_transcript": 3,
+            "avg_exon_length": 402.36,
+            "avg_protein_length": 359.81,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_protein_evidence": 11.9
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,234 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000001
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+3142 2565 gene
+ locus_tag FUN_000002
+3142 3138 mRNA
+3004 2883
+2686 2565
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+3142 3138 CDS
+3004 2883
+2686 2565
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 4248 gene
+ locus_tag FUN_000003
+5802 5797 mRNA
+5539 4937
+4742 4248
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+5802 5797 CDS
+5539 4937
+4742 4248
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10664 7691 gene
+ locus_tag FUN_000004
+10664 10657 mRNA
+10499 8707
+8385 7691
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+10664 10657 CDS
+10499 8707
+8385 7691
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 14247 gene
+ locus_tag FUN_000005
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15539 16619 gene
+ locus_tag FUN_000006
+15539 15543 mRNA
+15646 15919
+16485 16619
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+15539 15543 CDS
+15646 15919
+16485 16619
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+21705 18358 gene
+ locus_tag FUN_000007
+21705 21700 mRNA
+21515 19638
+19482 18358
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+21705 21700 CDS
+21515 19638
+19482 18358
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+35679 34843 gene
+ locus_tag FUN_000008
+35679 34843 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+35679 34843 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+40223 44130 gene
+ locus_tag FUN_000009
+40223 40396 mRNA
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+40223 40396 CDS
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+106221 102510 gene
+ locus_tag FUN_000011
+106221 106216 mRNA
+104632 104258
+103947 103696
+103618 103229
+103151 102510
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+106221 106216 CDS
+104632 104258
+103947 103696
+103618 103229
+103151 102510
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+167121 169212 gene
+ locus_tag FUN_000012
+167121 168069 mRNA
+168722 169212
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+167121 168069 CDS
+168722 169212
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+180262 180579 gene
+ locus_tag FUN_000013
+180262 180267 mRNA
+180400 180579
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+180262 180267 CDS
+180400 180579
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+210553 208619 gene
+ locus_tag FUN_000014
+210553 210548 mRNA
+210474 209053
+208645 208619
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+210553 210548 CDS
+210474 209053
+208645 208619
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000015
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+2126 >3537 gene
+ locus_tag FUN_000016
+2126 2199 mRNA
+2258 3224
+3284 >3537
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+2126 2199 CDS
+2258 3224
+3284 >3537
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
b
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 40b87aef5241 tool-data/funannotate.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/funannotate.loc.sample Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of funannotate databases used for the
+# funannotate annotation tool
+#
+# the columns are:
+# value  description format_version path
+#
+# for example
+# 2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 /tmp/database/funannotate/funannotate/2021-07-20-120000
b
diff -r 000000000000 -r 40b87aef5241 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="funannotate" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="tool-data/funannotate.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 40b87aef5241 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Aug 26 06:55:33 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="funannotate" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="${__HERE__}/test-data/funannotate.loc" />
+    </table>
+</tables>