Repository 'funannotate_compare'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/funannotate_compare

Changeset 0:857f7ac611e1 (2021-10-04)
Next changeset 1:e2e43248dd18 (2021-11-18)
Commit message:
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
added:
README.md
funannotate_compare.xml
macros.xml
test-data/SRR7458692.bam
test-data/cleaned.fa
test-data/cleaned_ident.fa
test-data/compare/Genus_species.gbk
test-data/compare/Other_beast.gbk
test-data/compare/Other_species.gbk
test-data/compare/Yet_another.gbk
test-data/funannotate.loc
test-data/funannotate_db/Pfam-A.clans.tsv
test-data/funannotate_db/Pfam-A.hmm
test-data/funannotate_db/Pfam-A.hmm.h3f
test-data/funannotate_db/Pfam-A.hmm.h3i
test-data/funannotate_db/Pfam-A.hmm.h3m
test-data/funannotate_db/Pfam-A.hmm.h3p
test-data/funannotate_db/dbCAN.hmm
test-data/funannotate_db/dbCAN.hmm.h3f
test-data/funannotate_db/dbCAN.hmm.h3i
test-data/funannotate_db/dbCAN.hmm.h3m
test-data/funannotate_db/dbCAN.hmm.h3p
test-data/funannotate_db/funannotate-db-info.txt
test-data/funannotate_db/go.obo
test-data/funannotate_db/insecta/ancestral
test-data/funannotate_db/insecta/dataset.cfg
test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm
test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm
test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm
test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm
test-data/funannotate_db/insecta/lengths_cutoff
test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl
test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl
test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl
test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl
test-data/funannotate_db/insecta/scores_cutoff
test-data/funannotate_db/interpro.tsv
test-data/funannotate_db/merops.dmnd
test-data/funannotate_db/merops.formatted.fa
test-data/funannotate_db/ncbi_cleaned_gene_products.txt
test-data/funannotate_db/repeats.dmnd
test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg
test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl
test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt
test-data/funannotate_db/trained_species/fly/info.json
test-data/funannotate_db/uniprot.dmnd
test-data/funannotate_db/uniprot_sprot.fasta
test-data/genome.fa
test-data/genome_masked.fa
test-data/predict_augustus/Genus_species.cds-transcripts.fa
test-data/predict_augustus/Genus_species.discrepency.report.txt
test-data/predict_augustus/Genus_species.error.summary.txt
test-data/predict_augustus/Genus_species.gbk
test-data/predict_augustus/Genus_species.gff3
test-data/predict_augustus/Genus_species.mrna-transcripts.fa
test-data/predict_augustus/Genus_species.proteins.fa
test-data/predict_augustus/Genus_species.scaffolds.fa
test-data/predict_augustus/Genus_species.stats.json
test-data/predict_augustus/Genus_species.tbl
test-data/predict_augustus/Genus_species.validation.txt
test-data/predict_augustus/fly.parameters.json
test-data/predict_bam/Genus_species.cds-transcripts.fa
test-data/predict_bam/Genus_species.discrepency.report.txt
test-data/predict_bam/Genus_species.error.summary.txt
test-data/predict_bam/Genus_species.gbk
test-data/predict_bam/Genus_species.gff3
test-data/predict_bam/Genus_species.mrna-transcripts.fa
test-data/predict_bam/Genus_species.proteins.fa
test-data/predict_bam/Genus_species.stats.json
test-data/predict_bam/Genus_species.tbl
test-data/predict_bam/Genus_species.validation.txt
test-data/predict_bam/fly.parameters.json
test-data/predict_scratch/Genus_species.cds-transcripts.fa
test-data/predict_scratch/Genus_species.discrepency.report.txt
test-data/predict_scratch/Genus_species.error.summary.txt
test-data/predict_scratch/Genus_species.gbk
test-data/predict_scratch/Genus_species.gff3
test-data/predict_scratch/Genus_species.mrna-transcripts.fa
test-data/predict_scratch/Genus_species.proteins.fa
test-data/predict_scratch/Genus_species.scaffolds.fa
test-data/predict_scratch/Genus_species.stats.json
test-data/predict_scratch/Genus_species.tbl
test-data/predict_scratch/Genus_species.validation.txt
test-data/predict_scratch/fly.parameters.json
tool-data/funannotate.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 857f7ac611e1 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,5 @@
+# Funannotate
+
+Funannotate can use GeneMark to predict gene, but due to licensing issues, we are not allowed to distribute GeneMark automatically.
+
+If you want to use it, the Galaxy administrator needs to install it GeneMark following the instructions on https://github.com/nextgenusfs/funannotate, and set the `GENEMARK_PATH` variable on the job destination.
b
diff -r 000000000000 -r 857f7ac611e1 funannotate_compare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/funannotate_compare.xml Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,124 @@
+<tool id="funannotate_compare" name="Funannotate compare" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>annotations</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements" />
+    </requirements>
+    <version_command>funannotate check --show-versions</version_command>
+    <command><![CDATA[
+#set $inputs = ""
+#for i, f in enumerate($input):
+    #set $inputs = $inputs + " input" + str($i) + ".gbk"
+    ln -s '$f' input${i}.gbk &&
+#end for
+
+funannotate compare
+--input
+
+$inputs
+
+--out output
+
+--database '$database.fields.path'
+
+#if $run_dnds in ['estimate', 'full']:
+    --run_dnds ${run_dnds}
+#end if
+
+## TODO add --outgroup option some day (hard to get an up to date/customizable list)
+
+--go_fdr ${go_fdr}
+
+--heatmap_stdev ${heatmap_stdev}
+
+--num_orthos ${num_orthos}
+
+#if $proteinortho:
+    --proteinortho '${proteinortho}'
+#end if
+
+--ml_method ${ml.ml_method}
+
+#if $ml.ml_method == 'raxml':
+    --bootstrap '${ml.bootstrap}'
+#end if
+
+--cpus \${GALAXY_SLOTS:-2}
+
+&&
+
+mkdir -p '$output.files_path' &&
+cp -R output/* '$output.files_path' &&
+cp output/index.html '$output'
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="genbank" multiple="true" label="Genome annotations in genbank format" help="Outputs from 'Funannotate predict annotation' tool" />
+
+        <param name="database" label="Funannotate database" type="select">
+            <options from_data_table="funannotate">
+                <column name="value" index="0" />
+                <column name="name" index="1" />
+                <column name="path" index="3" />
+                <filter type="sort_by" column="0" />
+                <filter type="static_value" column="2" value="1.0" />
+            </options>
+        </param>
+
+        <param argument="--run_dnds" type="select" label="Calculate dN/dS ratio on all orthologs">
+            <option value="no" selected="True">No</option>
+            <option value="estimate">estimate</option>
+            <option value="full">full</option>
+        </param>
+
+        <param argument="--go_fdr" type="float" value="0.05" label="P-value for FDR GO-enrichment" />
+        <param argument="--heatmap_stdev" type="float" value="1.0" label="Cut-off for heatmap" />
+        <param argument="--num_orthos" type="integer" value="500" label="Number of Single-copy orthologs to use for ML" />
+        <param argument="--proteinortho" type="data" format="tabular" optional="true" label="Proteinortho POFF results" help="TSV format, if available"/>
+
+        <conditional name="ml">
+            <param argument="--ml_method" type="select" label="Maximum Likelihood method">
+                <option value="iqtree" selected="True">iqtree</option>
+                <option value="raxml">raxml</option>
+            </param>
+            <when value="iqtree" />
+            <when value="raxml">
+                <param argument="--bootstrap" type="integer" value="100" label="Number of boostrap replicates to run with RAxML" />
+            </when>
+        </conditional>
+
+        <!-- Need this to change path in the test funannotate_db -->
+        <param type="hidden" name="uglyTestingHack" value="" />
+    </inputs>
+    <outputs>
+        <data name='output' format='html' label="${tool.name} on ${on_string}: report" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="compare/Genus_species.gbk,compare/Other_species.gbk,compare/Other_beast.gbk,compare/Yet_another.gbk" />
+            <param name="database" value="2021-07-20-120000" />
+            <param name="outputs" value="gbk" />
+            <conditional name="ml">
+                <param name="ml_method" value="raxml" />
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Report generated on:" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Funannotate_ compare
+--------------------
+
+Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).
+
+This script does light-weight comparative genomics between funannotated genomes.  Output
+is graphs, phylogeny, CSV files, etc --> visualized in web-browser.
+
+.. _Funannotate: http://funannotate.readthedocs.io
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 857f7ac611e1 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,159 @@\n+<?xml version="1.0"?>\n+<macros>\n+    <token name="@TOOL_VERSION@">1.8.9</token>\n+    <token name="@VERSION_SUFFIX@">1</token>\n+\n+    <xml name="requirements">\n+        <requirement type="package" version="@TOOL_VERSION@">funannotate</requirement>\n+    </xml>\n+\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.5281/zenodo.4054262</citation>\n+        </citations>\n+    </xml>\n+\n+    <xml name="augustus_species">\n+        <!-- list generated from a Funannotate database directory, listing trained_species/* -->\n+        <option value="adorsata">adorsata</option>\n+        <option value="aedes">aedes</option>\n+        <option value="amphimedon">amphimedon</option>\n+        <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>\n+        <option value="anidulans">anidulans</option>\n+        <option value="arabidopsis">arabidopsis</option>\n+        <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>\n+        <option value="aspergillus_nidulans">aspergillus_nidulans</option>\n+        <option value="aspergillus_oryzae">aspergillus_oryzae</option>\n+        <option value="aspergillus_terreus">aspergillus_terreus</option>\n+        <option value="bombus_impatiens1">bombus_impatiens1</option>\n+        <option value="bombus_terrestris2">bombus_terrestris2</option>\n+        <option value="botrytis_cinerea">botrytis_cinerea</option>\n+        <option value="b_pseudomallei">b_pseudomallei</option>\n+        <option value="brugia">brugia</option>\n+        <option value="cacao">cacao</option>\n+        <option value="caenorhabditis">caenorhabditis</option>\n+        <option value="camponotus_floridanus">camponotus_floridanus</option>\n+        <option value="candida_albicans">candida_albicans</option>\n+        <option value="candida_guilliermondii">candida_guilliermondii</option>\n+        <option value="candida_tropicalis">candida_tropicalis</option>\n+        <option value="c_elegans_trsk">c_elegans_trsk</option>\n+        <option value="chaetomium_globosum">chaetomium_globosum</option>\n+        <option value="chicken">chicken</option>\n+        <option value="chiloscyllium">chiloscyllium</option>\n+        <option value="chlamy2011">chlamy2011</option>\n+        <option value="chlamydomonas">chlamydomonas</option>\n+        <option value="chlorella">chlorella</option>\n+        <option value="ciona">ciona</option>\n+        <option value="coccidioides_immitis">coccidioides_immitis</option>\n+        <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>\n+        <option value="coprinus">coprinus</option>\n+        <option value="coprinus_cinereus">coprinus_cinereus</option>\n+        <option value="coyote_tobacco">coyote_tobacco</option>\n+        <option value="cryptococcus">cryptococcus</option>\n+        <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>\n+        <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>\n+        <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>\n+        <option value="culex">culex</option>\n+        <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>\n+        <option value="E_coli_K12">E_coli_K12</option>\n+        <option value="elephant_shark">elephant_shark</option>\n+        <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>\n+        <option value="eremothecium_gossypii">eremothecium_gossypii</option>\n+        <option value="fly">fly</option>\n+        <option value="fly_exp">fly_exp</option>\n+        <option value="fusarium">fusarium</option>\n+        <option value="fusarium_graminearum">fusarium_graminearum</option>\n+        <option value="galdieria">galdieria</option>\n+        <option value="generic">generic</option>\n+        <option value="heliconius_melpomene1">heliconius_melpomene1</option>\n+        <option value="histoplasma">histoplasma</opt'..b'lue="pneumocystis">pneumocystis</option>\n+        <option value="rhincodon">rhincodon</option>\n+        <option value="rhizopus_oryzae">rhizopus_oryzae</option>\n+        <option value="rhodnius">rhodnius</option>\n+        <option value="rice">rice</option>\n+        <option value="saccharomyces">saccharomyces</option>\n+        <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>\n+        <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>\n+        <option value="s_aureus">s_aureus</option>\n+        <option value="schistosoma">schistosoma</option>\n+        <option value="schistosoma2">schistosoma2</option>\n+        <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>\n+        <option value="scyliorhinus">scyliorhinus</option>\n+        <option value="sealamprey">sealamprey</option>\n+        <option value="s_pneumoniae">s_pneumoniae</option>\n+        <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>\n+        <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>\n+        <option value="template_prokaryotic">template_prokaryotic</option>\n+        <option value="tetrahymena">tetrahymena</option>\n+        <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>\n+        <option value="tomato">tomato</option>\n+        <option value="toxoplasma">toxoplasma</option>\n+        <option value="tribolium2012">tribolium2012</option>\n+        <option value="trichinella">trichinella</option>\n+        <option value="ustilago">ustilago</option>\n+        <option value="ustilago_maydis">ustilago_maydis</option>\n+        <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>\n+        <option value="verticillium_longisporum1">verticillium_longisporum1</option>\n+        <option value="volvox">volvox</option>\n+        <option value="wheat">wheat</option>\n+        <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>\n+        <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>\n+        <option value="zebrafish">zebrafish</option>\n+    </xml>\n+\n+    <xml name="busco_species">\n+        <!-- list generated from a Funannotate database directory, with the "funannotate database -show-buscos command" -->\n+        <option value="eukaryota">eukaryota</option>\n+        <option value="metazoa">metazoa</option>\n+        <option value="nematoda">nematoda</option>\n+        <option value="arthropoda">arthropoda</option>\n+        <option value="insecta">insecta</option>\n+        <option value="endopterygota">endopterygota</option>\n+        <option value="hymenoptera">hymenoptera</option>\n+        <option value="diptera">diptera</option>\n+        <option value="vertebrata">vertebrata</option>\n+        <option value="actinopterygii">actinopterygii</option>\n+        <option value="tetrapoda">tetrapoda</option>\n+        <option value="aves">aves</option>\n+        <option value="mammalia">mammalia</option>\n+        <option value="euarchontoglires">euarchontoglires</option>\n+        <option value="laurasiatheria">laurasiatheria</option>\n+        <option value="fungi">fungi</option>\n+        <option value="dikarya">dikarya</option>\n+        <option value="ascomycota">ascomycota</option>\n+        <option value="pezizomycotina">pezizomycotina</option>\n+        <option value="eurotiomycetes">eurotiomycetes</option>\n+        <option value="sordariomycetes">sordariomycetes</option>\n+        <option value="saccharomycetes">saccharomycetes</option>\n+        <option value="saccharomycetales">saccharomycetales</option>\n+        <option value="basidiomycota">basidiomycota</option>\n+        <option value="microsporidia">microsporidia</option>\n+        <option value="embryophyta">embryophyta</option>\n+        <option value="protists">protists</option>\n+        <option value="alveolata_stramenophiles">alveolata_stramenophiles</option>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/SRR7458692.bam
b
Binary file test-data/SRR7458692.bam has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/cleaned.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cleaned.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,2698 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'\n+ATCGGCGACTGTCTGTCATTGTATCCTTCTGCATTCCATTCGTATGTCCGTTTGTCTGTTCATTAGTCCGTCCGTTTGTC\n+CGCCCGTAACTCCGTCCCTGTGTCCTTTTTTTCCGTCCGTTTTCCTGATAAATACTTTTTAAGGAATCCAGCTTACCCTT\n+TTGCACTACAGGTAGCGTGAATAAAAATAAAATAAAAGAGCTAATTTTAAATTAAAATAAAAACAAAAACATCTCTTTGT\n+GTTTATATTTTCCCACTGTGCAGCACAAACACCCCTTTTGCCCACTTAAGCTTGCCACGTTTTCCCTTACTTATAACAGA\n+ACTTACAAACATGCGTTTGGTTTTCGTGGGTTGAGTTTGGTGCTCTCCGTTTACTTTTGCGGTTTTGTTCAGCGCTGCAT\n+ATATTTCCATATTAATTCCCCTGATTATGGGGAATCGTCATCGTCTGCGTTCTGTTCCCTGACGGTTTTGCCCAAATCCA\n+AATCCAAATCCAAATCCAAATCCAAATGCGAATGCGAATATCTGAATGCAGACCACAATTCGACGATGATGTTTCTGTTT\n+CAGAATAATCTAAATCGGCATTATTTATGCATTCAGTTCTTGCATTCATCACAACCACTTAGCGGTTCCACTTCAACGAA\n+CCCACAGATACACAATACATACATATTCATTTATGTATGTATGTACATACCGTCGTGTATATAATTAGTATGCATAGAAG\n+ATACATACGTATGCATTTTAATGGACCACAATTCCCTCGGCCAAAAGAGTGCTCCGATAATAAATATTAATTTTAATAAA\n+TGCTGATGCAGGTCAGCTGATTTCAAACGACCCTTTGTGTCACGGGGCAATGACTTTTGCTCAGTATATGTAGTATTCAA\n+TTTTCAATTTTCCGAAAATGGTATTACAATTACAATTTTTTAAATATTAGATCTCAAAAAATTGCTTGGCATACTTAGCA\n+ACATATCTTTAACTCTCAAGAGTCATATACACCCGATTTTGTTTCAAGTCCCCCTGGGACTTAAAAACCGACAATTACGC\n+CCAACTTGCAATCATTTGGAAAAAAAAACACCAGCTACGAACTTATCAAAACTTATATATCAGCAAAAAATAAATGGGTC\n+TCTATGCGCAGTTATCGGCCCACAAAATATATACAAAAGAAATGACATGTCATTAACTCCGGCCAAGGGCGTGCTTTTTG\n+GTGCCATCGCATTTGGGGGTAACTAGTCGCGGGTCGACCCAGAATCACATCTCCAGGAAGTGGATGTCCAGGGCGTGGTG\n+GTCCACATCGACGGGAGGGTGCAGCTGCAGGAACTTGACGTAGGCCAGGAAGGTGGTCCAGCACATGCTGAAGAAGGAGG\n+TAAACACGACCTGGTTTCGCGCCGGAACAAAGGCGAAGTTCACCGTCTGCACGCAGGGCCAATAAATAACGCCCACCTTG\n+TAGGCGTCCAGGAATTTATCGCTGACCTGGGAACGAGAGGGGGCAACGGGGTGGGTGAGTGCAGTGACATAATGCCCCCA\n+GTAGCAGTCGGAAAATGGAAATGGAAAATGCCAGCGCGGAAAAATTGTAATTAAGTGCCGCCGTCCAGCAGCGGTAGGAG\n+ACGTTTAATATTTTATTTATGGCCTGATCTCTTCTCCTCTTTTAAGGGGCGCTTGACTGGGGCGTGGCATTCAGTTAATG\n+TTGTTAATTAAAAGGCAACGCCTGGCTAGAAAAATTATCATCCAGGCACAGACTGTGTAACAAATGTAACATAGAGCACT\n+TTGGCACATTTTCAATTGGCATGAGAACTTCATTTAACTACAAAGACTATCCTTGTGCCATAAAACTTTCTTTTGTGGAT\n+CTATAGAAGTTGAATCGTTTTACAATCCTCACATAAAATATAGAACTTCGATGGCGAACGCATAGCTATTCCGAAAGCGA\n+TCTTGAAACTTGTACTGCCTTCTACCCGCTTCCAATTTGAAACTACTTTTAGGTGAGGGTCGAGGAACTTACGTCTCTAT\n+CGGCCACGCACACAACTCTTTTACGGCCCAAACCTGAGCCTATCTCAACCAAATCGCTCAGAATAGTCGAGCGATATATC\n+TTTGGAGAACGCATGCAAGGCAAATGAGACGACGCACGTTTGACACAAATATATAGCAAAACATTTACATTTATCAAGAT\n+AAATGGTTTCTAATGATATGGAAGTCAGCCATCGAACCGAGTTTGGCCATGGAGCACAGTGCCTGAAAGTATCTCGCAGT\n+ATGGCCGCAGAGCCAACAAATCAATGTTGACAGCTCGAAACGGAGGACCAGACGCAGAAATTGGGAATGGAGGGAGGTGA\n+GCGACAGACAGGTGATATTTATGCATGCTTACATTGCAGCCTCTGTCTCCGCCACAGATGCAGATTCATAGATACAGATA\n+CAAAGATGGGGATACTGCCTCTGAATGTGTCGTGAGAAAATGGATTCCGAAATCAAAAACTACAGACCGAAAACCGATTT\n+CCAAATAAATAACAATGCATACTGGGCACACACTTAGTAATGAGCACATCTGCGAAATGAAAGACCTTACCAGATAACTG\n+TCAACATTTTAAAATCGTTAAAAGTTGATTCAGGATTTGGGGTAGCCGTGCCAACGGTGTGAATGGGCATGAATAATATG\n+ACATATTCCTTTCCCGAGTAATGAAAAATGTTTTCAGCGAATCTATCCACGAATACCGTACATAAAATAGAGACTCTTCT\n+GCTTGTTAGATATCGTTGGCCCCCGACAAAATGTGGTTCTTTGAAATGAAATTTGAAATAAGTTTGTTGTGCTTAGGCCT\n+TAGTGACTTTGGGATGGGAATATACTTCACCTCCCGCTTGGCCTCCGCGTACGAGTTGCCCTCCATCAAGGTCATGAAGA\n+AGAGGAACGAGCTGATGGCCATCGGATCATAAGCGGTCTGCTCGGTGATCGCCTTGCAGAGCGATGACTTAATGTCGGTG\n+CGCGGCCACATAACGCTGGCCAATCTGATCCACACATATATGGTGGGCCCCATAAAGAAGAAGCCGAATAAGCTGAACCT\n+GCAATGGAATTAGCCAAACCATATCGGGTCCAATTAGCAATCGAATCGGGCCAAGACCCGTGCAGATTGCTCATCACCTA\n+AGGCACTTCATCCAGTCGTACGTCCGGAATGTCTTCTTCTCGATCATGGTCTGCTCGATGAGGGAGCCGCAGGGCCAGAG\n+GGTGCCATACGATATCATGCCGCGTAGGACTTTGTATTTGCTTGTAATATTCACCAAGCTACGAAACATTTTCAACGGTC\n+TCTTGTGATGATCTCGTCGAATGTCAGTGATAGTTTCACTTCCACAAAAGTGCTATAAACACACGCTGAAAGATAAATTG\n+TTTTAGATCAATACGGTGGGCTTTACATGGCTGAGTTCGCTTGGGTTAGTTTTATTAGGTGCCATATTTGTCTTGGCGGA\n+TTTCACTTTTGCAAAAAGTTCGGTTTTAACTCATACGACCTAGAAAGTTATCGAGTACTGATTGTCTTGGGTGGCCATTC\n+CAAATTCAACCGCATTATTCTCCCTCGACAGTGCCTAACTTAGCCACAGAATTCCAGCTCAAGACAATTGCAAATAAGTG\n+TCGGCCAAGCACAGCTGGCACTTCTGGACTCTTCTCTGAACTGATGGATGGCTAAGAGGTGGGTTGGTCTTTGGAAATGG\n+AGAGGGGGTGGTGCCCGCCATCCAGTAACACATGTCAAGTATTATTATCGAGCAACTACAAACGAAATACAAACATACAT\n+TTAATTGCCAAGAATGTTCCTAGCCAAACAAATGTCGAAATAATGTCGGGCCACGGCCAAAGATTTATCATATTCACTCG\n+ACTGTCATAAAAAGCAATTAGGCCCGGGCGAAAGATACACGACCGGCAGTCCCGAAATGT\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/cleaned_ident.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cleaned_ident.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,2849 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'ATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCTAGTCGATTTACACTTGGCTG\n+AGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGTGCTTAGGAAT\n+TGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAATGATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTC\n+GTTCCAACTACGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAG\n+TAGATTCTCAAGTTCTGGAGTTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGCTGGTGAAGGC\n+TGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATG\n+GGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCATCACTCCCGTGTGGATGCGCG\n+TGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCT\n+GGCTAACAAAATAAGGGGCGGCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCC\n+GCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCATGTGTACGGC\n+ATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGAGCATGCTCAGCGCCCCACTCAGCGCGTATG\n+GCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTC\n+AGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCA\n+GGGCACGCCCATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGC\n+ACTTGTAGGGCTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCATTCACTATAT\n+CACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATACATAGTCATATGAACAGTTGA\n+AAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTT\n+CTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTT\n+TTTTTGCTGCCAGTGAGCATAGAAAAAAAAAATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTC\n+CGCATTTTCGTGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGA\n+GCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCCTCCTGAGCCATCAGACTTGT\n+GTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACC\n+GAGGTGGAACACATTTCGCCAGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTT\n+CGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGC\n+AAGTGAAGATTTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCACGGCTATATACTATATGTAT\n+GTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGCACGATGCGCA\n+GGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGACTTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATA\n+ATTGGCTTTTCCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAA\n+AGTAAACACATTAATATGTACTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAATACAAAACCCA\n+CACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAA\n+TCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTGAATCGCTGTGTGCTATTTTT\n+ATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/compare/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/compare/Genus_species.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4260 @@\n+LOCUS       sample                215740 bp    DNA     linear       28-SEP-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (28-SEP-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.9\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            2126..3863\n+                     /locus_tag="FUN_000002"\n+     mRNA            join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+                     LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+                     DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+                     KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+                     AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+                     NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+                     RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+                     ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+                     RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+                     QDPALEAPQMND"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/compare/Other_beast.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/compare/Other_beast.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4148 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Other beast.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Other beast\n+  ORGANISM  Other beast\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Other beast"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUNC_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUNC_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUNC_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNC_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            complement(2565..3142)\n+                     /locus_tag="FUNC_000002"\n+     mRNA            complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUNC_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUNC_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNC_000002-T1"\n+                     /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+                     LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+     gene            complement(4248..5802)\n+                     /locus_tag="FUNC_000003"\n+     mRNA            complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUNC_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUNC_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNC_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+                     PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+                     SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+                     LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+     gene            complement(7691..10664)\n+                     /locus_tag="FUNC_000004"\n+     mRNA            complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUNC_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUNC_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNC_000004-T1"\n+                     /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+                     DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+                     ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+                     IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+                     EKAKELENFASVME'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/compare/Other_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/compare/Other_species.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4259 @@\n+LOCUS       sample                215740 bp    DNA     linear       22-JUL-2021\n+DEFINITION  Other species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Other species\n+  ORGANISM  Other species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Other species"\n+                     /mol_type="genomic DNA"\n+     gene            <2331..3254\n+                     /locus_tag="FUNB_000001"\n+     mRNA            <2331..3254\n+                     /locus_tag="FUNB_000001"\n+                     /product="hypothetical protein"\n+     CDS             <2331..3254\n+                     /locus_tag="FUNB_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNB_000001-T1"\n+                     /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+                     DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+                     EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+                     DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+                     LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+                     ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUNB_000002"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUNB_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUNB_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNB_000002-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUNB_000003"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUNB_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUNB_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUNB_000003-T1"\n+                     /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+                     QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+                     REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+                     NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+                     MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+                     ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+                     KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+                     LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+                     DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+                     FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+                     RQFDKSNDNYRKTFRSVDENSKGEL"\n+     gene            complement(14247..15214)\n+                     /locus_tag="FUNB_000004"\n+     mRNA            complement(join(14247..14648,15209..15214))\n+                     /locu'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/compare/Yet_another.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/compare/Yet_another.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4148 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Yet another.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Yet another\n+  ORGANISM  Yet another\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Yet another"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUND_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUND_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUND_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUND_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            complement(2565..3142)\n+                     /locus_tag="FUND_000002"\n+     mRNA            complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUND_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUND_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUND_000002-T1"\n+                     /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+                     LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+     gene            complement(4248..5802)\n+                     /locus_tag="FUND_000003"\n+     mRNA            complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUND_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUND_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUND_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+                     PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+                     SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+                     LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+     gene            complement(7691..10664)\n+                     /locus_tag="FUND_000004"\n+     mRNA            complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUND_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUND_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUND_000004-T1"\n+                     /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+                     DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+                     ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+                     IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+                     EKAKELENFASVME'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate.loc Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of funannotate databases used for the
+# funannotate annotation tool
+#
+# the columns are:
+# value  description format_version path
+#
+2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 ${__HERE__}/funannotate_db
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.clans.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/Pfam-A.clans.tsv Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,31 @@
+PF00001 CL0192 GPCR_A 7tm_1 7 transmembrane receptor (rhodopsin family)
+PF00002 CL0192 GPCR_A 7tm_2 7 transmembrane receptor (Secretin family)
+PF00003 CL0192 GPCR_A 7tm_3 7 transmembrane sweet-taste receptor of 3 GCPR
+PF00004 CL0023 P-loop_NTPase AAA ATPase family associated with various cellular activities (AAA)
+PF00005 CL0023 P-loop_NTPase ABC_tran ABC transporter
+PF00006 CL0023 P-loop_NTPase ATP-synt_ab ATP synthase alpha/beta family, nucleotide-binding domain
+PF00007 CL0079 Cystine-knot Cys_knot Cystine-knot domain
+PF00008 CL0001 EGF EGF EGF-like domain
+PF00009 CL0023 P-loop_NTPase GTP_EFTU Elongation factor Tu GTP binding domain
+PF00010 HLH Helix-loop-helix DNA-binding domain
+PF00011 CL0190 HSP20 HSP20 Hsp20/alpha crystallin family
+PF00012 CL0108 Actin_ATPase HSP70 Hsp70 protein
+PF00013 CL0007 KH KH_1 KH domain
+PF00014 Kunitz_BPTI Kunitz/Bovine pancreatic trypsin inhibitor domain
+PF00015 MCPsignal Methyl-accepting chemotaxis protein (MCP) signalling domain
+PF00016 RuBisCO_large Ribulose bisphosphate carboxylase large chain, catalytic domain
+PF00017 CL0541 SH2-like SH2 SH2 domain
+PF00018 CL0010 SH3 SH3_1 SH3 domain
+PF00019 CL0079 Cystine-knot TGF_beta Transforming growth factor beta like domain
+PF00020 CL0607 TNF_receptor TNFR_c6 TNFR/NGFR cysteine-rich region
+PF00021 CL0117 uPAR_Ly6_toxin UPAR_LY6 u-PAR/Ly-6 domain
+PF00022 CL0108 Actin_ATPase Actin Actin
+PF00023 CL0465 Ank Ank Ankyrin repeat
+PF00024 CL0168 PAN PAN_1 PAN domain
+PF00025 CL0023 P-loop_NTPase Arf ADP-ribosylation factor family
+PF00026 CL0129 Peptidase_AA Asp Eukaryotic aspartyl protease
+PF00027 CL0029 Cupin cNMP_binding Cyclic nucleotide-binding domain
+PF00028 CL0159 E-set Cadherin Cadherin domain
+PF00029 CL0375 Transporter Connexin Connexin
+PF00030 CL0333 gCrystallin Crystall Beta/Gamma crystallin
+PF00031 CL0121 Cystatin Cystatin Cystatin domain
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/Pfam-A.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,2268 @@\n+HMMER3/f [3.1b2 | February 2015]\n+NAME  1-cysPrx_C\n+ACC   PF10417.11\n+DESC  C-terminal domain of 1-Cys peroxiredoxin\n+LENG  40\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    yes\n+MAP   yes\n+DATE  Wed Feb 24 18:37:46 2021\n+NSEQ  40\n+EFFN  17.426758\n+CKSUM 4086680297\n+GA    21.10 21.10;\n+TC    21.10 21.10;\n+NC    21.00 21.00;\n+BM    hmmbuild HMM.ann SEED.ann\n+SM    hmmsearch -Z 57096847 -E 1000 --cpu 4 HMM pfamseq\n+STATS LOCAL MSV       -7.5463  0.71948\n+STATS LOCAL VITERBI   -7.8624  0.71948\n+STATS LOCAL FORWARD   -4.3303  0.71948\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.28046  4.31208  2.83393  2.63913  3.90855  2.69988  3.89812  3.33401  2.56310  2.85023  3.99954  3.22924  2.52123  2.90328  3.31238  2.94055  2.70512  2.59551  3.49266  3.82715\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.00000        *\n+      1   0.29666  6.14436  6.78514  6.79783  7.06332  2.55785  7.22049  6.57837  6.66651  6.27638  3.28757  5.91223  5.83978  6.69238  6.58162  2.20136  4.83343  5.59959  8.41086  7.43107      1 A - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.48576  0.95510\n+      2   4.59591  5.92009  6.57211  5.96147  1.92899  5.81035  6.10135  2.33093  5.75927  0.69439  2.86149  5.97820  6.07717  5.78793  5.72916  5.13924  4.81708  2.59612  3.18569  3.35842      2 l - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.48576  0.95510\n+      3   4.81290  7.05274  3.71696  4.47757  6.60126  5.41623  3.72993  5.92180  2.06538  3.59487  6.10993  4.89014  5.75663  0.42291  2.54802  4.76779  4.95656  5.56452  7.24472  6.08615      3 Q - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.48576  0.95510\n+      4   2.64100  5.28125  5.84007  3.33362  1.80025  5.06896  2.72827  3.71332  5.01717  1.75203  2.65498  5.22280  5.43290  5.15329  5.03455  4.37913  1.80041  2.31249  5.90246  2.63298      4 l - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.48576  0.95510\n+      5   1.41804  5.70097  4.57246  4.01393  4.89018  2.99141  3.06166  4.26400  3.98607  3.95402  4.82703  3.11439  5.16775  4.30619  4.38279  2.19082  1.83510  1.66601  6.29409  3.75702      5 a - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00226  6.48754  7.20989  0.61958  0.77255  0.48576  0.95510\n+      6   2.40940  6.27006  0.95419  1.88136  5.61364  4.53669  4.72417  5.09838  3.04943  4.57453  3.59812  4.00473  4.93049  3.33824  2.90687  3.72620  2.65150  4.65993  3.45464  5.29359      6 d - - H\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.5847'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    442   2.29864  4.36166  2.78326  3.38999  2.53742  2.77187  4.12835  1.90087  3.31519  2.56018  3.47681  3.70473  4.21526  3.57901  3.58783  3.10825  3.02383  2.10914  4.97533  3.76638    497 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    443   1.63203  4.34013  4.06646  3.49075  2.53230  3.86648  4.15865  2.76897  3.39829  2.04499  2.63095  2.72733  4.24075  3.64956  3.64097  3.14318  3.03596  2.57834  4.93886  3.73074    498 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    444   3.45776  4.79585  5.18920  4.63265  3.52372  4.70258  5.10103  1.90217  4.47095  0.76832  3.34020  4.80189  4.96861  4.60775  4.56613  4.05504  2.50816  2.41399  5.48117  4.38492    499 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    445   2.36957  4.45811  5.03009  4.44127  2.41688  4.36160  4.72344  1.58595  4.26807  1.49345  3.37957  4.49747  4.67797  4.39767  4.31200  3.68800  3.35368  1.84206  5.17229  4.01590    500 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    446   2.96533  4.42430  4.42687  3.84923  3.41907  4.08405  4.39794  2.61936  3.71705  1.40335  1.95988  2.61322  4.43548  3.93974  3.89559  3.38106  3.19904  2.03697  5.04359  3.86701    501 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    447   2.78421  4.21512  4.43112  3.83593  2.56381  3.91375  4.21971  2.14171  3.68350  2.40586  3.33147  3.96427  4.28054  3.87190  2.63279  2.49740  3.01625  2.02770  2.95875  2.66709    502 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    448   2.82824  5.21690  3.11861  2.17918  2.94314  3.60476  3.77516  3.98203  1.59578  3.49916  4.27124  3.09125  3.99567  2.88856  2.31346  2.30812  3.05474  3.59349  5.65674  4.29044    503 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01335  4.71894  5.44129  0.61958  0.77255  0.48576  0.95510\n+    449   3.39753  5.46518  3.96355  3.19882  4.85160  4.06610  3.96064  4.17905  1.03571  1.93276  4.52854  3.58771  4.40326  3.09886  1.88259  3.41029  3.55223  3.89272  5.74795  4.58563    504 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00900  4.71460        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.hmm.h3f
b
Binary file test-data/funannotate_db/Pfam-A.hmm.h3f has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.hmm.h3i
b
Binary file test-data/funannotate_db/Pfam-A.hmm.h3i has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.hmm.h3m
b
Binary file test-data/funannotate_db/Pfam-A.hmm.h3m has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/Pfam-A.hmm.h3p
b
Binary file test-data/funannotate_db/Pfam-A.hmm.h3p has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/dbCAN.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/dbCAN.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,613 @@\n+HMMER3/b [3.0 | March 2010]\n+NAME  CBM10\n+LENG  28\n+ALPH  amino\n+RF    no\n+CS    no\n+MAP   yes\n+DATE  Thu Apr 21 15:04:19 2011\n+NSEQ  84\n+EFFN  8.697876\n+CKSUM 1939305542\n+STATS LOCAL MSV       -7.3395  0.71998\n+STATS LOCAL VITERBI   -7.4498  0.71998\n+STATS LOCAL FORWARD   -3.9737  0.71998\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   3.06033  2.42663  2.85747  2.77283  4.58046  2.24214  4.56942  3.29197  3.49682  3.34028  4.33944  2.39813  3.09771  2.95262  3.60586  2.44839  2.84569  3.24117  2.72301  3.28669\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.59367  5.66823  0.81137  0.61958  0.77255  0.00000        *\n+      1   2.74340  3.47729  3.44643  2.64352  4.17985  3.79353  4.02282  3.57300  2.87097  3.22136  2.03109  3.38316  4.18747  1.79120  3.30841  2.03216  2.98385  3.28775  5.51287  4.21451      1 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00925  5.08381  5.80616  0.61958  0.77255  0.72961  0.65797\n+      2   3.21366  0.33490  5.23672  5.17809  5.69922  3.92522  5.89105  5.22956  5.14397  4.94899  5.73770  3.11988  4.78715  5.32569  5.20447  2.46009  3.79312  4.43668  7.07761  5.95722      2 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00690  5.37599  6.09834  0.61958  0.77255  0.73167  0.65606\n+      3   3.46517  6.03524  2.03286  2.67508  5.36194  2.25828  4.36563  4.86858  2.77875  4.33291  5.09631  1.07839  4.52524  2.62719  3.73072  2.75509  3.71412  4.41890  6.46315  5.00841      3 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.04091  5.49877  3.32435  0.61958  0.77255  0.52775  0.89143\n+      4   2.51597  5.24536  3.21395  3.79383  4.81190  3.30725  4.89534  4.21717  3.89295  3.90352  4.76570  4.13904  4.73005  4.17210  4.26805  3.51068  1.58681  3.86875  0.87084  4.95297      4 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00570  5.56595  6.28829  0.61958  0.77255  0.68571  0.70065\n+      5   1.88947  5.00491  4.00042  3.04471  4.13869  4.17114  3.84034  3.52228  3.40249  3.22374  4.10809  3.86994  4.55537  3.71667  3.77844  2.67165  2.19530  3.28758  3.58338  1.27239      5 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00552  5.59725  6.31960  0.61958  0.77255  0.60266  0.79264\n+      6   3.27926  5.74099  2.79040  2.94678  5.08062  1.27592  4.24831  3.41153  3.01359  4.05572  4.81107  3.21498  2.15563  2.32440  3.50951  2.31938  3.51838  4.13470  6.20511  4.79969      6 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00538  5.62391  6.34625  0.61958  0.77255  0.69965  0.68669\n+      7   2.98451  5.68149  2.03356  2.71108  5.01893  3.09684  4.15179  4.49988  2.89056  3.98355  4.72438  1.82442  4.35665  2.68802  3.38251  2.74393  1.71463  4.06807  3.52699  3.90983 '..b' 4.13890    203 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    156   3.23629  4.53295  5.17116  4.60062  2.56767  4.55401  4.94436  1.53611  4.45009  1.93006  2.67230  4.68873  4.82232  4.56190  4.49974  3.89871  3.47038  1.24322  5.32187  4.18538    204 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    157   3.35666  5.86974  0.68984  2.46426  4.92782  3.56196  2.78368  4.75794  3.22196  4.22981  5.12544  3.04989  4.21061  3.34715  3.81641  3.23366  3.66181  4.32892  6.19564  4.65728    205 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    158   2.67298  5.45036  2.13159  2.39143  4.78238  2.86513  3.84529  4.27738  2.69136  3.76387  4.54115  1.53088  3.99325  2.24761  3.21918  2.86652  3.18624  3.84376  5.91068  4.46667    206 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    159   3.47622  4.69232  5.49800  5.03902  4.03274  5.04172  5.78958  1.10648  4.96863  2.46343  3.77576  5.20399  5.28896  5.22948  5.14151  4.47780  3.74214  0.89149  6.13647  4.91519    207 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    160   2.97740  5.38053  3.16559  1.47840  4.78122  3.66390  3.76082  4.20817  1.84043  3.64747  4.43358  3.13556  4.05699  2.52986  2.01786  2.93673  3.18400  3.80690  5.72537  4.41283    208 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    161   3.57702  4.81880  5.48209  4.93951  3.50997  4.99824  5.42819  1.75624  4.81911  0.87415  3.27296  5.12067  5.15674  4.87874  4.88006  4.38284  3.80904  1.60841  5.63134  4.57121    209 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01622  4.52558  5.24792  0.61958  0.77255  0.61781  0.77462\n+    162   2.76975  4.18299  4.46129  3.86923  2.59055  3.91437  4.21294  2.01754  3.71166  2.30234  3.27364  3.97872  4.27520  3.88975  3.81544  3.21651  2.25828  2.00412  4.76988  2.10687    210 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.21940  4.52558  1.68107  0.61958  0.77255  0.61781  0.77462\n+    163   2.75776  4.82375  2.92091  2.81848  4.73679  1.07105  4.24317  4.29296  3.20401  3.89127  4.73515  1.86580  4.00371  3.46690  3.61880  2.84293  3.19250  3.75461  5.98100  4.66726    211 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01339  4.31957        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/dbCAN.hmm.h3f
b
Binary file test-data/funannotate_db/dbCAN.hmm.h3f has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/dbCAN.hmm.h3i
b
Binary file test-data/funannotate_db/dbCAN.hmm.h3i has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/dbCAN.hmm.h3m
b
Binary file test-data/funannotate_db/dbCAN.hmm.h3m has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/dbCAN.hmm.h3p
b
Binary file test-data/funannotate_db/dbCAN.hmm.h3p has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/funannotate-db-info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/funannotate-db-info.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,10 @@
+merops diamond /tmp/prout/merops.dmnd 12.0 2017-10-04 5009 a6dd76907896708f3ca5335f58560356
+uniprot diamond /tmp/prout/uniprot.dmnd 2021_03 2021-06-02 565254 68ed1e475d13bb3d5574c53822d11cd3
+dbCAN hmmer3 /tmp/prout/dbCAN.hmm 9.0 2020-08-04 641 04696dfba1c3bb82ff9b72cfbb3e4a65
+pfam hmmer3 /tmp/prout/Pfam-A.hmm 34.0 2021-03 19179 f83c0d00445257fd9c066ad3e9e10568
+repeats diamond /tmp/prout/repeats.dmnd 1.0 2021-07-19 11950 4e8cafc3eea47ec7ba505bb1e3465d21
+go text /tmp/prout/go.obo 2021-07-02 2021-07-02 47228 f5b79fe1a6d6a67c542e39da5d4661dc
+mibig diamond /tmp/prout/mibig.dmnd 1.4 2021-07-19 31023 118f2c11edde36c81bdea030a0228492
+interpro xml /tmp/prout/interpro.xml 86.0 2021-06-03 38913 0d8c575f88f397397b9491520b38db1e
+busco_outgroups outgroups /tmp/prout/outgroups 1.0 2021-07-19 8 6795b1d4545850a4226829c7ae8ef058
+gene2product text /tmp/prout/ncbi_cleaned_gene_products.txt 1.70 2021-06-15 34039 e93924259b8294255def54097bdab07b
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/go.obo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/go.obo Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,34 @@
+format-version: 1.2
+data-version: releases/2021-08-18
+subsetdef: chebi_ph7_3 "Rhea list of ChEBI terms representing the major species at pH 7.3."
+subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation"
+subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation"
+subsetdef: goslim_agr "AGR slim"
+subsetdef: goslim_aspergillus "Aspergillus GO slim"
+subsetdef: goslim_candida "Candida GO slim"
+subsetdef: goslim_chembl "ChEMBL protein targets summary"
+subsetdef: goslim_drosophila "Drosophila GO slim"
+subsetdef: goslim_flybase_ribbon "FlyBase Drosophila GO ribbon slim"
+subsetdef: goslim_generic "Generic GO slim"
+subsetdef: goslim_metagenomics "Metagenomics GO slim"
+subsetdef: goslim_mouse "Mouse GO slim"
+subsetdef: goslim_pir "PIR GO slim"
+subsetdef: goslim_plant "Plant GO slim"
+subsetdef: goslim_pombe "Fission yeast GO slim"
+subsetdef: goslim_synapse "synapse GO slim"
+subsetdef: goslim_yeast "Yeast GO slim"
+synonymtypedef: syngo_official_label "label approved by the SynGO project"
+synonymtypedef: systematic_synonym "Systematic synonym" EXACT
+default-namespace: gene_ontology
+ontology: go
+property_value: http://purl.org/dc/elements/1.1/description "The Gene Ontology (GO) provides a framework and set of concepts for describing the functions of gene products from all organisms." xsd:string
+property_value: http://purl.org/dc/elements/1.1/title "Gene Ontology" xsd:string
+property_value: http://purl.org/dc/terms/license http://creativecommons.org/licenses/by/4.0/
+property_value: owl:versionInfo "2021-08-18" xsd:string
+
+[Term]
+id: GO:0000001
+name: mitochondrion inheritance
+namespace: biological_process
+def: "The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton." [GOC:mcc, PMID:10873824, PMID:11389764]
+synonym: "mitochondrial inheritance" EXACT []
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/ancestral
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/ancestral Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,10899 @@\n+>EOG090W0028\n+VPLKDNQDVACFLVTKHSWKGKYKRIFSIGTAGITTYNPDKLEVTNKWLYSDVISVAPEF\n+VLTLKKDKKVDSLKFSSEHRAELLTEALKYFAEKPKRYEAYKLHWSDTRLPVVLEVTPAS\n+LDQLDPATNTVLASYAYKDIEGIGGFVIVVGGFSRLHLFEIKKKILESALGIEIKVITLE\n+EFEEQRLGKYSGDEHQTSLSEFTVEKVRHKEPVRRLLCLSETCLLERDPQTYSIVTLRPL\n+SDVFALVRIEYLNGQVRSYLATDRDSLLASLLDGVRASGNRDVHVKIKKTERGKRLGPLV\n+DEEVEALLLKLLQNEVLERFNANVPYSGLLYSVTQDGLFAENKEKLILEALQALVQKELE\n+AQFHALRRLVASKVGFAAFTKLSGFREAIGKKVVKALKRNDEAVTQAAIDLICALMQPMD\n+LDLRQEQLNKSSLLSSKKFLESLLDMWTEHVSKGTGALVVSAMLDLLTFALCVPYSETTD\n+GKQFDALLELVAERGRVLFKLFQHPSLAIVKGAGLVMRAIIEEGVAAKMQELALAEGALP\n+RHLLAALYTRLLTHRQLSRHLVGLWVTAMELLKRILPAGLLAFLESEEKVPEEEKLNVRD\n+NLKLAQDHASKKKVEKHLEALKHWGAKVEKIKERPVVLRKRRERKKSKLFYYKFNKDHAL\n+PNLIWNHKTREELREALENELRAFESDKELAGLVAWNYAEFEVKYQCLADEVKIGDYYLR\n+LLLEKDDSDSLIRKSYELFNDLYHRFLLTTKVELKVLCLQALAIVYGRYYEDIGPFSDTK\n+YIVQLLDRCLDRDRLVLFLKKLILHKRNVKEILDVRILVDLLTLAHLHTSRAEKEWYYNE\n+RKGPVSFKELKELYKKGKITAKTKVWAQGLDGWRSLQQVPQLKWTLVAKGSPVLNESELA\n+ALILDILIKLTEYFPSRAVIRPLPRVKRLLSELACLPHIVQLLLTFDPVLVEKVATLLLE\n+IMKDNPEVSKLYLTGVFYFILLYTGSNVLPIARFLKLTHTKQAFRSDESDIMQRSILGQL\n+LPEAMVSYLENHGAEKFAEIFLGEFDTPEAIWNSEMRRLLIEKIAAHIADFTPRLRSHTR\n+ARYQYLAIPAVRYPQLEKELFCNIFYLRHLCDTAKFPDWPIADPVKLLKDVLEAWKKEVE\n+KKPPAMTVEEAYKELGLDEAAVRKAYYKLAQKYHPDKNPEGRDKFEAVNKAYEFLCSRSS\n+WSGPNPNNIVLILRTQSILFERYSELRPYKYAGYPQLIKTIKLETKDEQLFSKLLAAASE\n+LAYHTVKCSALNAEELRREEGLEVLLEAYSRCVSVLSKSSKEEDQVCLNITRCFAVAAKF\n+EACRDKIVELPQLVKDLVRVLKFKHLAADSELQLQLVKAGVLWSLLLFLFEYDYTLEESG\n+VERSEEENKQEVANKLAKLAVKACAALAGYLEKLLTPYLARKLILKILTSNTENPYLIWD\n+NGTRAELLEFLEEKRFKYSAHKDELKIGEVFIRIYNEQPTFPINPKEFVLDLLEFLKHVV\n+MALEALANVIKNNKGVEIQCIGKFKLLFGLLSIKKAALEVISLVSRNKECVEDIAASEVL\n+VKLLLLLKVLDTLSALLKIVKEALAKGAVLYLLDLFCNSIREAAAELLAKLSADKLSGPK\n+VRLTLSKFLPKLLADALRDSPVQLFESKHENPELIWDDEARKRVNELVVGGVYLRLFVAN\n+PAWTLRKPKEFLSDLLDTVLELLSKLELATTALVALLRAQPALADAVPSLGHIPKLVRQL\n+KSALLVLHQLALSEICVSAISQTECISPLKRDLIAVACETLSRLFDKLVKQALEAELVKY\n+LLELLESRTKAQIVKALKAMSRSGEKVKAILEKSSVWAEYKDQKHDLFISAAGYLTAGPS\n+TSPPPVD\n+>EOG090W002U\n+MTTDISVVEYDGGNSSSRLFERSRIKAERESVQKKTFQKWVNSHLVRRIGDLLRDGKKLI\n+KLLEVLSGERLPRPTKGKMRIHCLENVDKALQFLREQRVHLENLGSHDIVDGNARLSLGL\n+IWTIILRFQIQDITIEETDNKETKSAKDALLLWCQMKTAGYHNVNVRNFTTSWRDGLAFN\n+AIIHKHRPDLIQFEKLSKSNAIYNLNNAFNVAEDKLGLTKLLDAEDVFVEQPDEKSIITY\n+VVTYYHYFSKLKQETVQGKRIGKVVGIAMENDRMIKEYESLTSDLLKWIEATIEALGDRK\n+FANSLVGVQQQLAQFSNYRTVEKPPKFVEKGNLEVLLFTLQSKMRANNQKPYTPKEGKMI\n+SDINKAWERLEKAEHERELALREELIRQEKLEQLAARFNRKASMRETWLSENQRLVSQDN\n+FGFDLAAVEAAAKKHEAIETDIFAYEERVQAVVAVSQELEAENYHDIERINARKDNVLRL\n+WNYLLELLRARRLRLELSLQLQQNFQEMLYILDSMEELKLRLLTDDYGKHLMGVEDLLQK\n+HSLVEADINVLGERVKAVVQQSQRFLYKPCDPAIIVERVQQLEDAYAELVKLAVERRARL\n+EESRKLWQFYWDMADEENWIKEKEQIVSTADIGHDLTTVNLLLSKHKALENEIQSHEPQL\n+MSVVAVGDELVHFGADRIQERLKEILAKWNHLLDLRRKRLEAVDYHQLFADADDVDIWML\n+DTLRLVSSEDVGRDEANVQSLLKKHKDVTDELKNYALHQQAEELERLASIDSRYKELLEL\n+AKLRKQRLLDALSLYKLLSESDGVEQWIGEKDRMLDTMVPAKDIEDVEILKHRYDGFDKE\n+MNANASRVAVVNQLARQLLHVEHPNSEQIVARQNELNQKWAELREKAEAKRDELNSAHGV\n+QTFYIECRETVSWIEDKKRILQETDSLEMDLTGVMTLQRRLSGMERDLAAIQAKLDSLEK\n+EAEAIHPEEAALIRERIAQIELIWEQLTQMLKERDAKLEEAGDLHRFLRDLDHFQAWLTK\n+TQTDVASEDTPTSLAEAEKLLSQHQSIKEEIDNYTDDYKKMMEYGERLTAEPSTQDDPQY\n+MFLRERLKALKDGWEELHQMWENRQQLLSQSLNLQLFNRDARQAEVLLSQQEHVLAKDET\n+PVNLEQAENLLKRHEAFLTTMEANDDKINSVVQFAERLVDEEHFAADKVKKKAENIEERA\n+NREKAEKLKDQEFLQDLEELSEWVQEKKITAQDETYRSAKTVHSKWTRHQAFEAEIASNK\n+ERLKPELAEIIEPKLKELADQFEELETTTKEKGERLFDANREVLIHQTCDDIDSWLNELE\n+KQIESEDTGSDLASVNILMQKQQLIETQMAVKAKQVEELEKQAEYLQKTVPVKKEKVEER\n+FEKLKAPLLERQRQLEKKKEAFQFRRDVEDEKLWIAEKLPLATSTEYGNSLFNVHVLKKK\n+NQSLKTEIDNHEPRIKAVCNNGQKLIDEGHEDAKEFEKLIEELWKELKDAVEEREKAQQY\n+LFDASEAESWMSEQELYMMVEDRGKDEISAQNLMKKHESLEKAVEDYAETIRQLGETARQ\n+LDQIAVKQSQVDKLYAGLKDLAGERRAKLDEALQLFMLNREVDDLEQWIAEREVVAGSHE\n+LGQDYDHVTLLWERFKEFARDTEAVGSERVAAVNEIADELIAAGHSDSATIAEWKDGLNE\n+AWQDLLELIETRTQMLAASRELHKFFHDCKDVLGRILEKQSDELGRDAGSVSALQRKHQN\n+FLQDLSTLQSQVQQIQEESAKLQASYAGDKAKEITNREAEVVAAWANLQALCDARKAKLA\n+DTGDLFFFNLVRTLLLWLDDVVRQMNTSEKPRDVSGVELLMNNHQSLKAEIDAREDNFSA\n+CISLGKELLARNHYASIKEKLLALTNQRNALLKRWEERWENLQLILEVYQFARDAAVAEA\n+WLIAQEPYLLSQELGHTIDEVENLIKKHEAFEKSA'..b'OG090W0MK4\n+DAEQIKSFKDFLLSYNKLSELCFVDCISDFTSREVEEKCALNCLEKYLKMNQRISQRFQE\n+FQLIANENALAAAKK\n+>EOG090W0MLJ\n+QKKLQELDKYKQVQKEYKKAVKQRQQLDGQLNENKVVELDLLKEDNEVYKLIGPVLVKQE\n+LEEAKQNVSKRIEYISKELKRVEDLIASLEKKQEKHRENLEKLQQQLQ\n+>EOG090W0MM4\n+LYEPDYLKPKIPLYDVLNVQIKGYDYAVLESYQKLIHKIAEALDLDVEDSWALPAQELKV\n+QRYKPKSTVVEAEYKLKVYERNVQISDVSSPILLRVLEAALPEGVTLEVEEHEEEKEEKR\n+YVPDKELLDLKQELDEL\n+>EOG090W0MNZ\n+KIEEYETFINDVLKEDLKKLEKKLEKLNEEIAEYVQLKSTIETLDGLKTKVDIGCNFFVQ\n+AKVEDSKILVNIGLGVYLELTLEEALKFIDVRIKLLEKQIEKLRKESAKTKAHIKLVLLA\n+IEELQ\n+>EOG090W0MYQ\n+NPFEKEKKKCILCKLNIEPDYKNVKLLSQFQSPYTGRIYGRHITGLCKKKQEKVEKEILK\n+AQFLKDPKLFDPEKPLRPHK\n+>EOG090W0MZQ\n+PPINQKRLLAFINHFIISTVSFLNKFAKSCEEKLLEFEKKLQKVEASLVILEAKLSSIPE\n+LEEDPEYKKYFKMVQVGVPKEAVKLKMQQEGLDPSLLD\n+>EOG090W0N0N\n+LSKKEKLKKAVKDYGSTVVVFHVGISLISLGALYLLVSSGLDVLLEKLEASTFVVAYAVH\n+KVLAPVRISITLAATPLIVRYLRKIGLLK\n+>EOG090W0N4N\n+MDLSKVKNEKKLELCKLYFGFALLPFLWAVNAVWFFKEAFKKPEYEEQKQIKKYVILSAI\n+GALIWAWIVIFQLKRAEWGELADEISFIIPLG\n+>EOG090W0N5S\n+MKAVTAVCATGASVPAVASGRVKRRRDLENEEIQMYLSKLKDLVPFMPKNRKLSKLEVIQ\n+HVIDYICDLQTALEEHPAAAALARQPLGVLPNTIL\n+>EOG090W0N7H\n+MKLSHETVTIELKNGTQVHGTITGVDVAMNTHLKAVKLTIKNRLETLSIRGNNIRYYILP\n+DSLPLETLLIDDTPKAKAKKK\n+>EOG090W0N7U\n+SSTSQKHREPMGDKPVTDLAGVGEVLGKRLVVLGQYLVLKKDKELFKEWMKDTCSANSKQ\n+SSDCYQCLSDWCEEF\n+>EOG090W0NCE\n+VNKTVSIITDGRNFIGTLKGFDQTINLILDESHERVYSTTQGVEQVVLGLHIIRGDNVAI\n+VGELDDSRLDLSSIRAEPLSSVVH\n+>EOG090W0NFV\n+DPELEAIRAQRLAQLQSQYKGQKAQEEKKREQEEMKNSILSQVLDQSARARLNTLKLGKP\n+EKGKMVENLLIRMAQRGQIKGKLGEKELIKLLESVNQQTTVKFDRRRAALDSDDD\n+>EOG090W0NJA\n+TRVYVGGLTEKVKKEDLEAEFEKYGKLNSVWVAFNPPGFAFIEFENKDEAEKACDNLNGT\n+ELLGSKLRVEISRGRGRKGGRGKRGSRFRSRSPVGR\n+>EOG090W0NJU\n+YLKSWEEFEKAAERLYLQDPLKRYTMKYVHSKGLLVLKLTDNCLQYKTEDLKKIEKFISN\n+LMRHMASKE\n+>EOG090W0NK3\n+VNVPKQRRTFCKKCKVHKLHKVTQYKKSKEGRRRYDRKQQGFGGQTKPIFRKKAKTTKKI\n+VLRLECTECKYRKPLKRCKHFELGGDKKRK\n+>EOG090W0NO8\n+MGKVKCSELRTKDKKELLKQLEELKTELTNLRVAKVTGGAASKLSKIRVVRKAIARVYIV\n+LHQKQKENLRKNKKYKPLDLRPKKTRALRRALTTLKEIRKRKYAVKA\n+>EOG090W0NRT\n+RKEALSQFIQQIHGRPVVVKLNSGVDYRGVLACLDGYMNIALEQTEEYVNGQLKNKYGDA\n+FIRGNNVLY\n+>EOG090W0NTV\n+EEWLEKEVIGLRVWQLLLLVLSILLSLVILLCCCIRFRIPRTKQEIEADYERKKLTKKFR\n+KRLKKIKNSEMDELDLKKAEAESLE\n+>EOG090W0O4V\n+MPKYYCDYCDTYLTHDSPSVRKTHCQGRKHKDNVKFYYQKWMEEQAQHLIDATTAAFKAG\n+KIASNPFAGVAIPPPGPGLAAPPGMPMMMGPHGPMPPMMMRPLMKPKGPMAPMGPLGALG\n+PVRPPL\n+>EOG090W0O82\n+MLEITCNDRLGKKVRVKCNPDDTIGDLKKLIAAQTGTKIVLKKWYTIFKDHIKLQD\n+>EOG090W0O88\n+AKRTKKVGITGKYGTRYGASLRKMVKKMEITQHSKYTCSFCGKAMKRSVVGIWSCKRCKR\n+TVAGGAWVYSTTAAASVRSAVRRLRE\n+>EOG090W0ODH\n+MEEKLAEYRAKKRREELLEKVKEKLKEVYLLYFLLWATLYIIAIELEFGAVYLVLSALVF\n+IYLNTRTGPKKKGEVSAYSVFNKNCEAIDGTLKAEQFEREIRYG\n+>EOG090W0OM7\n+LGRSRSPSPRRRRKERRDRRRRRSRERRRRSRDRERSLSRSRSRSEERERPVITEADLEG\n+KSPEEQEMLKLMGFCGFDTTKGKKVEGNDVGEVHVILKRKYRQYMNRKGGFNRPLDFV\n+>EOG090W0ORD\n+DEYALVAKGKLKLKSDKKKKKKKKRTKAELAFKMQEKMQKERIKEKASMTHKQRVEEFNR\n+HLDSLTEHFDIPKVSWTK\n+>EOG090W0ORX\n+PREIKEIKDFLLKARRKDAKSKIKKNAENVKFKVRCSRFLYTLVITDKEKAEKLKQSLPP\n+G\n+>EOG090W0OS5\n+KELEKLEEAKLKAKYPEGHSAFLQKRLAKGQKYFDSGDYQMAKQKTGEAIPTPETVPVRK\n+TSIIQP\n+>EOG090W0PDB\n+FAKDSIRLVKRCTKPDREFQKIAIATAIGFCIMGFIGFFVKLIHIPINNIIV\n+>EOG090W0PQO\n+LLLLAVALAAAQLFLAQALEASLAHPAVVENAEAEAQLPEELRNPFYKNPRIAAALAKES\n+WFTNKEMQVIDREAEKIPREKIYKILKNAGLVRRR\n+>EOG090W0PW0\n+EEKELKAGHPPAVKAGGMRITQHKTPSPPKTISGAPVKGNEAVQVFHEKKPPTIQQPRK\n+>EOG090W0PZH\n+KPIDSKREEFRKYLERAGVLDALTKVLVSLYEEPEKPEDALEYLRKNLGLKKELEEAKAE\n+IAELE\n+>EOG090W0Q9X\n+VIGGAVVGLLCAILVVMFIVYRLRKKDEGSYALEPKKRSPNREFYA\n+>EOG090W0QXM\n+PAAPSSTSVGSGSRSPSKQRKTTGSGGMWRFYTDDSPGIKVGPVPVLVMSLLFIASVFML\n+HIWGKYTRS\n+>EOG090W0R2X\n+MKRTKEKVEKEEGEELYSNEITEEMKKFIIEPSYVLCEKLIEGRLSFGGMNPEIEKLMEE\n+EEKDVSDEEMA\n+>EOG090W0RGQ\n+DVLDSWEEIDESEALEKKLKKLVIIKEEDELRSQLVPPEPTVKILKRPEKSSNGESKPKQ\n+PIKTLKQREQEYAEARLRILGEAKSPEENVLRLPRGPDGTKGFNVRR\n+>EOG090W0S6D\n+RVNGSLLKQFIGKKVSILGKVKKKSSNGKSFLKTTDNQKVTVELKEPLDEPLEGWVEVHG\n+VVKSSTISCDEYIEFPEETENFDAEAYNKLLNTVKNPWK\n+>EOG090W0T3K\n+MREFTNIVTTLSKLSKECVLRLTKDKLVFIVPLVWCELDQKFFSEYNMEGVSNEIYLELS\n+AEMLSRSLSSLKAKSVKIKLTNKQSPCLTVEIELSSESRQVVHDIPVTVIPRKEWSEYEE\n+PSIELPSLKKLRKVVDRMKNLSPSLTISATLKIETDTATVSTHFKNLKVSARVDIKKLSA\n+FLEVICSIEKLIKLELVKLHYFLPAV\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/dataset.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/dataset.cfg Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,6 @@
+name=insecta_odb9
+species=fly
+domain=eukaryota
+creation_date=2016-02-13
+number_of_BUSCOs=1658
+number_of_species=42
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,892 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W03A6\n+LENG  290\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:14:12 2016\n+NSEQ  41\n+EFFN  0.573059\n+CKSUM 4199501958\n+STATS LOCAL MSV      -11.0137  0.70159\n+STATS LOCAL VITERBI  -11.8914  0.70159\n+STATS LOCAL FORWARD   -5.4517  0.70159\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.61260  4.34648  2.94292  2.63725  3.00827  2.97272  3.70379  2.92379  2.63311  2.34924  3.71976  3.18648  3.40267  3.05104  2.84850  2.73289  2.87560  2.67151  4.50628  3.28193\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.06535  3.91193  3.14059  0.61958  0.77255  0.00000        *\n+      1   3.00919  4.52311  4.14821  3.62922  3.09723  4.03596  4.32783  2.27940  3.34231  1.08727  2.86803  3.94138  4.37623  3.72340  3.44120  3.39248  3.26003  2.31137  4.98326  3.74489      1 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      2   2.36218  4.33870  3.13011  2.73889  4.02232  3.10957  3.85645  3.40174  2.64759  3.07800  3.92900  3.09783  3.59930  3.07936  2.94839  1.61731  2.43809  2.94535  5.34618  4.08403      2 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      3   2.67521  4.27814  3.24960  2.71563  4.05115  3.41144  3.69906  3.32637  1.42497  3.01802  3.94128  3.12968  3.88008  2.88045  2.51411  2.76334  2.94671  2.94958  5.28612  4.02052      3 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03123  3.87781  4.60016  0.61958  0.77255  0.51713  0.90691\n+      4   2.80665  4.86651  3.06170  2.65102  4.29941  3.41912  3.68135  3.61311  1.29697  3.22463  4.14220  3.08300  3.90692  2.84934  2.39258  2.85149  2.85896  3.30620  5.40498  4.16189      4 k - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.04394  3.87781  3.80347  0.61958  0.77255  0.51713  0.90691\n+      5   1.15269  4.13250  3.42805  3.19131  4.05298  2.97131  4.22552  3.11818  3.19502  3.02891  4.02623  3.33501  3.71301  3.53350  3.46107  2.45402  2.62633  2.76070  5.51002  4.28558      5 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03162  3.86549  4.58784  0.61958  0.77255  0.52796  0.89112\n+      6   3.14746  4.61180  4.17828  3.81812  3.13308  3.97327  4.47605  2.33676  3.58594  0.88383  3.15824  4.09427  4.40984  3.95594  3.76314  3.54334  3.44203  2.37228  5.00981  3.73151      6 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03162  3.86549  4.58784  0.61958  0.77255  0.52796  0.89112\n+      7   3.23704  4.72886  3.82795  3.55604  2.29363  3.82202  3.61532  3.24723  3.41375  2.70296  3.92299  3.71488  4.29603  3.72147  3'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    283   2.86715  4.85199  2.89168  2.64635  4.01816  3.35460  3.80443  3.64641  2.46840  3.15049  4.17905  3.10585  3.90794  1.26571  2.75284  2.91574  3.16423  3.37724  5.29340  3.98605    284 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    284   2.30662  4.22669  3.19139  3.00653  4.05883  2.95692  4.11678  3.57914  3.07910  3.30080  4.22848  3.23401  3.69430  3.42983  3.36556  1.11793  2.78547  3.11811  5.43277  4.14231    285 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.54899  0.86164\n+    285   2.97610  4.79637  3.10901  2.86734  3.27278  3.43295  1.18022  3.67628  2.71089  3.17239  4.21410  3.27732  3.98459  3.23340  2.97758  3.04699  3.27893  3.41176  4.73803  3.22416    286 h - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03197  3.85485  4.57719  0.61958  0.77255  0.49247  0.94447\n+    286   3.02629  4.38931  4.55440  4.02788  3.38969  4.29987  4.75221  1.66570  3.90157  1.73351  3.22011  4.29643  4.60414  4.16932  4.12143  3.65546  3.28153  1.24068  5.32317  4.12520    287 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    287   3.12120  4.54522  4.46798  3.93436  3.10940  4.26644  4.61200  2.09702  3.74039  1.03668  2.81140  4.23014  4.55041  4.00262  3.93893  3.62030  3.36876  2.00221  5.11579  3.95042    288 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    288   2.79129  4.78468  3.27333  2.77748  4.23368  3.42379  3.72210  3.67372  2.18570  3.22145  4.12764  3.17710  3.22448  2.90441  1.31319  2.86068  3.06244  3.35113  5.34061  4.12949    289 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    289   2.87748  5.28398  2.07313  1.23939  4.58665  3.21010  3.73813  4.06161  2.69686  3.62602  4.49991  2.71592  3.81535  2.91427  3.22357  2.79830  3.16561  3.67987  5.77966  4.34719    290 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03072  3.89395  4.61629  0.61958  0.77255  0.51483  0.91031\n+    290   2.83903  4.94076  3.19058  2.65002  4.36201  3.09854  3.60817  3.74025  1.86467  3.26108  4.11943  3.08356  3.91775  2.75841  1.52389  2.86394  3.05522  3.41435  5.37419  4.15539    291 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02078  3.88401        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,973 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W06A3\n+LENG  317\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:14:23 2016\n+NSEQ  42\n+EFFN  0.505005\n+CKSUM 2173114792\n+STATS LOCAL MSV      -11.1177  0.70082\n+STATS LOCAL VITERBI  -11.8237  0.70082\n+STATS LOCAL FORWARD   -5.8436  0.70082\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.60078  4.20386  3.09081  2.72939  3.29201  2.73750  3.80596  2.60287  2.76439  2.32806  3.62751  3.16496  3.24412  3.25571  3.03713  2.74208  2.91733  2.41359  4.52910  3.56529\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.00000        *\n+      1   2.88548  4.98203  3.26484  2.66893  4.37797  3.54234  3.56403  3.73430  1.73193  3.23652  4.10452  3.09116  3.93137  2.65462  1.56967  2.90213  2.96175  3.42446  5.34579  4.14426      1 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      2   1.06014  4.17714  3.43127  3.22164  4.02163  3.00241  4.24732  3.14248  3.23710  3.02549  4.06118  3.37901  3.73871  3.58127  3.48951  2.52791  2.80311  2.80236  5.45195  4.24880      2 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      3   3.05279  4.55050  4.19258  3.77702  3.08275  4.01645  4.45173  2.21046  3.53641  1.02046  3.06577  4.04994  4.42083  3.89443  3.73294  3.46036  3.34301  2.19383  5.01051  3.71888      3 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      4   3.00664  4.43473  4.29003  3.90501  3.35346  4.03179  4.63929  1.11780  3.73250  1.94647  3.29426  4.15480  4.46644  4.08432  3.93271  3.52442  3.30654  1.84634  5.23744  3.97271      4 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      5   3.12038  4.59225  4.13872  3.77890  3.12451  3.94175  4.44453  2.32740  3.54694  0.91499  3.15816  4.05737  4.38285  3.92282  3.72797  3.50962  3.41625  2.35646  4.99189  3.70778      5 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      6   2.78335  4.33516  4.08134  3.74395  3.52758  3.69083  4.55127  2.05344  3.61123  2.22041  3.49797  3.93870  4.25342  3.97351  3.82669  3.19274  3.14588  1.06562  5.30654  4.03600      6 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03242  3.84097  4.56332  0.61958  0.77255  0.48576  0.95510\n+      7   2.62697  4.40500  3.29425  3.18992  4.33181  0.77941  4.31170  3.92417  3.36686  3.58217  4.55801  3.45867  3.79512  3.69359  3'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    310   2.30574  4.22577  3.18979  3.00487  4.05694  2.95603  4.11516  3.57701  3.07731  3.29883  4.22670  3.23264  3.69326  3.42820  3.36380  1.12089  2.78445  3.11640  5.43104  4.14042    310 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    311   2.79273  4.34096  4.09811  3.76084  3.53604  3.70346  4.56601  2.05196  3.62832  2.22661  3.50436  3.95375  4.26521  3.98962  3.84267  3.20593  3.15484  1.04991  5.31711  4.04742    311 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    312   2.52063  4.46247  3.14399  2.66329  3.61752  3.33464  3.73235  3.11172  2.55088  2.69643  3.68563  3.09303  2.21417  2.90819  2.81318  2.64840  2.75793  2.77252  5.11552  3.82308    312 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    313   2.83516  5.28588  2.02146  1.37746  4.57342  3.19313  3.68741  4.04429  2.57588  3.59161  4.44614  2.67503  3.78602  2.85457  3.15755  2.75155  3.11440  3.65636  5.76541  4.31525    313 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    314   2.42177  4.37033  3.17444  2.98491  4.08142  3.08409  4.11584  3.50342  3.03789  3.18538  4.19247  3.28514  1.20231  3.42206  3.32349  2.65489  2.93898  3.13658  5.40418  4.19997    314 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    315   2.63050  4.69517  2.96852  2.54738  3.99565  3.34796  3.67351  3.38755  2.34178  3.00253  3.70071  3.00925  3.82818  1.82403  2.67315  2.61986  2.77726  3.09208  5.27729  3.96986    315 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    316   2.99195  4.39606  4.46052  3.99700  3.37863  4.17130  4.73529  1.20074  3.83833  1.90438  3.24173  4.24154  4.54901  4.14877  4.05130  3.57249  3.27663  1.64463  5.32209  4.08731    316 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03203  3.85293  4.57528  0.61958  0.77255  0.49328  0.94320\n+    317   3.01592  4.44036  4.30752  3.92275  3.36125  4.04511  4.65464  1.10243  3.75049  1.95152  3.29989  4.17084  4.47839  4.10092  3.94931  3.53870  3.31551  1.84594  5.24789  3.98436    317 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02167  3.84258        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,352 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W0GYE\n+LENG  110\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:02:11 2016\n+NSEQ  42\n+EFFN  0.417847\n+CKSUM 96359631\n+STATS LOCAL MSV       -9.8588  0.71539\n+STATS LOCAL VITERBI  -10.5314  0.71539\n+STATS LOCAL FORWARD   -4.2136  0.71539\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.58050  4.26382  2.85365  2.80613  3.15035  2.89189  3.81397  2.73428  2.84682  2.50149  3.63880  3.04623  2.92146  3.23736  2.97958  2.69288  2.75694  2.61051  4.37217  3.44918\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.11208  3.80830  2.47881  0.61958  0.77255  0.00000        *\n+      1   2.86563  4.43941  3.90030  3.48825  3.15849  3.72815  4.25618  2.32070  3.26187  1.77984  1.67728  3.78149  4.19270  3.65614  3.48266  3.19769  3.17068  2.30899  4.97739  3.72347      1 m - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03621  3.73242  4.45477  0.61958  0.77255  0.51831  0.90517\n+      2   2.32088  4.22631  3.23062  2.88208  3.75054  3.06494  3.91305  3.12034  2.80280  2.86950  3.46740  3.17206  3.71214  3.20009  3.11255  1.58694  2.72892  2.79566  5.19560  3.87469      2 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      3   2.16583  4.14387  3.31164  3.04023  3.91928  2.99832  4.08555  2.96050  2.99484  2.87006  3.88524  3.25429  3.70169  3.37677  3.27365  2.46801  1.56963  2.64620  5.38268  4.14256      3 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      4   2.50339  4.36552  3.18021  2.87748  3.70569  3.17426  3.92862  3.09513  2.80524  2.51701  3.83182  3.22048  1.72512  3.22934  3.10382  2.66637  2.88472  2.83864  5.15020  3.85399      4 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      5   1.37966  4.07220  3.30932  3.05735  4.00851  2.90363  4.11036  3.10631  3.06226  3.01691  3.96738  3.22700  3.63731  3.40459  3.34236  2.32349  2.50171  2.72896  5.44141  4.20867      5 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      6   2.79738  4.71128  3.35305  2.81859  3.86956  3.45352  3.66728  3.34124  2.12422  2.73535  3.92040  3.19573  3.91394  2.88067  1.48782  2.88950  3.04990  3.10526  5.13081  3.85640      6 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03541  3.75430  4.47665  0.61958  0.77255  0.52979  0.88850\n+      7   2.79738  4.71128  3.35305  2.81859  3.86956  3.45352  3.66728  3.34124  2.12422  2.73535  3.92040  3.19573  3.91394  2.88067  1.4'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    103   2.28595  4.20469  3.15305  2.96732  4.01306  2.93587  4.07824  3.52766  3.03667  3.25343  4.18597  3.20146  3.66970  3.39139  3.32369  1.19215  2.76154  3.07690  5.39105  4.09660    103 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    104   1.11047  4.16262  3.39942  3.18942  3.99027  2.98852  4.21878  3.10639  3.20405  2.99074  4.03096  3.35463  3.72182  3.55149  3.45809  2.51413  2.78712  2.77202  5.42393  4.21734    104 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    105   2.98123  4.41976  4.24147  3.85605  3.33251  3.99469  4.59708  1.16125  3.68281  1.93347  3.27967  4.11045  4.43341  4.03863  3.88681  3.48501  3.28213  1.84863  5.20897  3.94068    105 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    106   3.08459  4.56673  4.08567  3.72675  3.11378  3.89922  4.40266  2.31619  3.49506  0.95858  3.15957  4.00826  4.34667  3.87883  3.68103  3.46477  3.38236  2.33656  4.96826  3.67627    106 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    107   2.38084  4.22527  3.37365  3.12444  3.91461  3.06621  4.14632  3.03402  3.06612  2.89972  3.95563  3.34312  3.76545  3.46456  3.32701  2.58155  1.28564  2.73588  5.35817  4.13168    107 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    108   2.28595  4.20469  3.15305  2.96732  4.01306  2.93587  4.07824  3.52766  3.03667  3.25343  4.18597  3.20146  3.66970  3.39139  3.32369  1.19215  2.76154  3.07690  5.39105  4.09660    108 s - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    109   2.98123  4.41976  4.24147  3.85605  3.33251  3.99469  4.59708  1.16125  3.68281  1.93347  3.27967  4.11045  4.43341  4.03863  3.88681  3.48501  3.28213  1.84863  5.20897  3.94068    109 i - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.03352  3.80830  4.53064  0.61958  0.77255  0.48576  0.95510\n+    110   2.83281  4.81376  2.86370  2.61910  3.97332  3.32524  3.77627  3.59570  2.44478  3.10430  4.13561  3.07764  3.87883  1.35288  2.72955  2.88405  3.13061  3.32988  5.25759  3.94755    110 q - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.02268  3.79747        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,640 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME  EOG090W0T3K\n+LENG  206\n+ALPH  amino\n+RF    no\n+MM    no\n+CONS  yes\n+CS    no\n+MAP   yes\n+DATE  Tue Jul 19 12:12:21 2016\n+NSEQ  42\n+EFFN  1.961060\n+CKSUM 265104873\n+STATS LOCAL MSV      -10.5526  0.70500\n+STATS LOCAL VITERBI  -11.3203  0.70500\n+STATS LOCAL FORWARD   -5.0917  0.70500\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   2.66939  4.05673  3.22993  2.79099  3.15365  3.55719  3.71237  2.54173  2.57440  2.22720  3.33451  3.21043  3.51162  3.12964  3.00991  2.70726  2.80134  2.41473  4.61383  3.55842\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.16208  4.61100  1.96838  0.61958  0.77255  0.00000        *\n+      1   2.93600  4.38428  4.46198  3.90104  3.40267  3.46509  4.45615  1.93047  3.75500  2.17341  1.33550  4.09987  4.43304  3.98673  3.92700  3.38467  3.18624  2.18032  5.08252  3.90751      1 m - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      2   3.51610  5.57440  4.08521  3.23432  5.16373  4.05621  3.85036  4.44109  1.24790  3.78589  4.67516  3.56930  4.38323  2.97693  1.02813  3.49054  3.62846  4.13070  5.74164  4.65992      2 r - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      3   2.71675  4.18007  2.20863  1.78454  4.48942  3.47153  3.68161  3.95962  2.43737  2.96771  4.22140  2.70561  3.87879  2.38271  2.92830  2.68696  2.95012  3.54764  5.61847  4.21897      3 e - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      4   3.79955  5.05528  5.24619  4.82928  0.91393  4.84708  4.43636  2.76588  4.65616  1.39209  3.35680  4.77841  5.06654  4.62904  4.66340  4.24935  4.02525  2.91694  4.56452  2.97666      4 f - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      5   2.33881  4.63133  3.04986  2.72039  3.81134  3.58086  3.80889  3.19122  2.69933  2.40831  3.28616  3.19757  3.96766  2.62497  3.12129  2.40087  2.24400  2.60449  5.18249  3.90340      5 t - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      6   2.53534  4.98255  2.88571  2.48098  3.55645  3.22634  3.11506  3.68024  2.49296  3.25497  4.05057  1.83222  3.89751  2.84319  2.96725  2.38521  2.93102  3.33084  5.46784  3.79390      6 n - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01722  4.46614  5.18849  0.61958  0.77255  0.71506  0.67170\n+      7   3.33815  4.58635  5.28231  4.77386  3.86630  4.81962  5.40260  1.04591  4.67983  2.15439  3.65019  4.93514  5.08026  4.90950  4.'..b'.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    199   3.17292  4.49948  5.07350  4.47897  2.19664  4.41569  4.75303  1.67850  4.30714  1.34256  2.66830  4.55216  4.68996  4.38672  4.33103  3.74557  3.18632  2.02062  5.12680  4.01423    203 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    200   2.82721  5.23462  3.00978  2.50556  4.56561  3.55698  1.89081  4.01902  2.34647  3.51731  4.29242  2.45019  3.96371  2.18070  2.67888  2.52972  3.05315  3.62288  5.65577  4.29113    204 h - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    201   2.79753  3.26426  4.38245  3.80576  2.13306  3.90254  4.09723  2.46777  3.65960  2.40542  3.34950  3.92355  4.27118  3.84123  3.78510  2.46701  3.03179  2.48318  4.61601  1.76843    205 y - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    202   2.72610  3.77026  4.37676  3.78412  1.86105  3.85380  3.70355  1.91248  3.39011  2.25745  3.26975  3.64215  4.21973  3.81329  3.74151  3.15148  2.95801  2.40389  4.70415  2.65981    206 f - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    203   3.15910  4.51063  4.85177  3.56044  3.48065  4.41056  4.77828  1.50858  4.14418  1.18189  3.34602  4.46100  4.70781  4.33071  4.27364  3.73860  3.39432  1.87237  5.27969  4.12987    207 l - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    204   2.54223  4.83441  3.15835  2.33659  4.10387  3.53825  3.79739  3.50371  2.61506  2.95960  3.75295  3.12487  1.86716  2.96533  3.06273  2.60399  2.36144  3.19033  5.39703  4.08002    208 p - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    205   1.87617  5.09761  2.66753  2.36334  4.39589  2.74795  3.18023  3.84868  2.46564  3.38501  4.15665  2.99165  3.89862  2.67910  2.94944  2.53808  2.60969  3.21059  5.56524  4.18537    209 a - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01619  4.52743  5.24978  0.61958  0.77255  0.62832  0.76248\n+    206   3.02518  4.38844  4.11409  4.15378  3.60416  4.28092  4.67591  1.38646  4.03415  2.26037  3.36491  4.32477  4.61541  4.25349  4.19108  3.60271  2.71238  1.26673  5.26692  4.07016    210 v - - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.01092  4.52217        *  0.61958  0.77255  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/lengths_cutoff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/lengths_cutoff Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t0\t26.009834120677418\t151\n+EOG090W0427\t0\t14.787662575205127\t119\n+EOG090W09K7\t0\t48.74476058088982\t218\n+EOG090W0B5K\t0\t8.436399156361812\t117\n+EOG090W0153\t0\t110.0106535860915\t535\n+EOG090W051T\t0\t48.42670503702809\t341\n+EOG090W01WI\t0\t66.27955761081256\t599\n+EOG090W01A3\t0\t147.49150864355283\t636\n+EOG090W067A\t0\t24.752819851328745\t239\n+EOG090W0IUR\t0\t33.027556860826415\t176\n+EOG090W09BV\t0\t59.827044564511475\t249\n+EOG090W0AIA\t0\t62.4605436431282\t296\n+EOG090W05D8\t0\t120.959030700539\t437\n+EOG090W01IP\t0\t203.56155914702887\t709\n+EOG090W02C3\t0\t123.60839779175912\t441\n+EOG090W0B1Y\t0\t14.92967253328818\t221\n+EOG090W01VD\t0\t137.07676494930246\t506\n+EOG090W035W\t0\t58.49371845525309\t413\n+EOG090W0DWN\t0\t41.156486855129046\t178\n+EOG090W028U\t0\t103.86567310127292\t544\n+EOG090W02TI\t0\t98.41080498838295\t449\n+EOG090W00PB\t0\t106.20423261052379\t448\n+EOG090W038B\t0\t77.2556129538091\t369\n+EOG090W0BUR\t0\t61.86700358352731\t260\n+EOG090W0F0L\t0\t46.56222745575028\t243\n+EOG090W08IZ\t0\t62.201306771663276\t286\n+EOG090W0BEB\t0\t24.453706055266927\t238\n+EOG090W0I37\t0\t18.215830819664493\t138\n+EOG090W0CQ9\t0\t22.121097366946984\t209\n+EOG090W04BS\t0\t57.09128334646491\t340\n+EOG090W0FCL\t0\t16.012152397171466\t130\n+EOG090W0AXJ\t0\t28.151048076823233\t222\n+EOG090W0FEP\t0\t12.631376967230036\t126\n+EOG090W0FVI\t0\t15.398685404771472\t141\n+EOG090W0GP3\t0\t19.401812121264605\t132\n+EOG090W01NH\t0\t109.56816301330042\t599\n+EOG090W0IEE\t0\t13.648148270718346\t133\n+EOG090W0IP7\t0\t16.863805057639492\t174\n+EOG090W038Z\t0\t59.028806526983075\t372\n+EOG090W0B8O\t0\t25.131143830790958\t193\n+EOG090W0KP0\t0\t31.31312208007784\t139\n+EOG090W064P\t0\t35.99213522298181\t305\n+EOG090W0A3V\t0\t29.293708718757674\t271\n+EOG090W0BOZ\t0\t11.373911097706742\t158\n+EOG090W0H6T\t0\t40.78248368613364\t174\n+EOG090W0EDI\t0\t25.598027290426895\t161\n+EOG090W0EFH\t0\t32.47988529410085\t158\n+EOG090W04BN\t0\t42.69662448203931\t429\n+EOG090W00SP\t0\t347.84066824884763\t1158\n+EOG090W01ZN\t0\t35.63683451653604\t248\n+EOG090W014R\t0\t303.558641419558\t908\n+EOG090W0FJE\t0\t11.554345658688966\t128\n+EOG090W00EP\t0\t90.98598088688564\t391\n+EOG090W07MY\t0\t26.394659474607508\t276\n+EOG090W00LR\t0\t47.803732742449085\t428\n+EOG090W06QR\t0\t45.20103500608554\t255\n+EOG090W06J7\t0\t48.950477475341124\t349\n+EOG090W04G6\t0\t20.747524095601406\t271\n+EOG090W06IG\t0\t83.91320663964761\t392\n+EOG090W036Y\t0\t90.1813890269454\t405\n+EOG090W0CMO\t0\t27.111085843428626\t212\n+EOG090W0BFE\t0\t43.83422206895252\t292\n+EOG090W0JYN\t0\t8.6240352353082\t95\n+EOG090W054P\t0\t42.0493667617466\t242\n+EOG090W0753\t0\t88.63736820874865\t365\n+EOG090W0FKG\t0\t36.02492014973623\t238\n+EOG090W0B6L\t0\t37.512128120083524\t178\n+EOG090W0C2I\t0\t39.9957024404497\t248\n+EOG090W057Z\t0\t39.37529750270147\t336\n+EOG090W01H1\t0\t167.89279528460568\t626\n+EOG090W07A3\t0\t38.193049881273545\t207\n+EOG090W0D5E\t0\t36.11275078460196\t174\n+EOG090W07PK\t0\t44.50703007278868\t242\n+EOG090W0GKW\t0\t13.60945220480054\t126\n+EOG090W0J9P\t0\t72.21231118153511\t276\n+EOG090W0A69\t0\t38.463468950032976\t247\n+EOG090W0DRQ\t0\t7.927491529558358\t71\n+EOG090W04G1\t0\t70.1561152864883\t382\n+EOG090W08L6\t0\t34.61950687705208\t168\n+EOG090W02QT\t0\t126.78937306395682\t573\n+EOG090W0K04\t0\t13.698118905107428\t123\n+EOG090W02UI\t0\t75.33983172202097\t569\n+EOG090W00MS\t0\t222.74918630603344\t865\n+EOG090W0HXZ\t0\t14.672974042346928\t111\n+EOG090W09LF\t0\t49.42509730797137\t225\n+EOG090W060L\t0\t48.49481993961427\t407\n+EOG090W0I0Q\t0\t8.898446275006405\t87\n+EOG090W0B0M\t0\t19.378137039507983\t192\n+EOG090W0EY0\t0\t25.633665074147427\t177\n+EOG090W019L\t0\t161.9212108198199\t655\n+EOG090W0ALV\t0\t33.567421363630004\t327\n+EOG090W0F9J\t0\t11.9066247090783\t195\n+EOG090W0BZ2\t0\t13.511577175520067\t123\n+EOG090W0B5T\t0\t12.744268370786148\t181\n+EOG090W0JBN\t0\t8.059235823034468\t94\n+EOG090W06CO\t0\t44.76325256823732\t283\n+EOG090W06Y4\t0\t58.56965239084716\t304\n+EOG090W00D0\t0\t173.60012223217007\t969\n+EOG090W0N7U\t0\t7.090271589968341\t73\n+EOG090W02H5\t0\t187.43347716555155\t645\n+EOG090W04DH\t0\t97.51184745911475\t425\n+EOG090W01HI\t0\t141.17028909316608\t668\n+EOG090W02JZ\t0\t106.2021542200327\t499\n+EOG090W0ANA\t0\t43.80791670031081\t242\n+EOG090W055F\t0\t25.315135740520443\t269\n+EOG090W0KMC\t0\t20.563083052290207\t149\n+EOG090W063Z\t0\t20.0'..b'656909\t232\n+EOG090W050K\t0\t77.93871889144076\t376\n+EOG090W0CL8\t0\t25.966747028064802\t218\n+EOG090W0JJQ\t0\t13.62856576746243\t117\n+EOG090W06W8\t0\t48.24622893316924\t267\n+EOG090W0EPV\t0\t35.10902433482864\t239\n+EOG090W00WM\t0\t260.8164746511948\t897\n+EOG090W005S\t0\t611.4917595121128\t1630\n+EOG090W02UQ\t0\t51.49464074592311\t285\n+EOG090W03FA\t0\t53.973113494178904\t344\n+EOG090W02B7\t0\t115.3009159526159\t425\n+EOG090W06DJ\t0\t77.792516412288\t338\n+EOG090W08FE\t0\t36.68154855844776\t279\n+EOG090W06P2\t0\t41.670994647017906\t299\n+EOG090W0C7S\t0\t29.611469746269982\t147\n+EOG090W0C4Z\t0\t26.43060073195963\t145\n+EOG090W00ZP\t0\t161.60550467275013\t697\n+EOG090W0C7Z\t0\t37.684920128988004\t192\n+EOG090W06AN\t0\t42.662308774978584\t289\n+EOG090W0FYR\t0\t13.508876473998257\t128\n+EOG090W015U\t0\t42.296492199255404\t276\n+EOG090W02LX\t0\t53.129566825639934\t495\n+EOG090W0DZ4\t0\t9.599076852633498\t186\n+EOG090W050Y\t0\t60.226966189955085\t368\n+EOG090W08GU\t0\t53.02740180911049\t280\n+EOG090W09LK\t0\t91.72851994063471\t372\n+EOG090W077G\t0\t76.76324145120006\t331\n+EOG090W0B8P\t0\t19.196863497747415\t150\n+EOG090W0A73\t0\t27.406826637563558\t161\n+EOG090W0B3U\t0\t43.48966796277244\t264\n+EOG090W0LL3\t0\t13.321975417932757\t120\n+EOG090W09R9\t0\t36.65014937864991\t212\n+EOG090W0A58\t0\t48.47060965162291\t255\n+EOG090W06VZ\t0\t29.419164232825445\t317\n+EOG090W080B\t0\t79.75592849488558\t305\n+EOG090W0ALP\t0\t13.883838648218155\t145\n+EOG090W0EJV\t0\t5.956771852621479\t160\n+EOG090W0BI6\t0\t30.97683170635923\t224\n+EOG090W05KO\t0\t95.433274449961\t425\n+EOG090W03K0\t0\t72.48414460771538\t334\n+EOG090W0JFZ\t0\t14.305445143137689\t92\n+EOG090W09RO\t0\t29.48406784028039\t267\n+EOG090W00ZV\t0\t61.02240025870282\t345\n+EOG090W005V\t0\t262.52686163098906\t1059\n+EOG090W0F9A\t0\t10.246423950758501\t105\n+EOG090W0IKC\t0\t14.947429244710978\t76\n+EOG090W04QG\t0\t49.56877927900001\t361\n+EOG090W00U5\t0\t71.63448419048026\t402\n+EOG090W0JS6\t0\t8.421979153901846\t87\n+EOG090W06X4\t0\t32.054695199308284\t297\n+EOG090W0H7U\t0\t24.51177799464656\t186\n+EOG090W02LH\t0\t31.4090320424614\t301\n+EOG090W06AU\t0\t21.750918119113738\t153\n+EOG090W0L6N\t0\t11.596992881882745\t94\n+EOG090W0028\t0\t588.5016762903192\t1601\n+EOG090W05ZG\t0\t33.671801910536935\t269\n+EOG090W0DSQ\t0\t53.83672675323885\t204\n+EOG090W0CIU\t0\t10.5996734768809\t188\n+EOG090W09DT\t0\t42.16152158590845\t238\n+EOG090W0883\t0\t43.82515148221423\t331\n+EOG090W08IL\t0\t31.073523765582504\t244\n+EOG090W07HX\t0\t43.87674109168712\t264\n+EOG090W0ADL\t0\t74.64253566218997\t270\n+EOG090W07E5\t0\t31.219065990205618\t342\n+EOG090W0CHN\t0\t36.32844749725724\t170\n+EOG090W0F27\t0\t28.270070111541965\t150\n+EOG090W05FW\t0\t36.389874917560235\t332\n+EOG090W061C\t0\t33.972962707452396\t322\n+EOG090W023I\t0\t6.011474071541897\t134\n+EOG090W09Y9\t0\t20.77931440362711\t167\n+EOG090W029L\t0\t129.73510535308327\t545\n+EOG090W078A\t0\t54.90487338981139\t401\n+EOG090W0C83\t0\t38.32093295366156\t262\n+EOG090W015Z\t0\t176.6733585172255\t684\n+EOG090W05IA\t0\t51.778721543176026\t345\n+EOG090W06HO\t0\t71.48257830521672\t342\n+EOG090W0E6K\t0\t76.78984701084445\t299\n+EOG090W032M\t0\t41.865258362423724\t238\n+EOG090W04ZL\t0\t64.14590294096341\t349\n+EOG090W0A4U\t0\t17.926850008284003\t178\n+EOG090W0G0Z\t0\t34.8181704466556\t189\n+EOG090W012F\t0\t210.2707396955778\t725\n+EOG090W08ME\t0\t14.38099468500289\t138\n+EOG090W090H\t0\t89.63598147718064\t390\n+EOG090W0C7T\t0\t14.33867361284194\t189\n+EOG090W0AUB\t0\t1.1004433696270324\t217\n+EOG090W094H\t0\t21.30754829636339\t247\n+EOG090W00HE\t0\t188.33841438573634\t778\n+EOG090W0HKZ\t0\t15.56285848109415\t128\n+EOG090W02KK\t0\t67.63366170127085\t626\n+EOG090W0828\t0\t24.44301723915799\t140\n+EOG090W07PH\t0\t52.049033834231\t266\n+EOG090W01XB\t0\t112.40728428787705\t527\n+EOG090W02C5\t0\t174.99547032465253\t515\n+EOG090W00WO\t0\t160.60058592597883\t618\n+EOG090W0140\t0\t77.3058731633741\t335\n+EOG090W01QT\t0\t54.84769853734253\t240\n+EOG090W0FQ4\t0\t21.524963224401723\t140\n+EOG090W0CAH\t0\t18.306986664749655\t190\n+EOG090W080Z\t0\t42.915151411519396\t247\n+EOG090W02AU\t0\t90.99510841851644\t457\n+EOG090W096X\t0\t37.173612523675914\t248\n+EOG090W04OJ\t0\t60.586185145045505\t253\n+EOG090W09UY\t0\t59.0061956628029\t262\n+EOG090W07CG\t0\t39.79244204205305\t320\n+EOG090W0KFZ\t0\t5.208228204067198\t77\n+EOG090W0LWB\t0\t5.161494516225474\t78\n+EOG090W0F00\t0\t36.17643723361296\t196\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,495 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t11\n+\n+[block]\n+# block no. 0 follows, 26 sequences, length 14\n+# corresponding to MSA columns:\n+# 26-39\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01222\t0.00873\t0.01222\t0.01397\t0.01572\t0.00873\t0.01222\t0.01572\t0.01746\t0.02270\t0.04016\t0.08556\t0.04365\t0.02095\t0.01048\t0.00349\t0.00698\t0.63506\t0.00698\t0.00698\n+1\t0.00956\t0.00674\t0.00871\t0.01015\t0.01051\t0.00637\t0.00660\t0.01065\t0.01390\t0.01853\t0.03861\t0.60723\t0.04613\t0.15052\t0.01485\t0.00384\t0.00493\t0.01968\t0.00660\t0.00588\n+2\t0.01493\t0.01410\t0.02384\t0.62468\t0.13806\t0.01701\t0.02136\t0.01991\t0.01545\t0.02011\t0.01359\t0.02003\t0.01037\t0.00747\t0.00757\t0.00249\t0.00996\t0.00674\t0.00342\t0.00891\n+3\t0.01103\t0.00735\t0.00827\t0.00827\t0.00827\t0.00735\t0.00460\t0.01103\t0.01103\t0.01471\t0.02390\t0.04964\t0.02758\t0.73343\t0.03861\t0.00735\t0.00735\t0.01103\t0.00460\t0.00460\n+4\t0.00924\t0.00660\t0.00880\t0.01056\t0.01100\t0.00616\t0.00704\t0.01056\t0.01452\t0.01936\t0.04181\t0.72848\t0.05017\t0.02376\t0.00968\t0.00308\t0.00440\t0.02156\t0.00704\t0.00616\n+5\t0.02748\t0.02058\t0.02200\t0.01718\t0.02301\t0.02289\t0.01409\t0.57682\t0.12187\t0.04563\t0.01992\t0.01958\t0.01429\t0.00927\t0.00761\t0.00232\t0.00803\t0.00707\t0.00761\t0.01278\n+6\t0.15256\t0.01392\t0.02108\t0.55605\t0.07699\t0.01694\t0.01888\t0.02009\t0.01487\t0.02226\t0.01300\t0.01874\t0.00987\t0.00745\t0.00706\t0.00248\t0.00930\t0.00627\t0.00363\t0.00857\n+7\t0.11917\t0.01388\t0.02161\t0.58414\t0.08089\t0.01694\t0.01944\t0.01998\t0.01497\t0.02165\t0.01313\t0.01908\t0.00996\t0.00747\t0.00719\t0.00249\t0.00948\t0.00639\t0.00357\t0.00859\n+8\t0.07699\t0.01719\t0.02511\t0.32900\t0.31130\t0.06966\t0.02051\t0.02206\t0.01635\t0.02270\t0.01345\t0.01851\t0.01073\t0.00722\t0.00727\t0.00236\t0.00960\t0.00640\t0.00367\t0.00993\n+9\t0.22837\t0.01515\t0.01649\t0.12060\t0.02277\t0.05359\t0.01194\t0.11110\t0.02019\t0.19029\t0.11508\t0.02414\t0.02104\t0.00925\t0.00698\t0.00235\t0.00710\t0.00718\t0.00650\t0.00989\n+10\t0.01881\t0.01720\t0.02326\t0.46321\t0.09186\t0.07067\t0.01933\t0.15694\t0.02017\t0.02577\t0.01441\t0.01919\t0.01082\t0.00786\t0.00752\t0.00241\t0.00992\t0.00662\t0.00434\t0.00968\n+11\t0.05197\t0.02202\t0.02168\t0.07042\t0.02458\t0.14127\t0.01479\t0.34418\t0.08557\t0.03496\t0.01658\t0.01808\t0.01254\t0.00931\t0.00949\t0.00240\t0.09660\t0.00652\t0.00601\t0.01103\n+12\t0.04828\t0.01906\t0.02153\t0.02065\t0.11087\t0.13727\t0.06274\t0.12273\t0.06954\t0.09870\t0.14622\t0.02634\t0.02452\t0.00986\t0.00865\t0.00235\t0.04619\t0.00808\t0.00615\t0.01027\n+13\t0.08658\t0.02046\t0.07459\t0.03874\t0.02561\t0.07075\t0.13650\t0.20915\t0.09128\t0.05154\t0.01799\t0.02037\t0.01366\t0.01401\t0.09115\t0.00355\t0.01079\t0.00712\t0.00554\t0.01060\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t30\n+\n+[block]\n+# block no. 1 follows, 26 sequences, length 8\n+# corresponding to MSA columns:\n+# 71-78\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02194\t0.06459\t0.02353\t0.16038\t0.11196\t0.02016\t0.01601\t0.31327\t0.06090\t0.03449\t0.06104\t0.02133\t0.01644\t0.00872\t0.00744\t0.00227\t0.00824\t0.00711\t0.00593\t0.03426\n+1\t0.02052\t0.05093\t0.07159\t0.02726\t0.10655\t0.20206\t0.25692\t0.02480\t0.01765\t0.02167\t0.01318\t0.01732\t0.01070\t0.00789\t0.01035\t0.00243\t0.11776\t0.00689\t0.00371\t0.00981\n+2\t0.01568\t0.10472\t0.01891\t0.01780\t0.14957\t0.01459\t0.01164\t0.12310\t0.02011\t0.02557\t0.04608\t0.04525\t0.35102\t0.01341\t0.00792\t0.00233\t0.00617\t0.01110\t0.00599\t0.00904\n+3\t0.10891\t0.01808\t0.02949\t0.10245\t0.33046\t0.01872\t0.14796\t0.10391\t0.01901\t0.02814\t0.01447\t0.01843\t0.01139\t0.00710\t0.00749\t0.00236\t0.00943\t0.00690\t0.00433\t0.01097\n+4\t0.08080\t0.02151\t0.01983\t0.01626\t0.02095\t0.19381\t0.01303\t0.17602\t0.02227\t0.15089\t0.01650\t0.01750\t0.01226\t0.00877\t0.00914\t0.00220\t0.09913\t0.00596\t0.00583\t0.10734\n+5\t0.01885\t0.01934\t0.03543\t0.02641\t0.03208\t0.02036\t0.40079\t0.08844\t0.08944\t0.02629\t0.01628\t0.02034\t0.01261\t0.00841\t0.01174\t0.00268\t0.14739\t0.00825\t0.00453\t0.01032\n+6\t0.01577\t0.01496\t0.02536\t0.49795\t0.25820\t0.01723\t0.02178\t0.02065\t0.01585\t0.02113\t0.01374\t0.01975\t0.01073\t0.00732\t0.00756\t0.00244\t0.00975\t0.00675\t0.00350\t0.0095'..b'1\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+73\t0.01076\t0.00724\t0.00835\t0.00861\t0.00868\t0.00718\t0.00496\t0.01096\t0.01155\t0.01540\t0.02656\t0.15039\t0.03093\t0.62810\t0.03431\t0.00672\t0.00692\t0.01259\t0.00496\t0.00483\n+74\t0.01521\t0.03923\t0.69413\t0.02162\t0.03283\t0.01762\t0.02802\t0.02402\t0.01601\t0.02402\t0.01361\t0.01601\t0.00961\t0.00721\t0.00721\t0.00240\t0.01121\t0.00560\t0.00320\t0.01121\n+75\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+76\t0.01474\t0.01390\t0.02349\t0.65337\t0.11086\t0.01696\t0.02127\t0.01974\t0.01536\t0.01988\t0.01355\t0.02009\t0.01029\t0.00751\t0.00758\t0.00250\t0.01001\t0.00674\t0.00341\t0.00876\n+77\t0.01573\t0.01348\t0.01573\t0.01123\t0.01798\t0.01011\t0.00899\t0.01910\t0.01573\t0.02472\t0.01348\t0.01573\t0.01123\t0.00562\t0.00562\t0.00112\t0.00562\t0.00449\t0.00449\t0.77980\n+78\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+79\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+80\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+81\t0.01091\t0.00784\t0.01074\t0.01074\t0.01159\t0.00869\t0.00767\t0.01261\t0.01176\t0.01653\t0.02165\t0.03717\t0.02216\t0.30815\t0.45648\t0.01039\t0.01550\t0.00921\t0.00426\t0.00596\n+82\t0.14579\t0.01317\t0.01664\t0.01297\t0.01867\t0.01202\t0.01071\t0.03479\t0.02040\t0.59380\t0.02705\t0.02381\t0.01721\t0.00904\t0.00719\t0.00235\t0.00637\t0.00711\t0.00869\t0.01221\n+83\t0.01705\t0.01494\t0.02275\t0.58420\t0.04685\t0.01810\t0.01982\t0.13981\t0.01901\t0.02472\t0.01437\t0.01984\t0.01063\t0.00787\t0.00758\t0.00248\t0.00978\t0.00676\t0.00416\t0.00927\n+84\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+85\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+86\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+87\t0.01659\t0.01659\t0.02323\t0.01991\t0.01991\t0.02323\t0.01659\t0.01825\t0.01162\t0.01825\t0.00996\t0.01659\t0.00996\t0.01328\t0.02489\t0.00332\t0.71955\t0.00664\t0.00332\t0.00830\n+88\t0.01039\t0.00753\t0.00980\t0.00967\t0.01123\t0.00694\t0.00708\t0.01348\t0.02001\t0.02797\t0.53948\t0.18340\t0.09020\t0.01719\t0.00908\t0.00252\t0.00374\t0.01528\t0.00806\t0.00694\n+89\t0.00949\t0.00678\t0.00909\t0.01084\t0.01139\t0.00637\t0.00747\t0.01099\t0.01477\t0.01964\t0.04167\t0.67500\t0.04963\t0.02353\t0.00975\t0.00311\t0.00462\t0.07259\t0.00704\t0.00623\n+90\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+91\t0.01579\t0.11910\t0.61934\t0.02063\t0.03130\t0.01903\t0.02630\t0.02387\t0.01595\t0.02332\t0.01326\t0.01557\t0.00962\t0.00712\t0.00694\t0.00231\t0.01086\t0.00543\t0.00321\t0.01104\n+92\t0.01499\t0.01416\t0.02396\t0.61489\t0.14734\t0.01703\t0.02140\t0.01996\t0.01548\t0.02019\t0.01360\t0.02000\t0.01040\t0.00746\t0.00757\t0.00249\t0.00995\t0.00674\t0.00343\t0.00896\n+93\t0.01755\t0.02345\t0.14323\t0.02872\t0.03641\t0.05046\t0.49780\t0.02386\t0.01761\t0.02374\t0.01621\t0.05431\t0.01303\t0.00744\t0.00862\t0.00253\t0.01217\t0.00886\t0.00390\t0.01012\n+94\t0.01667\t0.01591\t0.02684\t0.34869\t0.37232\t0.01770\t0.02201\t0.02131\t0.01609\t0.02207\t0.01374\t0.01934\t0.01107\t0.00739\t0.00823\t0.00242\t0.03764\t0.00675\t0.00356\t0.01024\n+95\t0.02037\t0.02306\t0.14580\t0.02556\t0.03296\t0.06905\t0.37543\t0.02613\t0.01817\t0.12499\t0.01723\t0.02046\t0.01252\t0.00947\t0.04096\t0.00291\t0.01198\t0.00784\t0.00459\t0.01053\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t774\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W03A6.fa\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,451 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t78\n+\n+[block]\n+# block no. 0 follows, 35 sequences, length 97\n+# corresponding to MSA columns:\n+# 78-174\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01132\t0.00850\t0.01136\t0.01285\t0.05849\t0.00867\t0.00997\t0.01593\t0.10286\t0.03809\t0.03330\t0.08023\t0.10035\t0.01684\t0.04478\t0.00301\t0.00605\t0.42505\t0.00569\t0.00668\n+1\t0.01291\t0.01226\t0.02066\t0.57532\t0.15656\t0.01442\t0.04607\t0.01778\t0.04826\t0.01763\t0.01217\t0.01705\t0.00931\t0.00630\t0.00638\t0.00208\t0.00823\t0.00579\t0.00307\t0.00775\n+2\t0.02846\t0.01080\t0.01472\t0.01129\t0.01619\t0.00932\t0.00932\t0.03092\t0.01816\t0.74187\t0.02503\t0.02159\t0.01570\t0.00785\t0.00638\t0.00196\t0.00540\t0.00638\t0.00785\t0.01080\n+3\t0.00778\t0.00556\t0.00741\t0.00880\t0.00921\t0.00519\t0.00589\t0.00897\t0.01240\t0.01660\t0.06578\t0.74094\t0.04275\t0.01957\t0.00807\t0.00255\t0.00365\t0.01775\t0.00594\t0.00519\n+4\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+5\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+6\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+7\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+8\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+9\t0.00907\t0.00677\t0.01003\t0.01003\t0.01111\t0.00784\t0.00773\t0.01122\t0.01015\t0.01461\t0.01714\t0.02574\t0.01621\t0.08357\t0.71641\t0.01000\t0.01646\t0.00692\t0.00342\t0.00557\n+10\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+11\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+12\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+13\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+14\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+15\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+16\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+17\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+18\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+19\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+20\t0.01276\t0.01176\t0.01807\t0.60873\t0.03813\t0.01444\t0.01607\t0.01979\t0.15849\t0.01832\t0.01425\t0.01829\t0.01069\t0.00681\t0.00637\t0.00212\t0.00775\t0.00595\t0.00356\t0.00766\n+21\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+22\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.0'..b'0.70975\t0.00996\t0.01636\t0.00694\t0.00342\t0.00556\n+89\t0.00785\t0.00644\t0.00692\t0.00680\t0.00879\t0.00551\t0.00510\t0.00979\t0.01530\t0.01913\t0.22483\t0.05778\t0.57309\t0.01532\t0.00749\t0.00211\t0.00316\t0.01293\t0.00615\t0.00551\n+90\t0.02370\t0.03023\t0.01798\t0.01634\t0.01961\t0.75158\t0.01226\t0.02533\t0.01798\t0.01553\t0.00981\t0.01144\t0.00817\t0.00654\t0.00572\t0.00163\t0.01144\t0.00409\t0.00327\t0.00735\n+91\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+92\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+93\t0.01501\t0.01702\t0.03689\t0.02731\t0.06618\t0.01600\t0.68185\t0.02028\t0.01495\t0.02034\t0.01279\t0.01705\t0.00965\t0.00536\t0.00743\t0.00213\t0.01055\t0.00740\t0.00321\t0.00863\n+94\t0.00891\t0.00648\t0.00838\t0.00791\t0.00943\t0.00596\t0.00593\t0.01183\t0.01779\t0.02504\t0.70181\t0.04805\t0.09243\t0.01312\t0.00748\t0.00200\t0.00300\t0.01157\t0.00693\t0.00596\n+95\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+96\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+97\t0.01426\t0.01451\t0.01937\t0.01664\t0.01677\t0.04764\t0.01382\t0.01565\t0.01003\t0.01528\t0.00838\t0.01379\t0.00832\t0.01093\t0.02024\t0.00273\t0.73639\t0.00550\t0.00279\t0.00696\n+98\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.01570\t0.01005\t0.00565\t0.00628\t0.00188\t0.00754\t0.00565\t0.00314\t0.01005\n+99\t0.02399\t0.02158\t0.01871\t0.01513\t0.01965\t0.24313\t0.01212\t0.50536\t0.02621\t0.03251\t0.01357\t0.01407\t0.00999\t0.00729\t0.00616\t0.00182\t0.00834\t0.00522\t0.00541\t0.00974\n+100\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+101\t0.02536\t0.01255\t0.01545\t0.01209\t0.01676\t0.01228\t0.00990\t0.19825\t0.08366\t0.47295\t0.05595\t0.02143\t0.01689\t0.00810\t0.00643\t0.00197\t0.00564\t0.00652\t0.00732\t0.01050\n+102\t0.01980\t0.04080\t0.05751\t0.01220\t0.01625\t0.04709\t0.01024\t0.14781\t0.13568\t0.24600\t0.06686\t0.13103\t0.02095\t0.00976\t0.00657\t0.00204\t0.00578\t0.00810\t0.00631\t0.00921\n+103\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+104\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+105\t0.01281\t0.01107\t0.01383\t0.06016\t0.01698\t0.00905\t0.02996\t0.01609\t0.03517\t0.02003\t0.01323\t0.05154\t0.01172\t0.03363\t0.00611\t0.00136\t0.00515\t0.00499\t0.00387\t0.64326\n+106\t0.01318\t0.09360\t0.65981\t0.01824\t0.05223\t0.01560\t0.02226\t0.01998\t0.01339\t0.01969\t0.01119\t0.01321\t0.00812\t0.00596\t0.00588\t0.00195\t0.00910\t0.00462\t0.00270\t0.00930\n+107\t0.01361\t0.01126\t0.01320\t0.00945\t0.01507\t0.00848\t0.00757\t0.01642\t0.01330\t0.04218\t0.01169\t0.01341\t0.00958\t0.00479\t0.00475\t0.00097\t0.00472\t0.00384\t0.00388\t0.79183\n+108\t0.01557\t0.01687\t0.03452\t0.02493\t0.03104\t0.01623\t0.61740\t0.06216\t0.07226\t0.02197\t0.01400\t0.01752\t0.01047\t0.00574\t0.00734\t0.00213\t0.01003\t0.00736\t0.00365\t0.00880\n+109\t0.00762\t0.00643\t0.00660\t0.00656\t0.00864\t0.00541\t0.00492\t0.00934\t0.01475\t0.01783\t0.11991\t0.05992\t0.67883\t0.01581\t0.00750\t0.00213\t0.00319\t0.01323\t0.00598\t0.00541\n+110\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+111\t0.01022\t0.00730\t0.01022\t0.01168\t0.01314\t0.00730\t0.01022\t0.01314\t0.01460\t0.01899\t0.03359\t0.07156\t0.03651\t0.01752\t0.00876\t0.00292\t0.00584\t0.69478\t0.00584\t0.00584\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t0\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W06A3.fa\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,133 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 0 follows, 37 sequences, length 109\n+# corresponding to MSA columns:\n+# 1-109\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02227\t0.01639\t0.01774\t0.01388\t0.01851\t0.01810\t0.01150\t0.67323\t0.02769\t0.03707\t0.01601\t0.01874\t0.01226\t0.00806\t0.00630\t0.00191\t0.00665\t0.05755\t0.00609\t0.01006\n+1\t0.01614\t0.01295\t0.01387\t0.01234\t0.01590\t0.01476\t0.00964\t0.03233\t0.68224\t0.07953\t0.02486\t0.02269\t0.01842\t0.00825\t0.00622\t0.00206\t0.00487\t0.00686\t0.00633\t0.00974\n+2\t0.01215\t0.01030\t0.01211\t0.00901\t0.01392\t0.00783\t0.00709\t0.01470\t0.01259\t0.01950\t0.01326\t0.09266\t0.01234\t0.00606\t0.00487\t0.00107\t0.00443\t0.00506\t0.00384\t0.73721\n+3\t0.02643\t0.01082\t0.01430\t0.01110\t0.01575\t0.00976\t0.00913\t0.05075\t0.07124\t0.67581\t0.02393\t0.02080\t0.01527\t0.00762\t0.00616\t0.00191\t0.00523\t0.00619\t0.00743\t0.01036\n+4\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+5\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+6\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+7\t0.00895\t0.00571\t0.00763\t0.00870\t0.00938\t0.00527\t0.00593\t0.01011\t0.01215\t0.07029\t0.03303\t0.72437\t0.03860\t0.01831\t0.00769\t0.00244\t0.00367\t0.01657\t0.00582\t0.00538\n+8\t0.00969\t0.00692\t0.00966\t0.01107\t0.01240\t0.00689\t0.00955\t0.01237\t0.01392\t0.01812\t0.03251\t0.12197\t0.03562\t0.01708\t0.00841\t0.00279\t0.00548\t0.65433\t0.00564\t0.00559\n+9\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+10\t0.01570\t0.73033\t0.03022\t0.01033\t0.01520\t0.02279\t0.01012\t0.01760\t0.01238\t0.01449\t0.01039\t0.06715\t0.01028\t0.00627\t0.00422\t0.00140\t0.00632\t0.00432\t0.00285\t0.00764\n+11\t0.00937\t0.00647\t0.00721\t0.00711\t0.00736\t0.00663\t0.00415\t0.01066\t0.06293\t0.01289\t0.01971\t0.03878\t0.02199\t0.72714\t0.02930\t0.00565\t0.00585\t0.00875\t0.00390\t0.00415\n+12\t0.01575\t0.01474\t0.02436\t0.03583\t0.69259\t0.01487\t0.01825\t0.07139\t0.01504\t0.02138\t0.01175\t0.01512\t0.00975\t0.00559\t0.00606\t0.00182\t0.00723\t0.00546\t0.00326\t0.00975\n+13\t0.01183\t0.01105\t0.01802\t0.71383\t0.04021\t0.01372\t0.01646\t0.01690\t0.06602\t0.01638\t0.01192\t0.01680\t0.00894\t0.00628\t0.00613\t0.00204\t0.00791\t0.00555\t0.00298\t0.00701\n+14\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+15\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+16\t0.01228\t0.03166\t0.75316\t0.01745\t0.02649\t0.01422\t0.02262\t0.01939\t0.01292\t0.01939\t0.01099\t0.01292\t0.00775\t0.00582\t0.00582\t0.00194\t0.00905\t0.00452\t0.00258\t0.00905\n+17\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+18\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+19\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+20\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+21\t0.82811\t0.01184\t0.00900\t0.00805\t0.01184\t0.01373\t0.00663\t0.01799\t0.01042\t0.02746\t0.00852\t0.00994\t0.00663\t0.00568\t0.00379\t0.00189\t0.00474\t0.00331\t0.00379\t0.00663\n+22\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04'..b'0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+86\t0.01141\t0.00571\t0.00571\t0.00571\t0.00713\t0.00571\t0.00428\t0.01426\t0.01284\t0.02282\t0.01997\t0.02282\t0.01569\t0.00713\t0.00428\t0.00143\t0.00285\t0.00571\t0.81886\t0.00571\n+87\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+88\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+89\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+90\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+91\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+92\t0.02287\t0.02917\t0.01735\t0.01577\t0.01892\t0.76030\t0.01183\t0.02444\t0.01735\t0.01498\t0.00946\t0.01104\t0.00788\t0.00631\t0.00552\t0.00158\t0.01104\t0.00394\t0.00315\t0.00710\n+93\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+94\t0.01063\t0.00532\t0.00797\t0.00797\t0.00797\t0.00532\t0.00532\t0.00797\t0.00797\t0.01063\t0.01063\t0.01861\t0.01063\t0.02127\t0.02392\t0.82190\t0.00532\t0.00532\t0.00266\t0.00266\n+95\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+96\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+97\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+98\t0.00874\t0.00656\t0.00984\t0.00984\t0.01093\t0.00765\t0.00765\t0.01093\t0.00984\t0.01421\t0.01640\t0.02405\t0.01530\t0.04591\t0.76062\t0.00984\t0.01640\t0.00656\t0.00328\t0.00547\n+99\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+100\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04572\t0.05776\t0.01251\t0.00722\t0.00193\t0.00289\t0.01107\t0.00674\t0.00578\n+101\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+102\t0.02746\t0.01042\t0.01421\t0.01089\t0.01563\t0.00900\t0.00900\t0.02983\t0.01752\t0.75093\t0.02415\t0.02083\t0.01515\t0.00758\t0.00616\t0.00189\t0.00521\t0.00616\t0.00758\t0.01042\n+103\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+104\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+105\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+106\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+107\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+108\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+11\t120\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0GYE.fa\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,429 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t16\n+\n+[block]\n+# block no. 0 follows, 30 sequences, length 21\n+# corresponding to MSA columns:\n+# 93-113\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01057\t0.00773\t0.01034\t0.01163\t0.01327\t0.00750\t0.00987\t0.01344\t0.01601\t0.02067\t0.06875\t0.11927\t0.14101\t0.01901\t0.00933\t0.00302\t0.00572\t0.50008\t0.00649\t0.00630\n+1\t0.01546\t0.06663\t0.01682\t0.01327\t0.01666\t0.04776\t0.06572\t0.02349\t0.22653\t0.06835\t0.14524\t0.05426\t0.08041\t0.04384\t0.04676\t0.00294\t0.00678\t0.04435\t0.00614\t0.00858\n+2\t0.01789\t0.53823\t0.20660\t0.01426\t0.02119\t0.06285\t0.01549\t0.02158\t0.01462\t0.01788\t0.01047\t0.01216\t0.00893\t0.00620\t0.00512\t0.00169\t0.00846\t0.00413\t0.00301\t0.00925\n+3\t0.12683\t0.01280\t0.01355\t0.01162\t0.01527\t0.03360\t0.00908\t0.13641\t0.11546\t0.07711\t0.12786\t0.08985\t0.04344\t0.01130\t0.00812\t0.05341\t0.00558\t0.00834\t0.00613\t0.09425\n+4\t0.14326\t0.01153\t0.03546\t0.01195\t0.01515\t0.01117\t0.05274\t0.04592\t0.11102\t0.02467\t0.09562\t0.20004\t0.11112\t0.01336\t0.00744\t0.00240\t0.00540\t0.05331\t0.00593\t0.04251\n+5\t0.02165\t0.01301\t0.01394\t0.01176\t0.01544\t0.09116\t0.00934\t0.08063\t0.01834\t0.27268\t0.05014\t0.02373\t0.01776\t0.01554\t0.12344\t0.00343\t0.00824\t0.00676\t0.16082\t0.04219\n+6\t0.01092\t0.00790\t0.01079\t0.01213\t0.01380\t0.00777\t0.01047\t0.01401\t0.01627\t0.02115\t0.08035\t0.07615\t0.09660\t0.01884\t0.00944\t0.00308\t0.00600\t0.57143\t0.00651\t0.00639\n+7\t0.01490\t0.01416\t0.02404\t0.44524\t0.32734\t0.01596\t0.02024\t0.01934\t0.01477\t0.01992\t0.01271\t0.01804\t0.01004\t0.00666\t0.00695\t0.00222\t0.00888\t0.00621\t0.00325\t0.00914\n+8\t0.01741\t0.27337\t0.28936\t0.01871\t0.02621\t0.10378\t0.13668\t0.02242\t0.01545\t0.01966\t0.01166\t0.01403\t0.00921\t0.00634\t0.00616\t0.00195\t0.00989\t0.00515\t0.00314\t0.00944\n+9\t0.00978\t0.00661\t0.00772\t0.00809\t0.00818\t0.00652\t0.00474\t0.01005\t0.01087\t0.01450\t0.02570\t0.20443\t0.03005\t0.58960\t0.02952\t0.00588\t0.00615\t0.01233\t0.00474\t0.00455\n+10\t0.01976\t0.01393\t0.01804\t0.01508\t0.01922\t0.01460\t0.07980\t0.16972\t0.21138\t0.14459\t0.02649\t0.13414\t0.02277\t0.01140\t0.00766\t0.00241\t0.00656\t0.06562\t0.00682\t0.01001\n+11\t0.02375\t0.05431\t0.01867\t0.01574\t0.01930\t0.45877\t0.01197\t0.11245\t0.01988\t0.05951\t0.01404\t0.01807\t0.01203\t0.09670\t0.03288\t0.00273\t0.01088\t0.00560\t0.00438\t0.00832\n+12\t0.00905\t0.00728\t0.00811\t0.00811\t0.01012\t0.00650\t0.00600\t0.01186\t0.04448\t0.02139\t0.22165\t0.14724\t0.43929\t0.01702\t0.00828\t0.00238\t0.00362\t0.01457\t0.00680\t0.00624\n+13\t0.05667\t0.00906\t0.01074\t0.00966\t0.01254\t0.00852\t0.00747\t0.01900\t0.07503\t0.17135\t0.27144\t0.08776\t0.20624\t0.01356\t0.00772\t0.00228\t0.00426\t0.01152\t0.00729\t0.00791\n+14\t0.04127\t0.01698\t0.01775\t0.01504\t0.01907\t0.12772\t0.06970\t0.11033\t0.24965\t0.08074\t0.06339\t0.07501\t0.06234\t0.01012\t0.00723\t0.00224\t0.00714\t0.00849\t0.00621\t0.00958\n+15\t0.08182\t0.01473\t0.01498\t0.01335\t0.01725\t0.03410\t0.01044\t0.06184\t0.51550\t0.04834\t0.05791\t0.02708\t0.05309\t0.00969\t0.00695\t0.00231\t0.00561\t0.00790\t0.00682\t0.01031\n+16\t0.00861\t0.00647\t0.00778\t0.00870\t0.00970\t0.00586\t0.00593\t0.00996\t0.01406\t0.01808\t0.06850\t0.45850\t0.24625\t0.08430\t0.01101\t0.00301\t0.00410\t0.01721\t0.00632\t0.00564\n+17\t0.02657\t0.01690\t0.01962\t0.01639\t0.05856\t0.01788\t0.01271\t0.41742\t0.08333\t0.22637\t0.02048\t0.01947\t0.01409\t0.00842\t0.00700\t0.00213\t0.00706\t0.00661\t0.00725\t0.01176\n+18\t0.05960\t0.01505\t0.02492\t0.26659\t0.45727\t0.01618\t0.01999\t0.02020\t0.01504\t0.02166\t0.01270\t0.01737\t0.01029\t0.00648\t0.00679\t0.00216\t0.00846\t0.00608\t0.00339\t0.00979\n+19\t0.00917\t0.00665\t0.00857\t0.00985\t0.01070\t0.00627\t0.00710\t0.01074\t0.01393\t0.01819\t0.04060\t0.47254\t0.12255\t0.08385\t0.01126\t0.00319\t0.00470\t0.14815\t0.00627\t0.00572\n+20\t0.05581\t0.01600\t0.01854\t0.01474\t0.01969\t0.01718\t0.04419\t0.31554\t0.18190\t0.16248\t0.02140\t0.02064\t0.01518\t0.00846\t0.00684\t0.00216\t0.00669\t0.00680\t0.05469\t0.01106\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 1 follows, 30 sequences, length 20\n+# corresponding to MSA columns:\n+# 116-135\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\t'..b'33\t0.01572\t0.00916\t0.09893\t0.05443\t0.01661\t0.06048\t0.08914\t0.29528\t0.05706\t0.01621\t0.00853\t0.00266\t0.00586\t0.17199\t0.04059\t0.00718\n+1\t0.01008\t0.00819\t0.01089\t0.01291\t0.10367\t0.00753\t0.00812\t0.01261\t0.01684\t0.02246\t0.28461\t0.17280\t0.27194\t0.01565\t0.00818\t0.00234\t0.00417\t0.01372\t0.00650\t0.00679\n+2\t0.01697\t0.01675\t0.01923\t0.11577\t0.11874\t0.14866\t0.01441\t0.02029\t0.04384\t0.04284\t0.01635\t0.04987\t0.03525\t0.01258\t0.05954\t0.00298\t0.21250\t0.00690\t0.03830\t0.00823\n+3\t0.00988\t0.00761\t0.01041\t0.10716\t0.01594\t0.00766\t0.00898\t0.01213\t0.01462\t0.01901\t0.07698\t0.32024\t0.13860\t0.05909\t0.01016\t0.00296\t0.00532\t0.16117\t0.00596\t0.00612\n+4\t0.01458\t0.01504\t0.06713\t0.05603\t0.04708\t0.08021\t0.08114\t0.03845\t0.04180\t0.02064\t0.04067\t0.07262\t0.02074\t0.08999\t0.05974\t0.00347\t0.08364\t0.09565\t0.06377\t0.00762\n+5\t0.01213\t0.00727\t0.00944\t0.00982\t0.01117\t0.00674\t0.00703\t0.01376\t0.01473\t0.11171\t0.09387\t0.39223\t0.07980\t0.12326\t0.01241\t0.00325\t0.00481\t0.07357\t0.00654\t0.00645\n+6\t0.01895\t0.19040\t0.13613\t0.01559\t0.02078\t0.13609\t0.04289\t0.12433\t0.05084\t0.02399\t0.06629\t0.01812\t0.01490\t0.00799\t0.00736\t0.00200\t0.05856\t0.00587\t0.04971\t0.00919\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 11 follows, 30 sequences, length 15\n+# corresponding to MSA columns:\n+# 432-446\n+name=unknown_L\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.06236\t0.06510\t0.08912\t0.16991\t0.02482\t0.07478\t0.01410\t0.04641\t0.01563\t0.11644\t0.01478\t0.01743\t0.01120\t0.00780\t0.00831\t0.00210\t0.10327\t0.00563\t0.08136\t0.06945\n+1\t0.14285\t0.23985\t0.21370\t0.01521\t0.02151\t0.06006\t0.05814\t0.06360\t0.01556\t0.02266\t0.01380\t0.07797\t0.01226\t0.00785\t0.00593\t0.00202\t0.00820\t0.00604\t0.00382\t0.00898\n+2\t0.07880\t0.07716\t0.01983\t0.01623\t0.04251\t0.14432\t0.02899\t0.04272\t0.07521\t0.09621\t0.01563\t0.04745\t0.01326\t0.01265\t0.06429\t0.00300\t0.20210\t0.00644\t0.00454\t0.00868\n+3\t0.01021\t0.00702\t0.00876\t0.00889\t0.01028\t0.00644\t0.00636\t0.01227\t0.01612\t0.05081\t0.28848\t0.24278\t0.15953\t0.10544\t0.01168\t0.00297\t0.00416\t0.03488\t0.00674\t0.00616\n+4\t0.01544\t0.03846\t0.06976\t0.05110\t0.11266\t0.03479\t0.07755\t0.07899\t0.08991\t0.04165\t0.09492\t0.11646\t0.02418\t0.10979\t0.01117\t0.00287\t0.00724\t0.00932\t0.00518\t0.00856\n+5\t0.00969\t0.00759\t0.00884\t0.00922\t0.01095\t0.00717\t0.00684\t0.01318\t0.07550\t0.02076\t0.14049\t0.24513\t0.29586\t0.04121\t0.00932\t0.00269\t0.00420\t0.07837\t0.00662\t0.00638\n+6\t0.01690\t0.01784\t0.02771\t0.06260\t0.05360\t0.07422\t0.22649\t0.04652\t0.01448\t0.01995\t0.01149\t0.01650\t0.00979\t0.00915\t0.01485\t0.00262\t0.35666\t0.00667\t0.00342\t0.00854\n+7\t0.01238\t0.00894\t0.01140\t0.01072\t0.01252\t0.00986\t0.00819\t0.10557\t0.01468\t0.02065\t0.02318\t0.03131\t0.07093\t0.13342\t0.42455\t0.00794\t0.01322\t0.00815\t0.06564\t0.00674\n+8\t0.01167\t0.00997\t0.01048\t0.01060\t0.02926\t0.07115\t0.00719\t0.01294\t0.01273\t0.01635\t0.02939\t0.06880\t0.15102\t0.31387\t0.14024\t0.00557\t0.04995\t0.00999\t0.03309\t0.00573\n+9\t0.00925\t0.00996\t0.08892\t0.00991\t0.01224\t0.00707\t0.00833\t0.01173\t0.01525\t0.02012\t0.14372\t0.35023\t0.25207\t0.01770\t0.00834\t0.00252\t0.00449\t0.01551\t0.00625\t0.00640\n+10\t0.01655\t0.01534\t0.07694\t0.01239\t0.01839\t0.01227\t0.01075\t0.05370\t0.16343\t0.06417\t0.01692\t0.01762\t0.01303\t0.00657\t0.00590\t0.00154\t0.00585\t0.00530\t0.00505\t0.47830\n+11\t0.11634\t0.01390\t0.04532\t0.01352\t0.01857\t0.01281\t0.05510\t0.08091\t0.08585\t0.36568\t0.09789\t0.02439\t0.02065\t0.00880\t0.00690\t0.00219\t0.00624\t0.00728\t0.00717\t0.01048\n+12\t0.01105\t0.07400\t0.01185\t0.00947\t0.01187\t0.00952\t0.00741\t0.01565\t0.09949\t0.02356\t0.31299\t0.10422\t0.22936\t0.01467\t0.00787\t0.00229\t0.00417\t0.03677\t0.00673\t0.00706\n+13\t0.01971\t0.07456\t0.14709\t0.01386\t0.01908\t0.07351\t0.01304\t0.16242\t0.06634\t0.11664\t0.08820\t0.02331\t0.03910\t0.00872\t0.00659\t0.00203\t0.00708\t0.00685\t0.10223\t0.00964\n+14\t0.01454\t0.01364\t0.08113\t0.01206\t0.01537\t0.03959\t0.01025\t0.10345\t0.10294\t0.04920\t0.09273\t0.11455\t0.12668\t0.07330\t0.06551\t0.00337\t0.00701\t0.04199\t0.02468\t0.00801\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t46\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0T3K.fa\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/insecta/scores_cutoff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/insecta/scores_cutoff Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t20.79\n+EOG090W0427\t76.86\n+EOG090W09K7\t70.14\n+EOG090W0B5K\t128.73\n+EOG090W0153\t102.48\n+EOG090W051T\t215.17999999999998\n+EOG090W01WI\t383.17999999999995\n+EOG090W01A3\t180.95\n+EOG090W067A\t151.13\n+EOG090W0IUR\t68.03999999999999\n+EOG090W09BV\t69.41\n+EOG090W0AIA\t45.01\n+EOG090W05D8\t52.35999999999999\n+EOG090W01IP\t193.27\n+EOG090W02C3\t62.79\n+EOG090W0B1Y\t200.97\n+EOG090W01VD\t37.519999999999996\n+EOG090W035W\t263.76\n+EOG090W0DWN\t50.489999999999995\n+EOG090W028U\t132.57999999999998\n+EOG090W02TI\t99.96000000000001\n+EOG090W00PB\t71.53999999999999\n+EOG090W038B\t138.32\n+EOG090W0BUR\t78.89\n+EOG090W0F0L\t27.65\n+EOG090W08IZ\t79.59\n+EOG090W0BEB\t171.71\n+EOG090W0I37\t40.10999999999999\n+EOG090W0CQ9\t196.62999999999997\n+EOG090W04BS\t134.26\n+EOG090W0FCL\t64.96\n+EOG090W0AXJ\t159.88\n+EOG090W0FEP\t79.44999999999999\n+EOG090W0FVI\t120.18999999999998\n+EOG090W0GP3\t64.75\n+EOG090W01NH\t132.65\n+EOG090W0IEE\t90.64999999999999\n+EOG090W0IP7\t102.33999999999999\n+EOG090W038Z\t104.78999999999999\n+EOG090W0B8O\t113.39999999999999\n+EOG090W0KP0\t37.309999999999995\n+EOG090W064P\t142.79999999999998\n+EOG090W0A3V\t166.73999999999998\n+EOG090W0BOZ\t106.18999999999998\n+EOG090W0H6T\t34.089999999999996\n+EOG090W0EDI\t55.36999999999999\n+EOG090W0EFH\t43.89\n+EOG090W04BN\t196.48999999999998\n+EOG090W00SP\t246.04999999999998\n+EOG090W01ZN\t189.64000000000001\n+EOG090W014R\t24.29\n+EOG090W0FJE\t84.41999999999999\n+EOG090W00EP\t16.59\n+EOG090W07MY\t127.67999999999999\n+EOG090W00LR\t281.75\n+EOG090W06QR\t84.84\n+EOG090W06J7\t114.8\n+EOG090W04G6\t264.99\n+EOG090W06IG\t86.66\n+EOG090W036Y\t274.78000000000003\n+EOG090W0CMO\t61.53\n+EOG090W0BFE\t67.34\n+EOG090W0JYN\t62.71999999999999\n+EOG090W054P\t125.64999999999999\n+EOG090W0753\t80.64\n+EOG090W0FKG\t73.42999999999999\n+EOG090W0B6L\t51.66\n+EOG090W0C2I\t135.07999999999998\n+EOG090W057Z\t83.44\n+EOG090W01H1\t66.00999999999999\n+EOG090W07A3\t61.31999999999999\n+EOG090W0D5E\t42.209999999999994\n+EOG090W07PK\t54.809999999999995\n+EOG090W0GKW\t93.86999999999999\n+EOG090W0J9P\t43.47\n+EOG090W0A69\t93.61\n+EOG090W0DRQ\t38.15\n+EOG090W04G1\t107.31\n+EOG090W08L6\t41.019999999999996\n+EOG090W02QT\t95.68999999999998\n+EOG090W0K04\t62.92\n+EOG090W02UI\t134.11999999999998\n+EOG090W00MS\t211.54\n+EOG090W0HXZ\t69.86\n+EOG090W09LF\t17.71\n+EOG090W060L\t301.84\n+EOG090W0I0Q\t47.39\n+EOG090W0B0M\t95.33999999999999\n+EOG090W0EY0\t90.64999999999999\n+EOG090W019L\t107.66\n+EOG090W0ALV\t164.29\n+EOG090W0F9J\t138.67\n+EOG090W0BZ2\t67.19999999999999\n+EOG090W0B5T\t160.85999999999999\n+EOG090W0JBN\t66.43\n+EOG090W06CO\t91.41999999999999\n+EOG090W06Y4\t45.35999999999999\n+EOG090W00D0\t558.88\n+EOG090W0N7U\t66.08\n+EOG090W02H5\t136.95\n+EOG090W04DH\t85.61\n+EOG090W01HI\t194.52999999999997\n+EOG090W02JZ\t297.21999999999997\n+EOG090W0ANA\t126.07\n+EOG090W055F\t99.82\n+EOG090W0KMC\t57.68\n+EOG090W063Z\t241.49999999999997\n+EOG090W0BC3\t72.38\n+EOG090W08A5\t68.03999999999999\n+EOG090W0BMW\t135.1\n+EOG090W08CW\t75.46\n+EOG090W04FE\t98.28\n+EOG090W0FH5\t58.38\n+EOG090W00RS\t269.64\n+EOG090W0EG7\t91.21000000000001\n+EOG090W06OD\t170.1\n+EOG090W00PL\t78.05\n+EOG090W0AV1\t163.45\n+EOG090W015K\t58.239999999999995\n+EOG090W07NX\t129.22\n+EOG090W0J8V\t91.69999999999999\n+EOG090W0FQ8\t58.239999999999995\n+EOG090W0JN2\t78.75\n+EOG090W029M\t83.02\n+EOG090W00TM\t273.90999999999997\n+EOG090W046G\t122.14999999999999\n+EOG090W06J5\t163.23999999999998\n+EOG090W0GWR\t32.34\n+EOG090W063H\t165.48\n+EOG090W01MU\t39.48\n+EOG090W0K07\t75.80999999999999\n+EOG090W02IA\t214.68999999999997\n+EOG090W0HTD\t76.78999999999999\n+EOG090W0F78\t69.36999999999999\n+EOG090W0GR5\t118.58\n+EOG090W04O1\t20.72\n+EOG090W0CN5\t108.57\n+EOG090W00X5\t129.36\n+EOG090W09PQ\t73.5\n+EOG090W06OE\t182.91\n+EOG090W0G1I\t35.769999999999996\n+EOG090W0GZA\t79.53\n+EOG090W07X1\t60.48\n+EOG090W08E9\t146.93\n+EOG090W054N\t226.82\n+EOG090W02JO\t52.849999999999994\n+EOG090W0AKG\t103.81\n+EOG090W04DG\t148.47\n+EOG090W00L3\t195.16\n+EOG090W03O0\t98.56\n+EOG090W0HX7\t36.739999999999995\n+EOG090W08N5\t76.64999999999999\n+EOG090W00BP\t168.21\n+EOG090W0AY7\t48.019999999999996\n+EOG090W051U\t222.67000000000002\n+EOG090W09PJ\t166.18\n+EOG090W03TV\t286.92999999999995\n+EOG090W08A9\t100.44999999999999\n+EOG090W0IBV\t35.629999999999995\n+EOG090W04NQ\t64.05\n+EOG090'..b'92999999999999\n+EOG090W05HI\t39.76\n+EOG090W0DJI\t49.49\n+EOG090W03WV\t73.00999999999999\n+EOG090W0ESV\t140.14\n+EOG090W004H\t39.6\n+EOG090W0BJR\t77.91\n+EOG090W0BPH\t198.37999999999997\n+EOG090W0AH5\t97.86\n+EOG090W0BKY\t96.58\n+EOG090W07XK\t114.38\n+EOG090W0KJ3\t76.58\n+EOG090W0DEY\t74.61999999999999\n+EOG090W0I7M\t15.189999999999998\n+EOG090W05GY\t169.26\n+EOG090W0IF2\t144.41\n+EOG090W09IF\t75.03999999999999\n+EOG090W04XG\t28.49\n+EOG090W06TC\t53.480000000000004\n+EOG090W00ZZ\t305.54999999999995\n+EOG090W05ZP\t55.58\n+EOG090W09AW\t96.46000000000001\n+EOG090W0FFP\t109.76\n+EOG090W0GQZ\t48.51\n+EOG090W0DYP\t29.33\n+EOG090W0GI3\t63.349999999999994\n+EOG090W04OX\t73.36\n+EOG090W0C66\t73.91999999999999\n+EOG090W04IF\t24.29\n+EOG090W04PI\t196.35\n+EOG090W0GPQ\t36.33\n+EOG090W06OY\t207.68999999999997\n+EOG090W0PW0\t15.26\n+EOG090W0BM0\t39.199999999999996\n+EOG090W08QR\t84.91\n+EOG090W00VU\t337.46999999999997\n+EOG090W06PP\t58.169999999999995\n+EOG090W0EIQ\t158.61999999999998\n+EOG090W0KXF\t61.10999999999999\n+EOG090W019B\t225.72\n+EOG090W05XP\t95.61999999999999\n+EOG090W01V1\t162.39999999999998\n+EOG090W0K88\t52.43\n+EOG090W0ITI\t40.10999999999999\n+EOG090W08AN\t158.54999999999998\n+EOG090W0PZH\t39.199999999999996\n+EOG090W0F7U\t63.14\n+EOG090W0CNN\t69.09\n+EOG090W08FZ\t140.35\n+EOG090W0FGQ\t103.38999999999999\n+EOG090W05BJ\t77.77\n+EOG090W0A4R\t40.04\n+EOG090W09QT\t40.10999999999999\n+EOG090W0GDE\t43.05\n+EOG090W050K\t148.72\n+EOG090W0CL8\t70.07\n+EOG090W0JJQ\t77.35\n+EOG090W06W8\t103.72999999999999\n+EOG090W0EPV\t35.559999999999995\n+EOG090W00WM\t140.63\n+EOG090W005S\t210.07\n+EOG090W02UQ\t98.0\n+EOG090W03FA\t114.72999999999999\n+EOG090W02B7\t11.97\n+EOG090W06DJ\t35.209999999999994\n+EOG090W08FE\t122.63999999999999\n+EOG090W06P2\t125.72999999999999\n+EOG090W0C7S\t146.51999999999998\n+EOG090W0C4Z\t55.660000000000004\n+EOG090W00ZP\t144.33999999999997\n+EOG090W0C7Z\t30.52\n+EOG090W06AN\t199.35999999999999\n+EOG090W0FYR\t77.98\n+EOG090W015U\t27.44\n+EOG090W02LX\t267.67999999999995\n+EOG090W0DZ4\t191.17000000000002\n+EOG090W050Y\t215.67000000000002\n+EOG090W08GU\t50.81999999999999\n+EOG090W09LK\t108.36\n+EOG090W077G\t64.61\n+EOG090W0B8P\t58.51999999999999\n+EOG090W0A73\t39.059999999999995\n+EOG090W0B3U\t47.739999999999995\n+EOG090W0LL3\t81.69\n+EOG090W09R9\t63.06999999999999\n+EOG090W0A58\t69.64999999999999\n+EOG090W06VZ\t445.71999999999997\n+EOG090W080B\t61.88\n+EOG090W0ALP\t115.01\n+EOG090W0EJV\t163.1\n+EOG090W0BI6\t112.76999999999998\n+EOG090W05KO\t149.66\n+EOG090W03K0\t120.11999999999999\n+EOG090W0JFZ\t39.199999999999996\n+EOG090W09RO\t93.1\n+EOG090W00ZV\t161.98\n+EOG090W005V\t363.79\n+EOG090W0F9A\t109.55\n+EOG090W0IKC\t25.2\n+EOG090W04QG\t181.85999999999999\n+EOG090W00U5\t126.17\n+EOG090W0JS6\t59.64\n+EOG090W06X4\t243.24999999999997\n+EOG090W0H7U\t18.759999999999998\n+EOG090W02LH\t101.28999999999999\n+EOG090W06AU\t54.10999999999999\n+EOG090W0L6N\t74.13\n+EOG090W0028\t38.39\n+EOG090W05ZG\t117.24999999999999\n+EOG090W0DSQ\t52.36\n+EOG090W0CIU\t151.13\n+EOG090W09DT\t96.46000000000001\n+EOG090W0883\t125.22999999999999\n+EOG090W08IL\t186.41\n+EOG090W07HX\t141.11999999999998\n+EOG090W0ADL\t42.14\n+EOG090W07E5\t140.69\n+EOG090W0CHN\t36.19\n+EOG090W0F27\t66.08\n+EOG090W05FW\t224.20999999999998\n+EOG090W061C\t309.87\n+EOG090W023I\t166.65\n+EOG090W09Y9\t86.59\n+EOG090W029L\t280.28000000000003\n+EOG090W078A\t137.13\n+EOG090W0C83\t33.39\n+EOG090W015Z\t231.07\n+EOG090W05IA\t174.85999999999999\n+EOG090W06HO\t108.71000000000001\n+EOG090W0E6K\t68.25\n+EOG090W032M\t17.009999999999998\n+EOG090W04ZL\t137.82999999999998\n+EOG090W0A4U\t92.11999999999999\n+EOG090W0G0Z\t34.37\n+EOG090W012F\t376.75\n+EOG090W08ME\t86.66\n+EOG090W090H\t203.07\n+EOG090W0C7T\t133.28\n+EOG090W0AUB\t233.79999999999998\n+EOG090W094H\t153.51\n+EOG090W00HE\t202.16\n+EOG090W0HKZ\t63.76999999999999\n+EOG090W02KK\t178.78\n+EOG090W0828\t35.349999999999994\n+EOG090W07PH\t84.84\n+EOG090W01XB\t80.36\n+EOG090W02C5\t70.14\n+EOG090W00WO\t96.25\n+EOG090W0140\t17.849999999999998\n+EOG090W01QT\t99.46999999999998\n+EOG090W0FQ4\t61.669999999999995\n+EOG090W0CAH\t116.61999999999999\n+EOG090W080Z\t53.76\n+EOG090W02AU\t96.03999999999999\n+EOG090W096X\t86.03\n+EOG090W04OJ\t69.36999999999999\n+EOG090W09UY\t66.43\n+EOG090W07CG\t158.41\n+EOG090W0KFZ\t64.75\n+EOG090W0LWB\t66.33\n+EOG090W0F00\t32.269999999999996\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/interpro.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/interpro.tsv Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,49 @@
+ENTRY_AC ENTRY_TYPE ENTRY_NAME
+IPR000126 Active_site Serine proteases, V8 family, serine active site
+IPR000138 Active_site Hydroxymethylglutaryl-CoA lyase, active site
+IPR000169 Active_site Cysteine peptidase, cysteine active site
+IPR000180 Active_site Membrane dipeptidase, active site
+IPR000189 Active_site Prokaryotic transglycosylase, active site
+IPR000590 Active_site Hydroxymethylglutaryl-coenzyme A synthase, active site
+IPR001252 Active_site Malate dehydrogenase, active site
+IPR001345 Active_site Phosphoglycerate/bisphosphoglycerate mutase, active site
+IPR001497 Active_site Methylated-DNA-[protein]-cysteine S-methyltransferase, active site
+IPR001555 Active_site Phosphoribosylglycinamide formyltransferase, active site
+IPR001579 Active_site Glycosyl hydrolases family 18 (GH18) active site
+IPR001586 Active_site Beta-lactamase, class-C active site
+IPR001969 Active_site Aspartic peptidase, active site
+IPR002071 Active_site Thermonuclease active site
+IPR002137 Active_site Beta-lactamase, class-D active site
+IPR002168 Active_site Lipase, GDXG, putative histidine active site
+IPR002471 Active_site Peptidase S9, serine active site
+IPR004164 Active_site Coenzyme A transferase active site
+IPR006650 Active_site Adenosine/AMP deaminase active site
+IPR008255 Active_site Pyridine nucleotide-disulphide oxidoreductase, class-II, active site
+IPR008259 Active_site FMN-dependent alpha-hydroxy acid dehydrogenase, active site
+IPR008261 Active_site Iodothyronine deiodinase, active site
+IPR008263 Active_site Glycoside hydrolase, family 16, active site
+IPR008265 Active_site Lipase, GDSL, active site
+IPR008266 Active_site Tyrosine-protein kinase, active site
+IPR008268 Active_site Peptidase S16, active site
+IPR008270 Active_site Glycosyl hydrolases family 25, active site
+IPR008271 Active_site Serine/threonine-protein kinase, active site
+IPR008272 Active_site 4-hydroxybenzoyl-CoA thioesterase, active site
+IPR011767 Active_site Glutaredoxin active site
+IPR012999 Active_site Pyridine nucleotide-disulphide oxidoreductase, class I, active site
+IPR013808 Active_site Transglutaminase, active site
+IPR016129 Active_site Peptidase family C14A, His active site
+IPR016130 Active_site Protein-tyrosine phosphatase, active site
+IPR017440 Active_site ATP-citrate lyase/succinyl-CoA ligase, active site
+IPR017950 Active_site Urease active site
+IPR018040 Active_site Pectinesterase, Tyr active site
+IPR018053 Active_site Glycoside hydrolase, family 32, active site
+IPR018057 Active_site Deoxyribonuclease I, active site
+IPR018085 Active_site Uracil-DNA glycosylase, active site
+IPR018088 Active_site Chalcone/stilbene synthase, active site
+IPR018089 Active_site Orotidine 5'-phosphate decarboxylase, active site
+IPR018114 Active_site Serine proteases, trypsin family, histidine active site
+IPR018117 Active_site DNA methylase, C-5 cytosine-specific, active site
+IPR018120 Active_site Glycoside hydrolase family 1, active site
+IPR018129 Active_site Phosphoenolpyruvate carboxylase, Lys active site
+IPR018148 Active_site Methylglyoxal synthase, active site
+IPR018177 Active_site L-lactate dehydrogenase, active site
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/merops.dmnd
b
Binary file test-data/funannotate_db/merops.dmnd has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/merops.formatted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/merops.formatted.fa Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,49 @@
+>MER0000002 S01A
+IVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGS
+SSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTT
+CVTTGWGLTRYTNANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDS
+GGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
+>MER0000004 S01A
+IVNGEDAVPGSWPWQVSLQDSTGFHFCGGSLISEDWVVTAAHCGVTTSDVVVAGEFDQGL
+ETEDTQVLKIGKVFKNPKFSILTVRNDITLLKLATPAQFSETVSAVCLPSADEDFPAGML
+CATTGWGKTKYNALKTPDKLQQATLPIVSNTDCRKYWGSRVTDVMICAGASGVSSCMGDS
+GGPLVCQKNGAWTLAGIVSWGSSTCSTSTPAVYARVTALMPWVQETLAAN
+>MER0000009 S01A
+VVGGEVAKNGSAPYQVSLQVPGWGHNCGGSLLNDRWVLTAAHCLVGHAPGDLMVLVGTNS
+LKEGGELLKVDKLLYHSRYNLPRFHNDIGLVRLEQPVQFSELVQSVEYSEKAVPANATVR
+LTGWGRTSANGPSPTLLQSLNVVTLSNEDCNKKGGDPGYTDVGHLCTLTKTGEGACNGDS
+GGPLVYEGKLVGVVNFGVPCALGYPDGFARVSYYHDWVRTTMANN
+>MER0000012 S01A
+YILTAAHCVSNEDVNHVITPIAAERFTIRAGSNDRFSGGVLVQVAEVIVHEEYGNFLNDV
+ALLRLESPLILSASIQPIDLPTVDTPADVDVVISGWGRIKHQGDLPRYLQYNTLKSITRQ
+QCEELIDFGFEGELCLLHQVDNGACNGDSGGP
+>MER0000013 S01A
+ITNGYPAYEGKVPYIVGLLFSGNGNWWCGGSIIGNTWVLTAAHCTNGASGVTINYGASIR
+TQPQYTHWVGSGDIIQHHHYNSGNLHNDISLIRTPHVDFWSLVNKVELPSYNDRYQDYAG
+WWAVASGWGGTYDGSPLPDWLQSVDVQIISQSDCSRTWSLHDNMICINTDGGKSTCGGDS
+GGPLVTHDGNRLVGVTSFGSAAGCQSGAPAVFSRVTGYLDWIRDNTGIS
+>MER0000015 S01A
+ITNGQDAVMGQFPYQVGLSLNLGNFKSAWCGGSLIGNEWVLTAAHCTDGVKSVTVFLGAT
+YRTEAEVKYTVKPNDILIHPGWNNKTLKNDISLVKIPETAYTALIQPVELPALASSYPSF
+AGDEVIASGWGRISDSASGVTNYLQWARLEVISNAVCARTYGSTITSSNLCVKTPGGVST
+CKGDSGGPLVLASSGVQVGLTSFGSILGCEKGFPAAFTRVTSYLEWINEHTGIS
+>MER0000020 S01A
+IVGGYNCEENSVPYQVSLNSGYHFCGGSLINEQWVVSAGHCYKSRIQVRLGEHNIEVLEG
+NEQFINAAKIIRHPQYDRKTLNNDIMLIKLSSRAVINARVSTISLPTAPPATGTKCLISG
+WGNTASSGADYPDELQCLDAPVLSQAKCEASYPGKITSNMFCVGFLEGGKDSCQGDSGGP
+VVCNGQLQGVVSWGDGCAQKNKPGVYTKVYNYVKWIKNTIAAN
+>MER0000021 S01A
+IVGGYICEENSVPYQVSLNSGYHFCGGSLISEQWVVSAGHCYKSRIQVRLGEHNIEVLEG
+NEQFINAAKIIRHPKYNSRTLDNDILLIKLSSPAVINSRVSAISLPTAPPAAGTESLISG
+WGNTLSSGADYPDELQCLDAPVLSQAECEASYPGKITNNMFCVGFLEGGKDSCQGDSGGP
+VVSNGELQGIVSWGYGCAQKNRPGVYTKVYNYVDWIKDTIAAN
+>MER0000022 S01A
+IVGGYTCEENSLPYQVSLNSGSHFCGGSLISEQWVVSAAHCYKTRIQVRLGEHNIKVLEG
+NEQFINAAKIIRHPKYNRDTLDNDIMLIKLSSPAVINARVSTISLPTAPPAAGTECLISG
+WGNTLSFGADYPDELKCLDAPVLREAECKASCPGKITNSMFCVGFLEGGKDSWKRDSGGP
+VVCNGQLQGVVSWGHGCAWKNRPGVYTKVYNYVDWIKDTIAAN
+>MER0000024 S01A
+IVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDNINVVEG
+NEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISG
+WGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGP
+VVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/ncbi_cleaned_gene_products.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/ncbi_cleaned_gene_products.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,54 @@
+#version 1.70
+#Date 06-15-2021
+#Name Description
+1-Oct mitochondrial intermediate peptidase
+1AMINOCYCLOPROPANE1CARBOXYLATE Probable 1-aminocyclopropane-1-carboxylate deaminase
+2ABA Protein phosphatase PP2A 55 kDa regulatory subunit
+2ABD Serine/threonine-protein phosphatase 2A 55 kDa regulatory subunit B delta isoform
+2E4.130 Regulator of nonsense transcripts 1
+2E4.130_0 Regulator of nonsense transcripts 1
+2E4.130_1 Regulator of nonsense transcripts 1
+2MBCD 2-methylacyl-CoA dehydrogenase, mitochondrial
+2METHYLACONITATE Aconitate/2-methylaconitate hydratase
+4CL 4-coumarate--CoA ligase
+4CL1 4-coumarate--CoA ligase 1
+4CL2 4-coumarate--CoA ligase 2
+4CL3 4-coumarate--CoA ligase 3
+4CLL1 4-coumarate--CoA ligase-like 1
+4CLL2 4-coumarate--CoA ligase-like 2
+4CLL3 4-coumarate--CoA ligase-like 3
+4CLL7 4-coumarate--CoA ligase-like 7
+4CLL7_0 4-coumarate--CoA ligase-like 7
+4CLL7_1 4-coumarate--CoA ligase-like 7
+4CLL7_2 4-coumarate--CoA ligase-like 7
+4CLL9 4-coumarate--CoA ligase-like 9
+4COUMARATECOA 4-coumarateCoA ligase-like 6
+4EBP 4E-binding protein THOR
+4EHP Eukaryotic translation initiation factor 4E type 2
+4HYDROXYPHENYLPYRUVATE 4-hydroxyphenylpyruvate dioxygenase
+6-PGD 6-phosphogluconate dehydrogenase, decarboxylating
+6GAL Endo-beta-1 6-galactanase
+6HN3M 6-hydroxynicotinate 3-monooxygenase
+6PGD 6-phosphogluconate dehydrogenase, decarboxylating
+6PGL phosphogluconolactonase
+6PGL4 6-phosphogluconolactonase 4
+20H10.080 NADH-ubiquinone oxidoreductase 21 subunit
+26S 26s proteasome regulatory subunit 6B
+26S_PRC 26S proteasome regulatory complex protein
+40S 40s ribosomal protein SA
+60S 60s acidic ribosomal protein P2
+A1 mating type regulatory protein, silenced copy at HMR locus
+A1CF APOB1 complementation factor
+A2 mating type regulatory protein, silenced copy at HMR locus
+A4GALT Lactosylceramide 4-alpha-galactosyltransferase
+A4GNT Alpha-1,4-N-acetylglucosaminyltransferase
+A4LEA 4-alpha-L-fucosyltransferase
+AAA1 Asc-type amino acid transporter 1
+AAC ADP,ATP carrier protein
+AAC1 ADP/ATP carrier protein AAC1
+AAC2 ADP,ATP carrier protein 2
+AAC3 ADP/ATP carrier protein AAC3
+AACC7 Aminoglycoside N(3)-acetyltransferase VII
+AACS Acetoacetyl-CoA synthetase
+AACS_0 Acetoacetyl-CoA synthetase
+AACS_1 Acetoacetyl-CoA synthetase
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/repeats.dmnd
b
Binary file test-data/funannotate_db/repeats.dmnd has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,10659 @@\n+#exon model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[LENGTH]\n+# maximal individually stored length probability =\n+3000\n+# slope of smoothing bandwidth =\n+0.3\n+# smoothing minwindowcount =\n+8\n+# length single  initial  internal  terminal\n+# total number of exons of above types\n+       1959           8219          22997           8219\n+# number of exons exceeding length d\n+       27             19            136             55\n+# 1000 P(len=k), k=0,1,..., 3000\n+0\t0\t1.95\t0.0374\t0.249\n+1\t0\t2.18\t0.0414\t0.284\n+2\t0\t2.4\t0.0456\t0.321\n+3\t0\t2.62\t0.0501\t0.359\n+4\t0\t2.84\t0.0548\t0.397\n+5\t0\t3.04\t0.0598\t0.435\n+6\t0\t3.23\t0.065\t0.473\n+7\t0\t3.41\t0.0706\t0.509\n+8\t0\t3.57\t0.0765\t0.543\n+9\t0\t3.71\t0.0828\t0.576\n+10\t0\t3.82\t0.0894\t0.606\n+11\t0\t3.92\t0.0965\t0.633\n+12\t0\t3.99\t0.104\t0.658\n+13\t0\t4.05\t0.112\t0.68\n+14\t0\t4.08\t0.121\t0.699\n+15\t0\t4.1\t0.13\t0.716\n+16\t0\t4.1\t0.139\t0.73\n+17\t0\t4.08\t0.15\t0.743\n+18\t0\t4.06\t0.16\t0.754\n+19\t0\t4.02\t0.172\t0.763\n+20\t0\t3.98\t0.184\t0.771\n+21\t0\t3.94\t0.198\t0.778\n+22\t0\t3.89\t0.212\t0.785\n+23\t0\t3.84\t0.226\t0.791\n+24\t0\t3.79\t0.242\t0.797\n+25\t0\t3.75\t0.259\t0.803\n+26\t0\t3.71\t0.277\t0.809\n+27\t0\t3.67\t0.295\t0.816\n+28\t0\t3.65\t0.315\t0.823\n+29\t0\t3.62\t0.335\t0.831\n+30\t0\t3.61\t0.357\t0.841\n+31\t0\t3.6\t0.379\t0.851\n+32\t0\t3.6\t0.401\t0.862\n+33\t0\t3.61\t0.425\t0.874\n+34\t0\t3.62\t0.449\t0.888\n+35\t0\t3.64\t0.473\t0.902\n+36\t0\t3.67\t0.498\t0.918\n+37\t0\t3.7\t0.524\t0.935\n+38\t0\t3.74\t0.549\t0.953\n+39\t0\t3.77\t0.575\t0.971\n+40\t0\t3.82\t0.601\t0.991\n+41\t0\t3.86\t0.628\t1.01\n+42\t0\t3.91\t0.654\t1.03\n+43\t0\t3.96\t0.68\t1.06\n+44\t0\t4.01\t0.706\t1.08\n+45\t0\t4.06\t0.732\t1.11\n+46\t0\t4.11\t0.759\t1.13\n+47\t0\t4.17\t0.785\t1.16\n+48\t0\t4.22\t0.811\t1.18\n+49\t0\t4.27\t0.838\t1.21\n+50\t0\t4.32\t0.864\t1.23\n+51\t0\t4.37\t0.892\t1.26\n+52\t0\t4.42\t0.92\t1.29\n+53\t0\t4.47\t0.949\t1.32\n+54\t0\t4.51\t0.979\t1.34\n+55\t0\t4.55\t1.01\t1.37\n+56\t0\t4.58\t1.04\t1.4\n+57\t0\t4.61\t1.08\t1.43\n+58\t0\t4.63\t1.12\t1.45\n+59\t0\t4.65\t1.15\t1.48\n+60\t0\t4.65\t1.2\t1.51\n+61\t0\t4.65\t1.24\t1.53\n+62\t0\t4.65\t1.28\t1.56\n+63\t0\t4.63\t1.33\t1.58\n+64\t0\t4.6\t1.37\t1.61\n+65\t0\t4.57\t1.42\t1.63\n+66\t0\t4.53\t1.47\t1.64\n+67\t0\t4.48\t1.52\t1.66\n+68\t0\t4.43\t1.56\t1.67\n+69\t0\t4.37\t1.61\t1.69\n+70\t0\t4.31\t1.66\t1.7\n+71\t0\t4.24\t1.7\t1.7\n+72\t0\t4.17\t1.74\t1.71\n+73\t0\t4.1\t1.79\t1.72\n+74\t0\t4.03\t1.83\t1.72\n+75\t0\t3.96\t1.86\t1.73\n+76\t0\t3.89\t1.9\t1.73\n+77\t0\t3.83\t1.94\t1.74\n+78\t0\t3.76\t1.97\t1.75\n+79\t0\t3.71\t2.01\t1.76\n+80\t0\t3.65\t2.05\t1.77\n+81\t0\t3.61\t2.08\t1.78\n+82\t0\t3.57\t2.12\t1.8\n+83\t0\t3.53\t2.15\t1.81\n+84\t0\t3.5\t2.19\t1.83\n+85\t0\t3.47\t2.23\t1.84\n+86\t0\t3.45\t2.27\t1.86\n+87\t0\t3.43\t2.31\t1.88\n+88\t0\t3.41\t2.35\t1.89\n+89\t0\t3.4\t2.39\t1.91\n+90\t0\t3.38\t2.44\t1.92\n+91\t0\t3.37\t2.48\t1.93\n+92\t0\t3.36\t2.53\t1.95\n+93\t0\t3.35\t2.57\t1.96\n+94\t0\t3.33\t2.62\t1.97\n+95\t0\t3.31\t2.66\t1.98\n+96\t0\t3.29\t2.71\t2\n+97\t0\t3.27\t2.76\t2.01\n+98\t0\t3.25\t2.81\t2.02\n+99\t0\t3.22\t2.85\t2.04\n+100\t0\t3.2\t2.9\t2.05\n+101\t0\t3.17\t2.95\t2.07\n+102\t0\t3.14\t3\t2.08\n+103\t0\t3.12\t3.05\t2.1\n+104\t0\t3.09\t3.1\t2.11\n+105\t0\t3.07\t3.15\t2.13\n+106\t0\t3.04\t3.2\t2.14\n+107\t0\t3.03\t3.24\t2.16\n+108\t0\t3.01\t3.29\t2.17\n+109\t0\t3\t3.33\t2.18\n+110\t0\t2.99\t3.37\t2.2\n+111\t0\t2.98\t3.41\t2.21\n+112\t0\t2.98\t3.45\t2.22\n+113\t0\t2.97\t3.49\t2.22\n+114\t0\t2.97\t3.52\t2.23\n+115\t0\t2.97\t3.56\t2.24\n+116\t0\t2.97\t3.59\t2.24\n+117\t0\t2.96\t3.62\t2.25\n+118\t0\t2.96\t3.65\t2.25\n+119\t0\t2.95\t3.68\t2.25\n+120\t0\t2.94\t3.71\t2.26\n+121\t0\t2.93\t3.74\t2.26\n+122\t0\t2.91\t3.77\t2.27\n+123\t0\t2.9\t3.8\t2.28\n+124\t0\t2.88\t3.84\t2.29\n+125\t0\t2.86\t3.87\t2.29\n+126\t0\t2.84\t3.9\t2.3\n+127\t0\t2.81\t3.93\t2.32\n+128\t0\t2.79\t3.96\t2.33\n+129\t0\t2.76\t3.99\t2.34\n+130\t0\t2.74\t4.01\t2.35\n+131\t0\t2.71\t4.04\t2.36\n+132\t0\t2.68\t4.06\t2.37\n+133\t0\t2.66\t4.08\t2.38\n+134\t0\t2.63\t4.1\t2.39\n+135\t0\t2.61\t4.12\t2.39\n+136\t0\t2.58\t4.14\t2.4\n+137\t0\t2.56\t4.15\t2.41\n+138\t0\t2.53\t4.16\t2.41\n+139\t0\t2.51\t4.17\t2.42\n+140\t0\t2.49\t4.17\t2.43\n+141\t0\t2.47\t4.18\t2.44\n+142\t0\t2.44\t4.18\t2.45\n+143\t0\t2.43\t4.18\t2.46\n+144\t0\t2.41\t4.18\t2.48\n+145\t0\t2.39\t4.18\t2.49\n+146\t0\t2.37\t4.18\t2.51\n+147\t0\t2.35\t4.17\t2.52\n+148\t0\t2.33\t4.17\t2.54\n+149\t0\t2.32\t4.16\t2.56\n+150\t0\t2.3\t4.15\t2.58\n+151\t0\t2.28\t4.15\t2.6\n+152\t0\t2.27\t4.13\t2.62\n+153\t0\t2.25\t4.12\t2.63\n+154\t0\t2.24\t4.1\t2.65\n+155\t0\t2.23\t4.09\t2.66\n+156\t0\t2.21\t4.07\t2.67\n+157\t0\t2.2\t4.04\t2.67\n+158\t0\t2.19\t4.02\t2.68\n+159\t0\t2.18\t4\t2.68\n+160\t0\t2.16\t3.9'..b'25\n+tgcca     \t0.25     \t0.25     \t0.25\n+tgccc     \t0.25     \t0.25     \t0.25\n+tgccg     \t0.25     \t0.25     \t0.25\n+tgcct     \t0.25     \t0.25     \t0.25\n+tgcga     \t0.25     \t0.25     \t0.25\n+tgcgc     \t0.25     \t0.25     \t0.25\n+tgcgg     \t0.25     \t0.25     \t0.25\n+tgcgt     \t0.25     \t0.25     \t0.25\n+tgcta     \t0.25     \t0.25     \t0.25\n+tgctc     \t0.25     \t0.25     \t0.25\n+tgctg     \t0.25     \t0.25     \t0.25\n+tgctt     \t0.25     \t0.25     \t0.25\n+tggaa     \t0.25     \t0.25     \t0.25\n+tggac     \t0.25     \t0.25     \t0.25\n+tggag     \t0.25     \t0.25     \t0.25\n+tggat     \t0.25     \t0.25     \t0.25\n+tggca     \t0.25     \t0.25     \t0.25\n+tggcc     \t0.25     \t0.25     \t0.25\n+tggcg     \t0.25     \t0.25     \t0.25\n+tggct     \t0.25     \t0.25     \t0.25\n+tggga     \t0.25     \t0.25     \t0.25\n+tgggc     \t0.25     \t0.25     \t0.25\n+tgggg     \t0.25     \t0.25     \t0.25\n+tgggt     \t0.25     \t0.25     \t0.25\n+tggta     \t0.25     \t0.25     \t0.25\n+tggtc     \t0.25     \t0.25     \t0.25\n+tggtg     \t0.25     \t0.25     \t0.25\n+tggtt     \t0.25     \t0.25     \t0.25\n+tgtaa     \t0.25     \t0.25     \t0.25\n+tgtac     \t0.25     \t0.25     \t0.25\n+tgtag     \t0.25     \t0.25     \t0.25\n+tgtat     \t0.25     \t0.25     \t0.25\n+tgtca     \t0.25     \t0.25     \t0.25\n+tgtcc     \t0.25     \t0.25     \t0.25\n+tgtcg     \t0.25     \t0.25     \t0.25\n+tgtct     \t0.25     \t0.25     \t0.25\n+tgtga     \t0.25     \t0.25     \t0.25\n+tgtgc     \t0.25     \t0.25     \t0.25\n+tgtgg     \t0.25     \t0.25     \t0.25\n+tgtgt     \t0.25     \t0.25     \t0.25\n+tgtta     \t0.25     \t0.25     \t0.25\n+tgttc     \t0.25     \t0.25     \t0.25\n+tgttg     \t0.25     \t0.25     \t0.25\n+tgttt     \t0.25     \t0.25     \t0.25\n+ttaaa     \t0.25     \t0.25     \t0.25\n+ttaac     \t0.25     \t0.25     \t0.25\n+ttaag     \t0.25     \t0.25     \t0.25\n+ttaat     \t0.25     \t0.25     \t0.25\n+ttaca     \t0.25     \t0.25     \t0.25\n+ttacc     \t0.25     \t0.25     \t0.25\n+ttacg     \t0.25     \t0.25     \t0.25\n+ttact     \t0.25     \t0.25     \t0.25\n+ttaga     \t0.25     \t0.25     \t0.25\n+ttagc     \t0.25     \t0.25     \t0.25\n+ttagg     \t0.25     \t0.25     \t0.25\n+ttagt     \t0.25     \t0.25     \t0.25\n+ttata     \t0.25     \t0.25     \t0.25\n+ttatc     \t0.25     \t0.25     \t0.25\n+ttatg     \t0.25     \t0.25     \t0.25\n+ttatt     \t0.25     \t0.25     \t0.25\n+ttcaa     \t0.25     \t0.25     \t0.25\n+ttcac     \t0.25     \t0.25     \t0.25\n+ttcag     \t0.25     \t0.25     \t0.25\n+ttcat     \t0.25     \t0.25     \t0.25\n+ttcca     \t0.25     \t0.25     \t0.25\n+ttccc     \t0.25     \t0.25     \t0.25\n+ttccg     \t0.25     \t0.25     \t0.25\n+ttcct     \t0.25     \t0.25     \t0.25\n+ttcga     \t0.25     \t0.25     \t0.25\n+ttcgc     \t0.25     \t0.25     \t0.25\n+ttcgg     \t0.25     \t0.25     \t0.25\n+ttcgt     \t0.25     \t0.25     \t0.25\n+ttcta     \t0.25     \t0.25     \t0.25\n+ttctc     \t0.25     \t0.25     \t0.25\n+ttctg     \t0.25     \t0.25     \t0.25\n+ttctt     \t0.25     \t0.25     \t0.25\n+ttgaa     \t0.25     \t0.25     \t0.25\n+ttgac     \t0.25     \t0.25     \t0.25\n+ttgag     \t0.25     \t0.25     \t0.25\n+ttgat     \t0.25     \t0.25     \t0.25\n+ttgca     \t0.25     \t0.25     \t0.25\n+ttgcc     \t0.25     \t0.25     \t0.25\n+ttgcg     \t0.25     \t0.25     \t0.25\n+ttgct     \t0.25     \t0.25     \t0.25\n+ttgga     \t0.25     \t0.25     \t0.25\n+ttggc     \t0.25     \t0.25     \t0.25\n+ttggg     \t0.25     \t0.25     \t0.25\n+ttggt     \t0.25     \t0.25     \t0.25\n+ttgta     \t0.25     \t0.25     \t0.25\n+ttgtc     \t0.25     \t0.25     \t0.25\n+ttgtg     \t0.25     \t0.25     \t0.25\n+ttgtt     \t0.25     \t0.25     \t0.25\n+tttaa     \t0.25     \t0.25     \t0.25\n+tttac     \t0.25     \t0.25     \t0.25\n+tttag     \t0.25     \t0.25     \t0.25\n+tttat     \t0.25     \t0.25     \t0.25\n+tttca     \t0.25     \t0.25     \t0.25\n+tttcc     \t0.25     \t0.25     \t0.25\n+tttcg     \t0.25     \t0.25     \t0.25\n+tttct     \t0.25     \t0.25     \t0.25\n+tttga     \t0.25     \t0.25     \t0.25\n+tttgc     \t0.25     \t0.25     \t0.25\n+tttgg     \t0.25     \t0.25     \t0.25\n+tttgt     \t0.25     \t0.25     \t0.25\n+tttta     \t0.25     \t0.25     \t0.25\n+ttttc     \t0.25     \t0.25     \t0.25\n+ttttg     \t0.25     \t0.25     \t0.25\n+ttttt     \t0.25     \t0.25     \t0.25\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl Mon Oct 04 19:38:37 2021 +0000
[
b"@@ -0,0 +1,3445 @@\n+[1]\n+# (a,c,g,t)= (0.295, 0.205, 0.205, 0.295)\n+#\n+# Probabilities file for the intergenic region model\n+#\n+\n+# k =\n+4\n+\n+# The P_l's\n+[P_ls]\n+# l=\n+0\n+# Values\n+A\t0.304\n+C\t0.196\n+G\t0.196\n+T\t0.304\n+# l=\n+1\n+# Values\n+AA\t0.112\n+AC\t0.0524\n+AG\t0.0515\n+AT\t0.088\n+CA\t0.0665\n+CC\t0.0404\n+CG\t0.0378\n+CT\t0.0515\n+GA\t0.0524\n+GC\t0.051\n+GG\t0.0404\n+GT\t0.0524\n+TA\t0.0729\n+TC\t0.0525\n+TG\t0.0665\n+TT\t0.112\n+# l=\n+2\n+# Values\n+AAA\t0.0446\n+AAC\t0.0182\n+AAG\t0.0173\n+AAT\t0.0319\n+ACA\t0.0197\n+ACC\t0.00915\n+ACG\t0.00876\n+ACT\t0.0148\n+AGA\t0.0141\n+AGC\t0.0136\n+AGG\t0.00903\n+AGT\t0.0148\n+ATA\t0.0251\n+ATC\t0.0139\n+ATG\t0.0171\n+ATT\t0.0319\n+CAA\t0.0228\n+CAC\t0.0135\n+CAG\t0.013\n+CAT\t0.0171\n+CCA\t0.0146\n+CCC\t0.00889\n+CCG\t0.00783\n+CCT\t0.00903\n+CGA\t0.0118\n+CGC\t0.00949\n+CGG\t0.00783\n+CGT\t0.00876\n+CTA\t0.0105\n+CTC\t0.0106\n+CTG\t0.013\n+CTT\t0.0173\n+GAA\t0.0192\n+GAC\t0.00881\n+GAG\t0.0106\n+GAT\t0.0139\n+GCA\t0.0166\n+GCC\t0.0113\n+GCG\t0.00949\n+GCT\t0.0136\n+GGA\t0.0111\n+GGC\t0.0113\n+GGG\t0.00889\n+GGT\t0.00915\n+GTA\t0.0118\n+GTC\t0.00881\n+GTG\t0.0135\n+GTT\t0.0182\n+TAA\t0.0254\n+TAC\t0.0118\n+TAG\t0.0105\n+TAT\t0.0251\n+TCA\t0.0156\n+TCC\t0.0111\n+TCG\t0.0118\n+TCT\t0.0141\n+TGA\t0.0156\n+TGC\t0.0166\n+TGG\t0.0146\n+TGT\t0.0197\n+TTA\t0.0254\n+TTC\t0.0192\n+TTG\t0.0228\n+TTT\t0.0446\n+# l=\n+3\n+# Values\n+AAAA\t0.0172\n+AAAC\t0.00735\n+AAAG\t0.00683\n+AAAT\t0.0133\n+AACA\t0.00675\n+AACC\t0.00305\n+AACG\t0.00312\n+AACT\t0.00531\n+AAGA\t0.00461\n+AAGC\t0.0042\n+AAGG\t0.00313\n+AAGT\t0.00541\n+AATA\t0.00908\n+AATC\t0.00494\n+AATG\t0.00662\n+AATT\t0.0113\n+ACAA\t0.00692\n+ACAC\t0.00428\n+ACAG\t0.00328\n+ACAT\t0.00519\n+ACCA\t0.00329\n+ACCC\t0.00206\n+ACCG\t0.00177\n+ACCT\t0.00202\n+ACGA\t0.00278\n+ACGC\t0.00217\n+ACGG\t0.00174\n+ACGT\t0.00208\n+ACTA\t0.00318\n+ACTC\t0.00281\n+ACTG\t0.00337\n+ACTT\t0.00541\n+AGAA\t0.00511\n+AGAC\t0.00223\n+AGAG\t0.00305\n+AGAT\t0.00366\n+AGCA\t0.00446\n+AGCC\t0.00283\n+AGCG\t0.00259\n+AGCT\t0.00373\n+AGGA\t0.00284\n+AGGC\t0.00226\n+AGGG\t0.0019\n+AGGT\t0.00202\n+AGTA\t0.0032\n+AGTC\t0.00248\n+AGTG\t0.00379\n+AGTT\t0.00531\n+ATAA\t0.00847\n+ATAC\t0.00397\n+ATAG\t0.00317\n+ATAT\t0.00947\n+ATCA\t0.00424\n+ATCC\t0.00279\n+ATCG\t0.00317\n+ATCT\t0.00366\n+ATGA\t0.00422\n+ATGC\t0.00407\n+ATGG\t0.00365\n+ATGT\t0.00518\n+ATTA\t0.00767\n+ATTC\t0.00495\n+ATTG\t0.00602\n+ATTT\t0.0133\n+CAAA\t0.00896\n+CAAC\t0.00433\n+CAAG\t0.00353\n+CAAT\t0.00602\n+CACA\t0.00525\n+CACC\t0.00243\n+CACG\t0.002\n+CACT\t0.00379\n+CAGA\t0.00344\n+CAGC\t0.00407\n+CAGG\t0.00215\n+CAGT\t0.00338\n+CATA\t0.00464\n+CATC\t0.00309\n+CATG\t0.00278\n+CATT\t0.00662\n+CCAA\t0.00486\n+CCAC\t0.00325\n+CCAG\t0.00288\n+CCAT\t0.00365\n+CCCA\t0.00319\n+CCCC\t0.00225\n+CCCG\t0.00154\n+CCCT\t0.0019\n+CCGA\t0.00238\n+CCGC\t0.00225\n+CCGG\t0.00146\n+CCGT\t0.00174\n+CCTA\t0.00173\n+CCTC\t0.00201\n+CCTG\t0.00215\n+CCTT\t0.00313\n+CGAA\t0.00419\n+CGAC\t0.00209\n+CGAG\t0.00231\n+CGAT\t0.00317\n+CGCA\t0.00305\n+CGCC\t0.00236\n+CGCG\t0.00149\n+CGCT\t0.00259\n+CGGA\t0.0022\n+CGGC\t0.00232\n+CGGG\t0.00154\n+CGGT\t0.00178\n+CGTA\t0.00197\n+CGTC\t0.00167\n+CGTG\t0.002\n+CGTT\t0.00312\n+CTAA\t0.00374\n+CTAC\t0.00193\n+CTAG\t0.00168\n+CTAT\t0.00317\n+CTCA\t0.00284\n+CTCC\t0.00236\n+CTCG\t0.00231\n+CTCT\t0.00305\n+CTGA\t0.00304\n+CTGC\t0.00383\n+CTGG\t0.00288\n+CTGT\t0.00328\n+CTTA\t0.00383\n+CTTC\t0.00316\n+CTTG\t0.00353\n+CTTT\t0.00683\n+GAAA\t0.00815\n+GAAC\t0.00296\n+GAAG\t0.00316\n+GAAT\t0.00495\n+GACA\t0.003\n+GACC\t0.00166\n+GACG\t0.00167\n+GACT\t0.00248\n+GAGA\t0.003\n+GAGC\t0.00274\n+GAGG\t0.00201\n+GAGT\t0.00281\n+GATA\t0.00357\n+GATC\t0.00227\n+GATG\t0.00309\n+GATT\t0.00494\n+GCAA\t0.00554\n+GCAC\t0.00316\n+GCAG\t0.00383\n+GCAT\t0.00407\n+GCCA\t0.00448\n+GCCC\t0.00221\n+GCCG\t0.00232\n+GCCT\t0.00226\n+GCGA\t0.00295\n+GCGC\t0.00212\n+GCGG\t0.00225\n+GCGT\t0.00217\n+GCTA\t0.00261\n+GCTC\t0.00274\n+GCTG\t0.00407\n+GCTT\t0.0042\n+GGAA\t0.00414\n+GGAC\t0.00178\n+GGAG\t0.00236\n+GGAT\t0.00279\n+GGCA\t0.0037\n+GGCC\t0.00239\n+GGCG\t0.00236\n+GGCT\t0.00282\n+GGGA\t0.00237\n+GGGC\t0.00221\n+GGGG\t0.00225\n+GGGT\t0.00206\n+GGTA\t0.00201\n+GGTC\t0.00166\n+GGTG\t0.00243\n+GGTT\t0.00305\n+GTAA\t0.0039\n+GTAC\t0.00205\n+GTAG\t0.00193\n+GTAT\t0.00397\n+GTCA\t0.00271\n+GTCC\t0.00178\n+GTCG\t0.00209\n+GTCT\t0.00223\n+GTGA\t0.00279\n+GTGC\t0.00315\n+GTGG\t0.00325\n+GTGT\t0.00428\n+GTTA\t0.00359\n+GTTC\t0.00296\n+GTTG\t0.00433\n+GTTT\t0.00735\n+TAAA\t0.0103\n+TAAC\t0.00359\n+TAAG\t0.00383\n+TAAT\t0.00768\n+TACA\t0.00467\n+TACC\t0.00201\n+TACG\t0.00197\n+TACT\t0."..b'1188\n+GTGCA\t415919\n+GTGCC\t261766\n+GTGCG\t295620\n+GTGCT\t323411\n+GTGGA\t332993\n+GTGGC\t385960\n+GTGGG\t323106\n+GTGGT\t292234\n+GTGTA\t359272\n+GTGTC\t243286\n+GTGTG\t676471\n+GTGTT\t478535\n+GTTAA\t529553\n+GTTAC\t237009\n+GTTAG\t244930\n+GTTAT\t462774\n+GTTCA\t351705\n+GTTCC\t242639\n+GTTCG\t284705\n+GTTCT\t337027\n+GTTGA\t395847\n+GTTGC\t467863\n+GTTGG\t398583\n+GTTGT\t518738\n+GTTTA\t656320\n+GTTTC\t525254\n+GTTTG\t635106\n+GTTTT\t1204266\n+TAAAA\t1550931\n+TAAAC\t656320\n+TAAAG\t584278\n+TAAAT\t1453686\n+TAACA\t514469\n+TAACC\t257738\n+TAACG\t228098\n+TAACT\t473671\n+TAAGA\t411688\n+TAAGC\t412400\n+TAAGG\t243072\n+TAAGT\t505986\n+TAATA\t887022\n+TAATC\t429688\n+TAATG\t557972\n+TAATT\t1279151\n+TACAA\t638171\n+TACAC\t359272\n+TACAG\t269554\n+TACAT\t651698\n+TACCA\t294521\n+TACCC\t186072\n+TACCG\t134084\n+TACCT\t210051\n+TACGA\t255187\n+TACGC\t188583\n+TACGG\t144135\n+TACGT\t222807\n+TACTA\t299145\n+TACTC\t243515\n+TACTG\t242385\n+TACTT\t529461\n+TAGAA\t445486\n+TAGAC\t192718\n+TAGAG\t225170\n+TAGAT\t371662\n+TAGCA\t338376\n+TAGCC\t240530\n+TAGCG\t157045\n+TAGCT\t334860\n+TAGGA\t194399\n+TAGGC\t179285\n+TAGGG\t138443\n+TAGGT\t199383\n+TAGTA\t299145\n+TAGTC\t203900\n+TAGTG\t245329\n+TAGTT\t560260\n+TATAA\t905114\n+TATAC\t478377\n+TATAG\t374869\n+TATAT\t1443312\n+TATCA\t432085\n+TATCC\t266505\n+TATCG\t299809\n+TATCT\t467523\n+TATGA\t432243\n+TATGC\t441619\n+TATGG\t339799\n+TATGT\t693461\n+TATTA\t887022\n+TATTC\t522722\n+TATTG\t602062\n+TATTT\t1720154\n+TCAAA\t830733\n+TCAAC\t395847\n+TCAAG\t362165\n+TCAAT\t678967\n+TCACA\t402109\n+TCACC\t212946\n+TCACG\t164762\n+TCACT\t367070\n+TCAGA\t303978\n+TCAGC\t373449\n+TCAGG\t185559\n+TCAGT\t385818\n+TCATA\t432243\n+TCATC\t340433\n+TCATG\t254268\n+TCATT\t707017\n+TCCAA\t464453\n+TCCAC\t332993\n+TCCAG\t285196\n+TCCAT\t425248\n+TCCCA\t332570\n+TCCCC\t246063\n+TCCCG\t164879\n+TCCCT\t229526\n+TCCGA\t253087\n+TCCGC\t252341\n+TCCGG\t163286\n+TCCGT\t233416\n+TCCTA\t194399\n+TCCTC\t267623\n+TCCTG\t279201\n+TCCTT\t425709\n+TCGAA\t507221\n+TCGAC\t242629\n+TCGAG\t284084\n+TCGAT\t460551\n+TCGCA\t375343\n+TCGCC\t294688\n+TCGCG\t173249\n+TCGCT\t370922\n+TCGGA\t253087\n+TCGGC\t265278\n+TCGGG\t190433\n+TCGGT\t269143\n+TCGTA\t255187\n+TCGTC\t227718\n+TCGTG\t218373\n+TCGTT\t441393\n+TCTAA\t393086\n+TCTAC\t220356\n+TCTAG\t208283\n+TCTAT\t413312\n+TCTCA\t305326\n+TCTCC\t255587\n+TCTCG\t248235\n+TCTCT\t424487\n+TCTGA\t303978\n+TCTGC\t389381\n+TCTGG\t307187\n+TCTGT\t414129\n+TCTTA\t411688\n+TCTTC\t375086\n+TCTTG\t371605\n+TCTTT\t735509\n+TGAAA\t979910\n+TGAAC\t351705\n+TGAAG\t365491\n+TGAAT\t675983\n+TGACA\t381622\n+TGACC\t222424\n+TGACG\t180167\n+TGACT\t330719\n+TGAGA\t305326\n+TGAGC\t300824\n+TGAGG\t195709\n+TGAGT\t363535\n+TGATA\t432085\n+TGATC\t244669\n+TGATG\t377889\n+TGATT\t689516\n+TGCAA\t739059\n+TGCAC\t415919\n+TGCAG\t455674\n+TGCAT\t610206\n+TGCCA\t582755\n+TGCCC\t291844\n+TGCCG\t300708\n+TGCCT\t343894\n+TGCGA\t375343\n+TGCGC\t275852\n+TGCGG\t278984\n+TGCGT\t321845\n+TGCTA\t338376\n+TGCTC\t373175\n+TGCTG\t560268\n+TGCTT\t560387\n+TGGAA\t571498\n+TGGAC\t232424\n+TGGAG\t307866\n+TGGAT\t396414\n+TGGCA\t582755\n+TGGCC\t458889\n+TGGCG\t310066\n+TGGCT\t490513\n+TGGGA\t332570\n+TGGGC\t341654\n+TGGGG\t289650\n+TGGGT\t347866\n+TGGTA\t294521\n+TGGTC\t249199\n+TGGTG\t330244\n+TGGTT\t479937\n+TGTAA\t596325\n+TGTAC\t339181\n+TGTAG\t275425\n+TGTAT\t708010\n+TGTCA\t381622\n+TGTCC\t255633\n+TGTCG\t254663\n+TGTCT\t341705\n+TGTGA\t402109\n+TGTGC\t487257\n+TGTGG\t435188\n+TGTGT\t832228\n+TGTTA\t514469\n+TGTTC\t395511\n+TGTTG\t687960\n+TGTTT\t1174890\n+TTAAA\t1506984\n+TTAAC\t529553\n+TTAAG\t603193\n+TTAAT\t1184370\n+TTACA\t596325\n+TTACC\t289904\n+TTACG\t255155\n+TTACT\t462537\n+TTAGA\t393086\n+TTAGC\t402136\n+TTAGG\t254810\n+TTAGT\t488881\n+TTATA\t905114\n+TTATC\t500716\n+TTATG\t638944\n+TTATT\t1435871\n+TTCAA\t845058\n+TTCAC\t412587\n+TTCAG\t438454\n+TTCAT\t676983\n+TTCCA\t571498\n+TTCCC\t388709\n+TTCCG\t320193\n+TTCCT\t422559\n+TTCGA\t507221\n+TTCGC\t428272\n+TTCGG\t349863\n+TTCGT\t436061\n+TTCTA\t445486\n+TTCTC\t421020\n+TTCTG\t474000\n+TTCTT\t759053\n+TTGAA\t845058\n+TTGAC\t370823\n+TTGAG\t388608\n+TTGAT\t662989\n+TTGCA\t739059\n+TTGCC\t538214\n+TTGCG\t352393\n+TTGCT\t647156\n+TTGGA\t464453\n+TTGGC\t630591\n+TTGGG\t411508\n+TTGGT\t490945\n+TTGTA\t638171\n+TTGTC\t422058\n+TTGTG\t611423\n+TTGTT\t1171948\n+TTTAA\t1506984\n+TTTAC\t638916\n+TTTAG\t573057\n+TTTAT\t1526553\n+TTTCA\t979910\n+TTTCC\t759290\n+TTTCG\t714479\n+TTTCT\t897347\n+TTTGA\t830733\n+TTTGC\t883669\n+TTTGG\t776539\n+TTTGT\t1190742\n+TTTTA\t1550931\n+TTTTC\t1313857\n+TTTTG\t1382023\n+TTTTT\t2802550\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl Mon Oct 04 19:38:37 2021 +0000
[
b'@@ -0,0 +1,20382 @@\n+#intron model parameters\n+# begin of content independent part\n+#\n+# ASS probabilities\n+#only nonpseudocount values are shown\n+[ASS]\n+# Size of vector\n+1024\n+# c_ass (ASS count)\n+31180\n+# asspseudocount (added to all possible patterns, no matter if they occur)\n+0.01\n+# Probabilities * 1000\n+aaaaa\t0.321\n+aaaac\t0.289\n+aaaag\t0.0324\n+aaaat\t0.289\n+aaaca\t0.161\n+aaacc\t0.257\n+aaacg\t0.0644\n+aaact\t0.257\n+aaaga\t0.0324\n+aaagc\t0.0644\n+aaagg\t0.0324\n+aaagt\t0.0965\n+aaata\t0.161\n+aaatc\t0.193\n+aaatg\t0.129\n+aaatt\t0.161\n+aacaa\t0.193\n+aacag\t0.129\n+aacat\t0.129\n+aacca\t0.129\n+aaccc\t0.193\n+aaccg\t0.193\n+aacct\t0.129\n+aacga\t0.161\n+aacgc\t0.0644\n+aacgg\t0.0965\n+aacta\t0.0644\n+aactc\t0.0965\n+aactg\t0.129\n+aactt\t0.289\n+aagaa\t0.129\n+aagag\t0.0324\n+aagat\t0.161\n+aagca\t0.0324\n+aagcc\t0.0965\n+aagcg\t0.193\n+aagct\t0.129\n+aagga\t0.0324\n+aaggc\t0.0324\n+aaggg\t0.0965\n+aaggt\t0.0644\n+aagta\t0.0324\n+aagtc\t0.0965\n+aagtg\t0.129\n+aagtt\t0.193\n+aataa\t0.193\n+aatac\t0.353\n+aatag\t0.0644\n+aatat\t0.353\n+aatca\t0.417\n+aatcc\t0.513\n+aatcg\t0.385\n+aatct\t0.321\n+aatga\t0.417\n+aatgc\t0.449\n+aatgg\t0.289\n+aatgt\t0.321\n+aatta\t0.193\n+aattc\t0.417\n+aattg\t0.385\n+aattt\t0.257\n+acaaa\t0.161\n+acaac\t0.193\n+acaag\t0.0965\n+acaat\t0.193\n+acaca\t0.129\n+acacc\t0.257\n+acacg\t0.0324\n+acact\t0.129\n+acagc\t0.161\n+acagg\t0.129\n+acagt\t0.0324\n+acata\t0.0324\n+acatc\t0.161\n+acatg\t0.0965\n+acatt\t0.193\n+accaa\t0.161\n+accac\t0.129\n+accag\t0.0644\n+accat\t0.193\n+accca\t0.0965\n+acccc\t0.0644\n+acccg\t0.193\n+accct\t0.129\n+accga\t0.129\n+accgg\t0.0324\n+accgt\t0.161\n+accta\t0.129\n+acctc\t0.129\n+acctg\t0.193\n+acctt\t0.193\n+acgaa\t0.0324\n+acgac\t0.161\n+acgag\t0.0965\n+acgat\t0.0644\n+acgca\t0.129\n+acgcc\t0.129\n+acgcg\t0.0324\n+acgct\t0.129\n+acgga\t0.161\n+acggc\t0.0644\n+acggt\t0.0644\n+acgtc\t0.129\n+acgtg\t0.0965\n+acgtt\t0.129\n+actaa\t0.129\n+actac\t0.417\n+actat\t0.257\n+actca\t0.257\n+actcc\t0.193\n+actcg\t0.0965\n+actct\t0.321\n+actga\t0.257\n+actgc\t0.385\n+actgg\t0.225\n+actgt\t0.289\n+actta\t0.129\n+acttc\t0.193\n+acttg\t0.0965\n+acttt\t0.289\n+agaaa\t0.513\n+agaac\t0.77\n+agaag\t0.0965\n+agaat\t0.545\n+agaca\t0.353\n+agacc\t0.353\n+agacg\t0.385\n+agact\t0.513\n+agaga\t0.193\n+agagc\t0.385\n+agagg\t0.129\n+agagt\t0.289\n+agata\t0.321\n+agatc\t0.449\n+agatg\t0.417\n+agatt\t0.353\n+agcaa\t0.449\n+agcac\t0.257\n+agcag\t0.0644\n+agcat\t0.642\n+agcca\t0.353\n+agccc\t0.161\n+agccg\t0.353\n+agcct\t0.257\n+agcga\t0.385\n+agcgc\t0.225\n+agcgg\t0.129\n+agcgt\t0.225\n+agcta\t0.161\n+agctc\t0.161\n+agctg\t0.577\n+agctt\t0.417\n+aggaa\t0.225\n+aggac\t0.193\n+aggag\t0.129\n+aggat\t0.353\n+aggca\t0.417\n+aggcc\t0.161\n+aggcg\t0.129\n+aggct\t0.225\n+aggga\t0.129\n+agggc\t0.129\n+agggg\t0.0324\n+agggt\t0.193\n+aggta\t0.161\n+aggtc\t0.321\n+aggtg\t0.193\n+aggtt\t0.257\n+agtaa\t0.609\n+agtac\t0.706\n+agtag\t0.0324\n+agtat\t0.77\n+agtca\t0.866\n+agtcc\t0.577\n+agtcg\t0.385\n+agtct\t0.898\n+agtga\t0.706\n+agtgc\t0.738\n+agtgg\t0.994\n+agtgt\t0.449\n+agtta\t0.577\n+agttc\t0.417\n+agttg\t0.449\n+agttt\t0.802\n+ataaa\t0.193\n+ataac\t0.129\n+ataat\t0.161\n+ataca\t0.0324\n+atacc\t0.0644\n+atacg\t0.0324\n+atact\t0.129\n+atata\t0.0644\n+atatc\t0.0644\n+atatg\t0.129\n+atatt\t0.0644\n+atcaa\t0.225\n+atcac\t0.0965\n+atcat\t0.0644\n+atcca\t0.0965\n+atccc\t0.0965\n+atccg\t0.0965\n+atcct\t0.0965\n+atcga\t0.0965\n+atcgc\t0.129\n+atcgt\t0.0965\n+atcta\t0.129\n+atctc\t0.0644\n+atctg\t0.193\n+atctt\t0.0644\n+atgaa\t0.0644\n+atgac\t0.0965\n+atgat\t0.161\n+atgca\t0.161\n+atgcc\t0.225\n+atgcg\t0.129\n+atgct\t0.321\n+atgga\t0.161\n+atggc\t0.0965\n+atggg\t0.0644\n+atggt\t0.161\n+atgta\t0.0324\n+atgtc\t0.193\n+atgtg\t0.0965\n+atgtt\t0.129\n+attaa\t0.0965\n+attac\t0.225\n+attag\t0.0324\n+attat\t0.0644\n+attca\t0.0965\n+attcc\t0.257\n+attcg\t0.193\n+attct\t0.0644\n+attga\t0.0965\n+attgc\t0.257\n+attgg\t0.0644\n+attgt\t0.161\n+attta\t0.129\n+atttc\t0.289\n+atttg\t0.161\n+atttt\t0.289\n+caaaa\t6.77\n+caaac\t5.48\n+caaag\t2.47\n+caaat\t5.26\n+caaca\t3.88\n+caacc\t3.85\n+caacg\t3.5\n+caact\t4.2\n+caaga\t1.64\n+caagc\t1.51\n+caagg\t1.31\n+caagt\t1.28\n+caata\t2.5\n+caatc\t3.59\n+caatg\t5.23\n+caatt\t3.27\n+cacaa\t4.91\n+cacac\t2.63\n+cacag\t0.545\n+cacat\t3.21\n+cacca\t2.66\n+caccc\t2.47\n+caccg\t1.41\n+cacct\t2.79\n+cacga\t3.05\n+cacgc\t2.82\n+cacgg\t1.09\n+cacgt\t1.03\n+cacta\t1.83\n+cactc\t2.79\n+cactg\t3.11\n+cactt\t2.85\n+cagaa\t3.27\n+cagac\t1.44\n+cagag\t1.76\n+cagat\t2.44\n+cagca\t2.89\n+cagcc\t2.79\n+cagcg\t2.31\n+cagct\t3.62\n+cagga\t2.76\n+caggc\t1.'..b'855631\n+#\tttcgg\t698837\n+#\tttcgt\t871342\n+#\tttcta\t890087\n+#\tttctc\t841394\n+#\tttctg\t946845\n+#\tttctt\t1516968\n+#\tttgaa\t1688548\n+#\tttgac\t741091\n+#\tttgag\t776440\n+#\tttgat\t1325067\n+#\tttgca\t1476775\n+#\tttgcc\t1075827\n+#\tttgcg\t704154\n+#\tttgct\t1293450\n+#\tttgga\t927788\n+#\tttggc\t1260242\n+#\tttggg\t822345\n+#\tttggt\t980857\n+#\tttgta\t1275361\n+#\tttgtc\t843496\n+#\tttgtg\t1221327\n+#\tttgtt\t2341971\n+#\ttttaa\t3011593\n+#\ttttac\t1276443\n+#\ttttag\t1145019\n+#\ttttat\t3051195\n+#\ttttca\t1957941\n+#\ttttcc\t1517354\n+#\ttttcg\t1427298\n+#\ttttct\t1793145\n+#\ttttga\t1659999\n+#\ttttgc\t1765755\n+#\ttttgg\t1551540\n+#\ttttgt\t2379188\n+#\ttttta\t3099452\n+#\tttttc\t2625305\n+#\tttttg\t2761348\n+#\tttttt\t5600229\n+\n+# motif upstream of acceptor splice site\n+[ASSMOTIF]\n+# width of motif, n=\n+32\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0  0.358\t0.167\t0.126\t0.35\t0.329\t0.218\t0.131\t0.323\t0.326\t0.213\t0.151\t0.31\t0.298\t0.172\t0.17\t0.36\n+ 1  0.358\t0.168\t0.125\t0.35\t0.328\t0.215\t0.13\t0.327\t0.328\t0.212\t0.151\t0.309\t0.299\t0.171\t0.169\t0.36\n+ 2  0.358\t0.168\t0.122\t0.352\t0.327\t0.217\t0.128\t0.329\t0.329\t0.214\t0.148\t0.309\t0.301\t0.169\t0.167\t0.362\n+ 3  0.358\t0.17\t0.119\t0.353\t0.326\t0.216\t0.126\t0.331\t0.328\t0.216\t0.146\t0.31\t0.303\t0.169\t0.165\t0.362\n+ 4  0.358\t0.172\t0.116\t0.355\t0.324\t0.216\t0.124\t0.336\t0.325\t0.218\t0.145\t0.312\t0.305\t0.17\t0.163\t0.363\n+ 5  0.358\t0.174\t0.11\t0.358\t0.322\t0.215\t0.12\t0.343\t0.324\t0.219\t0.143\t0.314\t0.308\t0.169\t0.159\t0.364\n+ 6  0.358\t0.176\t0.104\t0.361\t0.319\t0.216\t0.117\t0.349\t0.323\t0.221\t0.139\t0.317\t0.313\t0.169\t0.155\t0.363\n+ 7  0.359\t0.178\t0.0971\t0.365\t0.315\t0.216\t0.114\t0.355\t0.322\t0.222\t0.134\t0.322\t0.316\t0.168\t0.151\t0.365\n+ 8  0.362\t0.179\t0.0893\t0.37\t0.31\t0.216\t0.111\t0.363\t0.321\t0.223\t0.129\t0.328\t0.32\t0.168\t0.146\t0.367\n+ 9  0.362\t0.182\t0.0832\t0.372\t0.305\t0.218\t0.106\t0.371\t0.319\t0.225\t0.124\t0.332\t0.323\t0.167\t0.142\t0.369\n+10  0.364\t0.184\t0.0758\t0.376\t0.301\t0.219\t0.103\t0.377\t0.314\t0.228\t0.119\t0.339\t0.325\t0.167\t0.139\t0.37\n+11  0.364\t0.185\t0.0701\t0.38\t0.298\t0.218\t0.102\t0.382\t0.309\t0.229\t0.116\t0.346\t0.324\t0.167\t0.137\t0.372\n+12  0.366\t0.185\t0.0646\t0.385\t0.294\t0.22\t0.101\t0.385\t0.306\t0.228\t0.114\t0.352\t0.32\t0.168\t0.137\t0.375\n+13  0.367\t0.183\t0.0603\t0.389\t0.293\t0.22\t0.102\t0.385\t0.305\t0.228\t0.111\t0.356\t0.314\t0.169\t0.137\t0.38\n+14  0.364\t0.184\t0.0574\t0.394\t0.29\t0.223\t0.104\t0.383\t0.301\t0.228\t0.111\t0.36\t0.305\t0.171\t0.138\t0.386\n+15  0.359\t0.186\t0.0542\t0.4\t0.286\t0.227\t0.105\t0.382\t0.295\t0.23\t0.11\t0.365\t0.292\t0.175\t0.138\t0.395\n+16  0.355\t0.187\t0.0513\t0.407\t0.279\t0.232\t0.106\t0.382\t0.287\t0.234\t0.11\t0.37\t0.278\t0.18\t0.138\t0.404\n+17  0.347\t0.187\t0.05\t0.415\t0.275\t0.236\t0.108\t0.382\t0.277\t0.238\t0.11\t0.376\t0.261\t0.184\t0.14\t0.414\n+18  0.339\t0.187\t0.047\t0.426\t0.268\t0.239\t0.11\t0.382\t0.268\t0.239\t0.11\t0.383\t0.244\t0.189\t0.14\t0.426\n+19  0.329\t0.188\t0.0445\t0.438\t0.262\t0.242\t0.113\t0.383\t0.262\t0.239\t0.11\t0.39\t0.229\t0.194\t0.139\t0.437\n+20  0.32\t0.189\t0.0399\t0.451\t0.252\t0.246\t0.112\t0.389\t0.256\t0.239\t0.107\t0.398\t0.214\t0.2\t0.137\t0.449\n+21  0.307\t0.192\t0.0368\t0.464\t0.244\t0.253\t0.111\t0.392\t0.245\t0.245\t0.106\t0.404\t0.201\t0.206\t0.133\t0.46\n+22  0.294\t0.198\t0.032\t0.475\t0.235\t0.266\t0.11\t0.389\t0.236\t0.253\t0.103\t0.407\t0.192\t0.218\t0.13\t0.46\n+23  0.286\t0.201\t0.0264\t0.487\t0.233\t0.273\t0.111\t0.383\t0.231\t0.261\t0.0999\t0.408\t0.186\t0.225\t0.127\t0.461\n+24  0.276\t0.207\t0.0212\t0.495\t0.228\t0.282\t0.107\t0.383\t0.225\t0.272\t0.0963\t0.406\t0.179\t0.233\t0.121\t0.466\n+25  0.258\t0.204\t0.0171\t0.521\t0.213\t0.286\t0.103\t0.398\t0.214\t0.271\t0.0937\t0.421\t0.166\t0.236\t0.114\t0.484\n+26  0.255\t0.208\t0.0131\t0.524\t0.207\t0.3\t0.0997\t0.393\t0.214\t0.274\t0.0902\t0.421\t0.148\t0.223\t0.102\t0.527\n+27  0.27\t0.208\t0.0141\t0.508\t0.22\t0.291\t0.116\t0.372\t0.225\t0.275\t0.0979\t0.403\t0.147\t0.217\t0.127\t0.509\n+28  0.269\t0.21\t0.0127\t0.508\t0.219\t0.296\t0.116\t0.369\t0.221\t0.282\t0.096\t0.402\t0.143\t0.217\t0.127\t0.514\n+29  0.267\t0.214\t0.0126\t0.507\t0.221\t0.301\t0.118\t0.36\t0.219\t0.29\t0.0977\t0.393\t0.139\t0.217\t0.128\t0.516\n+30  0.27\t0.214\t0.0126\t0.504\t0.223\t0.302\t0.12\t0.354\t0.224\t0.292\t0.097\t0.387\t0.135\t0.215\t0.13\t0.52\n+31  0.273\t0.209\t0.0136\t0.504\t0.224\t0.299\t0.123\t0.354\t0.23\t0.285\t0.0997\t0.385\t0.126\t0.203\t0.132\t0.539\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,48 @@
+# This file contains the list of meta parameters for the coding regions (CDS) which are subject to optimization. 
+# All other meta parameters are chosen as given in the species parameter file. The order 
+# of the parameters determines the order in the optimization process.
+# Basically, different values for these meta parameters are tried out and the ones
+# giving best performance in a cross-validation on the training set are chosen.
+# For each parameter the range of possible values is specified after the parameter
+# name and at least one white space.
+# 3 cases are possible for the range:
+# - an explicit list is given, e.g. protein "on" "off"
+# - it is an integer range, e.g. window_size "1"-"5"
+# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8"
+#
+# 
+# Mario Stanke, 19.12.2006
+#
+
+/Constant/dss_end "1"-"4"
+/Constant/dss_start "1"-"3"
+/Constant/ass_start "1"-"3"
+/Constant/ass_end "0"-"4"
+/Constant/ass_upwindow_size "1"-"50"
+/IntronModel/d                  "100"-"950"
+/IntronModel/ass_motif_memory "0"-"3"
+/IntronModel/ass_motif_radius "0"-"4"
+/ExonModel/tis_motif_memory "0"-"3"
+/ExonModel/tis_motif_radius "0"-"3"
+/Constant/trans_init_window "0"-"25"
+/Constant/init_coding_len "0"-"18"
+/ExonModel/patpseudocount "0.5"_"5"
+/ExonModel/etpseudocount "0"-"10"
+/ExonModel/etorder "0"-"3"
+/Constant/intterm_coding_len "0"-"13"
+/ExonModel/slope_of_bandwidth "0.05"_"0.6"
+/ExonModel/minwindowcount "1"-"15"
+/IGenicModel/patpseudocount "0.5"_"7"
+/IntronModel/patpseudocount "0.5"_"7"
+/IntronModel/slope_of_bandwidth "0.05"_"0.6"
+/IntronModel/minwindowcount "1"-"8"
+/IntronModel/asspseudocount "0.0005"_"0.03"
+/IntronModel/dsspseudocount "0.0002"_"0.04"
+/IntronModel/dssneighborfactor  "0.0001"_"0.01"
+/ExonModel/minPatSum "100"_"600"
+/Constant/probNinCoding         "0.15"_".25"
+/Constant/decomp_num_steps "1"-"5"
+# comment parameters out that you do not want to be subject of optimization
+#/IGenicModel/k                  "4" "3" "5"
+#/IntronModel/k                  "4" "3" "5"
+#/ExonModel/k                    "4" "3" "5"
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,30 @@
+# This file contains the list of meta parameters for the Untranslated Regions (UTRs), which are subject to optimization. 
+# All other parameters are chosen as given in the species parameter file. The order 
+# of the parameters determines the order in the optimization process.
+# Basically, different values for these meta parameters are tried out and the ones
+# giving best performance in a cross-validation on the training set are chosen.
+# For each parameter the range of possible values is specified after the parameter
+# name and at least one white space.
+# 3 cases are possible for the range:
+# - an explicit list is given, e.g. protein "on" "off"
+# - it is an integer range, e.g. window_size "1"-"5"
+# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8"
+#
+# 
+# Mario Stanke, 9.5.2008
+#
+
+/UtrModel/prob_polya            "0.0"_"1.0"
+/UtrModel/d_polya_cleavage_min  "6"-"14"
+/UtrModel/d_polya_cleavage_max  "17"-"27"
+/UtrModel/tss_start             "0"-"12"
+/UtrModel/tss_end               "0"-"8"
+/UtrModel/tts_motif_memory      "0"-"2"
+/UtrModel/utr5patternweight     "0.1"_"1.0"
+/UtrModel/utr3patternweight     "0.1"_"1.0"
+/UtrModel/patpseudocount        "1"_"3"
+/UtrModel/tssup_k               "0"-"2"
+/UtrModel/slope_of_bandwidth    "0.2"_"0.4"
+/UtrModel/minwindowcount        "1"-"4"
+#/UtrModel/k                     "2"-"4"
+
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,144 @@
+#
+# parameters for all Drosophila versions
+# 
+# date : 11.8.2009
+#
+
+#
+# Properties for augustus
+#------------------------------------
+/augustus/verbosity 3     # 0-3, 0: only print the necessary
+maxDNAPieceSize    200000 # maximum segment that is predicted in one piece
+stopCodonExcludedFromCDS false # make this 'true' if the CDS includes the stop codon (training and prediction)
+
+# gff output options:
+protein             on    # output predicted protein sequence
+codingseq           off   # output the coding sequence
+cds                 on    # output 'cds' as feature for exons
+start               on    # output start codons (translation start)
+stop                on    # output stop codons  (translation stop)
+introns             on    # output introns
+tss                 on    # output transcription start site
+tts                 on    # output transcription termination site
+print_utr           off   # output 5'UTR and 3'UTR lines in addition to exon lines
+
+checkExAcc          off   # internal parameter for extrinsic accuracy
+
+# alternative transcripts and posterior probabilities
+sample                      100   # the number of sampling iterations
+alternatives-from-sampling  false # output alternative transcripts
+minexonintronprob           0.08  # minimal posterior probability of all (coding) exons
+minmeanexonintronprob       0.4   # minimal geometric mean of the posterior probs of introns and exons
+maxtracks                   -1    # maximum number of reported transcripts per gene (-1: no limit)
+keep_viterbi                true  # set to true if all Viterbi transcripts should be reported
+uniqueCDS                   true  # don't report transcripts that differ only in the UTR
+UTR                         on    # predict untranslated regions
+
+#
+# 
+# The rest of the file contains mainly meta parameters used for training.
+#
+
+# global constants
+# ----------------------------
+
+/Constant/trans_init_window           25
+/Constant/ass_upwindow_size           32
+/Constant/ass_start                   1
+/Constant/ass_end                     4
+/Constant/dss_start                   3
+/Constant/dss_end                     4
+/Constant/init_coding_len       9
+/Constant/intterm_coding_len       0
+/Constant/tss_upwindow_size           45
+/Constant/decomp_num_at               1
+/Constant/decomp_num_gc               1
+/Constant/gc_range_min       0.32   # This range has an effect only when decomp_num_steps>1. 
+/Constant/gc_range_max                0.50   # States the minimal and maximal percentage of c or g
+/Constant/decomp_num_steps            1      # I recommend keeping this to 1 for most species.
+/Constant/min_coding_len              201    # no gene with a coding sequence shorter than this is predicted
+/Constant/probNinCoding               0.23   # divide this by .25 to get a malus for making one masked letter part of the coding sequence
+/Constant/amberprob                   0.34   # Prob(stop codon = tag), if 0 tag is assumed to code for amino acid
+/Constant/ochreprob                   0.41   # Prob(stop codon = taa), if 0 taa is assumed to code for amino acid
+/Constant/opalprob                    0.25   # Prob(stop codon = tga), if 0 tga is assumed to code for amino acid
+/Constant/subopt_transcript_threshold 0.7
+/Constant/almost_identical_maxdiff    10
+
+# type of weighing, one of  1 = equalWeights, 2 = gcContentClasses, 3 = multiNormalKernel
+/BaseCount/weighingType    3
+# file with the weight matrix (only for multiNormalKernel type weighing)
+/BaseCount/weightMatrixFile   fly_weightmatrix.txt # change this to your species if at all necessary
+
+# Properties for IGenicModel
+# ----------------------------
+/IGenicModel/verbosity      0
+/IGenicModel/infile         fly_igenic_probs.pbl   # change this and the other five filenames *_probs.pbl below to your species
+/IGenicModel/outfile        fly_igenic_probs.pbl
+/IGenicModel/patpseudocount 5.0
+/IGenicModel/k              4        # order of the Markov chain for content model, keep equal to /ExonModel/k
+
+# Properties for ExonModel
+# ----------------------------
+/ExonModel/verbosity          3
+/ExonModel/infile             fly_exon_probs.pbl
+/ExonModel/outfile            fly_exon_probs.pbl
+/ExonModel/patpseudocount     5.0
+/ExonModel/minPatSum          350
+/ExonModel/k                  4       # order of the Markov chain for content model
+/ExonModel/etorder       2
+/ExonModel/etpseudocount      3
+/ExonModel/exonlengthD        3000    # beyond this the distribution is geometric
+/ExonModel/maxexonlength      15000
+/ExonModel/slope_of_bandwidth 0.3
+/ExonModel/minwindowcount     8
+/ExonModel/tis_motif_memory   3
+/ExonModel/tis_motif_radius   2

+# Properties for IntronModel
+# ----------------------------
+/IntronModel/verbosity          0
+/IntronModel/infile             fly_intron_probs.pbl
+/IntronModel/outfile            fly_intron_probs.pbl
+/IntronModel/patpseudocount     5.0
+/IntronModel/k                  4     # order of the Markov chain for content model, keep equal to /ExonModel/k
+/IntronModel/slope_of_bandwidth 0.4
+/IntronModel/minwindowcount     3
+/IntronModel/asspseudocount     0.01
+/IntronModel/dsspseudocount     0.01015
+/IntronModel/dssneighborfactor  0.001
+#/IntronModel/splicefile         fly_splicefile.txt # this optional file contains additional windows around splice sites for training, uncomment if you have one
+/IntronModel/sf_with_motif false           # if true the splice file is also used to train the branch point region
+/IntronModel/d                  929  # constraint: this must be larger than 4 + /Constant/dss_end + /Constant/ass_upwindow_size + /Constant/ass_start
+/IntronModel/ass_motif_memory   1
+/IntronModel/ass_motif_radius   4
+
+# Properties for UtrModel
+# ----------------------------
+/UtrModel/verbosity             3
+/UtrModel/infile                fly_utr_probs.pbl
+/UtrModel/outfile               fly_utr_probs.pbl
+/UtrModel/k                     4
+/UtrModel/utr5patternweight     0.3    #0.7625
+/UtrModel/utr3patternweight     0.3   #0.5
+/UtrModel/patpseudocount        1
+/UtrModel/tssup_k               1
+/UtrModel/tssup_patpseudocount  1
+/UtrModel/slope_of_bandwidth    0.25
+/UtrModel/minwindowcount        1
+/UtrModel/exonlengthD           800
+/UtrModel/maxexonlength         1200
+/UtrModel/max3singlelength      2000    # excludes roughly 1%
+/UtrModel/max3termlength        1200    # excludes ~ 0.3%
+/UtrModel/tss_start             8
+/UtrModel/tss_end               5
+/UtrModel/tata_start            2
+/UtrModel/tata_end              10
+/UtrModel/tata_pseudocount      2
+/UtrModel/d_tss_tata_min        26      # minimal distance between start of tata box (if existent) and tss 
+/UtrModel/d_tss_tata_max        37      # maximal distance between start of tata box (if existent) and tss
+/UtrModel/polyasig_consensus    aataaa  # polyadenylation signal training not fully automated yet
+/UtrModel/d_polyasig_cleavage   14      # the transcription end is predicted this many bases after the polyadenylation signal
+/UtrModel/d_polya_cleavage_min  9
+/UtrModel/d_polya_cleavage_max  35
+/UtrModel/prob_polya            0.95
+/UtrModel/tts_motif_memory      1
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl Mon Oct 04 19:38:37 2021 +0000
[
b"@@ -0,0 +1,7137 @@\n+# UTR model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[UTRLENGTH]\n+# maximal individually stored length probability d=\n+800\n+# slope of smoothing bandwidth =\n+0.25\n+# smoothing minwindowcount =\n+1\n+# length 5' sing  5' init  5' int  5' term  3' sing  3' init  3' int  3' term \n+# total number of exons of above types\n+      6188      1995       342      1995      7966       178        69       178\n+# number of exons exceeding length d=800\n+        81        32         6        27       823         7         7        48\n+# 1000 P(len=k), k=0,1,..., 800\n+0\t0.681\t5.07e-15\t4.35e-06\t4.62\t0.00284\t5.75\t3.2e-18\t0\n+1\t1.82\t9.16e-12\t0.000391\t10.5\t0.0358\t10.4\t1.04e-17\t0\n+2\t2.76\t6.1e-09\t0.013\t13\t0.178\t8.35\t3.54e-17\t0\n+3\t3.13\t1.49e-06\t0.158\t12.5\t0.405\t6.03\t1.32e-16\t0\n+4\t3.51\t0.000135\t0.708\t12.6\t0.599\t9.23\t5.19e-16\t2.83e-20\n+5\t3.55\t0.00451\t1.17\t13.8\t0.682\t15.3\t2.13e-15\t2.89e-18\n+6\t2.95\t0.0564\t0.708\t15\t0.512\t20.4\t8.93e-15\t2.29e-16\n+7\t2.5\t0.272\t0.158\t15.4\t0.296\t18.5\t3.83e-14\t1.42e-14\n+8\t2.56\t0.548\t0.013\t15.3\t0.27\t12.9\t1.67e-13\t6.84e-13\n+9\t3.12\t0.566\t0.000391\t14.5\t0.332\t11.6\t7.41e-13\t2.57e-11\n+10\t3.83\t0.407\t4.35e-06\t13.6\t0.267\t11.3\t3.35e-12\t7.5e-10\n+11\t4.27\t0.328\t1.78e-08\t12.2\t0.252\t7.9\t1.54e-11\t1.71e-08\n+12\t4.37\t0.472\t5.42e-11\t9.93\t0.347\t6.71\t7.12e-11\t3.03e-07\n+13\t4.42\t0.528\t8.34e-10\t8.9\t0.549\t8.5\t3.31e-10\t4.18e-06\n+14\t4.28\t0.326\t3.63e-08\t9.1\t0.796\t6.71\t1.53e-09\t4.49e-05\n+15\t4.04\t0.333\t4.68e-06\t9.88\t0.875\t4.29\t7.03e-09\t0.000376\n+16\t4.33\t0.612\t0.000396\t11.4\t0.938\t6.01\t3.21e-08\t0.00245\n+17\t4.51\t0.867\t0.013\t12.8\t1.03\t9.28\t1.46e-07\t0.0124\n+18\t4.07\t1.1\t0.159\t12.8\t1.03\t11\t6.62e-07\t0.0492\n+19\t3.76\t1.22\t0.723\t11.3\t0.904\t11.6\t2.99e-06\t0.152\n+20\t3.96\t1.21\t1.34\t10.7\t0.878\t12.3\t1.32e-05\t0.364\n+21\t4.31\t1.12\t1.51\t11\t1.11\t13.6\t5.6e-05\t0.68\n+22\t4.45\t1.4\t1.86\t10.3\t1.38\t12.8\t0.000224\t0.989\n+23\t4.75\t2.06\t2.7\t8.37\t1.36\t8.55\t0.000833\t1.12\n+24\t4.76\t2.19\t3.68\t6.7\t1.25\t5.92\t0.00284\t0.994\n+25\t4.68\t2.04\t3.45\t6\t1.2\t6.15\t0.00881\t0.7\n+26\t4.47\t2.06\t3.12\t6.21\t1.14\t7.01\t0.0248\t0.434\n+27\t4.28\t2.35\t3.29\t6.62\t1.1\t7.47\t0.0629\t0.352\n+28\t4.83\t3.68\t3.35\t6.53\t0.986\t7.33\t0.144\t0.514\n+29\t5.98\t4.63\t3.25\t5.68\t0.939\t6.87\t0.296\t0.878\n+30\t6.15\t4.11\t3.2\t4.95\t1.15\t6.45\t0.549\t1.3\n+31\t5.15\t3.31\t3.23\t5.2\t1.46\t6.14\t0.918\t1.57\n+32\t4.9\t2.94\t3.22\t5.98\t1.72\t5.79\t1.39\t1.59\n+33\t5.97\t2.5\t3.03\t6.3\t1.94\t5.34\t1.9\t1.39\n+34\t7.15\t2.36\t2.73\t6.51\t2.02\t4.96\t2.37\t1.11\n+35\t7.24\t2.86\t2.53\t7.51\t2.19\t4.8\t2.73\t0.859\n+36\t6.12\t2.78\t2.52\t7.26\t2.39\t4.86\t2.94\t0.648\n+37\t4.95\t2.05\t2.64\t5.47\t2.29\t5.02\t3.02\t0.466\n+38\t4.34\t1.81\t2.72\t4.59\t2.15\t5.19\t3.05\t0.311\n+39\t4.11\t2.08\t2.69\t4.74\t2.11\t5.32\t3.09\t0.189\n+40\t4.19\t2.33\t2.7\t4.96\t2\t5.39\t3.19\t0.107\n+41\t4.29\t2.47\t2.95\t4.98\t1.85\t5.44\t3.36\t0.0591\n+42\t4.22\t2.58\t3.52\t4.91\t1.87\t5.48\t3.59\t0.0393\n+43\t4.26\t2.73\t4.35\t4.84\t2.07\t5.58\t3.84\t0.0391\n+44\t4.69\t2.92\t5.33\t4.86\t2.23\t5.78\t4.08\t0.0537\n+45\t4.43\t3.1\t6.32\t4.93\t2.3\t6.09\t4.29\t0.0815\n+46\t3.6\t3.22\t7.22\t4.96\t2.25\t6.51\t4.48\t0.123\n+47\t3.46\t3.29\t7.94\t4.92\t2\t6.99\t4.64\t0.18\n+48\t3.87\t3.39\t8.42\t4.86\t2\t7.48\t4.78\t0.254\n+49\t4.21\t3.6\t8.62\t4.87\t2.27\t7.91\t4.91\t0.344\n+50\t4.36\t4.01\t8.54\t4.94\t2.5\t8.23\t5.04\t0.448\n+51\t4.41\t4.55\t8.25\t4.98\t2.62\t8.41\t5.18\t0.562\n+52\t4.46\t5.04\t7.86\t4.89\t2.68\t8.41\t5.31\t0.679\n+53\t4.62\t5.25\t7.47\t4.66\t2.73\t8.23\t5.42\t0.79\n+54\t4.92\t5.08\t7.17\t4.36\t2.8\t7.87\t5.5\t0.887\n+55\t5.31\t4.64\t6.98\t4.12\t2.9\t7.36\t5.54\t0.963\n+56\t5.61\t4.19\t6.89\t4\t3.07\t6.75\t5.53\t1.01\n+57\t5.7\t3.93\t6.85\t4.03\t3.3\t6.11\t5.46\t1.03\n+58\t5.54\t3.92\t6.83\t4.14\t3.53\t5.48\t5.33\t1.03\n+59\t5.29\t4.03\t6.8\t4.25\t3.69\t4.93\t5.17\t1.01\n+60\t5.13\t4.16\t6.75\t4.3\t3.76\t4.48\t4.97\t0.984\n+61\t5.16\t4.24\t6.66\t4.28\t3.79\t4.14\t4.74\t0.955\n+62\t5.35\t4.23\t6.53\t4.19\t3.8\t3.89\t4.52\t0.934\n+63\t5.63\t4.14\t6.37\t4.05\t3.79\t3.7\t4.29\t0.927\n+64\t5.92\t4\t6.18\t3.88\t3.77\t3.55\t4.1\t0.937\n+65\t6.16\t3.84\t5.96\t3.72\t3.79\t3.4\t3.93\t0.965\n+66\t6.28\t3.71\t5.74\t3.62\t3.87\t3.24\t3.8\t1.01\n+67\t6.2\t3.64\t5.51\t3.59\t3.99\t3.07\t3.72\t1.06\n+68\t5.92\t3.63\t5.3\t3.64\t4.07\t2.88\t3.68\t1.13\n+69\t5.57\t3.68\t5.09\t3.72\t4.04\t2.7\t3.68\t1.19\n+70\t5.32\t3.74\t4.91\t3.79\t3.89\t2.52\t3.73\t1.25\n+71\t5.27\t3.79\t4.73\t3.8\t3.7\t2.35\t3.8\t1.31\n+72\t5."..b'tcgc\t14965\n+#\tttcgg\t11310\n+#\tttcgt\t20239\n+#\tttcta\t28351\n+#\tttctc\t16356\n+#\tttctg\t18660\n+#\tttctt\t32422\n+#\tttgaa\t47499\n+#\tttgac\t14608\n+#\tttgag\t17968\n+#\tttgat\t36555\n+#\tttgca\t31476\n+#\tttgcc\t17850\n+#\tttgcg\t12080\n+#\tttgct\t22771\n+#\tttgga\t20302\n+#\tttggc\t17146\n+#\tttggg\t11267\n+#\tttggt\t16683\n+#\tttgta\t64460\n+#\tttgtc\t18109\n+#\tttgtg\t29654\n+#\tttgtt\t61476\n+#\ttttaa\t101164\n+#\ttttac\t38483\n+#\ttttag\t41333\n+#\ttttat\t89923\n+#\ttttca\t39758\n+#\ttttcc\t27803\n+#\ttttcg\t26520\n+#\ttttct\t36657\n+#\ttttga\t41172\n+#\ttttgc\t30519\n+#\ttttgg\t24062\n+#\ttttgt\t71057\n+#\ttttta\t94640\n+#\tttttc\t46611\n+#\tttttg\t62333\n+#\tttttt\t128813\n+\n+#\n+# The emission probabilities of the tss upwindow\n+#\n+[EMISSION-TSSUPWIN]\n+# size of the emission vector\n+16\n+#tssup_k=\n+1\n+# patpseudo : pseudocount for sequence patterns\n+1\n+aa\t0.326\n+ac\t0.195\n+ag\t0.204\n+at\t0.275\n+ca\t0.306\n+cc\t0.2\n+cg\t0.248\n+ct\t0.245\n+ga\t0.245\n+gc\t0.298\n+gg\t0.193\n+gt\t0.264\n+ta\t0.212\n+tc\t0.219\n+tg\t0.221\n+tt\t0.347\n+\n+# motif around the TSS of TATA-less promoters\n+[TSSMOTIF]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.268\t0.227\t0.222\t0.283\n+ 1  0.257\t0.23\t0.22\t0.293\n+ 2  0.279\t0.234\t0.216\t0.27\n+ 3  0.271\t0.238\t0.214\t0.278\n+ 4  0.285\t0.237\t0.216\t0.261\n+ 5  0.263\t0.214\t0.229\t0.295\n+ 6  0.249\t0.223\t0.195\t0.332\n+ 7  0.162\t0.309\t0.241\t0.288\n+ 8  0.406\t0.252\t0.193\t0.15\n+ 9  0.275\t0.199\t0.233\t0.293\n+10  0.275\t0.194\t0.165\t0.365\n+11  0.294\t0.215\t0.182\t0.308\n+12  0.283\t0.207\t0.228\t0.282\n+\n+# motif around the TSS of TATA promoters\n+[TSSMOTIFTATA]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.293\t0.245\t0.261\t0.201\n+ 1  0.32\t0.188\t0.298\t0.193\n+ 2  0.3\t0.216\t0.248\t0.237\n+ 3  0.301\t0.216\t0.213\t0.271\n+ 4  0.314\t0.254\t0.221\t0.211\n+ 5  0.293\t0.174\t0.264\t0.269\n+ 6  0.235\t0.172\t0.213\t0.38\n+ 7  0.15\t0.422\t0.116\t0.312\n+ 8  0.501\t0.174\t0.208\t0.118\n+ 9  0.256\t0.211\t0.229\t0.304\n+10  0.264\t0.225\t0.113\t0.398\n+11  0.329\t0.222\t0.121\t0.329\n+12  0.312\t0.195\t0.213\t0.28\n+\n+# tata box motif \n+[TATAMOTIF]\n+# width of motif, n=\n+12\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0  0.21\t0.237\t0.381\t0.173\n+ 1  0.19\t0.341\t0.336\t0.133\n+ 2  0.0032\t0.0032\t0.0032\t0.99\n+ 3  0.99\t0.0032\t0.0032\t0.0032\n+ 4  0.0032\t0.0032\t0.0032\t0.99\n+ 5  0.99\t0.0032\t0.0032\t0.0032\n+ 6  0.637\t0.0432\t0.0272\t0.293\n+ 7  0.99\t0.0032\t0.0032\t0.0032\n+ 8  0.602\t0.0592\t0.0576\t0.282\n+ 9  0.37\t0.0976\t0.386\t0.147\n+10  0.202\t0.315\t0.312\t0.171\n+11  0.254\t0.283\t0.278\t0.184\n+\n+# motif after polyA signal\n+[TTSMOTIF]\n+# width of motif, n=\n+14\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0  0.368\t0.167\t0.147\t0.317\t0.414\t0.134\t0.181\t0.27\t0.365\t0.182\t0.115\t0.337\t0.342\t0.126\t0.224\t0.308\n+ 1  0.368\t0.17\t0.139\t0.323\t0.41\t0.145\t0.177\t0.268\t0.338\t0.199\t0.106\t0.358\t0.323\t0.136\t0.21\t0.33\n+ 2  0.397\t0.173\t0.114\t0.316\t0.425\t0.149\t0.173\t0.252\t0.35\t0.19\t0.106\t0.353\t0.335\t0.13\t0.198\t0.337\n+ 3  0.42\t0.159\t0.102\t0.319\t0.437\t0.152\t0.165\t0.246\t0.359\t0.191\t0.102\t0.348\t0.329\t0.124\t0.176\t0.371\n+ 4  0.452\t0.148\t0.104\t0.296\t0.446\t0.147\t0.159\t0.248\t0.386\t0.187\t0.1\t0.328\t0.337\t0.114\t0.167\t0.382\n+ 5  0.455\t0.141\t0.107\t0.297\t0.435\t0.143\t0.159\t0.263\t0.375\t0.194\t0.0979\t0.333\t0.324\t0.115\t0.162\t0.399\n+ 6  0.453\t0.144\t0.11\t0.292\t0.421\t0.15\t0.156\t0.274\t0.362\t0.2\t0.104\t0.334\t0.323\t0.118\t0.158\t0.4\n+ 7  0.453\t0.145\t0.11\t0.292\t0.422\t0.158\t0.145\t0.275\t0.365\t0.194\t0.109\t0.333\t0.325\t0.121\t0.154\t0.401\n+ 8  0.454\t0.145\t0.109\t0.292\t0.427\t0.165\t0.132\t0.276\t0.372\t0.197\t0.112\t0.319\t0.329\t0.119\t0.158\t0.394\n+ 9  0.454\t0.153\t0.106\t0.288\t0.435\t0.156\t0.131\t0.278\t0.371\t0.197\t0.107\t0.325\t0.331\t0.118\t0.166\t0.385\n+10  0.451\t0.155\t0.104\t0.29\t0.424\t0.154\t0.145\t0.276\t0.361\t0.194\t0.105\t0.34\t0.336\t0.116\t0.169\t0.379\n+11  0.457\t0.157\t0.102\t0.284\t0.427\t0.156\t0.147\t0.271\t0.359\t0.199\t0.105\t0.337\t0.343\t0.117\t0.165\t0.376\n+12  0.461\t0.149\t0.103\t0.287\t0.432\t0.168\t0.145\t0.255\t0.364\t0.21\t0.106\t0.32\t0.348\t0.12\t0.162\t0.37\n+13  0.467\t0.144\t0.101\t0.287\t0.447\t0.171\t0.135\t0.247\t0.364\t0.225\t0.111\t0.3\t0.347\t0.122\t0.162\t0.369\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,23 @@
+# 
+# This file contains a matrix used for weighing the training sequences
+# when given an input sequence. Let z = (da, dc, dg, dt) be the vector
+# containing the differences in the relative nucleotide frequencies of
+# two sequences, the input sequence and a training sequence.
+# Then the training sequence has weight proportional to 
+#
+# exp ( - z M z^t)
+#
+# with M being the matrix specified below.
+# If M is nonsingular, then (apart from a two normalizing factors) M
+# is the inverse of the covariance matrix of a multinormal
+# distribution - the kernel for the estimation.
+
+
+# this matrix is gc-content only, i.e. 
+# weight = 10 * exp (-200 * (dc + dg))^2)
+# in particular weight <= 10
+0      0      0             0
+0      200    0             0
+0      0      200           0
+0      0      0             0
+
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/trained_species/fly/info.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/trained_species/fly/info.json Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{}], "glimmerhmm": [{}]}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/uniprot.dmnd
b
Binary file test-data/funannotate_db/uniprot.dmnd has changed
b
diff -r 000000000000 -r 857f7ac611e1 test-data/funannotate_db/uniprot_sprot.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/funannotate_db/uniprot_sprot.fasta Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,83 @@
+>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1
+MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS
+EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD
+AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL
+EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD
+SFRKIYTDLGWKFTPL
+>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1
+MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR
+IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL
+AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC
+KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML
+DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK
+VMFFVAGAVLVAILISTVRW
+>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1
+MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL
+QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT
+FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD
+LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET
+YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY
+STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS
+GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI
+QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC
+>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1
+MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT
+PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS
+TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI
+>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1
+MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD
+RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI
+FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ
+PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD
+AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR
+TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA
+LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR
+KAKIQEMFDNMVSRMVTS
+>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1
+MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY
+>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1
+MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL
+CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC
+KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH
+QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY
+>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1
+MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS
+NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED
+QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT
+REFVDKDAQEFQDFLNSLDASLLS
+>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1
+MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL
+IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII
+>fcresfdr
+MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL
+>BUSCOaEOG7B0HST
+MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA
+VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE
+DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED
+TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID
+LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI
+VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE
+AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN
+AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY
+LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI
+IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT
+SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS
+LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT
+AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG
+AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK
+VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK
+>FBpp0306926
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG
+ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY
+VSKRYKDLPPPHPGFGADQPPA
+>FBpp0078508
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD
+LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA
+DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC
+AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI
+NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR
+RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG
+ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK
+SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS
+RSGSGSRSPSRSRSGSPSGSGSSSGSASDE
b
diff -r 000000000000 -r 857f7ac611e1 test-data/genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,3253 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATG\n+TGATGCATTAATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTT\n+TTTTCGGCAAACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAA\n+AAGATACCTATGACATGTGACACCTTTAAAGTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCC\n+CTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATAGATAGTTAGTG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTTTCTTTAAATTTAACCAAATT\n+TATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAAC\n+ATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATAT\n+CTATGCTCAGCGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAAC\n+CTATGAAAATCGCAAAGATCTATTCCTTTGCGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTT\n+CGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATCATATTTTATC\n+AACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCTTTGTGTTTATTTGCATTGGG\n+AATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAA\n+TTCGTTAGCTTTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTT\n+CGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTG\n+ATTCTCCTGCTACCATGGGATCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCAAAAGGAAACTAGACATGTTT\n+CGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAACAGCGATAATGATCTGTGAC\n+TTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAGCATTTTTGGG\n+AGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAATCTAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGT\n+TTTTAAACGTTGATTTTTCAGCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTAT\n+CCATAATATTAGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTG\n+AGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTATCGTACCGGTCAAGTACGGTCACACTGCCA\n+AGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAA\n+AAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCAT\n+TTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGA\n+GCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGA\n+CGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACAT\n+CTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTC\n+AAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCT\n+GCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGA\n+GCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCAT\n+CAATGGCGAGCTGTACAAGGAGGAGGAGGAGTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTG\n+AAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAA\n+GCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCA\n+AGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAG\n+ACGAGCGAGGACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTC\n+TCGGGCCAGCAGCCGCTCAAAGTCTGGTTCTCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCG\n+GCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGAT'..b'ATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTT\n+TAAATAAAATCGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCT\n+AGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTA\n+AGACGTTTTTCTTAGGGGGTGCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAAT\n+GATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTCGTTCCAACTACGTGGCGTCCATCAAAAAGC\n+GCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAG\n+TTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATATGAGCTGTCCC\n+TCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCC\n+TCCTCGGTGGGACATCGATGGGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACT\n+TGTAGGGCATCACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCC\n+TGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGTGACCCGGAAAAGAAGGTATACCTCTCCTTC\n+ATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCCGCTGTGAATC\n+CTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCA\n+TGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTG\n+CTGCACACATCGCACTCCCACAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAG\n+GGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTG\n+GTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCAGGGCACGCCCATCATGTGCA\n+CTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGCACTTGTAGGG\n+CTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCA\n+TTCACTATATCACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATAC\n+ATAGTCATATGAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTT\n+TTTAAAATATAAGGGTATATAGATTTCTTTCTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTC\n+AGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAAAAAAA\n+AATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAGATAATAGCACTTAATATATG\n+TACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTCCGCATTTTCG\n+TGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCG\n+TTTGGGAGGAGCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCT\n+TGCAGTTGGCAAAGGGACAGGCCAGTGGGCCGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAAT\n+ACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCCAGTGCACTCA\n+ATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTTCGGTCTGTGATCATCTATTC\n+AGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCC\n+ATCTCTCGAACAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCA\n+CGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTT\n+CGTTCTAGTCTTTGTAACGCACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTTCCGCCAATATCCAATTGGAA\n+TATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTA\n+CTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTCATTTTCGTAA\n+ATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACAAAACCCACACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTAT\n+TCAGCGATTTAAACAAGCAATCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTAT\n+ACCCCCACTGAATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/genome_masked.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_masked.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.cds-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.discrepency.report.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,139 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 18 present
+DISC_FEATURE_COUNT:CDS: 18 present
+DISC_FEATURE_COUNT:mRNA: 18 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:32 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:22 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::32 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::32 features have joined location but no exception
+genome:CDS hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017
+genome:mRNA hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002
+genome:CDS hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018
+genome:mRNA hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003
+genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004
+genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006
+genome:CDS hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007
+genome:mRNA hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008
+genome:mRNA hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009
+genome:CDS hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011
+genome:mRNA hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011
+genome:CDS hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012
+genome:mRNA hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012
+genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013
+genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013
+genome:CDS hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014
+genome:mRNA hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 124)
+genome:ncbi:FUN_000002-T1 (length 520)
+genome:ncbi:FUN_000003-T1 (length 220)
+genome:ncbi:FUN_000004-T1 (length 591)
+genome:ncbi:FUN_000005-T1 (length 135)
+genome:ncbi:FUN_000006-T1 (length 662)
+genome:ncbi:FUN_000007-T1 (length 254)
+genome:ncbi:FUN_000008-T1 (length 249)
+genome:ncbi:FUN_000009-T1 (length 138)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 94)
+genome:ncbi:FUN_000012-T1 (length 986)
+genome:ncbi:FUN_000013-T1 (length 4717)
+genome:ncbi:FUN_000014-T1 (length 231)
+genome:ncbi:FUN_000015-T1 (length 478)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000016-T1 (length 124)
+genome:ncbi:FUN_000017-T1 (length 520)
+genome:ncbi:FUN_000018-T1 (length 358)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.error.summary.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.error.summary.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,1 @@
+     2 WARNING: SEQ_FEAT.ShortExon
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4258 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            2126..3863\n+                     /locus_tag="FUN_000002"\n+     mRNA            join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             join(2126..2199,2258..3224,3284..3490,3549..3863)\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+                     LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+                     DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+                     KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+                     AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+                     NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+                     RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+                     ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+                     RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+                     QDPALEAPQMND"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.gff3 Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,151 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000002-T1.exon4;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t35675\t35679\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t35648\t35655\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t34843\t35594\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35675\t35679\t.\t-\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35648\t35655\t.\t-\t1\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t34843\t35594\t.\t-\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t40223\t41234\t.\t+\t.\tID=FUN_000008;\n+sample\tfunannotate\t'..b'N_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t149952\t150112\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t150174\t150248\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t151966\t152072\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152314\t152429\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152496\t152751\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t153651\t159010\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t159150\t164491\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167135\t168360\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169208\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t169350\t169416\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t192049\t194669\t.\t+\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t192049\t194669\t.\t+\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t192049\t192067\t.\t+\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000014-T1.exon4;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t192049\t192067\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000017-T1.exon2;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000017-T1.exon3;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000017-T1.exon4;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t4248\t5494\t.\t-\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t4248\t5494\t.\t-\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t4930\t5494\t.\t-\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t4248\t4759\t.\t-\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4930\t5494\t.\t-\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4248\t4759\t.\t-\t2\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.mrna-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.proteins.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,158 @@\n+>FUN_000001-T1 FUN_000001\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000002-T1 FUN_000002\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000003-T1 FUN_000003\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000004-T1 FUN_000004\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000005-T1 FUN_000005\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000006-T1 FUN_000006\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000007-T1 FUN_000007\n+MKIRYCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSC\n+KLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSL\n+EFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVR\n+QDVVLHSILPAVCM\n+>FUN_000008-T1 FUN_000008\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGTTD\n+RITRLLAQS\n+>FUN_000009-T1 FUN_000009\n+MWIVNCMCLYLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSW\n+LDSCIVGWRSTVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASVPEPQIIM\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MKVHGNVDEKSPSHGYDSEGEESSSSSIITGGAQTPPSTRLDGSAGSSSGHHPPSDWYHTTAPSGSAEAMNPLNHFGHHH\n+HHHHLMHPGAATAY\n+>FUN_000012-T1 FUN_000012\n+MQRGIDSFFKRLPAKAKSAEAENGETPSKAPKRRKAVIISSDEDEVVSPPETKKRKASKTASSEDDVVAATPEPIAKKAR\n+NGQKPALSKLKRHVDPTELFGGETKRVIVPKPKTKAVLEFENEDIDRSLMEVDLDESIKEAAPEKKVHSITRSSPSPKRA\n+KNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGL\n+TFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVK\n+EEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSS\n+QKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIVGQAGAASNVTKLMNWLSKWYVNHDGNKKPQRPNPWAKNDDGSF'..b'KPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESSSGGSRKPPRIEK\n+PARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIE\n+SSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPAMTSSLGGIGVNP\n+TDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTNLAYISDADRRTS\n+AEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWPLPEIPFDHVPVK\n+PADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVILDEEMAVGPPDV\n+AKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKNSSPEVIVAQPTR\n+SPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRKNYEARLSSGGGG\n+ASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANLSGSDSLSAVSTH\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSATRSDDTTLTLTEM\n+AHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAA\n+ARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQTQQPQQVRQKPR\n+APQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+PKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSATNTTTTTNTLNSE\n+STEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSSLDVRGQEAKMR\n+SRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSG\n+KFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKRQFKREDSTAAGT\n+SGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGAGQDQEQGTGGQA\n+RHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQ\n+AEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQV\n+LSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMY\n+EGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSA\n+NFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEH\n+NTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGG\n+GPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYF\n+YKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNRITCRVDLDLCSARYVQCRSTE\n+>FUN_000014-T1 FUN_000014\n+MVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQGYKITLKNMEAFGASN\n+FKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIHAFKGANYLHIDALSL\n+VLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPVEQFYVD\n+>FUN_000015-T1 FUN_000015\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000016-T1 FUN_000016\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000017-T1 FUN_000017\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000018-T1 FUN_000018\n+MKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALS\n+MLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYHKHLLYHSEVKPHVCGVC\n+GRAFKELSTLHNHQRIHSGEKPFKCEVCGKCFRQRVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRCPTEEAQTPE\n+QLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFS\n+GNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.scaffolds.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.scaffolds.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.stats.json Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,119 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpkgev4773/files/c/e/8/dataset_ce85d0fa-1534-47df-8c1e-5f0a5c1b82f0.dat --out output --database /home/abretaud/.planemo/planemo_tmp__fmxm4ll/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --augustus_species fly --min_training_models 200 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-21",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 18,
+        "common_name": 0,
+        "mRNA": 18,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 2695.06,
+        "transcript-level": {
+            "CDS_transcripts": 18,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 18,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 18,
+            "CDS_no-start": 0,
+            "CDS_no-stop": 0,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 57,
+            "total_cds_exons": 57,
+            "multiple_exon_transcript": 16,
+            "single_exon_transcript": 2,
+            "avg_exon_length": 558.58,
+            "avg_protein_length": 582.83,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_protein_evidence": 17.54
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.tbl Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,282 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000001
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+2126 3863 gene
+ locus_tag FUN_000002
+2126 2199 mRNA
+2258 3224
+3284 3490
+3549 3863
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+2126 2199 CDS
+2258 3224
+3284 3490
+3549 3863
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 4883 gene
+ locus_tag FUN_000003
+5802 5797 mRNA
+5539 4883
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+5802 5797 CDS
+5539 4883
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10557 8696 gene
+ locus_tag FUN_000004
+10557 10549 mRNA
+10462 8696
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+10557 10549 CDS
+10462 8696
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 14247 gene
+ locus_tag FUN_000005
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+21705 19533 gene
+ locus_tag FUN_000006
+21705 21700 mRNA
+21515 19533
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+21705 21700 CDS
+21515 19533
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+35679 34843 gene
+ locus_tag FUN_000007
+35679 35675 mRNA
+35655 35648
+35594 34843
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+35679 35675 CDS
+35655 35648
+35594 34843
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+40223 41234 gene
+ locus_tag FUN_000008
+40223 40396 mRNA
+40659 41234
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+40223 40396 CDS
+40659 41234
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+41267 42107 gene
+ locus_tag FUN_000009
+41267 41274 mRNA
+41437 41444
+41707 42107
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+41267 41274 CDS
+41437 41444
+41707 42107
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+94727 95583 gene
+ locus_tag FUN_000011
+94727 94732 mRNA
+94873 95016
+95449 95583
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+94727 94732 CDS
+94873 95016
+95449 95583
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+133134 136346 gene
+ locus_tag FUN_000012
+133134 133142 mRNA
+133209 134539
+134668 135510
+135569 136346
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+133134 133142 CDS
+133209 134539
+134668 135510
+135569 136346
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+144294 169416 gene
+ locus_tag FUN_000013
+144294 144551 mRNA
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153651 159010
+159150 164491
+167135 168360
+168722 169208
+169350 169416
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+144294 144551 CDS
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153651 159010
+159150 164491
+167135 168360
+168722 169208
+169350 169416
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+192049 194669 gene
+ locus_tag FUN_000014
+192049 192067 mRNA
+193549 193658
+194041 194455
+194518 194669
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+192049 192067 CDS
+193549 193658
+194041 194455
+194518 194669
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+210553 209044 gene
+ locus_tag FUN_000015
+210553 210548 mRNA
+210474 209044
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+210553 210548 CDS
+210474 209044
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000016
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+2126 3863 gene
+ locus_tag FUN_000017
+2126 2199 mRNA
+2258 3224
+3284 3490
+3549 3863
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+2126 2199 CDS
+2258 3224
+3284 3490
+3549 3863
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+5494 4248 gene
+ locus_tag FUN_000018
+5494 4930 mRNA
+4759 4248
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
+5494 4930 CDS
+4759 4248
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/Genus_species.validation.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/Genus_species.validation.txt Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,2 @@
+WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:c35679-35675, c35655-35648, c35594-34843)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000007-T1]
+WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:41267-41274, 41437-41444, 41707-42107)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000009-T1]
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_augustus/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_augustus/fly.parameters.json Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/fly.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.cds-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.discrepency.report.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,142 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 18 present
+DISC_FEATURE_COUNT:CDS: 18 present
+DISC_FEATURE_COUNT:mRNA: 18 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:30 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:22 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::30 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::30 features have joined location but no exception
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002
+genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003
+genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007
+genome:CDS hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008
+genome:mRNA hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008
+genome:CDS hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009
+genome:mRNA hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011
+genome:mRNA hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011
+genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012
+genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012
+genome:CDS hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013
+genome:mRNA hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013
+genome:CDS hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014
+genome:mRNA hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014
+genome:CDS hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015
+genome:mRNA hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+genome:CDS hypothetical protein sample:<2331-3254 FUN_000001
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 307)
+genome:ncbi:FUN_000002-T1 (length 220)
+genome:ncbi:FUN_000003-T1 (length 591)
+genome:ncbi:FUN_000004-T1 (length 135)
+genome:ncbi:FUN_000005-T1 (length 662)
+genome:ncbi:FUN_000006-T1 (length 278)
+genome:ncbi:FUN_000007-T1 (length 578)
+genome:ncbi:FUN_000008-T1 (length 396)
+genome:ncbi:FUN_000009-T1 (length 130)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 923)
+genome:ncbi:FUN_000012-T1 (length 3977)
+genome:ncbi:FUN_000013-T1 (length 576)
+genome:ncbi:FUN_000014-T1 (length 151)
+genome:ncbi:FUN_000015-T1 (length 246)
+genome:ncbi:FUN_000016-T1 (length 478)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000017-T1 (length 124)
+genome:ncbi:FUN_000018-T1 (length 432)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4259 @@\n+LOCUS       sample                215740 bp    DNA     linear       22-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            <2331..3254\n+                     /locus_tag="FUN_000001"\n+     mRNA            <2331..3254\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             <2331..3254\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+                     DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+                     EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+                     DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+                     LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+                     ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+     gene            complement(4883..5802)\n+                     /locus_tag="FUN_000002"\n+     mRNA            complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4883..5539,5797..5802))\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+                     FC"\n+     gene            complement(8696..10557)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(8696..10462,10549..10557))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+                     QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+                     REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+                     NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+                     MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+                     ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+                     KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+                     LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+                     DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+                     FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+                     RQFDKSNDNYRKTFRSVDENSKGEL"\n+     gene            complement(14247..15214)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(14247..14648,15209..15214))\n+                     /locus_tag="FUN_00'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.gff3 Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,145 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t2331\t3254\t.\t+\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t2331\t3254\t.\t+\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t34843\t35679\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t40223\t44130\t.\t+\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t40223\t44130\t.\t+\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t40223\t40396\t.\t+\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t40659\t41193\t.\t+\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t41707\t42080\t.\t+\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43409\t43609\t.\t+\t.\tID=FUN_000007-T1.exon4;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43678\t44130\t.\t+\t.\tID=FUN_000007-T1.exon5;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40223\t40396\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40659\t41193\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t41707\t42080\t.\t+\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43409\t43609\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43678\t44130\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t45527\t47195\t.\t-\t.\tID=FUN_000008;\n+sample\tfunannotate\tmRNA\t45527\t47195\t.\t-\t.\tID=FUN_000008-T1;Parent=FUN_000008;product=hypothetical protein;\n+sample\tfunannotate\texon\t46753\t47195\t.\t-\t.\tID=FUN_000008-T1.exon1'..b'_000012-T1;\n+sample\tfunannotate\tCDS\t153296\t153630\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t153689\t155122\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t155789\t158975\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t159190\t164495\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168360\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167121\t168360\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t176699\t178916\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t176699\t178916\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t178873\t178916\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176887\t177172\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176699\t176824\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t178873\t178916\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176887\t177172\t.\t-\t1\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176699\t176824\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t192004\t194669\t.\t+\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t192004\t194669\t.\t+\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t192004\t192067\t.\t+\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000015-T1.exon3;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000015-T1.exon4;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t192004\t192067\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000016;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000018-T1.exon3;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.mrna-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.proteins.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,155 @@\n+>FUN_000001-T1 FUN_000001\n+YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSW\n+LRKSEYISTEQTRFQPQNLENIEAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEY\n+KIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL\n+>FUN_000002-T1 FUN_000002\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000003-T1 FUN_000003\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000004-T1 FUN_000004\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000005-T1 FUN_000005\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000006-T1 FUN_000006\n+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI\n+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL\n+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL\n+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM\n+>FUN_000007-T1 FUN_000007\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL\n+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR\n+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF\n+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI\n+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG\n+YKLQDLWNVMPTKMETME\n+>FUN_000008-T1 FUN_000008\n+MKTLSVRLHRGTEFIKDTVHKALVMSAPTPVAPATAPAPKIVDHSLKRKLSGAGGLMGCSSIGSMTSSIAGSSRSHHYAL\n+TSQVASSQVIPLPSQVPTAAFLRTYTVAPTALHRSAAARKRNPSTDSLLMDLCLFKPIRPMPITPIKIHKFRGFEVKKPK\n+FVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTISDSAFVPMPYIETTNTAINATTTTNSGSRSRSLNTHTSGSAQAITK\n+PKRRRRAPMLTAKRRRKALDTELTTSADAGTEDKAPAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKST\n+SRSEAAKRSRVASVQNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISAST\n+>FUN_000009-T1 FUN_000009\n+MVTLRLPWCIRHKPPLCRIGLSHGCECDNSKKMAASSHAPESDRRAQRLRTQSNWNPPDHSALSLGKLVSRKLTPTAVGH\n+WVVGRQRAACACAGGPNADWTDGQPIESSRGCIFQPAPHCHGGRIARHFG\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASA\n+VLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKL\n+AVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPK'..b'YVSVDESHSAASKSPVPGTGGGTEGYPHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETGSW\n+MTVECDEFIGSDTSDNEPRTLEPDRNVLETQATLEDANPLEYSNCATPTSDLNILLTPPNASPQIEKSVLETFEKYTGSS\n+DTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVSCSIARPLGISQDFGKEEARKCQELK\n+QRMLQLEVGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESS\n+SGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQK\n+EQQSTWRPFPIESSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPA\n+MTSSLGGIGVNPTDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTN\n+LAYISDADRRTSAEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWP\n+LPEIPFDHVPVKPADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVI\n+LDEEMAVGPPDVAKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKN\n+SSPEVIVAQPTRSPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRK\n+NYEARLSSGGGGASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANL\n+SGSDSLSAVSTHSCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSAT\n+RSDDTTLTLTEMAHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEH\n+ADSQTGPETSAAARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQ\n+TQQPQQVRQKPRAPQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVT\n+PSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSAT\n+NTTTTTNTLNSESTEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDS\n+SLDVRGQEAKMRSRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQ\n+TDDYEDYPQYSGKFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKR\n+QFKREDSTAAGTSGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGA\n+GQDQEQGTGGQARHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQ\n+>FUN_000013-T1 FUN_000013\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA\n+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR\n+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI\n+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQ\n+RMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRP\n+PNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVG\n+LGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFK\n+NHVYFFRAESAHTYNR\n+>FUN_000014-T1 FUN_000014\n+MSTPARRRLMRDFKRLQEDPPTGVSGAPTDNNIMIWNAVIFGPHDTPFEDGTFKLTIEFTEEYPNKPPTVRFVSKVFHPN\n+VYADGGICLDILQNRWSPTYDVSAILTSIQSLLSDPNPNSPANSTAAQLYKENRREYEKRVKACVEQSFID\n+>FUN_000015-T1 FUN_000015\n+MNKAVCLVIVIQALRMVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQG\n+YKITLKNMEAFGASNFKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIH\n+AFKGANYLHIDALSLVLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPV\n+EQFYVD\n+>FUN_000016-T1 FUN_000016\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000017-T1 FUN_000017\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000018-T1 FUN_000018\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQVARDRQSRSRSRTRS\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.stats.json Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,120 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmp2z22js7e/files/c/c/3/dataset_cc3f66b5-ec9b-4669-93d6-4ddeac0e33c1.dat --out output --database /home/abretaud/.planemo/planemo_tmp_z_14xthq/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --rna_bam /tmp/tmp2z22js7e/files/9/6/5/dataset_965b5091-b838-4f4a-8ec8-9fb84c12cdc5.dat --transcript_evidence /tmp/tmp2z22js7e/files/d/a/e/dataset_daea4ce7-3191-40eb-ad83-b35e9e058d46.dat --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-22",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 18,
+        "common_name": 0,
+        "mRNA": 18,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 2775.33,
+        "transcript-level": {
+            "CDS_transcripts": 18,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 18,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 16,
+            "CDS_no-start": 1,
+            "CDS_no-stop": 1,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 54,
+            "total_cds_exons": 54,
+            "multiple_exon_transcript": 15,
+            "single_exon_transcript": 3,
+            "avg_exon_length": 563.63,
+            "avg_protein_length": 571.83,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_transcript_evidence": 31.48,
+            "pct_exon_overlap_protein_evidence": 9.26
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/Genus_species.tbl Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,276 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+<2331 3254 gene
+ locus_tag FUN_000001
+<2331 3254 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+<2331 3254 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+5802 4883 gene
+ locus_tag FUN_000002
+5802 5797 mRNA
+5539 4883
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 5797 CDS
+5539 4883
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+10557 8696 gene
+ locus_tag FUN_000003
+10557 10549 mRNA
+10462 8696
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10557 10549 CDS
+10462 8696
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+15214 14247 gene
+ locus_tag FUN_000004
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+21705 19533 gene
+ locus_tag FUN_000005
+21705 21700 mRNA
+21515 19533
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+21705 21700 CDS
+21515 19533
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+35679 34843 gene
+ locus_tag FUN_000006
+35679 34843 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+35679 34843 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+40223 44130 gene
+ locus_tag FUN_000007
+40223 40396 mRNA
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+40223 40396 CDS
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+47195 45527 gene
+ locus_tag FUN_000008
+47195 46753 mRNA
+46330 46214
+46157 45527
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+47195 46753 CDS
+46330 46214
+46157 45527
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+79527 78685 gene
+ locus_tag FUN_000009
+79527 79519 mRNA
+79068 78685
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+79527 79519 CDS
+79068 78685
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+133587 137862 gene
+ locus_tag FUN_000011
+133587 134504 mRNA
+134720 135510
+135569 136284
+137516 137862
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+133587 134504 CDS
+134720 135510
+135569 136284
+137516 137862
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+144294 164495 gene
+ locus_tag FUN_000012
+144294 144551 mRNA
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153296 153630
+153689 155122
+155789 158975
+159190 164495
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+144294 144551 CDS
+149012 149244
+149367 149588
+149654 149897
+149952 150112
+150174 150248
+151966 152072
+152314 152429
+152496 152751
+153296 153630
+153689 155122
+155789 158975
+159190 164495
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+167121 169212 gene
+ locus_tag FUN_000013
+167121 168360 mRNA
+168722 169212
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+167121 168360 CDS
+168722 169212
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+178916 176699 gene
+ locus_tag FUN_000014
+178916 178873 mRNA
+177172 176887
+176824 176699
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+178916 178873 CDS
+177172 176887
+176824 176699
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+192004 194669 gene
+ locus_tag FUN_000015
+192004 192067 mRNA
+193549 193658
+194041 194455
+194518 194669
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+192004 192067 CDS
+193549 193658
+194041 194455
+194518 194669
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+210553 209044 gene
+ locus_tag FUN_000016
+210553 210548 mRNA
+210474 209044
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+210553 210548 CDS
+210474 209044
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000017
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000017-T1_mrna
+ protein_id gnl|ncbi|FUN_000017-T1
+2126 >3537 gene
+ locus_tag FUN_000018
+2126 2199 mRNA
+2258 3224
+3284 >3537
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
+2126 2199 CDS
+2258 3224
+3284 >3537
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000018-T1_mrna
+ protein_id gnl|ncbi|FUN_000018-T1
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_bam/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_bam/fly.parameters.json Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.cds-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.cds-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.discrepency.report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.discrepency.report.txt Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,135 @@
+Discrepancy Report Results
+
+Summary
+DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein"
+DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same)
+DISC_FEATURE_COUNT:gene: 16 present
+DISC_FEATURE_COUNT:CDS: 16 present
+DISC_FEATURE_COUNT:mRNA: 16 present
+DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present
+JOINED_FEATURES:26 features have joined locations.
+NO_ANNOTATION:2 bioseqs have no features
+DISC_QUALITY_SCORES:Quality scores are missing on all sequences.
+FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:1 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same)
+MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments
+MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA.
+TECHNIQUE_NOT_TSA:4 technique are not set as TSA
+MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments.
+MISSING_PROJECT:20 sequences do not include project.
+DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same)
+
+
+Detailed Report
+
+DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein"
+
+DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same)
+DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname
+DiscRep_ALL:DISC_FEATURE_COUNT::gene: 16 present
+DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 16 present
+DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 16 present
+DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:JOINED_FEATURES::26 features have joined locations.
+DiscRep_SUB:JOINED_FEATURES::26 features have joined location but no exception
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+genome:mRNA hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002
+genome:CDS hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002
+genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003
+genome:CDS hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003
+genome:CDS hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004
+genome:mRNA hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004
+genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005
+genome:CDS hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006
+genome:mRNA hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006
+genome:CDS hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007
+genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007
+genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009
+genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009
+genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010
+genome:CDS hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011
+genome:mRNA hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011
+genome:CDS hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012
+genome:mRNA hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012
+genome:CDS hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013
+genome:mRNA hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013
+genome:CDS hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014
+genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014
+
+DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+
+DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences.
+
+FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::1 featurepartial ends thands that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so
+genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016
+
+DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same)
+genome:sample:"Annotated using 1.8.7"
+genome:sample2:"Annotated using 1.8.7"
+genome:sample3:"Annotated using 1.8.7"
+genome:sample4:"Annotated using 1.8.7"
+
+DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments.
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
+DiscRep_ALL:MISSING_PROJECT::20 sequences do not include project.
+genome:sample (length 215740)
+genome:ncbi:FUN_000001-T1 (length 124)
+genome:ncbi:FUN_000002-T1 (length 82)
+genome:ncbi:FUN_000003-T1 (length 367)
+genome:ncbi:FUN_000004-T1 (length 831)
+genome:ncbi:FUN_000005-T1 (length 135)
+genome:ncbi:FUN_000006-T1 (length 137)
+genome:ncbi:FUN_000007-T1 (length 1002)
+genome:ncbi:FUN_000008-T1 (length 278)
+genome:ncbi:FUN_000009-T1 (length 578)
+genome:ncbi:FUN_000010-T1 (length 90)
+genome:ncbi:FUN_000011-T1 (length 554)
+genome:ncbi:FUN_000012-T1 (length 479)
+genome:ncbi:FUN_000013-T1 (length 61)
+genome:ncbi:FUN_000014-T1 (length 484)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+genome:ncbi:FUN_000015-T1 (length 124)
+genome:ncbi:FUN_000016-T1 (length 432)
+
+DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing)
+DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique
+genome:sample (length 215740)
+genome:sample2 (length 2030)
+genome:sample3 (length 2100)
+genome:sample4 (length 7560)
+
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.gbk Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4148 @@\n+LOCUS       sample                215740 bp    DNA     linear       21-JUL-2021\n+DEFINITION  Genus species.\n+ACCESSION   \n+VERSION\n+KEYWORDS    .\n+SOURCE      Genus species\n+  ORGANISM  Genus species\n+            Unclassified.\n+REFERENCE   1  (bases 1 to 215740)\n+  AUTHORS   Palmer,J.M.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+            Pinchot Drive, Madison, WI 53726, USA\n+COMMENT     \'Annotated using 1.8.7\'.\n+FEATURES             Location/Qualifiers\n+     source          1..215740\n+                     /organism="Genus species"\n+                     /mol_type="genomic DNA"\n+     gene            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+     mRNA            complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /product="hypothetical protein"\n+     CDS             complement(1092..1466)\n+                     /locus_tag="FUN_000001"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000001-T1"\n+                     /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+                     ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+                     VSKRYKDLPPPHPGFGADQPPA"\n+     gene            complement(2565..3142)\n+                     /locus_tag="FUN_000002"\n+     mRNA            complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUN_000002"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(2565..2686,2883..3004,3138..3142))\n+                     /locus_tag="FUN_000002"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000002-T1"\n+                     /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+                     LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+     gene            complement(4248..5802)\n+                     /locus_tag="FUN_000003"\n+     mRNA            complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(4248..4742,4937..5539,5797..5802))\n+                     /locus_tag="FUN_000003"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000003-T1"\n+                     /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+                     PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+                     PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+                     KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+                     PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+                     SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+                     LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+     gene            complement(7691..10664)\n+                     /locus_tag="FUN_000004"\n+     mRNA            complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUN_000004"\n+                     /product="hypothetical protein"\n+     CDS             complement(join(7691..8385,8707..10499,10657..10664))\n+                     /locus_tag="FUN_000004"\n+                     /codon_start=1\n+                     /product="hypothetical protein"\n+                     /protein_id="ncbi:FUN_000004-T1"\n+                     /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+                     DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+                     ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+                     IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+                     EKAKELENFASVMEKVNARLKS'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+     4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+     4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+     4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+     4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+     4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+     4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+     4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+     4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+     4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+     5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+     5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+     5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+     5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+     5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+     5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+     5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+     5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+     5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+     5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+     5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+     5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+     5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+     5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+     5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+     5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+     6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+     6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+     6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+     6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+     6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+     6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+     6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+     6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+     6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+     6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+     6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+     6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+     6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+     6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+     6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+     6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+     6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+     7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+     7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+     7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+     7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+     7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+     7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+     7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+     7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+     7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.gff3 Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,117 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2565\t3142\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2565\t3142\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t3138\t3142\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2883\t3004\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2565\t2686\t.\t-\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3138\t3142\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2883\t3004\t.\t-\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2565\t2686\t.\t-\t2\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4248\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4248\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4937\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4248\t4742\t.\t-\t.\tID=FUN_000003-T1.exon3;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4937\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4248\t4742\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t7691\t10664\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t7691\t10664\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10657\t10664\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8707\t10499\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t7691\t8385\t.\t-\t.\tID=FUN_000004-T1.exon3;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10657\t10664\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8707\t10499\t.\t-\t1\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t7691\t8385\t.\t-\t2\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t15539\t16619\t.\t+\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t15539\t16619\t.\t+\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t15539\t15543\t.\t+\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t15646\t15919\t.\t+\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t16485\t16619\t.\t+\t.\tID=FUN_000006-T1.exon3;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15539\t15543\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15646\t15919\t.\t+\t1\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t16485\t16619\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t18358\t21705\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t18358\t21705\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t19638\t21515\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t18358\t19482\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_00'..b'N_000010-T1;\n+sample\tfunannotate\tCDS\t87202\t87207\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tCDS\t88054\t88320\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tgene\t102510\t106221\t.\t-\t.\tID=FUN_000011;\n+sample\tfunannotate\tmRNA\t102510\t106221\t.\t-\t.\tID=FUN_000011-T1;Parent=FUN_000011;product=hypothetical protein;\n+sample\tfunannotate\texon\t106216\t106221\t.\t-\t.\tID=FUN_000011-T1.exon1;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t104258\t104632\t.\t-\t.\tID=FUN_000011-T1.exon2;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103696\t103947\t.\t-\t.\tID=FUN_000011-T1.exon3;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103229\t103618\t.\t-\t.\tID=FUN_000011-T1.exon4;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t102510\t103151\t.\t-\t.\tID=FUN_000011-T1.exon5;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t106216\t106221\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t104258\t104632\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103696\t103947\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103229\t103618\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t102510\t103151\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000012;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000012-T1;Parent=FUN_000012;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168069\t.\t+\t.\tID=FUN_000012-T1.exon1;Parent=FUN_000012-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000012-T1.exon2;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t167121\t168069\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t180262\t180579\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t180262\t180579\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t180262\t180267\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t180400\t180579\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180262\t180267\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180400\t180579\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t208619\t210553\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t208619\t210553\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t209053\t210474\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t208619\t208645\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t209053\t210474\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t208619\t208645\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000015;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000016-T1.exon3;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.mrna-transcripts.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.mrna-transcripts.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.proteins.fa Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,96 @@
+>FUN_000001-T1 FUN_000001
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000002-T1 FUN_000002
+MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA
+QP
+>FUN_000003-T1 FUN_000003
+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN
+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH
+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC
+PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL
+QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT
+>FUN_000004-T1 FUN_000004
+MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL
+EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA
+IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS
+VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN
+ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA
+DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV
+ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL
+NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ
+RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT
+KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS
+DMSSLKDDTSSTTSHSGLSIISLELPLPKKK
+>FUN_000005-T1 FUN_000005
+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT
+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS
+>FUN_000006-T1 FUN_000006
+MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV
+GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL
+>FUN_000007-T1 FUN_000007
+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS
+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN
+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP
+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH
+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR
+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD
+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY
+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH
+IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF
+GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH
+QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP
+QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA
+KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ
+>FUN_000008-T1 FUN_000008
+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI
+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL
+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL
+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM
+>FUN_000009-T1 FUN_000009
+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK
+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD
+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL
+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR
+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF
+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI
+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG
+YKLQDLWNVMPTKMETME
+>FUN_000010-T1 FUN_000010
+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK
+PCNPKRYLTT
+>FUN_000011-T1 FUN_000011
+MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL
+RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI
+IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI
+TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ
+FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD
+FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE
+KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ
+>FUN_000012-T1 FUN_000012
+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA
+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR
+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI
+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE
+EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL
+RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR
+>FUN_000013-T1 FUN_000013
+MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL
+>FUN_000014-T1 FUN_000014
+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR
+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA
+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR
+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES
+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH
+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR
+IREL
+>FUN_000015-T1 FUN_000015
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000016-T1 FUN_000016
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG
+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE
+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI
+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED
+AAVGAQAASGADSPAQVARDRQSRSRSRTRS
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.scaffolds.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.scaffolds.fa Mon Oct 04 19:38:37 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.stats.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.stats.json Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,119 @@
+{
+    "format": "annotation",
+    "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpm833xrq1/files/e/1/c/dataset_e1c34c74-e579-4cab-b0ed-5ce938ce4e4b.dat --out output --database /home/abretaud/.planemo/planemo_tmp_yntx6ieu/test-data/funannotate_db --species Genus species --isolate  --strain  --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1",
+    "organism": "Genus_species",
+    "software": {
+        "name": "funannotate",
+        "version": "1.8.7",
+        "date": "2021-07-21",
+        "resources": {
+            "merops": {
+                "type": "diamond",
+                "version": "12.0",
+                "date": "2017-10-04",
+                "num-records": "5009"
+            },
+            "uniprot": {
+                "type": "diamond",
+                "version": "2021_03",
+                "date": "2021-06-02",
+                "num-records": "565254"
+            },
+            "dbCAN": {
+                "type": "hmmer3",
+                "version": "9.0",
+                "date": "2020-08-04",
+                "num-records": "641"
+            },
+            "pfam": {
+                "type": "hmmer3",
+                "version": "34.0",
+                "date": "2021-03",
+                "num-records": "19179"
+            },
+            "repeats": {
+                "type": "diamond",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "11950"
+            },
+            "go": {
+                "type": "text",
+                "version": "2021-07-02",
+                "date": "2021-07-02",
+                "num-records": "47228"
+            },
+            "mibig": {
+                "type": "diamond",
+                "version": "1.4",
+                "date": "2021-07-19",
+                "num-records": "31023"
+            },
+            "interpro": {
+                "type": "xml",
+                "version": "86.0",
+                "date": "2021-06-03",
+                "num-records": "38913"
+            },
+            "busco_outgroups": {
+                "type": "outgroups",
+                "version": "1.0",
+                "date": "2021-07-19",
+                "num-records": "8"
+            },
+            "gene2product": {
+                "type": "text",
+                "version": "1.70",
+                "date": "2021-06-15",
+                "num-records": "34039"
+            }
+        }
+    },
+    "assembly": {
+        "num_contigs": 4,
+        "length": 227430,
+        "mean_length": 56857.5,
+        "N50": 215740,
+        "L50": 1,
+        "N90": 215740,
+        "L90": 1,
+        "GC_content": 42.86
+    },
+    "annotation": {
+        "genes": 16,
+        "common_name": 0,
+        "mRNA": 16,
+        "tRNA": 0,
+        "ncRNA": 0,
+        "rRNA": 0,
+        "avg_gene_length": 1660.69,
+        "transcript-level": {
+            "CDS_transcripts": 16,
+            "CDS_five_utr": 0,
+            "CDS_three_utr": 0,
+            "CDS_no_utr": 16,
+            "CDS_five_three_utr": 0,
+            "CDS_complete": 15,
+            "CDS_no-start": 0,
+            "CDS_no-stop": 1,
+            "CDS_no-start_no-stop": 0,
+            "total_exons": 42,
+            "total_cds_exons": 42,
+            "multiple_exon_transcript": 13,
+            "single_exon_transcript": 3,
+            "avg_exon_length": 402.36,
+            "avg_protein_length": 359.81,
+            "functional": {
+                "go_terms": 0,
+                "interproscan": 0,
+                "eggnog": 0,
+                "pfam": 0,
+                "cazyme": 0,
+                "merops": 0,
+                "busco": 0,
+                "secretion": 0
+            },
+            "pct_exon_overlap_protein_evidence": 11.9
+        }
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/Genus_species.tbl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/Genus_species.tbl Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,234 @@
+>Feature sample
+1 215740 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000001
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000001-T1_mrna
+ protein_id gnl|ncbi|FUN_000001-T1
+3142 2565 gene
+ locus_tag FUN_000002
+3142 3138 mRNA
+3004 2883
+2686 2565
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+3142 3138 CDS
+3004 2883
+2686 2565
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000002-T1_mrna
+ protein_id gnl|ncbi|FUN_000002-T1
+5802 4248 gene
+ locus_tag FUN_000003
+5802 5797 mRNA
+5539 4937
+4742 4248
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+5802 5797 CDS
+5539 4937
+4742 4248
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000003-T1_mrna
+ protein_id gnl|ncbi|FUN_000003-T1
+10664 7691 gene
+ locus_tag FUN_000004
+10664 10657 mRNA
+10499 8707
+8385 7691
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+10664 10657 CDS
+10499 8707
+8385 7691
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000004-T1_mrna
+ protein_id gnl|ncbi|FUN_000004-T1
+15214 14247 gene
+ locus_tag FUN_000005
+15214 15209 mRNA
+14648 14247
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15214 15209 CDS
+14648 14247
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000005-T1_mrna
+ protein_id gnl|ncbi|FUN_000005-T1
+15539 16619 gene
+ locus_tag FUN_000006
+15539 15543 mRNA
+15646 15919
+16485 16619
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+15539 15543 CDS
+15646 15919
+16485 16619
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000006-T1_mrna
+ protein_id gnl|ncbi|FUN_000006-T1
+21705 18358 gene
+ locus_tag FUN_000007
+21705 21700 mRNA
+21515 19638
+19482 18358
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+21705 21700 CDS
+21515 19638
+19482 18358
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000007-T1_mrna
+ protein_id gnl|ncbi|FUN_000007-T1
+35679 34843 gene
+ locus_tag FUN_000008
+35679 34843 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+35679 34843 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000008-T1_mrna
+ protein_id gnl|ncbi|FUN_000008-T1
+40223 44130 gene
+ locus_tag FUN_000009
+40223 40396 mRNA
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+40223 40396 CDS
+40659 41193
+41707 42080
+43409 43609
+43678 44130
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000009-T1_mrna
+ protein_id gnl|ncbi|FUN_000009-T1
+87202 88320 gene
+ locus_tag FUN_000010
+87202 87207 mRNA
+88054 88320
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+87202 87207 CDS
+88054 88320
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000010-T1_mrna
+ protein_id gnl|ncbi|FUN_000010-T1
+106221 102510 gene
+ locus_tag FUN_000011
+106221 106216 mRNA
+104632 104258
+103947 103696
+103618 103229
+103151 102510
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+106221 106216 CDS
+104632 104258
+103947 103696
+103618 103229
+103151 102510
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000011-T1_mrna
+ protein_id gnl|ncbi|FUN_000011-T1
+167121 169212 gene
+ locus_tag FUN_000012
+167121 168069 mRNA
+168722 169212
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+167121 168069 CDS
+168722 169212
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000012-T1_mrna
+ protein_id gnl|ncbi|FUN_000012-T1
+180262 180579 gene
+ locus_tag FUN_000013
+180262 180267 mRNA
+180400 180579
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+180262 180267 CDS
+180400 180579
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000013-T1_mrna
+ protein_id gnl|ncbi|FUN_000013-T1
+210553 208619 gene
+ locus_tag FUN_000014
+210553 210548 mRNA
+210474 209053
+208645 208619
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+210553 210548 CDS
+210474 209053
+208645 208619
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000014-T1_mrna
+ protein_id gnl|ncbi|FUN_000014-T1
+>Feature sample4
+1 7560 REFERENCE
+ CFMR 12345
+1466 1092 gene
+ locus_tag FUN_000015
+1466 1092 mRNA
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+1466 1092 CDS
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000015-T1_mrna
+ protein_id gnl|ncbi|FUN_000015-T1
+2126 >3537 gene
+ locus_tag FUN_000016
+2126 2199 mRNA
+2258 3224
+3284 >3537
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
+2126 2199 CDS
+2258 3224
+3284 >3537
+ codon_start 1
+ product hypothetical protein
+ transcript_id gnl|ncbi|FUN_000016-T1_mrna
+ protein_id gnl|ncbi|FUN_000016-T1
b
diff -r 000000000000 -r 857f7ac611e1 test-data/predict_scratch/fly.parameters.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predict_scratch/fly.parameters.json Mon Oct 04 19:38:37 2021 +0000
[
@@ -0,0 +1,1 @@
+{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]}
\ No newline at end of file
b
diff -r 000000000000 -r 857f7ac611e1 tool-data/funannotate.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/funannotate.loc.sample Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,8 @@
+# this is a tab separated file describing the location of funannotate databases used for the
+# funannotate annotation tool
+#
+# the columns are:
+# value  description format_version path
+#
+# for example
+# 2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 /tmp/database/funannotate/funannotate/2021-07-20-120000
b
diff -r 000000000000 -r 857f7ac611e1 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="funannotate" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="tool-data/funannotate.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 857f7ac611e1 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Oct 04 19:38:37 2021 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="funannotate" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, description, format_version, path</columns>
+        <file path="${__HERE__}/test-data/funannotate.loc" />
+    </table>
+</tables>