Next changeset 1:1a59958c1f76 (2021-10-04) |
Commit message:
"planemo upload commit 9613152729099079c7465c3d5d42005ef22ca91e" |
added:
README.md funannotate_predict.xml macros.xml test-data/SRR7458692.bam test-data/cleaned.fa test-data/cleaned_ident.fa test-data/funannotate.loc test-data/funannotate_db/funannotate-db-info.txt test-data/funannotate_db/insecta/ancestral test-data/funannotate_db/insecta/dataset.cfg test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm test-data/funannotate_db/insecta/lengths_cutoff test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl test-data/funannotate_db/insecta/scores_cutoff test-data/funannotate_db/repeats.dmnd test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt test-data/funannotate_db/trained_species/fly/info.json test-data/funannotate_db/uniprot_sprot.fasta test-data/genome.fa test-data/genome_masked.fa test-data/predict_augustus/Genus_species.cds-transcripts.fa test-data/predict_augustus/Genus_species.discrepency.report.txt test-data/predict_augustus/Genus_species.error.summary.txt test-data/predict_augustus/Genus_species.gbk test-data/predict_augustus/Genus_species.gff3 test-data/predict_augustus/Genus_species.mrna-transcripts.fa test-data/predict_augustus/Genus_species.proteins.fa test-data/predict_augustus/Genus_species.scaffolds.fa test-data/predict_augustus/Genus_species.stats.json test-data/predict_augustus/Genus_species.tbl test-data/predict_augustus/Genus_species.validation.txt test-data/predict_augustus/fly.parameters.json test-data/predict_bam/Genus_species.cds-transcripts.fa test-data/predict_bam/Genus_species.discrepency.report.txt test-data/predict_bam/Genus_species.error.summary.txt test-data/predict_bam/Genus_species.gbk test-data/predict_bam/Genus_species.gff3 test-data/predict_bam/Genus_species.mrna-transcripts.fa test-data/predict_bam/Genus_species.proteins.fa test-data/predict_bam/Genus_species.stats.json test-data/predict_bam/Genus_species.tbl test-data/predict_bam/Genus_species.validation.txt test-data/predict_bam/fly.parameters.json test-data/predict_scratch/Genus_species.cds-transcripts.fa test-data/predict_scratch/Genus_species.discrepency.report.txt test-data/predict_scratch/Genus_species.error.summary.txt test-data/predict_scratch/Genus_species.gbk test-data/predict_scratch/Genus_species.gff3 test-data/predict_scratch/Genus_species.mrna-transcripts.fa test-data/predict_scratch/Genus_species.proteins.fa test-data/predict_scratch/Genus_species.scaffolds.fa test-data/predict_scratch/Genus_species.stats.json test-data/predict_scratch/Genus_species.tbl test-data/predict_scratch/Genus_species.validation.txt test-data/predict_scratch/fly.parameters.json tool-data/funannotate.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r 40b87aef5241 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,5 @@ +# Funannotate + +Funannotate can use GeneMark to predict gene, but due to licensing issues, we are not allowed to distribute GeneMark automatically. + +If you want to use it, the Galaxy administrator needs to install it GeneMark following the instructions on https://github.com/nextgenusfs/funannotate, and set the `GENEMARK_PATH` variable on the job destination. |
b |
diff -r 000000000000 -r 40b87aef5241 funannotate_predict.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/funannotate_predict.xml Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,487 @@\n+<tool id="funannotate_predict" name="Funannotate predict annotation" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+ <description></description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <requirements>\n+ <expand macro="requirements" />\n+ </requirements>\n+ <version_command><![CDATA[funannotate check --show-versions]]></version_command>\n+ <command><![CDATA[\n+#if $genemark.genemark_license:\n+ if [ -z "\\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi &&\n+ if [ ! -f "\\$GENEMARK_PATH/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi &&\n+ ## GeneMark only search for license in ~/.gm_key\n+ cp \'${genemark.genemark_license}\' ~/.gm_key &&\n+#end if\n+\n+#if $uglyTestingHack == "true":\n+ ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager)\n+ ## Need to copy too as the test_data is read only on CI\n+ cp -r \'${database.fields.path}\' \'./hacked_database\' &&\n+ sed -i.bak \'s|/tmp/prout|\'`pwd`\'/hacked_database|\' \'./hacked_database/trained_species/fly/info.json\' &&\n+#end if\n+\n+funannotate predict\n+--input \'${input}\'\n+--out output\n+\n+#if $uglyTestingHack == "true":\n+ --database `pwd`\'/hacked_database\'\n+#else\n+ --database \'$database.fields.path\'\n+#end if\n+\n+--species \'${organism.species}\'\n+--isolate \'${organism.isolate}\'\n+--strain \'${organism.strain}\'\n+--organism \'${organism.organism}\'\n+--ploidy ${organism.ploidy}\n+--SeqCenter \'${organism.SeqCenter}\'\n+--SeqAccession \'${organism.SeqAccession}\'\n+--name \'${organism.name}\'\n+--numbering ${organism.numbering}\n+\n+#if $parameters:\n+ --parameters \'${parameters}\'\n+#end if\n+\n+#if $evidences.rna_bam:\n+ --rna_bam ${evidences.rna_bam}\n+#end if\n+\n+#set est_list = ""\n+#if len($evidences.transcript_evidence) > 0:\n+ #for $estev in $evidences.transcript_evidence:\n+ #if $estev:\n+ #set est_list += " \'" + str($estev) + "\'"\n+ #end if\n+ #end for\n+#end if\n+#if $est_list:\n+ --transcript_evidence $est_list\n+#end if\n+\n+#if $evidences.prot_evidence == \'custom\':\n+ --protein_evidence\n+ #for $protev in $evidences.protein_evidence:\n+ \'${protev}\'\n+ #end for\n+#end if\n+--p2g_pident ${evidences.p2g_pident}\n+--p2g_prefilter ${evidences.p2g_prefilter}\n+\n+#if $augustus.augustus_species != \'none\':\n+ --augustus_species \'${augustus.augustus_species}\'\n+#end if\n+--min_training_models ${augustus.min_training_models}\n+${augustus.optimize_augustus}\n+\n+#if $genemark.genemark_license:\n+ --genemark_mode \'${genemark.genemark_mode}\'\n+ #if $genemark.genemark_mod:\n+ --genemark_mod \'${genemark.genemark_mod}\'\n+ #end if\n+ --soft_mask ${genemark.soft_mask}\n+#end if\n+\n+--busco_seed_species \'${busco.busco_seed_species}\'\n+--busco_db \'${busco.busco_db}\'\n+\n+$evm.repeats2evm\n+#if $evm.evm_partitioning.evm_partition == "yes":\n+--evm-partition-interval ${evm.evm_partitioning.evm_partition_interval}\n+#else:\n+--no-evm-partitions\n+#end if\n+#if $evm.weights:\n+ --weights \'${evm.weights}\'\n+#end if\n+\n+#if $other_predictors.stringtie:\n+ --stringtie \'${other_predictors.stringtie}\'\n+#end if\n+#if $other_predictors.maker_gff:\n+ --maker_gff \'${other_predictors.maker_gff}\'\n+#end if\n+#if $other_predictors.pasa_gff:\n+ --pasa_gff \'${other_predictors.pasa_gff}:${other_predictors.pasa_gff_weight}\'\n+#end if\n+#if $other_predictors.other_gff:\n+ --other_gff \'${other_predictors.other_gff}:${other_predictors.other_gff_weight}\'\n+#end if\n+\n+--min_intronlen ${filtering.min_intronlen}\n+--max_intronlen ${filtering.max_intronlen}\n+--min_protlen ${filtering.min_protlen}\n+${filtering.keep_no_stops}\n+--repeat_filter ${filtering.repeat_filter}\n+\n+--cpus \\${GALAXY_SLOTS:-2}\n+\n+&&\n+\n+mv output/predict_results/*.gbk out.gbk &&\n+mv output/predict_results/*.tbl out.tbl &&\n+mv output/predict_results/*.gff3 out.gff3 &&\n+mv output/pre'..b'section>\n+ <section name="evidences">\n+ <param name="rna_bam" value="SRR7458692.bam" />\n+ <param name="transcript_evidence" value="predict_scratch/Genus_species.mrna-transcripts.fa" />\n+ <conditional name="prot_evidence">\n+ <param name="prot_evidence_source" value="custom" />\n+ <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" />\n+ </conditional>\n+ </section>\n+ <section name="augustus">\n+ <param name="min_training_models" value="3" />\n+ </section>\n+ <section name="busco">\n+ <param name="busco_seed_species" value="fly" />\n+ <param name="busco_db" value="insecta" />\n+ </section>\n+ <!-- non deterministic results, so can\'t be more precise here -->\n+ <output name="annot_gbk">\n+ <assert_contents>\n+ <has_text text=" TITLE Direct Submission" />\n+ <has_text text="/locus_tag="FUN_000001"" />\n+ </assert_contents>\n+ </output>\n+ <output name="annot_tbl">\n+ <assert_contents>\n+ <has_text text=">Feature sample" />\n+ <has_text text="gnl|ncbi|FUN_000001-T1_mrna" />\n+ </assert_contents>\n+ </output>\n+ <output name="annot_gff3">\n+ <assert_contents>\n+ <has_text text="##gff-version 3" />\n+ <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" />\n+ </assert_contents>\n+ </output>\n+ <output name="fasta_proteins">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <output name="fasta_transcripts_mrna">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <output name="fasta_transcripts_cds">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <assert_stderr>\n+ <has_text text="augustus busco"/>\n+ <has_text text="glimmerhmm busco"/>\n+ <has_text text="snap busco"/>\n+ <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/>\n+ <not_has_text text="Skipping CodingQuarry as no --rna_bam passed"/>\n+ <has_text text="Running Augustus gene prediction using genus_species parameters"/>\n+ <has_text text="Training Augustus using BUSCO gene models"/>\n+ <has_text text="Aligning transcript evidence to genome with minimap2"/>\n+ <has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/>\n+ <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/>\n+ <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/>\n+ <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/>\n+ </assert_stderr>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+Funannotate_ predict\n+--------------------\n+\n+Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).\n+\n+Script takes genome multi-fasta file and a variety of inputs to do a comprehensive whole\n+genome gene prediction. Uses AUGUSTUS, GeneMark, Snap, GlimmerHMM, BUSCO, EVidence Modeler,\n+tbl2asn, tRNAScan-SE, Exonerate, minimap2.\n+\n+.. _Funannotate: http://funannotate.readthedocs.io\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n' |
b |
diff -r 000000000000 -r 40b87aef5241 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,159 @@\n+<?xml version="1.0"?>\n+<macros>\n+ <token name="@TOOL_VERSION@">1.8.9</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+\n+ <xml name="requirements">\n+ <requirement type="package" version="@TOOL_VERSION@">funannotate</requirement>\n+ </xml>\n+\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.5281/zenodo.4054262</citation>\n+ </citations>\n+ </xml>\n+\n+ <xml name="augustus_species">\n+ <!-- list generated from a Funannotate database directory, listing trained_species/* -->\n+ <option value="adorsata">adorsata</option>\n+ <option value="aedes">aedes</option>\n+ <option value="amphimedon">amphimedon</option>\n+ <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>\n+ <option value="anidulans">anidulans</option>\n+ <option value="arabidopsis">arabidopsis</option>\n+ <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>\n+ <option value="aspergillus_nidulans">aspergillus_nidulans</option>\n+ <option value="aspergillus_oryzae">aspergillus_oryzae</option>\n+ <option value="aspergillus_terreus">aspergillus_terreus</option>\n+ <option value="bombus_impatiens1">bombus_impatiens1</option>\n+ <option value="bombus_terrestris2">bombus_terrestris2</option>\n+ <option value="botrytis_cinerea">botrytis_cinerea</option>\n+ <option value="b_pseudomallei">b_pseudomallei</option>\n+ <option value="brugia">brugia</option>\n+ <option value="cacao">cacao</option>\n+ <option value="caenorhabditis">caenorhabditis</option>\n+ <option value="camponotus_floridanus">camponotus_floridanus</option>\n+ <option value="candida_albicans">candida_albicans</option>\n+ <option value="candida_guilliermondii">candida_guilliermondii</option>\n+ <option value="candida_tropicalis">candida_tropicalis</option>\n+ <option value="c_elegans_trsk">c_elegans_trsk</option>\n+ <option value="chaetomium_globosum">chaetomium_globosum</option>\n+ <option value="chicken">chicken</option>\n+ <option value="chiloscyllium">chiloscyllium</option>\n+ <option value="chlamy2011">chlamy2011</option>\n+ <option value="chlamydomonas">chlamydomonas</option>\n+ <option value="chlorella">chlorella</option>\n+ <option value="ciona">ciona</option>\n+ <option value="coccidioides_immitis">coccidioides_immitis</option>\n+ <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>\n+ <option value="coprinus">coprinus</option>\n+ <option value="coprinus_cinereus">coprinus_cinereus</option>\n+ <option value="coyote_tobacco">coyote_tobacco</option>\n+ <option value="cryptococcus">cryptococcus</option>\n+ <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>\n+ <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>\n+ <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>\n+ <option value="culex">culex</option>\n+ <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>\n+ <option value="E_coli_K12">E_coli_K12</option>\n+ <option value="elephant_shark">elephant_shark</option>\n+ <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>\n+ <option value="eremothecium_gossypii">eremothecium_gossypii</option>\n+ <option value="fly">fly</option>\n+ <option value="fly_exp">fly_exp</option>\n+ <option value="fusarium">fusarium</option>\n+ <option value="fusarium_graminearum">fusarium_graminearum</option>\n+ <option value="galdieria">galdieria</option>\n+ <option value="generic">generic</option>\n+ <option value="heliconius_melpomene1">heliconius_melpomene1</option>\n+ <option value="histoplasma">histoplasma</opt'..b'lue="pneumocystis">pneumocystis</option>\n+ <option value="rhincodon">rhincodon</option>\n+ <option value="rhizopus_oryzae">rhizopus_oryzae</option>\n+ <option value="rhodnius">rhodnius</option>\n+ <option value="rice">rice</option>\n+ <option value="saccharomyces">saccharomyces</option>\n+ <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>\n+ <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>\n+ <option value="s_aureus">s_aureus</option>\n+ <option value="schistosoma">schistosoma</option>\n+ <option value="schistosoma2">schistosoma2</option>\n+ <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>\n+ <option value="scyliorhinus">scyliorhinus</option>\n+ <option value="sealamprey">sealamprey</option>\n+ <option value="s_pneumoniae">s_pneumoniae</option>\n+ <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>\n+ <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>\n+ <option value="template_prokaryotic">template_prokaryotic</option>\n+ <option value="tetrahymena">tetrahymena</option>\n+ <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>\n+ <option value="tomato">tomato</option>\n+ <option value="toxoplasma">toxoplasma</option>\n+ <option value="tribolium2012">tribolium2012</option>\n+ <option value="trichinella">trichinella</option>\n+ <option value="ustilago">ustilago</option>\n+ <option value="ustilago_maydis">ustilago_maydis</option>\n+ <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>\n+ <option value="verticillium_longisporum1">verticillium_longisporum1</option>\n+ <option value="volvox">volvox</option>\n+ <option value="wheat">wheat</option>\n+ <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>\n+ <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>\n+ <option value="zebrafish">zebrafish</option>\n+ </xml>\n+\n+ <xml name="busco_species">\n+ <!-- list generated from a Funannotate database directory, with the "funannotate database -show-buscos command" -->\n+ <option value="eukaryota">eukaryota</option>\n+ <option value="metazoa">metazoa</option>\n+ <option value="nematoda">nematoda</option>\n+ <option value="arthropoda">arthropoda</option>\n+ <option value="insecta">insecta</option>\n+ <option value="endopterygota">endopterygota</option>\n+ <option value="hymenoptera">hymenoptera</option>\n+ <option value="diptera">diptera</option>\n+ <option value="vertebrata">vertebrata</option>\n+ <option value="actinopterygii">actinopterygii</option>\n+ <option value="tetrapoda">tetrapoda</option>\n+ <option value="aves">aves</option>\n+ <option value="mammalia">mammalia</option>\n+ <option value="euarchontoglires">euarchontoglires</option>\n+ <option value="laurasiatheria">laurasiatheria</option>\n+ <option value="fungi">fungi</option>\n+ <option value="dikarya">dikarya</option>\n+ <option value="ascomycota">ascomycota</option>\n+ <option value="pezizomycotina">pezizomycotina</option>\n+ <option value="eurotiomycetes">eurotiomycetes</option>\n+ <option value="sordariomycetes">sordariomycetes</option>\n+ <option value="saccharomycetes">saccharomycetes</option>\n+ <option value="saccharomycetales">saccharomycetales</option>\n+ <option value="basidiomycota">basidiomycota</option>\n+ <option value="microsporidia">microsporidia</option>\n+ <option value="embryophyta">embryophyta</option>\n+ <option value="protists">protists</option>\n+ <option value="alveolata_stramenophiles">alveolata_stramenophiles</option>\n+ </xml>\n+</macros>\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/SRR7458692.bam |
b |
Binary file test-data/SRR7458692.bam has changed |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/cleaned.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cleaned.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,2698 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'\n+ATCGGCGACTGTCTGTCATTGTATCCTTCTGCATTCCATTCGTATGTCCGTTTGTCTGTTCATTAGTCCGTCCGTTTGTC\n+CGCCCGTAACTCCGTCCCTGTGTCCTTTTTTTCCGTCCGTTTTCCTGATAAATACTTTTTAAGGAATCCAGCTTACCCTT\n+TTGCACTACAGGTAGCGTGAATAAAAATAAAATAAAAGAGCTAATTTTAAATTAAAATAAAAACAAAAACATCTCTTTGT\n+GTTTATATTTTCCCACTGTGCAGCACAAACACCCCTTTTGCCCACTTAAGCTTGCCACGTTTTCCCTTACTTATAACAGA\n+ACTTACAAACATGCGTTTGGTTTTCGTGGGTTGAGTTTGGTGCTCTCCGTTTACTTTTGCGGTTTTGTTCAGCGCTGCAT\n+ATATTTCCATATTAATTCCCCTGATTATGGGGAATCGTCATCGTCTGCGTTCTGTTCCCTGACGGTTTTGCCCAAATCCA\n+AATCCAAATCCAAATCCAAATCCAAATGCGAATGCGAATATCTGAATGCAGACCACAATTCGACGATGATGTTTCTGTTT\n+CAGAATAATCTAAATCGGCATTATTTATGCATTCAGTTCTTGCATTCATCACAACCACTTAGCGGTTCCACTTCAACGAA\n+CCCACAGATACACAATACATACATATTCATTTATGTATGTATGTACATACCGTCGTGTATATAATTAGTATGCATAGAAG\n+ATACATACGTATGCATTTTAATGGACCACAATTCCCTCGGCCAAAAGAGTGCTCCGATAATAAATATTAATTTTAATAAA\n+TGCTGATGCAGGTCAGCTGATTTCAAACGACCCTTTGTGTCACGGGGCAATGACTTTTGCTCAGTATATGTAGTATTCAA\n+TTTTCAATTTTCCGAAAATGGTATTACAATTACAATTTTTTAAATATTAGATCTCAAAAAATTGCTTGGCATACTTAGCA\n+ACATATCTTTAACTCTCAAGAGTCATATACACCCGATTTTGTTTCAAGTCCCCCTGGGACTTAAAAACCGACAATTACGC\n+CCAACTTGCAATCATTTGGAAAAAAAAACACCAGCTACGAACTTATCAAAACTTATATATCAGCAAAAAATAAATGGGTC\n+TCTATGCGCAGTTATCGGCCCACAAAATATATACAAAAGAAATGACATGTCATTAACTCCGGCCAAGGGCGTGCTTTTTG\n+GTGCCATCGCATTTGGGGGTAACTAGTCGCGGGTCGACCCAGAATCACATCTCCAGGAAGTGGATGTCCAGGGCGTGGTG\n+GTCCACATCGACGGGAGGGTGCAGCTGCAGGAACTTGACGTAGGCCAGGAAGGTGGTCCAGCACATGCTGAAGAAGGAGG\n+TAAACACGACCTGGTTTCGCGCCGGAACAAAGGCGAAGTTCACCGTCTGCACGCAGGGCCAATAAATAACGCCCACCTTG\n+TAGGCGTCCAGGAATTTATCGCTGACCTGGGAACGAGAGGGGGCAACGGGGTGGGTGAGTGCAGTGACATAATGCCCCCA\n+GTAGCAGTCGGAAAATGGAAATGGAAAATGCCAGCGCGGAAAAATTGTAATTAAGTGCCGCCGTCCAGCAGCGGTAGGAG\n+ACGTTTAATATTTTATTTATGGCCTGATCTCTTCTCCTCTTTTAAGGGGCGCTTGACTGGGGCGTGGCATTCAGTTAATG\n+TTGTTAATTAAAAGGCAACGCCTGGCTAGAAAAATTATCATCCAGGCACAGACTGTGTAACAAATGTAACATAGAGCACT\n+TTGGCACATTTTCAATTGGCATGAGAACTTCATTTAACTACAAAGACTATCCTTGTGCCATAAAACTTTCTTTTGTGGAT\n+CTATAGAAGTTGAATCGTTTTACAATCCTCACATAAAATATAGAACTTCGATGGCGAACGCATAGCTATTCCGAAAGCGA\n+TCTTGAAACTTGTACTGCCTTCTACCCGCTTCCAATTTGAAACTACTTTTAGGTGAGGGTCGAGGAACTTACGTCTCTAT\n+CGGCCACGCACACAACTCTTTTACGGCCCAAACCTGAGCCTATCTCAACCAAATCGCTCAGAATAGTCGAGCGATATATC\n+TTTGGAGAACGCATGCAAGGCAAATGAGACGACGCACGTTTGACACAAATATATAGCAAAACATTTACATTTATCAAGAT\n+AAATGGTTTCTAATGATATGGAAGTCAGCCATCGAACCGAGTTTGGCCATGGAGCACAGTGCCTGAAAGTATCTCGCAGT\n+ATGGCCGCAGAGCCAACAAATCAATGTTGACAGCTCGAAACGGAGGACCAGACGCAGAAATTGGGAATGGAGGGAGGTGA\n+GCGACAGACAGGTGATATTTATGCATGCTTACATTGCAGCCTCTGTCTCCGCCACAGATGCAGATTCATAGATACAGATA\n+CAAAGATGGGGATACTGCCTCTGAATGTGTCGTGAGAAAATGGATTCCGAAATCAAAAACTACAGACCGAAAACCGATTT\n+CCAAATAAATAACAATGCATACTGGGCACACACTTAGTAATGAGCACATCTGCGAAATGAAAGACCTTACCAGATAACTG\n+TCAACATTTTAAAATCGTTAAAAGTTGATTCAGGATTTGGGGTAGCCGTGCCAACGGTGTGAATGGGCATGAATAATATG\n+ACATATTCCTTTCCCGAGTAATGAAAAATGTTTTCAGCGAATCTATCCACGAATACCGTACATAAAATAGAGACTCTTCT\n+GCTTGTTAGATATCGTTGGCCCCCGACAAAATGTGGTTCTTTGAAATGAAATTTGAAATAAGTTTGTTGTGCTTAGGCCT\n+TAGTGACTTTGGGATGGGAATATACTTCACCTCCCGCTTGGCCTCCGCGTACGAGTTGCCCTCCATCAAGGTCATGAAGA\n+AGAGGAACGAGCTGATGGCCATCGGATCATAAGCGGTCTGCTCGGTGATCGCCTTGCAGAGCGATGACTTAATGTCGGTG\n+CGCGGCCACATAACGCTGGCCAATCTGATCCACACATATATGGTGGGCCCCATAAAGAAGAAGCCGAATAAGCTGAACCT\n+GCAATGGAATTAGCCAAACCATATCGGGTCCAATTAGCAATCGAATCGGGCCAAGACCCGTGCAGATTGCTCATCACCTA\n+AGGCACTTCATCCAGTCGTACGTCCGGAATGTCTTCTTCTCGATCATGGTCTGCTCGATGAGGGAGCCGCAGGGCCAGAG\n+GGTGCCATACGATATCATGCCGCGTAGGACTTTGTATTTGCTTGTAATATTCACCAAGCTACGAAACATTTTCAACGGTC\n+TCTTGTGATGATCTCGTCGAATGTCAGTGATAGTTTCACTTCCACAAAAGTGCTATAAACACACGCTGAAAGATAAATTG\n+TTTTAGATCAATACGGTGGGCTTTACATGGCTGAGTTCGCTTGGGTTAGTTTTATTAGGTGCCATATTTGTCTTGGCGGA\n+TTTCACTTTTGCAAAAAGTTCGGTTTTAACTCATACGACCTAGAAAGTTATCGAGTACTGATTGTCTTGGGTGGCCATTC\n+CAAATTCAACCGCATTATTCTCCCTCGACAGTGCCTAACTTAGCCACAGAATTCCAGCTCAAGACAATTGCAAATAAGTG\n+TCGGCCAAGCACAGCTGGCACTTCTGGACTCTTCTCTGAACTGATGGATGGCTAAGAGGTGGGTTGGTCTTTGGAAATGG\n+AGAGGGGGTGGTGCCCGCCATCCAGTAACACATGTCAAGTATTATTATCGAGCAACTACAAACGAAATACAAACATACAT\n+TTAATTGCCAAGAATGTTCCTAGCCAAACAAATGTCGAAATAATGTCGGGCCACGGCCAAAGATTTATCATATTCACTCG\n+ACTGTCATAAAAAGCAATTAGGCCCGGGCGAAAGATACACGACCGGCAGTCCCGAAATGT\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/cleaned_ident.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cleaned_ident.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,2849 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'ATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCTAGTCGATTTACACTTGGCTG\n+AGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGTGCTTAGGAAT\n+TGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAATGATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTC\n+GTTCCAACTACGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAG\n+TAGATTCTCAAGTTCTGGAGTTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGCTGGTGAAGGC\n+TGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATG\n+GGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCATCACTCCCGTGTGGATGCGCG\n+TGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCT\n+GGCTAACAAAATAAGGGGCGGCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCC\n+GCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCATGTGTACGGC\n+ATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGAGCATGCTCAGCGCCCCACTCAGCGCGTATG\n+GCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTC\n+AGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCA\n+GGGCACGCCCATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGC\n+ACTTGTAGGGCTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCATTCACTATAT\n+CACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATACATAGTCATATGAACAGTTGA\n+AAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTT\n+CTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTT\n+TTTTTGCTGCCAGTGAGCATAGAAAAAAAAAATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTC\n+CGCATTTTCGTGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGA\n+GCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCCTCCTGAGCCATCAGACTTGT\n+GTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACC\n+GAGGTGGAACACATTTCGCCAGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTT\n+CGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGC\n+AAGTGAAGATTTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCACGGCTATATACTATATGTAT\n+GTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGCACGATGCGCA\n+GGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGACTTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATA\n+ATTGGCTTTTCCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAA\n+AGTAAACACATTAATATGTACTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAATACAAAACCCA\n+CACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAA\n+TCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTGAATCGCTGTGTGCTATTTTT\n+ATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate.loc Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of funannotate databases used for the +# funannotate annotation tool +# +# the columns are: +# value description format_version path +# +2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 ${__HERE__}/funannotate_db |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/funannotate-db-info.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/funannotate-db-info.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,10 @@ +merops diamond /tmp/prout/merops.dmnd 12.0 2017-10-04 5009 a6dd76907896708f3ca5335f58560356 +uniprot diamond /tmp/prout/uniprot.dmnd 2021_03 2021-06-02 565254 68ed1e475d13bb3d5574c53822d11cd3 +dbCAN hmmer3 /tmp/prout/dbCAN.hmm 9.0 2020-08-04 641 04696dfba1c3bb82ff9b72cfbb3e4a65 +pfam hmmer3 /tmp/prout/Pfam-A.hmm 34.0 2021-03 19179 f83c0d00445257fd9c066ad3e9e10568 +repeats diamond /tmp/prout/repeats.dmnd 1.0 2021-07-19 11950 4e8cafc3eea47ec7ba505bb1e3465d21 +go text /tmp/prout/go.obo 2021-07-02 2021-07-02 47228 f5b79fe1a6d6a67c542e39da5d4661dc +mibig diamond /tmp/prout/mibig.dmnd 1.4 2021-07-19 31023 118f2c11edde36c81bdea030a0228492 +interpro xml /tmp/prout/interpro.xml 86.0 2021-06-03 38913 0d8c575f88f397397b9491520b38db1e +busco_outgroups outgroups /tmp/prout/outgroups 1.0 2021-07-19 8 6795b1d4545850a4226829c7ae8ef058 +gene2product text /tmp/prout/ncbi_cleaned_gene_products.txt 1.70 2021-06-15 34039 e93924259b8294255def54097bdab07b |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/ancestral --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/ancestral Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,10899 @@\n+>EOG090W0028\n+VPLKDNQDVACFLVTKHSWKGKYKRIFSIGTAGITTYNPDKLEVTNKWLYSDVISVAPEF\n+VLTLKKDKKVDSLKFSSEHRAELLTEALKYFAEKPKRYEAYKLHWSDTRLPVVLEVTPAS\n+LDQLDPATNTVLASYAYKDIEGIGGFVIVVGGFSRLHLFEIKKKILESALGIEIKVITLE\n+EFEEQRLGKYSGDEHQTSLSEFTVEKVRHKEPVRRLLCLSETCLLERDPQTYSIVTLRPL\n+SDVFALVRIEYLNGQVRSYLATDRDSLLASLLDGVRASGNRDVHVKIKKTERGKRLGPLV\n+DEEVEALLLKLLQNEVLERFNANVPYSGLLYSVTQDGLFAENKEKLILEALQALVQKELE\n+AQFHALRRLVASKVGFAAFTKLSGFREAIGKKVVKALKRNDEAVTQAAIDLICALMQPMD\n+LDLRQEQLNKSSLLSSKKFLESLLDMWTEHVSKGTGALVVSAMLDLLTFALCVPYSETTD\n+GKQFDALLELVAERGRVLFKLFQHPSLAIVKGAGLVMRAIIEEGVAAKMQELALAEGALP\n+RHLLAALYTRLLTHRQLSRHLVGLWVTAMELLKRILPAGLLAFLESEEKVPEEEKLNVRD\n+NLKLAQDHASKKKVEKHLEALKHWGAKVEKIKERPVVLRKRRERKKSKLFYYKFNKDHAL\n+PNLIWNHKTREELREALENELRAFESDKELAGLVAWNYAEFEVKYQCLADEVKIGDYYLR\n+LLLEKDDSDSLIRKSYELFNDLYHRFLLTTKVELKVLCLQALAIVYGRYYEDIGPFSDTK\n+YIVQLLDRCLDRDRLVLFLKKLILHKRNVKEILDVRILVDLLTLAHLHTSRAEKEWYYNE\n+RKGPVSFKELKELYKKGKITAKTKVWAQGLDGWRSLQQVPQLKWTLVAKGSPVLNESELA\n+ALILDILIKLTEYFPSRAVIRPLPRVKRLLSELACLPHIVQLLLTFDPVLVEKVATLLLE\n+IMKDNPEVSKLYLTGVFYFILLYTGSNVLPIARFLKLTHTKQAFRSDESDIMQRSILGQL\n+LPEAMVSYLENHGAEKFAEIFLGEFDTPEAIWNSEMRRLLIEKIAAHIADFTPRLRSHTR\n+ARYQYLAIPAVRYPQLEKELFCNIFYLRHLCDTAKFPDWPIADPVKLLKDVLEAWKKEVE\n+KKPPAMTVEEAYKELGLDEAAVRKAYYKLAQKYHPDKNPEGRDKFEAVNKAYEFLCSRSS\n+WSGPNPNNIVLILRTQSILFERYSELRPYKYAGYPQLIKTIKLETKDEQLFSKLLAAASE\n+LAYHTVKCSALNAEELRREEGLEVLLEAYSRCVSVLSKSSKEEDQVCLNITRCFAVAAKF\n+EACRDKIVELPQLVKDLVRVLKFKHLAADSELQLQLVKAGVLWSLLLFLFEYDYTLEESG\n+VERSEEENKQEVANKLAKLAVKACAALAGYLEKLLTPYLARKLILKILTSNTENPYLIWD\n+NGTRAELLEFLEEKRFKYSAHKDELKIGEVFIRIYNEQPTFPINPKEFVLDLLEFLKHVV\n+MALEALANVIKNNKGVEIQCIGKFKLLFGLLSIKKAALEVISLVSRNKECVEDIAASEVL\n+VKLLLLLKVLDTLSALLKIVKEALAKGAVLYLLDLFCNSIREAAAELLAKLSADKLSGPK\n+VRLTLSKFLPKLLADALRDSPVQLFESKHENPELIWDDEARKRVNELVVGGVYLRLFVAN\n+PAWTLRKPKEFLSDLLDTVLELLSKLELATTALVALLRAQPALADAVPSLGHIPKLVRQL\n+KSALLVLHQLALSEICVSAISQTECISPLKRDLIAVACETLSRLFDKLVKQALEAELVKY\n+LLELLESRTKAQIVKALKAMSRSGEKVKAILEKSSVWAEYKDQKHDLFISAAGYLTAGPS\n+TSPPPVD\n+>EOG090W002U\n+MTTDISVVEYDGGNSSSRLFERSRIKAERESVQKKTFQKWVNSHLVRRIGDLLRDGKKLI\n+KLLEVLSGERLPRPTKGKMRIHCLENVDKALQFLREQRVHLENLGSHDIVDGNARLSLGL\n+IWTIILRFQIQDITIEETDNKETKSAKDALLLWCQMKTAGYHNVNVRNFTTSWRDGLAFN\n+AIIHKHRPDLIQFEKLSKSNAIYNLNNAFNVAEDKLGLTKLLDAEDVFVEQPDEKSIITY\n+VVTYYHYFSKLKQETVQGKRIGKVVGIAMENDRMIKEYESLTSDLLKWIEATIEALGDRK\n+FANSLVGVQQQLAQFSNYRTVEKPPKFVEKGNLEVLLFTLQSKMRANNQKPYTPKEGKMI\n+SDINKAWERLEKAEHERELALREELIRQEKLEQLAARFNRKASMRETWLSENQRLVSQDN\n+FGFDLAAVEAAAKKHEAIETDIFAYEERVQAVVAVSQELEAENYHDIERINARKDNVLRL\n+WNYLLELLRARRLRLELSLQLQQNFQEMLYILDSMEELKLRLLTDDYGKHLMGVEDLLQK\n+HSLVEADINVLGERVKAVVQQSQRFLYKPCDPAIIVERVQQLEDAYAELVKLAVERRARL\n+EESRKLWQFYWDMADEENWIKEKEQIVSTADIGHDLTTVNLLLSKHKALENEIQSHEPQL\n+MSVVAVGDELVHFGADRIQERLKEILAKWNHLLDLRRKRLEAVDYHQLFADADDVDIWML\n+DTLRLVSSEDVGRDEANVQSLLKKHKDVTDELKNYALHQQAEELERLASIDSRYKELLEL\n+AKLRKQRLLDALSLYKLLSESDGVEQWIGEKDRMLDTMVPAKDIEDVEILKHRYDGFDKE\n+MNANASRVAVVNQLARQLLHVEHPNSEQIVARQNELNQKWAELREKAEAKRDELNSAHGV\n+QTFYIECRETVSWIEDKKRILQETDSLEMDLTGVMTLQRRLSGMERDLAAIQAKLDSLEK\n+EAEAIHPEEAALIRERIAQIELIWEQLTQMLKERDAKLEEAGDLHRFLRDLDHFQAWLTK\n+TQTDVASEDTPTSLAEAEKLLSQHQSIKEEIDNYTDDYKKMMEYGERLTAEPSTQDDPQY\n+MFLRERLKALKDGWEELHQMWENRQQLLSQSLNLQLFNRDARQAEVLLSQQEHVLAKDET\n+PVNLEQAENLLKRHEAFLTTMEANDDKINSVVQFAERLVDEEHFAADKVKKKAENIEERA\n+NREKAEKLKDQEFLQDLEELSEWVQEKKITAQDETYRSAKTVHSKWTRHQAFEAEIASNK\n+ERLKPELAEIIEPKLKELADQFEELETTTKEKGERLFDANREVLIHQTCDDIDSWLNELE\n+KQIESEDTGSDLASVNILMQKQQLIETQMAVKAKQVEELEKQAEYLQKTVPVKKEKVEER\n+FEKLKAPLLERQRQLEKKKEAFQFRRDVEDEKLWIAEKLPLATSTEYGNSLFNVHVLKKK\n+NQSLKTEIDNHEPRIKAVCNNGQKLIDEGHEDAKEFEKLIEELWKELKDAVEEREKAQQY\n+LFDASEAESWMSEQELYMMVEDRGKDEISAQNLMKKHESLEKAVEDYAETIRQLGETARQ\n+LDQIAVKQSQVDKLYAGLKDLAGERRAKLDEALQLFMLNREVDDLEQWIAEREVVAGSHE\n+LGQDYDHVTLLWERFKEFARDTEAVGSERVAAVNEIADELIAAGHSDSATIAEWKDGLNE\n+AWQDLLELIETRTQMLAASRELHKFFHDCKDVLGRILEKQSDELGRDAGSVSALQRKHQN\n+FLQDLSTLQSQVQQIQEESAKLQASYAGDKAKEITNREAEVVAAWANLQALCDARKAKLA\n+DTGDLFFFNLVRTLLLWLDDVVRQMNTSEKPRDVSGVELLMNNHQSLKAEIDAREDNFSA\n+CISLGKELLARNHYASIKEKLLALTNQRNALLKRWEERWENLQLILEVYQFARDAAVAEA\n+WLIAQEPYLLSQELGHTIDEVENLIKKHEAFEKSA'..b'OG090W0MK4\n+DAEQIKSFKDFLLSYNKLSELCFVDCISDFTSREVEEKCALNCLEKYLKMNQRISQRFQE\n+FQLIANENALAAAKK\n+>EOG090W0MLJ\n+QKKLQELDKYKQVQKEYKKAVKQRQQLDGQLNENKVVELDLLKEDNEVYKLIGPVLVKQE\n+LEEAKQNVSKRIEYISKELKRVEDLIASLEKKQEKHRENLEKLQQQLQ\n+>EOG090W0MM4\n+LYEPDYLKPKIPLYDVLNVQIKGYDYAVLESYQKLIHKIAEALDLDVEDSWALPAQELKV\n+QRYKPKSTVVEAEYKLKVYERNVQISDVSSPILLRVLEAALPEGVTLEVEEHEEEKEEKR\n+YVPDKELLDLKQELDEL\n+>EOG090W0MNZ\n+KIEEYETFINDVLKEDLKKLEKKLEKLNEEIAEYVQLKSTIETLDGLKTKVDIGCNFFVQ\n+AKVEDSKILVNIGLGVYLELTLEEALKFIDVRIKLLEKQIEKLRKESAKTKAHIKLVLLA\n+IEELQ\n+>EOG090W0MYQ\n+NPFEKEKKKCILCKLNIEPDYKNVKLLSQFQSPYTGRIYGRHITGLCKKKQEKVEKEILK\n+AQFLKDPKLFDPEKPLRPHK\n+>EOG090W0MZQ\n+PPINQKRLLAFINHFIISTVSFLNKFAKSCEEKLLEFEKKLQKVEASLVILEAKLSSIPE\n+LEEDPEYKKYFKMVQVGVPKEAVKLKMQQEGLDPSLLD\n+>EOG090W0N0N\n+LSKKEKLKKAVKDYGSTVVVFHVGISLISLGALYLLVSSGLDVLLEKLEASTFVVAYAVH\n+KVLAPVRISITLAATPLIVRYLRKIGLLK\n+>EOG090W0N4N\n+MDLSKVKNEKKLELCKLYFGFALLPFLWAVNAVWFFKEAFKKPEYEEQKQIKKYVILSAI\n+GALIWAWIVIFQLKRAEWGELADEISFIIPLG\n+>EOG090W0N5S\n+MKAVTAVCATGASVPAVASGRVKRRRDLENEEIQMYLSKLKDLVPFMPKNRKLSKLEVIQ\n+HVIDYICDLQTALEEHPAAAALARQPLGVLPNTIL\n+>EOG090W0N7H\n+MKLSHETVTIELKNGTQVHGTITGVDVAMNTHLKAVKLTIKNRLETLSIRGNNIRYYILP\n+DSLPLETLLIDDTPKAKAKKK\n+>EOG090W0N7U\n+SSTSQKHREPMGDKPVTDLAGVGEVLGKRLVVLGQYLVLKKDKELFKEWMKDTCSANSKQ\n+SSDCYQCLSDWCEEF\n+>EOG090W0NCE\n+VNKTVSIITDGRNFIGTLKGFDQTINLILDESHERVYSTTQGVEQVVLGLHIIRGDNVAI\n+VGELDDSRLDLSSIRAEPLSSVVH\n+>EOG090W0NFV\n+DPELEAIRAQRLAQLQSQYKGQKAQEEKKREQEEMKNSILSQVLDQSARARLNTLKLGKP\n+EKGKMVENLLIRMAQRGQIKGKLGEKELIKLLESVNQQTTVKFDRRRAALDSDDD\n+>EOG090W0NJA\n+TRVYVGGLTEKVKKEDLEAEFEKYGKLNSVWVAFNPPGFAFIEFENKDEAEKACDNLNGT\n+ELLGSKLRVEISRGRGRKGGRGKRGSRFRSRSPVGR\n+>EOG090W0NJU\n+YLKSWEEFEKAAERLYLQDPLKRYTMKYVHSKGLLVLKLTDNCLQYKTEDLKKIEKFISN\n+LMRHMASKE\n+>EOG090W0NK3\n+VNVPKQRRTFCKKCKVHKLHKVTQYKKSKEGRRRYDRKQQGFGGQTKPIFRKKAKTTKKI\n+VLRLECTECKYRKPLKRCKHFELGGDKKRK\n+>EOG090W0NO8\n+MGKVKCSELRTKDKKELLKQLEELKTELTNLRVAKVTGGAASKLSKIRVVRKAIARVYIV\n+LHQKQKENLRKNKKYKPLDLRPKKTRALRRALTTLKEIRKRKYAVKA\n+>EOG090W0NRT\n+RKEALSQFIQQIHGRPVVVKLNSGVDYRGVLACLDGYMNIALEQTEEYVNGQLKNKYGDA\n+FIRGNNVLY\n+>EOG090W0NTV\n+EEWLEKEVIGLRVWQLLLLVLSILLSLVILLCCCIRFRIPRTKQEIEADYERKKLTKKFR\n+KRLKKIKNSEMDELDLKKAEAESLE\n+>EOG090W0O4V\n+MPKYYCDYCDTYLTHDSPSVRKTHCQGRKHKDNVKFYYQKWMEEQAQHLIDATTAAFKAG\n+KIASNPFAGVAIPPPGPGLAAPPGMPMMMGPHGPMPPMMMRPLMKPKGPMAPMGPLGALG\n+PVRPPL\n+>EOG090W0O82\n+MLEITCNDRLGKKVRVKCNPDDTIGDLKKLIAAQTGTKIVLKKWYTIFKDHIKLQD\n+>EOG090W0O88\n+AKRTKKVGITGKYGTRYGASLRKMVKKMEITQHSKYTCSFCGKAMKRSVVGIWSCKRCKR\n+TVAGGAWVYSTTAAASVRSAVRRLRE\n+>EOG090W0ODH\n+MEEKLAEYRAKKRREELLEKVKEKLKEVYLLYFLLWATLYIIAIELEFGAVYLVLSALVF\n+IYLNTRTGPKKKGEVSAYSVFNKNCEAIDGTLKAEQFEREIRYG\n+>EOG090W0OM7\n+LGRSRSPSPRRRRKERRDRRRRRSRERRRRSRDRERSLSRSRSRSEERERPVITEADLEG\n+KSPEEQEMLKLMGFCGFDTTKGKKVEGNDVGEVHVILKRKYRQYMNRKGGFNRPLDFV\n+>EOG090W0ORD\n+DEYALVAKGKLKLKSDKKKKKKKKRTKAELAFKMQEKMQKERIKEKASMTHKQRVEEFNR\n+HLDSLTEHFDIPKVSWTK\n+>EOG090W0ORX\n+PREIKEIKDFLLKARRKDAKSKIKKNAENVKFKVRCSRFLYTLVITDKEKAEKLKQSLPP\n+G\n+>EOG090W0OS5\n+KELEKLEEAKLKAKYPEGHSAFLQKRLAKGQKYFDSGDYQMAKQKTGEAIPTPETVPVRK\n+TSIIQP\n+>EOG090W0PDB\n+FAKDSIRLVKRCTKPDREFQKIAIATAIGFCIMGFIGFFVKLIHIPINNIIV\n+>EOG090W0PQO\n+LLLLAVALAAAQLFLAQALEASLAHPAVVENAEAEAQLPEELRNPFYKNPRIAAALAKES\n+WFTNKEMQVIDREAEKIPREKIYKILKNAGLVRRR\n+>EOG090W0PW0\n+EEKELKAGHPPAVKAGGMRITQHKTPSPPKTISGAPVKGNEAVQVFHEKKPPTIQQPRK\n+>EOG090W0PZH\n+KPIDSKREEFRKYLERAGVLDALTKVLVSLYEEPEKPEDALEYLRKNLGLKKELEEAKAE\n+IAELE\n+>EOG090W0Q9X\n+VIGGAVVGLLCAILVVMFIVYRLRKKDEGSYALEPKKRSPNREFYA\n+>EOG090W0QXM\n+PAAPSSTSVGSGSRSPSKQRKTTGSGGMWRFYTDDSPGIKVGPVPVLVMSLLFIASVFML\n+HIWGKYTRS\n+>EOG090W0R2X\n+MKRTKEKVEKEEGEELYSNEITEEMKKFIIEPSYVLCEKLIEGRLSFGGMNPEIEKLMEE\n+EEKDVSDEEMA\n+>EOG090W0RGQ\n+DVLDSWEEIDESEALEKKLKKLVIIKEEDELRSQLVPPEPTVKILKRPEKSSNGESKPKQ\n+PIKTLKQREQEYAEARLRILGEAKSPEENVLRLPRGPDGTKGFNVRR\n+>EOG090W0S6D\n+RVNGSLLKQFIGKKVSILGKVKKKSSNGKSFLKTTDNQKVTVELKEPLDEPLEGWVEVHG\n+VVKSSTISCDEYIEFPEETENFDAEAYNKLLNTVKNPWK\n+>EOG090W0T3K\n+MREFTNIVTTLSKLSKECVLRLTKDKLVFIVPLVWCELDQKFFSEYNMEGVSNEIYLELS\n+AEMLSRSLSSLKAKSVKIKLTNKQSPCLTVEIELSSESRQVVHDIPVTVIPRKEWSEYEE\n+PSIELPSLKKLRKVVDRMKNLSPSLTISATLKIETDTATVSTHFKNLKVSARVDIKKLSA\n+FLEVICSIEKLIKLELVKLHYFLPAV\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/dataset.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/dataset.cfg Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +name=insecta_odb9 +species=fly +domain=eukaryota +creation_date=2016-02-13 +number_of_BUSCOs=1658 +number_of_species=42 |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,892 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W03A6\n+LENG 290\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:14:12 2016\n+NSEQ 41\n+EFFN 0.573059\n+CKSUM 4199501958\n+STATS LOCAL MSV -11.0137 0.70159\n+STATS LOCAL VITERBI -11.8914 0.70159\n+STATS LOCAL FORWARD -5.4517 0.70159\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.61260 4.34648 2.94292 2.63725 3.00827 2.97272 3.70379 2.92379 2.63311 2.34924 3.71976 3.18648 3.40267 3.05104 2.84850 2.73289 2.87560 2.67151 4.50628 3.28193\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.06535 3.91193 3.14059 0.61958 0.77255 0.00000 *\n+ 1 3.00919 4.52311 4.14821 3.62922 3.09723 4.03596 4.32783 2.27940 3.34231 1.08727 2.86803 3.94138 4.37623 3.72340 3.44120 3.39248 3.26003 2.31137 4.98326 3.74489 1 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 2 2.36218 4.33870 3.13011 2.73889 4.02232 3.10957 3.85645 3.40174 2.64759 3.07800 3.92900 3.09783 3.59930 3.07936 2.94839 1.61731 2.43809 2.94535 5.34618 4.08403 2 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 3 2.67521 4.27814 3.24960 2.71563 4.05115 3.41144 3.69906 3.32637 1.42497 3.01802 3.94128 3.12968 3.88008 2.88045 2.51411 2.76334 2.94671 2.94958 5.28612 4.02052 3 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 4 2.80665 4.86651 3.06170 2.65102 4.29941 3.41912 3.68135 3.61311 1.29697 3.22463 4.14220 3.08300 3.90692 2.84934 2.39258 2.85149 2.85896 3.30620 5.40498 4.16189 4 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.04394 3.87781 3.80347 0.61958 0.77255 0.51713 0.90691\n+ 5 1.15269 4.13250 3.42805 3.19131 4.05298 2.97131 4.22552 3.11818 3.19502 3.02891 4.02623 3.33501 3.71301 3.53350 3.46107 2.45402 2.62633 2.76070 5.51002 4.28558 5 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03162 3.86549 4.58784 0.61958 0.77255 0.52796 0.89112\n+ 6 3.14746 4.61180 4.17828 3.81812 3.13308 3.97327 4.47605 2.33676 3.58594 0.88383 3.15824 4.09427 4.40984 3.95594 3.76314 3.54334 3.44203 2.37228 5.00981 3.73151 6 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03162 3.86549 4.58784 0.61958 0.77255 0.52796 0.89112\n+ 7 3.23704 4.72886 3.82795 3.55604 2.29363 3.82202 3.61532 3.24723 3.41375 2.70296 3.92299 3.71488 4.29603 3.72147 3'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 283 2.86715 4.85199 2.89168 2.64635 4.01816 3.35460 3.80443 3.64641 2.46840 3.15049 4.17905 3.10585 3.90794 1.26571 2.75284 2.91574 3.16423 3.37724 5.29340 3.98605 284 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 284 2.30662 4.22669 3.19139 3.00653 4.05883 2.95692 4.11678 3.57914 3.07910 3.30080 4.22848 3.23401 3.69430 3.42983 3.36556 1.11793 2.78547 3.11811 5.43277 4.14231 285 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 285 2.97610 4.79637 3.10901 2.86734 3.27278 3.43295 1.18022 3.67628 2.71089 3.17239 4.21410 3.27732 3.98459 3.23340 2.97758 3.04699 3.27893 3.41176 4.73803 3.22416 286 h - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.49247 0.94447\n+ 286 3.02629 4.38931 4.55440 4.02788 3.38969 4.29987 4.75221 1.66570 3.90157 1.73351 3.22011 4.29643 4.60414 4.16932 4.12143 3.65546 3.28153 1.24068 5.32317 4.12520 287 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 287 3.12120 4.54522 4.46798 3.93436 3.10940 4.26644 4.61200 2.09702 3.74039 1.03668 2.81140 4.23014 4.55041 4.00262 3.93893 3.62030 3.36876 2.00221 5.11579 3.95042 288 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 288 2.79129 4.78468 3.27333 2.77748 4.23368 3.42379 3.72210 3.67372 2.18570 3.22145 4.12764 3.17710 3.22448 2.90441 1.31319 2.86068 3.06244 3.35113 5.34061 4.12949 289 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 289 2.87748 5.28398 2.07313 1.23939 4.58665 3.21010 3.73813 4.06161 2.69686 3.62602 4.49991 2.71592 3.81535 2.91427 3.22357 2.79830 3.16561 3.67987 5.77966 4.34719 290 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 290 2.83903 4.94076 3.19058 2.65002 4.36201 3.09854 3.60817 3.74025 1.86467 3.26108 4.11943 3.08356 3.91775 2.75841 1.52389 2.86394 3.05522 3.41435 5.37419 4.15539 291 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02078 3.88401 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,973 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W06A3\n+LENG 317\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:14:23 2016\n+NSEQ 42\n+EFFN 0.505005\n+CKSUM 2173114792\n+STATS LOCAL MSV -11.1177 0.70082\n+STATS LOCAL VITERBI -11.8237 0.70082\n+STATS LOCAL FORWARD -5.8436 0.70082\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.60078 4.20386 3.09081 2.72939 3.29201 2.73750 3.80596 2.60287 2.76439 2.32806 3.62751 3.16496 3.24412 3.25571 3.03713 2.74208 2.91733 2.41359 4.52910 3.56529\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.00000 *\n+ 1 2.88548 4.98203 3.26484 2.66893 4.37797 3.54234 3.56403 3.73430 1.73193 3.23652 4.10452 3.09116 3.93137 2.65462 1.56967 2.90213 2.96175 3.42446 5.34579 4.14426 1 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 2 1.06014 4.17714 3.43127 3.22164 4.02163 3.00241 4.24732 3.14248 3.23710 3.02549 4.06118 3.37901 3.73871 3.58127 3.48951 2.52791 2.80311 2.80236 5.45195 4.24880 2 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 3 3.05279 4.55050 4.19258 3.77702 3.08275 4.01645 4.45173 2.21046 3.53641 1.02046 3.06577 4.04994 4.42083 3.89443 3.73294 3.46036 3.34301 2.19383 5.01051 3.71888 3 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 4 3.00664 4.43473 4.29003 3.90501 3.35346 4.03179 4.63929 1.11780 3.73250 1.94647 3.29426 4.15480 4.46644 4.08432 3.93271 3.52442 3.30654 1.84634 5.23744 3.97271 4 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 5 3.12038 4.59225 4.13872 3.77890 3.12451 3.94175 4.44453 2.32740 3.54694 0.91499 3.15816 4.05737 4.38285 3.92282 3.72797 3.50962 3.41625 2.35646 4.99189 3.70778 5 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 6 2.78335 4.33516 4.08134 3.74395 3.52758 3.69083 4.55127 2.05344 3.61123 2.22041 3.49797 3.93870 4.25342 3.97351 3.82669 3.19274 3.14588 1.06562 5.30654 4.03600 6 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 7 2.62697 4.40500 3.29425 3.18992 4.33181 0.77941 4.31170 3.92417 3.36686 3.58217 4.55801 3.45867 3.79512 3.69359 3'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 310 2.30574 4.22577 3.18979 3.00487 4.05694 2.95603 4.11516 3.57701 3.07731 3.29883 4.22670 3.23264 3.69326 3.42820 3.36380 1.12089 2.78445 3.11640 5.43104 4.14042 310 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 311 2.79273 4.34096 4.09811 3.76084 3.53604 3.70346 4.56601 2.05196 3.62832 2.22661 3.50436 3.95375 4.26521 3.98962 3.84267 3.20593 3.15484 1.04991 5.31711 4.04742 311 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 312 2.52063 4.46247 3.14399 2.66329 3.61752 3.33464 3.73235 3.11172 2.55088 2.69643 3.68563 3.09303 2.21417 2.90819 2.81318 2.64840 2.75793 2.77252 5.11552 3.82308 312 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 313 2.83516 5.28588 2.02146 1.37746 4.57342 3.19313 3.68741 4.04429 2.57588 3.59161 4.44614 2.67503 3.78602 2.85457 3.15755 2.75155 3.11440 3.65636 5.76541 4.31525 313 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 314 2.42177 4.37033 3.17444 2.98491 4.08142 3.08409 4.11584 3.50342 3.03789 3.18538 4.19247 3.28514 1.20231 3.42206 3.32349 2.65489 2.93898 3.13658 5.40418 4.19997 314 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 315 2.63050 4.69517 2.96852 2.54738 3.99565 3.34796 3.67351 3.38755 2.34178 3.00253 3.70071 3.00925 3.82818 1.82403 2.67315 2.61986 2.77726 3.09208 5.27729 3.96986 315 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 316 2.99195 4.39606 4.46052 3.99700 3.37863 4.17130 4.73529 1.20074 3.83833 1.90438 3.24173 4.24154 4.54901 4.14877 4.05130 3.57249 3.27663 1.64463 5.32209 4.08731 316 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 317 3.01592 4.44036 4.30752 3.92275 3.36125 4.04511 4.65464 1.10243 3.75049 1.95152 3.29989 4.17084 4.47839 4.10092 3.94931 3.53870 3.31551 1.84594 5.24789 3.98436 317 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02167 3.84258 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,352 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W0GYE\n+LENG 110\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:02:11 2016\n+NSEQ 42\n+EFFN 0.417847\n+CKSUM 96359631\n+STATS LOCAL MSV -9.8588 0.71539\n+STATS LOCAL VITERBI -10.5314 0.71539\n+STATS LOCAL FORWARD -4.2136 0.71539\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.58050 4.26382 2.85365 2.80613 3.15035 2.89189 3.81397 2.73428 2.84682 2.50149 3.63880 3.04623 2.92146 3.23736 2.97958 2.69288 2.75694 2.61051 4.37217 3.44918\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.11208 3.80830 2.47881 0.61958 0.77255 0.00000 *\n+ 1 2.86563 4.43941 3.90030 3.48825 3.15849 3.72815 4.25618 2.32070 3.26187 1.77984 1.67728 3.78149 4.19270 3.65614 3.48266 3.19769 3.17068 2.30899 4.97739 3.72347 1 m - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03621 3.73242 4.45477 0.61958 0.77255 0.51831 0.90517\n+ 2 2.32088 4.22631 3.23062 2.88208 3.75054 3.06494 3.91305 3.12034 2.80280 2.86950 3.46740 3.17206 3.71214 3.20009 3.11255 1.58694 2.72892 2.79566 5.19560 3.87469 2 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 3 2.16583 4.14387 3.31164 3.04023 3.91928 2.99832 4.08555 2.96050 2.99484 2.87006 3.88524 3.25429 3.70169 3.37677 3.27365 2.46801 1.56963 2.64620 5.38268 4.14256 3 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 4 2.50339 4.36552 3.18021 2.87748 3.70569 3.17426 3.92862 3.09513 2.80524 2.51701 3.83182 3.22048 1.72512 3.22934 3.10382 2.66637 2.88472 2.83864 5.15020 3.85399 4 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 5 1.37966 4.07220 3.30932 3.05735 4.00851 2.90363 4.11036 3.10631 3.06226 3.01691 3.96738 3.22700 3.63731 3.40459 3.34236 2.32349 2.50171 2.72896 5.44141 4.20867 5 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 6 2.79738 4.71128 3.35305 2.81859 3.86956 3.45352 3.66728 3.34124 2.12422 2.73535 3.92040 3.19573 3.91394 2.88067 1.48782 2.88950 3.04990 3.10526 5.13081 3.85640 6 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 7 2.79738 4.71128 3.35305 2.81859 3.86956 3.45352 3.66728 3.34124 2.12422 2.73535 3.92040 3.19573 3.91394 2.88067 1.4'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 103 2.28595 4.20469 3.15305 2.96732 4.01306 2.93587 4.07824 3.52766 3.03667 3.25343 4.18597 3.20146 3.66970 3.39139 3.32369 1.19215 2.76154 3.07690 5.39105 4.09660 103 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 104 1.11047 4.16262 3.39942 3.18942 3.99027 2.98852 4.21878 3.10639 3.20405 2.99074 4.03096 3.35463 3.72182 3.55149 3.45809 2.51413 2.78712 2.77202 5.42393 4.21734 104 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 105 2.98123 4.41976 4.24147 3.85605 3.33251 3.99469 4.59708 1.16125 3.68281 1.93347 3.27967 4.11045 4.43341 4.03863 3.88681 3.48501 3.28213 1.84863 5.20897 3.94068 105 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 106 3.08459 4.56673 4.08567 3.72675 3.11378 3.89922 4.40266 2.31619 3.49506 0.95858 3.15957 4.00826 4.34667 3.87883 3.68103 3.46477 3.38236 2.33656 4.96826 3.67627 106 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 107 2.38084 4.22527 3.37365 3.12444 3.91461 3.06621 4.14632 3.03402 3.06612 2.89972 3.95563 3.34312 3.76545 3.46456 3.32701 2.58155 1.28564 2.73588 5.35817 4.13168 107 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 108 2.28595 4.20469 3.15305 2.96732 4.01306 2.93587 4.07824 3.52766 3.03667 3.25343 4.18597 3.20146 3.66970 3.39139 3.32369 1.19215 2.76154 3.07690 5.39105 4.09660 108 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 109 2.98123 4.41976 4.24147 3.85605 3.33251 3.99469 4.59708 1.16125 3.68281 1.93347 3.27967 4.11045 4.43341 4.03863 3.88681 3.48501 3.28213 1.84863 5.20897 3.94068 109 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 110 2.83281 4.81376 2.86370 2.61910 3.97332 3.32524 3.77627 3.59570 2.44478 3.10430 4.13561 3.07764 3.87883 1.35288 2.72955 2.88405 3.13061 3.32988 5.25759 3.94755 110 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02268 3.79747 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,640 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W0T3K\n+LENG 206\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:12:21 2016\n+NSEQ 42\n+EFFN 1.961060\n+CKSUM 265104873\n+STATS LOCAL MSV -10.5526 0.70500\n+STATS LOCAL VITERBI -11.3203 0.70500\n+STATS LOCAL FORWARD -5.0917 0.70500\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.66939 4.05673 3.22993 2.79099 3.15365 3.55719 3.71237 2.54173 2.57440 2.22720 3.33451 3.21043 3.51162 3.12964 3.00991 2.70726 2.80134 2.41473 4.61383 3.55842\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.16208 4.61100 1.96838 0.61958 0.77255 0.00000 *\n+ 1 2.93600 4.38428 4.46198 3.90104 3.40267 3.46509 4.45615 1.93047 3.75500 2.17341 1.33550 4.09987 4.43304 3.98673 3.92700 3.38467 3.18624 2.18032 5.08252 3.90751 1 m - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 2 3.51610 5.57440 4.08521 3.23432 5.16373 4.05621 3.85036 4.44109 1.24790 3.78589 4.67516 3.56930 4.38323 2.97693 1.02813 3.49054 3.62846 4.13070 5.74164 4.65992 2 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 3 2.71675 4.18007 2.20863 1.78454 4.48942 3.47153 3.68161 3.95962 2.43737 2.96771 4.22140 2.70561 3.87879 2.38271 2.92830 2.68696 2.95012 3.54764 5.61847 4.21897 3 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 4 3.79955 5.05528 5.24619 4.82928 0.91393 4.84708 4.43636 2.76588 4.65616 1.39209 3.35680 4.77841 5.06654 4.62904 4.66340 4.24935 4.02525 2.91694 4.56452 2.97666 4 f - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 5 2.33881 4.63133 3.04986 2.72039 3.81134 3.58086 3.80889 3.19122 2.69933 2.40831 3.28616 3.19757 3.96766 2.62497 3.12129 2.40087 2.24400 2.60449 5.18249 3.90340 5 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 6 2.53534 4.98255 2.88571 2.48098 3.55645 3.22634 3.11506 3.68024 2.49296 3.25497 4.05057 1.83222 3.89751 2.84319 2.96725 2.38521 2.93102 3.33084 5.46784 3.79390 6 n - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 7 3.33815 4.58635 5.28231 4.77386 3.86630 4.81962 5.40260 1.04591 4.67983 2.15439 3.65019 4.93514 5.08026 4.90950 4.'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 199 3.17292 4.49948 5.07350 4.47897 2.19664 4.41569 4.75303 1.67850 4.30714 1.34256 2.66830 4.55216 4.68996 4.38672 4.33103 3.74557 3.18632 2.02062 5.12680 4.01423 203 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 200 2.82721 5.23462 3.00978 2.50556 4.56561 3.55698 1.89081 4.01902 2.34647 3.51731 4.29242 2.45019 3.96371 2.18070 2.67888 2.52972 3.05315 3.62288 5.65577 4.29113 204 h - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 201 2.79753 3.26426 4.38245 3.80576 2.13306 3.90254 4.09723 2.46777 3.65960 2.40542 3.34950 3.92355 4.27118 3.84123 3.78510 2.46701 3.03179 2.48318 4.61601 1.76843 205 y - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 202 2.72610 3.77026 4.37676 3.78412 1.86105 3.85380 3.70355 1.91248 3.39011 2.25745 3.26975 3.64215 4.21973 3.81329 3.74151 3.15148 2.95801 2.40389 4.70415 2.65981 206 f - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 203 3.15910 4.51063 4.85177 3.56044 3.48065 4.41056 4.77828 1.50858 4.14418 1.18189 3.34602 4.46100 4.70781 4.33071 4.27364 3.73860 3.39432 1.87237 5.27969 4.12987 207 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 204 2.54223 4.83441 3.15835 2.33659 4.10387 3.53825 3.79739 3.50371 2.61506 2.95960 3.75295 3.12487 1.86716 2.96533 3.06273 2.60399 2.36144 3.19033 5.39703 4.08002 208 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 205 1.87617 5.09761 2.66753 2.36334 4.39589 2.74795 3.18023 3.84868 2.46564 3.38501 4.15665 2.99165 3.89862 2.67910 2.94944 2.53808 2.60969 3.21059 5.56524 4.18537 209 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 206 3.02518 4.38844 4.11409 4.15378 3.60416 4.28092 4.67591 1.38646 4.03415 2.26037 3.36491 4.32477 4.61541 4.25349 4.19108 3.60271 2.71238 1.26673 5.26692 4.07016 210 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01092 4.52217 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/lengths_cutoff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/lengths_cutoff Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t0\t26.009834120677418\t151\n+EOG090W0427\t0\t14.787662575205127\t119\n+EOG090W09K7\t0\t48.74476058088982\t218\n+EOG090W0B5K\t0\t8.436399156361812\t117\n+EOG090W0153\t0\t110.0106535860915\t535\n+EOG090W051T\t0\t48.42670503702809\t341\n+EOG090W01WI\t0\t66.27955761081256\t599\n+EOG090W01A3\t0\t147.49150864355283\t636\n+EOG090W067A\t0\t24.752819851328745\t239\n+EOG090W0IUR\t0\t33.027556860826415\t176\n+EOG090W09BV\t0\t59.827044564511475\t249\n+EOG090W0AIA\t0\t62.4605436431282\t296\n+EOG090W05D8\t0\t120.959030700539\t437\n+EOG090W01IP\t0\t203.56155914702887\t709\n+EOG090W02C3\t0\t123.60839779175912\t441\n+EOG090W0B1Y\t0\t14.92967253328818\t221\n+EOG090W01VD\t0\t137.07676494930246\t506\n+EOG090W035W\t0\t58.49371845525309\t413\n+EOG090W0DWN\t0\t41.156486855129046\t178\n+EOG090W028U\t0\t103.86567310127292\t544\n+EOG090W02TI\t0\t98.41080498838295\t449\n+EOG090W00PB\t0\t106.20423261052379\t448\n+EOG090W038B\t0\t77.2556129538091\t369\n+EOG090W0BUR\t0\t61.86700358352731\t260\n+EOG090W0F0L\t0\t46.56222745575028\t243\n+EOG090W08IZ\t0\t62.201306771663276\t286\n+EOG090W0BEB\t0\t24.453706055266927\t238\n+EOG090W0I37\t0\t18.215830819664493\t138\n+EOG090W0CQ9\t0\t22.121097366946984\t209\n+EOG090W04BS\t0\t57.09128334646491\t340\n+EOG090W0FCL\t0\t16.012152397171466\t130\n+EOG090W0AXJ\t0\t28.151048076823233\t222\n+EOG090W0FEP\t0\t12.631376967230036\t126\n+EOG090W0FVI\t0\t15.398685404771472\t141\n+EOG090W0GP3\t0\t19.401812121264605\t132\n+EOG090W01NH\t0\t109.56816301330042\t599\n+EOG090W0IEE\t0\t13.648148270718346\t133\n+EOG090W0IP7\t0\t16.863805057639492\t174\n+EOG090W038Z\t0\t59.028806526983075\t372\n+EOG090W0B8O\t0\t25.131143830790958\t193\n+EOG090W0KP0\t0\t31.31312208007784\t139\n+EOG090W064P\t0\t35.99213522298181\t305\n+EOG090W0A3V\t0\t29.293708718757674\t271\n+EOG090W0BOZ\t0\t11.373911097706742\t158\n+EOG090W0H6T\t0\t40.78248368613364\t174\n+EOG090W0EDI\t0\t25.598027290426895\t161\n+EOG090W0EFH\t0\t32.47988529410085\t158\n+EOG090W04BN\t0\t42.69662448203931\t429\n+EOG090W00SP\t0\t347.84066824884763\t1158\n+EOG090W01ZN\t0\t35.63683451653604\t248\n+EOG090W014R\t0\t303.558641419558\t908\n+EOG090W0FJE\t0\t11.554345658688966\t128\n+EOG090W00EP\t0\t90.98598088688564\t391\n+EOG090W07MY\t0\t26.394659474607508\t276\n+EOG090W00LR\t0\t47.803732742449085\t428\n+EOG090W06QR\t0\t45.20103500608554\t255\n+EOG090W06J7\t0\t48.950477475341124\t349\n+EOG090W04G6\t0\t20.747524095601406\t271\n+EOG090W06IG\t0\t83.91320663964761\t392\n+EOG090W036Y\t0\t90.1813890269454\t405\n+EOG090W0CMO\t0\t27.111085843428626\t212\n+EOG090W0BFE\t0\t43.83422206895252\t292\n+EOG090W0JYN\t0\t8.6240352353082\t95\n+EOG090W054P\t0\t42.0493667617466\t242\n+EOG090W0753\t0\t88.63736820874865\t365\n+EOG090W0FKG\t0\t36.02492014973623\t238\n+EOG090W0B6L\t0\t37.512128120083524\t178\n+EOG090W0C2I\t0\t39.9957024404497\t248\n+EOG090W057Z\t0\t39.37529750270147\t336\n+EOG090W01H1\t0\t167.89279528460568\t626\n+EOG090W07A3\t0\t38.193049881273545\t207\n+EOG090W0D5E\t0\t36.11275078460196\t174\n+EOG090W07PK\t0\t44.50703007278868\t242\n+EOG090W0GKW\t0\t13.60945220480054\t126\n+EOG090W0J9P\t0\t72.21231118153511\t276\n+EOG090W0A69\t0\t38.463468950032976\t247\n+EOG090W0DRQ\t0\t7.927491529558358\t71\n+EOG090W04G1\t0\t70.1561152864883\t382\n+EOG090W08L6\t0\t34.61950687705208\t168\n+EOG090W02QT\t0\t126.78937306395682\t573\n+EOG090W0K04\t0\t13.698118905107428\t123\n+EOG090W02UI\t0\t75.33983172202097\t569\n+EOG090W00MS\t0\t222.74918630603344\t865\n+EOG090W0HXZ\t0\t14.672974042346928\t111\n+EOG090W09LF\t0\t49.42509730797137\t225\n+EOG090W060L\t0\t48.49481993961427\t407\n+EOG090W0I0Q\t0\t8.898446275006405\t87\n+EOG090W0B0M\t0\t19.378137039507983\t192\n+EOG090W0EY0\t0\t25.633665074147427\t177\n+EOG090W019L\t0\t161.9212108198199\t655\n+EOG090W0ALV\t0\t33.567421363630004\t327\n+EOG090W0F9J\t0\t11.9066247090783\t195\n+EOG090W0BZ2\t0\t13.511577175520067\t123\n+EOG090W0B5T\t0\t12.744268370786148\t181\n+EOG090W0JBN\t0\t8.059235823034468\t94\n+EOG090W06CO\t0\t44.76325256823732\t283\n+EOG090W06Y4\t0\t58.56965239084716\t304\n+EOG090W00D0\t0\t173.60012223217007\t969\n+EOG090W0N7U\t0\t7.090271589968341\t73\n+EOG090W02H5\t0\t187.43347716555155\t645\n+EOG090W04DH\t0\t97.51184745911475\t425\n+EOG090W01HI\t0\t141.17028909316608\t668\n+EOG090W02JZ\t0\t106.2021542200327\t499\n+EOG090W0ANA\t0\t43.80791670031081\t242\n+EOG090W055F\t0\t25.315135740520443\t269\n+EOG090W0KMC\t0\t20.563083052290207\t149\n+EOG090W063Z\t0\t20.0'..b'656909\t232\n+EOG090W050K\t0\t77.93871889144076\t376\n+EOG090W0CL8\t0\t25.966747028064802\t218\n+EOG090W0JJQ\t0\t13.62856576746243\t117\n+EOG090W06W8\t0\t48.24622893316924\t267\n+EOG090W0EPV\t0\t35.10902433482864\t239\n+EOG090W00WM\t0\t260.8164746511948\t897\n+EOG090W005S\t0\t611.4917595121128\t1630\n+EOG090W02UQ\t0\t51.49464074592311\t285\n+EOG090W03FA\t0\t53.973113494178904\t344\n+EOG090W02B7\t0\t115.3009159526159\t425\n+EOG090W06DJ\t0\t77.792516412288\t338\n+EOG090W08FE\t0\t36.68154855844776\t279\n+EOG090W06P2\t0\t41.670994647017906\t299\n+EOG090W0C7S\t0\t29.611469746269982\t147\n+EOG090W0C4Z\t0\t26.43060073195963\t145\n+EOG090W00ZP\t0\t161.60550467275013\t697\n+EOG090W0C7Z\t0\t37.684920128988004\t192\n+EOG090W06AN\t0\t42.662308774978584\t289\n+EOG090W0FYR\t0\t13.508876473998257\t128\n+EOG090W015U\t0\t42.296492199255404\t276\n+EOG090W02LX\t0\t53.129566825639934\t495\n+EOG090W0DZ4\t0\t9.599076852633498\t186\n+EOG090W050Y\t0\t60.226966189955085\t368\n+EOG090W08GU\t0\t53.02740180911049\t280\n+EOG090W09LK\t0\t91.72851994063471\t372\n+EOG090W077G\t0\t76.76324145120006\t331\n+EOG090W0B8P\t0\t19.196863497747415\t150\n+EOG090W0A73\t0\t27.406826637563558\t161\n+EOG090W0B3U\t0\t43.48966796277244\t264\n+EOG090W0LL3\t0\t13.321975417932757\t120\n+EOG090W09R9\t0\t36.65014937864991\t212\n+EOG090W0A58\t0\t48.47060965162291\t255\n+EOG090W06VZ\t0\t29.419164232825445\t317\n+EOG090W080B\t0\t79.75592849488558\t305\n+EOG090W0ALP\t0\t13.883838648218155\t145\n+EOG090W0EJV\t0\t5.956771852621479\t160\n+EOG090W0BI6\t0\t30.97683170635923\t224\n+EOG090W05KO\t0\t95.433274449961\t425\n+EOG090W03K0\t0\t72.48414460771538\t334\n+EOG090W0JFZ\t0\t14.305445143137689\t92\n+EOG090W09RO\t0\t29.48406784028039\t267\n+EOG090W00ZV\t0\t61.02240025870282\t345\n+EOG090W005V\t0\t262.52686163098906\t1059\n+EOG090W0F9A\t0\t10.246423950758501\t105\n+EOG090W0IKC\t0\t14.947429244710978\t76\n+EOG090W04QG\t0\t49.56877927900001\t361\n+EOG090W00U5\t0\t71.63448419048026\t402\n+EOG090W0JS6\t0\t8.421979153901846\t87\n+EOG090W06X4\t0\t32.054695199308284\t297\n+EOG090W0H7U\t0\t24.51177799464656\t186\n+EOG090W02LH\t0\t31.4090320424614\t301\n+EOG090W06AU\t0\t21.750918119113738\t153\n+EOG090W0L6N\t0\t11.596992881882745\t94\n+EOG090W0028\t0\t588.5016762903192\t1601\n+EOG090W05ZG\t0\t33.671801910536935\t269\n+EOG090W0DSQ\t0\t53.83672675323885\t204\n+EOG090W0CIU\t0\t10.5996734768809\t188\n+EOG090W09DT\t0\t42.16152158590845\t238\n+EOG090W0883\t0\t43.82515148221423\t331\n+EOG090W08IL\t0\t31.073523765582504\t244\n+EOG090W07HX\t0\t43.87674109168712\t264\n+EOG090W0ADL\t0\t74.64253566218997\t270\n+EOG090W07E5\t0\t31.219065990205618\t342\n+EOG090W0CHN\t0\t36.32844749725724\t170\n+EOG090W0F27\t0\t28.270070111541965\t150\n+EOG090W05FW\t0\t36.389874917560235\t332\n+EOG090W061C\t0\t33.972962707452396\t322\n+EOG090W023I\t0\t6.011474071541897\t134\n+EOG090W09Y9\t0\t20.77931440362711\t167\n+EOG090W029L\t0\t129.73510535308327\t545\n+EOG090W078A\t0\t54.90487338981139\t401\n+EOG090W0C83\t0\t38.32093295366156\t262\n+EOG090W015Z\t0\t176.6733585172255\t684\n+EOG090W05IA\t0\t51.778721543176026\t345\n+EOG090W06HO\t0\t71.48257830521672\t342\n+EOG090W0E6K\t0\t76.78984701084445\t299\n+EOG090W032M\t0\t41.865258362423724\t238\n+EOG090W04ZL\t0\t64.14590294096341\t349\n+EOG090W0A4U\t0\t17.926850008284003\t178\n+EOG090W0G0Z\t0\t34.8181704466556\t189\n+EOG090W012F\t0\t210.2707396955778\t725\n+EOG090W08ME\t0\t14.38099468500289\t138\n+EOG090W090H\t0\t89.63598147718064\t390\n+EOG090W0C7T\t0\t14.33867361284194\t189\n+EOG090W0AUB\t0\t1.1004433696270324\t217\n+EOG090W094H\t0\t21.30754829636339\t247\n+EOG090W00HE\t0\t188.33841438573634\t778\n+EOG090W0HKZ\t0\t15.56285848109415\t128\n+EOG090W02KK\t0\t67.63366170127085\t626\n+EOG090W0828\t0\t24.44301723915799\t140\n+EOG090W07PH\t0\t52.049033834231\t266\n+EOG090W01XB\t0\t112.40728428787705\t527\n+EOG090W02C5\t0\t174.99547032465253\t515\n+EOG090W00WO\t0\t160.60058592597883\t618\n+EOG090W0140\t0\t77.3058731633741\t335\n+EOG090W01QT\t0\t54.84769853734253\t240\n+EOG090W0FQ4\t0\t21.524963224401723\t140\n+EOG090W0CAH\t0\t18.306986664749655\t190\n+EOG090W080Z\t0\t42.915151411519396\t247\n+EOG090W02AU\t0\t90.99510841851644\t457\n+EOG090W096X\t0\t37.173612523675914\t248\n+EOG090W04OJ\t0\t60.586185145045505\t253\n+EOG090W09UY\t0\t59.0061956628029\t262\n+EOG090W07CG\t0\t39.79244204205305\t320\n+EOG090W0KFZ\t0\t5.208228204067198\t77\n+EOG090W0LWB\t0\t5.161494516225474\t78\n+EOG090W0F00\t0\t36.17643723361296\t196\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,495 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t11\n+\n+[block]\n+# block no. 0 follows, 26 sequences, length 14\n+# corresponding to MSA columns:\n+# 26-39\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01222\t0.00873\t0.01222\t0.01397\t0.01572\t0.00873\t0.01222\t0.01572\t0.01746\t0.02270\t0.04016\t0.08556\t0.04365\t0.02095\t0.01048\t0.00349\t0.00698\t0.63506\t0.00698\t0.00698\n+1\t0.00956\t0.00674\t0.00871\t0.01015\t0.01051\t0.00637\t0.00660\t0.01065\t0.01390\t0.01853\t0.03861\t0.60723\t0.04613\t0.15052\t0.01485\t0.00384\t0.00493\t0.01968\t0.00660\t0.00588\n+2\t0.01493\t0.01410\t0.02384\t0.62468\t0.13806\t0.01701\t0.02136\t0.01991\t0.01545\t0.02011\t0.01359\t0.02003\t0.01037\t0.00747\t0.00757\t0.00249\t0.00996\t0.00674\t0.00342\t0.00891\n+3\t0.01103\t0.00735\t0.00827\t0.00827\t0.00827\t0.00735\t0.00460\t0.01103\t0.01103\t0.01471\t0.02390\t0.04964\t0.02758\t0.73343\t0.03861\t0.00735\t0.00735\t0.01103\t0.00460\t0.00460\n+4\t0.00924\t0.00660\t0.00880\t0.01056\t0.01100\t0.00616\t0.00704\t0.01056\t0.01452\t0.01936\t0.04181\t0.72848\t0.05017\t0.02376\t0.00968\t0.00308\t0.00440\t0.02156\t0.00704\t0.00616\n+5\t0.02748\t0.02058\t0.02200\t0.01718\t0.02301\t0.02289\t0.01409\t0.57682\t0.12187\t0.04563\t0.01992\t0.01958\t0.01429\t0.00927\t0.00761\t0.00232\t0.00803\t0.00707\t0.00761\t0.01278\n+6\t0.15256\t0.01392\t0.02108\t0.55605\t0.07699\t0.01694\t0.01888\t0.02009\t0.01487\t0.02226\t0.01300\t0.01874\t0.00987\t0.00745\t0.00706\t0.00248\t0.00930\t0.00627\t0.00363\t0.00857\n+7\t0.11917\t0.01388\t0.02161\t0.58414\t0.08089\t0.01694\t0.01944\t0.01998\t0.01497\t0.02165\t0.01313\t0.01908\t0.00996\t0.00747\t0.00719\t0.00249\t0.00948\t0.00639\t0.00357\t0.00859\n+8\t0.07699\t0.01719\t0.02511\t0.32900\t0.31130\t0.06966\t0.02051\t0.02206\t0.01635\t0.02270\t0.01345\t0.01851\t0.01073\t0.00722\t0.00727\t0.00236\t0.00960\t0.00640\t0.00367\t0.00993\n+9\t0.22837\t0.01515\t0.01649\t0.12060\t0.02277\t0.05359\t0.01194\t0.11110\t0.02019\t0.19029\t0.11508\t0.02414\t0.02104\t0.00925\t0.00698\t0.00235\t0.00710\t0.00718\t0.00650\t0.00989\n+10\t0.01881\t0.01720\t0.02326\t0.46321\t0.09186\t0.07067\t0.01933\t0.15694\t0.02017\t0.02577\t0.01441\t0.01919\t0.01082\t0.00786\t0.00752\t0.00241\t0.00992\t0.00662\t0.00434\t0.00968\n+11\t0.05197\t0.02202\t0.02168\t0.07042\t0.02458\t0.14127\t0.01479\t0.34418\t0.08557\t0.03496\t0.01658\t0.01808\t0.01254\t0.00931\t0.00949\t0.00240\t0.09660\t0.00652\t0.00601\t0.01103\n+12\t0.04828\t0.01906\t0.02153\t0.02065\t0.11087\t0.13727\t0.06274\t0.12273\t0.06954\t0.09870\t0.14622\t0.02634\t0.02452\t0.00986\t0.00865\t0.00235\t0.04619\t0.00808\t0.00615\t0.01027\n+13\t0.08658\t0.02046\t0.07459\t0.03874\t0.02561\t0.07075\t0.13650\t0.20915\t0.09128\t0.05154\t0.01799\t0.02037\t0.01366\t0.01401\t0.09115\t0.00355\t0.01079\t0.00712\t0.00554\t0.01060\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t30\n+\n+[block]\n+# block no. 1 follows, 26 sequences, length 8\n+# corresponding to MSA columns:\n+# 71-78\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02194\t0.06459\t0.02353\t0.16038\t0.11196\t0.02016\t0.01601\t0.31327\t0.06090\t0.03449\t0.06104\t0.02133\t0.01644\t0.00872\t0.00744\t0.00227\t0.00824\t0.00711\t0.00593\t0.03426\n+1\t0.02052\t0.05093\t0.07159\t0.02726\t0.10655\t0.20206\t0.25692\t0.02480\t0.01765\t0.02167\t0.01318\t0.01732\t0.01070\t0.00789\t0.01035\t0.00243\t0.11776\t0.00689\t0.00371\t0.00981\n+2\t0.01568\t0.10472\t0.01891\t0.01780\t0.14957\t0.01459\t0.01164\t0.12310\t0.02011\t0.02557\t0.04608\t0.04525\t0.35102\t0.01341\t0.00792\t0.00233\t0.00617\t0.01110\t0.00599\t0.00904\n+3\t0.10891\t0.01808\t0.02949\t0.10245\t0.33046\t0.01872\t0.14796\t0.10391\t0.01901\t0.02814\t0.01447\t0.01843\t0.01139\t0.00710\t0.00749\t0.00236\t0.00943\t0.00690\t0.00433\t0.01097\n+4\t0.08080\t0.02151\t0.01983\t0.01626\t0.02095\t0.19381\t0.01303\t0.17602\t0.02227\t0.15089\t0.01650\t0.01750\t0.01226\t0.00877\t0.00914\t0.00220\t0.09913\t0.00596\t0.00583\t0.10734\n+5\t0.01885\t0.01934\t0.03543\t0.02641\t0.03208\t0.02036\t0.40079\t0.08844\t0.08944\t0.02629\t0.01628\t0.02034\t0.01261\t0.00841\t0.01174\t0.00268\t0.14739\t0.00825\t0.00453\t0.01032\n+6\t0.01577\t0.01496\t0.02536\t0.49795\t0.25820\t0.01723\t0.02178\t0.02065\t0.01585\t0.02113\t0.01374\t0.01975\t0.01073\t0.00732\t0.00756\t0.00244\t0.00975\t0.00675\t0.00350\t0.0095'..b'1\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+73\t0.01076\t0.00724\t0.00835\t0.00861\t0.00868\t0.00718\t0.00496\t0.01096\t0.01155\t0.01540\t0.02656\t0.15039\t0.03093\t0.62810\t0.03431\t0.00672\t0.00692\t0.01259\t0.00496\t0.00483\n+74\t0.01521\t0.03923\t0.69413\t0.02162\t0.03283\t0.01762\t0.02802\t0.02402\t0.01601\t0.02402\t0.01361\t0.01601\t0.00961\t0.00721\t0.00721\t0.00240\t0.01121\t0.00560\t0.00320\t0.01121\n+75\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+76\t0.01474\t0.01390\t0.02349\t0.65337\t0.11086\t0.01696\t0.02127\t0.01974\t0.01536\t0.01988\t0.01355\t0.02009\t0.01029\t0.00751\t0.00758\t0.00250\t0.01001\t0.00674\t0.00341\t0.00876\n+77\t0.01573\t0.01348\t0.01573\t0.01123\t0.01798\t0.01011\t0.00899\t0.01910\t0.01573\t0.02472\t0.01348\t0.01573\t0.01123\t0.00562\t0.00562\t0.00112\t0.00562\t0.00449\t0.00449\t0.77980\n+78\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+79\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+80\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+81\t0.01091\t0.00784\t0.01074\t0.01074\t0.01159\t0.00869\t0.00767\t0.01261\t0.01176\t0.01653\t0.02165\t0.03717\t0.02216\t0.30815\t0.45648\t0.01039\t0.01550\t0.00921\t0.00426\t0.00596\n+82\t0.14579\t0.01317\t0.01664\t0.01297\t0.01867\t0.01202\t0.01071\t0.03479\t0.02040\t0.59380\t0.02705\t0.02381\t0.01721\t0.00904\t0.00719\t0.00235\t0.00637\t0.00711\t0.00869\t0.01221\n+83\t0.01705\t0.01494\t0.02275\t0.58420\t0.04685\t0.01810\t0.01982\t0.13981\t0.01901\t0.02472\t0.01437\t0.01984\t0.01063\t0.00787\t0.00758\t0.00248\t0.00978\t0.00676\t0.00416\t0.00927\n+84\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+85\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+86\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+87\t0.01659\t0.01659\t0.02323\t0.01991\t0.01991\t0.02323\t0.01659\t0.01825\t0.01162\t0.01825\t0.00996\t0.01659\t0.00996\t0.01328\t0.02489\t0.00332\t0.71955\t0.00664\t0.00332\t0.00830\n+88\t0.01039\t0.00753\t0.00980\t0.00967\t0.01123\t0.00694\t0.00708\t0.01348\t0.02001\t0.02797\t0.53948\t0.18340\t0.09020\t0.01719\t0.00908\t0.00252\t0.00374\t0.01528\t0.00806\t0.00694\n+89\t0.00949\t0.00678\t0.00909\t0.01084\t0.01139\t0.00637\t0.00747\t0.01099\t0.01477\t0.01964\t0.04167\t0.67500\t0.04963\t0.02353\t0.00975\t0.00311\t0.00462\t0.07259\t0.00704\t0.00623\n+90\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+91\t0.01579\t0.11910\t0.61934\t0.02063\t0.03130\t0.01903\t0.02630\t0.02387\t0.01595\t0.02332\t0.01326\t0.01557\t0.00962\t0.00712\t0.00694\t0.00231\t0.01086\t0.00543\t0.00321\t0.01104\n+92\t0.01499\t0.01416\t0.02396\t0.61489\t0.14734\t0.01703\t0.02140\t0.01996\t0.01548\t0.02019\t0.01360\t0.02000\t0.01040\t0.00746\t0.00757\t0.00249\t0.00995\t0.00674\t0.00343\t0.00896\n+93\t0.01755\t0.02345\t0.14323\t0.02872\t0.03641\t0.05046\t0.49780\t0.02386\t0.01761\t0.02374\t0.01621\t0.05431\t0.01303\t0.00744\t0.00862\t0.00253\t0.01217\t0.00886\t0.00390\t0.01012\n+94\t0.01667\t0.01591\t0.02684\t0.34869\t0.37232\t0.01770\t0.02201\t0.02131\t0.01609\t0.02207\t0.01374\t0.01934\t0.01107\t0.00739\t0.00823\t0.00242\t0.03764\t0.00675\t0.00356\t0.01024\n+95\t0.02037\t0.02306\t0.14580\t0.02556\t0.03296\t0.06905\t0.37543\t0.02613\t0.01817\t0.12499\t0.01723\t0.02046\t0.01252\t0.00947\t0.04096\t0.00291\t0.01198\t0.00784\t0.00459\t0.01053\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t774\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W03A6.fa\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,451 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t78\n+\n+[block]\n+# block no. 0 follows, 35 sequences, length 97\n+# corresponding to MSA columns:\n+# 78-174\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01132\t0.00850\t0.01136\t0.01285\t0.05849\t0.00867\t0.00997\t0.01593\t0.10286\t0.03809\t0.03330\t0.08023\t0.10035\t0.01684\t0.04478\t0.00301\t0.00605\t0.42505\t0.00569\t0.00668\n+1\t0.01291\t0.01226\t0.02066\t0.57532\t0.15656\t0.01442\t0.04607\t0.01778\t0.04826\t0.01763\t0.01217\t0.01705\t0.00931\t0.00630\t0.00638\t0.00208\t0.00823\t0.00579\t0.00307\t0.00775\n+2\t0.02846\t0.01080\t0.01472\t0.01129\t0.01619\t0.00932\t0.00932\t0.03092\t0.01816\t0.74187\t0.02503\t0.02159\t0.01570\t0.00785\t0.00638\t0.00196\t0.00540\t0.00638\t0.00785\t0.01080\n+3\t0.00778\t0.00556\t0.00741\t0.00880\t0.00921\t0.00519\t0.00589\t0.00897\t0.01240\t0.01660\t0.06578\t0.74094\t0.04275\t0.01957\t0.00807\t0.00255\t0.00365\t0.01775\t0.00594\t0.00519\n+4\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+5\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+6\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+7\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+8\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+9\t0.00907\t0.00677\t0.01003\t0.01003\t0.01111\t0.00784\t0.00773\t0.01122\t0.01015\t0.01461\t0.01714\t0.02574\t0.01621\t0.08357\t0.71641\t0.01000\t0.01646\t0.00692\t0.00342\t0.00557\n+10\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+11\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+12\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+13\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+14\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+15\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+16\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+17\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+18\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+19\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+20\t0.01276\t0.01176\t0.01807\t0.60873\t0.03813\t0.01444\t0.01607\t0.01979\t0.15849\t0.01832\t0.01425\t0.01829\t0.01069\t0.00681\t0.00637\t0.00212\t0.00775\t0.00595\t0.00356\t0.00766\n+21\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+22\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.0'..b'0.70975\t0.00996\t0.01636\t0.00694\t0.00342\t0.00556\n+89\t0.00785\t0.00644\t0.00692\t0.00680\t0.00879\t0.00551\t0.00510\t0.00979\t0.01530\t0.01913\t0.22483\t0.05778\t0.57309\t0.01532\t0.00749\t0.00211\t0.00316\t0.01293\t0.00615\t0.00551\n+90\t0.02370\t0.03023\t0.01798\t0.01634\t0.01961\t0.75158\t0.01226\t0.02533\t0.01798\t0.01553\t0.00981\t0.01144\t0.00817\t0.00654\t0.00572\t0.00163\t0.01144\t0.00409\t0.00327\t0.00735\n+91\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+92\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+93\t0.01501\t0.01702\t0.03689\t0.02731\t0.06618\t0.01600\t0.68185\t0.02028\t0.01495\t0.02034\t0.01279\t0.01705\t0.00965\t0.00536\t0.00743\t0.00213\t0.01055\t0.00740\t0.00321\t0.00863\n+94\t0.00891\t0.00648\t0.00838\t0.00791\t0.00943\t0.00596\t0.00593\t0.01183\t0.01779\t0.02504\t0.70181\t0.04805\t0.09243\t0.01312\t0.00748\t0.00200\t0.00300\t0.01157\t0.00693\t0.00596\n+95\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+96\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+97\t0.01426\t0.01451\t0.01937\t0.01664\t0.01677\t0.04764\t0.01382\t0.01565\t0.01003\t0.01528\t0.00838\t0.01379\t0.00832\t0.01093\t0.02024\t0.00273\t0.73639\t0.00550\t0.00279\t0.00696\n+98\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.01570\t0.01005\t0.00565\t0.00628\t0.00188\t0.00754\t0.00565\t0.00314\t0.01005\n+99\t0.02399\t0.02158\t0.01871\t0.01513\t0.01965\t0.24313\t0.01212\t0.50536\t0.02621\t0.03251\t0.01357\t0.01407\t0.00999\t0.00729\t0.00616\t0.00182\t0.00834\t0.00522\t0.00541\t0.00974\n+100\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+101\t0.02536\t0.01255\t0.01545\t0.01209\t0.01676\t0.01228\t0.00990\t0.19825\t0.08366\t0.47295\t0.05595\t0.02143\t0.01689\t0.00810\t0.00643\t0.00197\t0.00564\t0.00652\t0.00732\t0.01050\n+102\t0.01980\t0.04080\t0.05751\t0.01220\t0.01625\t0.04709\t0.01024\t0.14781\t0.13568\t0.24600\t0.06686\t0.13103\t0.02095\t0.00976\t0.00657\t0.00204\t0.00578\t0.00810\t0.00631\t0.00921\n+103\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+104\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+105\t0.01281\t0.01107\t0.01383\t0.06016\t0.01698\t0.00905\t0.02996\t0.01609\t0.03517\t0.02003\t0.01323\t0.05154\t0.01172\t0.03363\t0.00611\t0.00136\t0.00515\t0.00499\t0.00387\t0.64326\n+106\t0.01318\t0.09360\t0.65981\t0.01824\t0.05223\t0.01560\t0.02226\t0.01998\t0.01339\t0.01969\t0.01119\t0.01321\t0.00812\t0.00596\t0.00588\t0.00195\t0.00910\t0.00462\t0.00270\t0.00930\n+107\t0.01361\t0.01126\t0.01320\t0.00945\t0.01507\t0.00848\t0.00757\t0.01642\t0.01330\t0.04218\t0.01169\t0.01341\t0.00958\t0.00479\t0.00475\t0.00097\t0.00472\t0.00384\t0.00388\t0.79183\n+108\t0.01557\t0.01687\t0.03452\t0.02493\t0.03104\t0.01623\t0.61740\t0.06216\t0.07226\t0.02197\t0.01400\t0.01752\t0.01047\t0.00574\t0.00734\t0.00213\t0.01003\t0.00736\t0.00365\t0.00880\n+109\t0.00762\t0.00643\t0.00660\t0.00656\t0.00864\t0.00541\t0.00492\t0.00934\t0.01475\t0.01783\t0.11991\t0.05992\t0.67883\t0.01581\t0.00750\t0.00213\t0.00319\t0.01323\t0.00598\t0.00541\n+110\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+111\t0.01022\t0.00730\t0.01022\t0.01168\t0.01314\t0.00730\t0.01022\t0.01314\t0.01460\t0.01899\t0.03359\t0.07156\t0.03651\t0.01752\t0.00876\t0.00292\t0.00584\t0.69478\t0.00584\t0.00584\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t0\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W06A3.fa\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,133 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 0 follows, 37 sequences, length 109\n+# corresponding to MSA columns:\n+# 1-109\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02227\t0.01639\t0.01774\t0.01388\t0.01851\t0.01810\t0.01150\t0.67323\t0.02769\t0.03707\t0.01601\t0.01874\t0.01226\t0.00806\t0.00630\t0.00191\t0.00665\t0.05755\t0.00609\t0.01006\n+1\t0.01614\t0.01295\t0.01387\t0.01234\t0.01590\t0.01476\t0.00964\t0.03233\t0.68224\t0.07953\t0.02486\t0.02269\t0.01842\t0.00825\t0.00622\t0.00206\t0.00487\t0.00686\t0.00633\t0.00974\n+2\t0.01215\t0.01030\t0.01211\t0.00901\t0.01392\t0.00783\t0.00709\t0.01470\t0.01259\t0.01950\t0.01326\t0.09266\t0.01234\t0.00606\t0.00487\t0.00107\t0.00443\t0.00506\t0.00384\t0.73721\n+3\t0.02643\t0.01082\t0.01430\t0.01110\t0.01575\t0.00976\t0.00913\t0.05075\t0.07124\t0.67581\t0.02393\t0.02080\t0.01527\t0.00762\t0.00616\t0.00191\t0.00523\t0.00619\t0.00743\t0.01036\n+4\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+5\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+6\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+7\t0.00895\t0.00571\t0.00763\t0.00870\t0.00938\t0.00527\t0.00593\t0.01011\t0.01215\t0.07029\t0.03303\t0.72437\t0.03860\t0.01831\t0.00769\t0.00244\t0.00367\t0.01657\t0.00582\t0.00538\n+8\t0.00969\t0.00692\t0.00966\t0.01107\t0.01240\t0.00689\t0.00955\t0.01237\t0.01392\t0.01812\t0.03251\t0.12197\t0.03562\t0.01708\t0.00841\t0.00279\t0.00548\t0.65433\t0.00564\t0.00559\n+9\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+10\t0.01570\t0.73033\t0.03022\t0.01033\t0.01520\t0.02279\t0.01012\t0.01760\t0.01238\t0.01449\t0.01039\t0.06715\t0.01028\t0.00627\t0.00422\t0.00140\t0.00632\t0.00432\t0.00285\t0.00764\n+11\t0.00937\t0.00647\t0.00721\t0.00711\t0.00736\t0.00663\t0.00415\t0.01066\t0.06293\t0.01289\t0.01971\t0.03878\t0.02199\t0.72714\t0.02930\t0.00565\t0.00585\t0.00875\t0.00390\t0.00415\n+12\t0.01575\t0.01474\t0.02436\t0.03583\t0.69259\t0.01487\t0.01825\t0.07139\t0.01504\t0.02138\t0.01175\t0.01512\t0.00975\t0.00559\t0.00606\t0.00182\t0.00723\t0.00546\t0.00326\t0.00975\n+13\t0.01183\t0.01105\t0.01802\t0.71383\t0.04021\t0.01372\t0.01646\t0.01690\t0.06602\t0.01638\t0.01192\t0.01680\t0.00894\t0.00628\t0.00613\t0.00204\t0.00791\t0.00555\t0.00298\t0.00701\n+14\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+15\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+16\t0.01228\t0.03166\t0.75316\t0.01745\t0.02649\t0.01422\t0.02262\t0.01939\t0.01292\t0.01939\t0.01099\t0.01292\t0.00775\t0.00582\t0.00582\t0.00194\t0.00905\t0.00452\t0.00258\t0.00905\n+17\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+18\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+19\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+20\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+21\t0.82811\t0.01184\t0.00900\t0.00805\t0.01184\t0.01373\t0.00663\t0.01799\t0.01042\t0.02746\t0.00852\t0.00994\t0.00663\t0.00568\t0.00379\t0.00189\t0.00474\t0.00331\t0.00379\t0.00663\n+22\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04'..b'0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+86\t0.01141\t0.00571\t0.00571\t0.00571\t0.00713\t0.00571\t0.00428\t0.01426\t0.01284\t0.02282\t0.01997\t0.02282\t0.01569\t0.00713\t0.00428\t0.00143\t0.00285\t0.00571\t0.81886\t0.00571\n+87\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+88\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+89\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+90\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+91\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+92\t0.02287\t0.02917\t0.01735\t0.01577\t0.01892\t0.76030\t0.01183\t0.02444\t0.01735\t0.01498\t0.00946\t0.01104\t0.00788\t0.00631\t0.00552\t0.00158\t0.01104\t0.00394\t0.00315\t0.00710\n+93\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+94\t0.01063\t0.00532\t0.00797\t0.00797\t0.00797\t0.00532\t0.00532\t0.00797\t0.00797\t0.01063\t0.01063\t0.01861\t0.01063\t0.02127\t0.02392\t0.82190\t0.00532\t0.00532\t0.00266\t0.00266\n+95\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+96\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+97\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+98\t0.00874\t0.00656\t0.00984\t0.00984\t0.01093\t0.00765\t0.00765\t0.01093\t0.00984\t0.01421\t0.01640\t0.02405\t0.01530\t0.04591\t0.76062\t0.00984\t0.01640\t0.00656\t0.00328\t0.00547\n+99\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+100\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04572\t0.05776\t0.01251\t0.00722\t0.00193\t0.00289\t0.01107\t0.00674\t0.00578\n+101\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+102\t0.02746\t0.01042\t0.01421\t0.01089\t0.01563\t0.00900\t0.00900\t0.02983\t0.01752\t0.75093\t0.02415\t0.02083\t0.01515\t0.00758\t0.00616\t0.00189\t0.00521\t0.00616\t0.00758\t0.01042\n+103\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+104\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+105\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+106\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+107\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+108\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+11\t120\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0GYE.fa\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,429 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t16\n+\n+[block]\n+# block no. 0 follows, 30 sequences, length 21\n+# corresponding to MSA columns:\n+# 93-113\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01057\t0.00773\t0.01034\t0.01163\t0.01327\t0.00750\t0.00987\t0.01344\t0.01601\t0.02067\t0.06875\t0.11927\t0.14101\t0.01901\t0.00933\t0.00302\t0.00572\t0.50008\t0.00649\t0.00630\n+1\t0.01546\t0.06663\t0.01682\t0.01327\t0.01666\t0.04776\t0.06572\t0.02349\t0.22653\t0.06835\t0.14524\t0.05426\t0.08041\t0.04384\t0.04676\t0.00294\t0.00678\t0.04435\t0.00614\t0.00858\n+2\t0.01789\t0.53823\t0.20660\t0.01426\t0.02119\t0.06285\t0.01549\t0.02158\t0.01462\t0.01788\t0.01047\t0.01216\t0.00893\t0.00620\t0.00512\t0.00169\t0.00846\t0.00413\t0.00301\t0.00925\n+3\t0.12683\t0.01280\t0.01355\t0.01162\t0.01527\t0.03360\t0.00908\t0.13641\t0.11546\t0.07711\t0.12786\t0.08985\t0.04344\t0.01130\t0.00812\t0.05341\t0.00558\t0.00834\t0.00613\t0.09425\n+4\t0.14326\t0.01153\t0.03546\t0.01195\t0.01515\t0.01117\t0.05274\t0.04592\t0.11102\t0.02467\t0.09562\t0.20004\t0.11112\t0.01336\t0.00744\t0.00240\t0.00540\t0.05331\t0.00593\t0.04251\n+5\t0.02165\t0.01301\t0.01394\t0.01176\t0.01544\t0.09116\t0.00934\t0.08063\t0.01834\t0.27268\t0.05014\t0.02373\t0.01776\t0.01554\t0.12344\t0.00343\t0.00824\t0.00676\t0.16082\t0.04219\n+6\t0.01092\t0.00790\t0.01079\t0.01213\t0.01380\t0.00777\t0.01047\t0.01401\t0.01627\t0.02115\t0.08035\t0.07615\t0.09660\t0.01884\t0.00944\t0.00308\t0.00600\t0.57143\t0.00651\t0.00639\n+7\t0.01490\t0.01416\t0.02404\t0.44524\t0.32734\t0.01596\t0.02024\t0.01934\t0.01477\t0.01992\t0.01271\t0.01804\t0.01004\t0.00666\t0.00695\t0.00222\t0.00888\t0.00621\t0.00325\t0.00914\n+8\t0.01741\t0.27337\t0.28936\t0.01871\t0.02621\t0.10378\t0.13668\t0.02242\t0.01545\t0.01966\t0.01166\t0.01403\t0.00921\t0.00634\t0.00616\t0.00195\t0.00989\t0.00515\t0.00314\t0.00944\n+9\t0.00978\t0.00661\t0.00772\t0.00809\t0.00818\t0.00652\t0.00474\t0.01005\t0.01087\t0.01450\t0.02570\t0.20443\t0.03005\t0.58960\t0.02952\t0.00588\t0.00615\t0.01233\t0.00474\t0.00455\n+10\t0.01976\t0.01393\t0.01804\t0.01508\t0.01922\t0.01460\t0.07980\t0.16972\t0.21138\t0.14459\t0.02649\t0.13414\t0.02277\t0.01140\t0.00766\t0.00241\t0.00656\t0.06562\t0.00682\t0.01001\n+11\t0.02375\t0.05431\t0.01867\t0.01574\t0.01930\t0.45877\t0.01197\t0.11245\t0.01988\t0.05951\t0.01404\t0.01807\t0.01203\t0.09670\t0.03288\t0.00273\t0.01088\t0.00560\t0.00438\t0.00832\n+12\t0.00905\t0.00728\t0.00811\t0.00811\t0.01012\t0.00650\t0.00600\t0.01186\t0.04448\t0.02139\t0.22165\t0.14724\t0.43929\t0.01702\t0.00828\t0.00238\t0.00362\t0.01457\t0.00680\t0.00624\n+13\t0.05667\t0.00906\t0.01074\t0.00966\t0.01254\t0.00852\t0.00747\t0.01900\t0.07503\t0.17135\t0.27144\t0.08776\t0.20624\t0.01356\t0.00772\t0.00228\t0.00426\t0.01152\t0.00729\t0.00791\n+14\t0.04127\t0.01698\t0.01775\t0.01504\t0.01907\t0.12772\t0.06970\t0.11033\t0.24965\t0.08074\t0.06339\t0.07501\t0.06234\t0.01012\t0.00723\t0.00224\t0.00714\t0.00849\t0.00621\t0.00958\n+15\t0.08182\t0.01473\t0.01498\t0.01335\t0.01725\t0.03410\t0.01044\t0.06184\t0.51550\t0.04834\t0.05791\t0.02708\t0.05309\t0.00969\t0.00695\t0.00231\t0.00561\t0.00790\t0.00682\t0.01031\n+16\t0.00861\t0.00647\t0.00778\t0.00870\t0.00970\t0.00586\t0.00593\t0.00996\t0.01406\t0.01808\t0.06850\t0.45850\t0.24625\t0.08430\t0.01101\t0.00301\t0.00410\t0.01721\t0.00632\t0.00564\n+17\t0.02657\t0.01690\t0.01962\t0.01639\t0.05856\t0.01788\t0.01271\t0.41742\t0.08333\t0.22637\t0.02048\t0.01947\t0.01409\t0.00842\t0.00700\t0.00213\t0.00706\t0.00661\t0.00725\t0.01176\n+18\t0.05960\t0.01505\t0.02492\t0.26659\t0.45727\t0.01618\t0.01999\t0.02020\t0.01504\t0.02166\t0.01270\t0.01737\t0.01029\t0.00648\t0.00679\t0.00216\t0.00846\t0.00608\t0.00339\t0.00979\n+19\t0.00917\t0.00665\t0.00857\t0.00985\t0.01070\t0.00627\t0.00710\t0.01074\t0.01393\t0.01819\t0.04060\t0.47254\t0.12255\t0.08385\t0.01126\t0.00319\t0.00470\t0.14815\t0.00627\t0.00572\n+20\t0.05581\t0.01600\t0.01854\t0.01474\t0.01969\t0.01718\t0.04419\t0.31554\t0.18190\t0.16248\t0.02140\t0.02064\t0.01518\t0.00846\t0.00684\t0.00216\t0.00669\t0.00680\t0.05469\t0.01106\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 1 follows, 30 sequences, length 20\n+# corresponding to MSA columns:\n+# 116-135\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\t'..b'33\t0.01572\t0.00916\t0.09893\t0.05443\t0.01661\t0.06048\t0.08914\t0.29528\t0.05706\t0.01621\t0.00853\t0.00266\t0.00586\t0.17199\t0.04059\t0.00718\n+1\t0.01008\t0.00819\t0.01089\t0.01291\t0.10367\t0.00753\t0.00812\t0.01261\t0.01684\t0.02246\t0.28461\t0.17280\t0.27194\t0.01565\t0.00818\t0.00234\t0.00417\t0.01372\t0.00650\t0.00679\n+2\t0.01697\t0.01675\t0.01923\t0.11577\t0.11874\t0.14866\t0.01441\t0.02029\t0.04384\t0.04284\t0.01635\t0.04987\t0.03525\t0.01258\t0.05954\t0.00298\t0.21250\t0.00690\t0.03830\t0.00823\n+3\t0.00988\t0.00761\t0.01041\t0.10716\t0.01594\t0.00766\t0.00898\t0.01213\t0.01462\t0.01901\t0.07698\t0.32024\t0.13860\t0.05909\t0.01016\t0.00296\t0.00532\t0.16117\t0.00596\t0.00612\n+4\t0.01458\t0.01504\t0.06713\t0.05603\t0.04708\t0.08021\t0.08114\t0.03845\t0.04180\t0.02064\t0.04067\t0.07262\t0.02074\t0.08999\t0.05974\t0.00347\t0.08364\t0.09565\t0.06377\t0.00762\n+5\t0.01213\t0.00727\t0.00944\t0.00982\t0.01117\t0.00674\t0.00703\t0.01376\t0.01473\t0.11171\t0.09387\t0.39223\t0.07980\t0.12326\t0.01241\t0.00325\t0.00481\t0.07357\t0.00654\t0.00645\n+6\t0.01895\t0.19040\t0.13613\t0.01559\t0.02078\t0.13609\t0.04289\t0.12433\t0.05084\t0.02399\t0.06629\t0.01812\t0.01490\t0.00799\t0.00736\t0.00200\t0.05856\t0.00587\t0.04971\t0.00919\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 11 follows, 30 sequences, length 15\n+# corresponding to MSA columns:\n+# 432-446\n+name=unknown_L\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.06236\t0.06510\t0.08912\t0.16991\t0.02482\t0.07478\t0.01410\t0.04641\t0.01563\t0.11644\t0.01478\t0.01743\t0.01120\t0.00780\t0.00831\t0.00210\t0.10327\t0.00563\t0.08136\t0.06945\n+1\t0.14285\t0.23985\t0.21370\t0.01521\t0.02151\t0.06006\t0.05814\t0.06360\t0.01556\t0.02266\t0.01380\t0.07797\t0.01226\t0.00785\t0.00593\t0.00202\t0.00820\t0.00604\t0.00382\t0.00898\n+2\t0.07880\t0.07716\t0.01983\t0.01623\t0.04251\t0.14432\t0.02899\t0.04272\t0.07521\t0.09621\t0.01563\t0.04745\t0.01326\t0.01265\t0.06429\t0.00300\t0.20210\t0.00644\t0.00454\t0.00868\n+3\t0.01021\t0.00702\t0.00876\t0.00889\t0.01028\t0.00644\t0.00636\t0.01227\t0.01612\t0.05081\t0.28848\t0.24278\t0.15953\t0.10544\t0.01168\t0.00297\t0.00416\t0.03488\t0.00674\t0.00616\n+4\t0.01544\t0.03846\t0.06976\t0.05110\t0.11266\t0.03479\t0.07755\t0.07899\t0.08991\t0.04165\t0.09492\t0.11646\t0.02418\t0.10979\t0.01117\t0.00287\t0.00724\t0.00932\t0.00518\t0.00856\n+5\t0.00969\t0.00759\t0.00884\t0.00922\t0.01095\t0.00717\t0.00684\t0.01318\t0.07550\t0.02076\t0.14049\t0.24513\t0.29586\t0.04121\t0.00932\t0.00269\t0.00420\t0.07837\t0.00662\t0.00638\n+6\t0.01690\t0.01784\t0.02771\t0.06260\t0.05360\t0.07422\t0.22649\t0.04652\t0.01448\t0.01995\t0.01149\t0.01650\t0.00979\t0.00915\t0.01485\t0.00262\t0.35666\t0.00667\t0.00342\t0.00854\n+7\t0.01238\t0.00894\t0.01140\t0.01072\t0.01252\t0.00986\t0.00819\t0.10557\t0.01468\t0.02065\t0.02318\t0.03131\t0.07093\t0.13342\t0.42455\t0.00794\t0.01322\t0.00815\t0.06564\t0.00674\n+8\t0.01167\t0.00997\t0.01048\t0.01060\t0.02926\t0.07115\t0.00719\t0.01294\t0.01273\t0.01635\t0.02939\t0.06880\t0.15102\t0.31387\t0.14024\t0.00557\t0.04995\t0.00999\t0.03309\t0.00573\n+9\t0.00925\t0.00996\t0.08892\t0.00991\t0.01224\t0.00707\t0.00833\t0.01173\t0.01525\t0.02012\t0.14372\t0.35023\t0.25207\t0.01770\t0.00834\t0.00252\t0.00449\t0.01551\t0.00625\t0.00640\n+10\t0.01655\t0.01534\t0.07694\t0.01239\t0.01839\t0.01227\t0.01075\t0.05370\t0.16343\t0.06417\t0.01692\t0.01762\t0.01303\t0.00657\t0.00590\t0.00154\t0.00585\t0.00530\t0.00505\t0.47830\n+11\t0.11634\t0.01390\t0.04532\t0.01352\t0.01857\t0.01281\t0.05510\t0.08091\t0.08585\t0.36568\t0.09789\t0.02439\t0.02065\t0.00880\t0.00690\t0.00219\t0.00624\t0.00728\t0.00717\t0.01048\n+12\t0.01105\t0.07400\t0.01185\t0.00947\t0.01187\t0.00952\t0.00741\t0.01565\t0.09949\t0.02356\t0.31299\t0.10422\t0.22936\t0.01467\t0.00787\t0.00229\t0.00417\t0.03677\t0.00673\t0.00706\n+13\t0.01971\t0.07456\t0.14709\t0.01386\t0.01908\t0.07351\t0.01304\t0.16242\t0.06634\t0.11664\t0.08820\t0.02331\t0.03910\t0.00872\t0.00659\t0.00203\t0.00708\t0.00685\t0.10223\t0.00964\n+14\t0.01454\t0.01364\t0.08113\t0.01206\t0.01537\t0.03959\t0.01025\t0.10345\t0.10294\t0.04920\t0.09273\t0.11455\t0.12668\t0.07330\t0.06551\t0.00337\t0.00701\t0.04199\t0.02468\t0.00801\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t46\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0T3K.fa\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/insecta/scores_cutoff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/scores_cutoff Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t20.79\n+EOG090W0427\t76.86\n+EOG090W09K7\t70.14\n+EOG090W0B5K\t128.73\n+EOG090W0153\t102.48\n+EOG090W051T\t215.17999999999998\n+EOG090W01WI\t383.17999999999995\n+EOG090W01A3\t180.95\n+EOG090W067A\t151.13\n+EOG090W0IUR\t68.03999999999999\n+EOG090W09BV\t69.41\n+EOG090W0AIA\t45.01\n+EOG090W05D8\t52.35999999999999\n+EOG090W01IP\t193.27\n+EOG090W02C3\t62.79\n+EOG090W0B1Y\t200.97\n+EOG090W01VD\t37.519999999999996\n+EOG090W035W\t263.76\n+EOG090W0DWN\t50.489999999999995\n+EOG090W028U\t132.57999999999998\n+EOG090W02TI\t99.96000000000001\n+EOG090W00PB\t71.53999999999999\n+EOG090W038B\t138.32\n+EOG090W0BUR\t78.89\n+EOG090W0F0L\t27.65\n+EOG090W08IZ\t79.59\n+EOG090W0BEB\t171.71\n+EOG090W0I37\t40.10999999999999\n+EOG090W0CQ9\t196.62999999999997\n+EOG090W04BS\t134.26\n+EOG090W0FCL\t64.96\n+EOG090W0AXJ\t159.88\n+EOG090W0FEP\t79.44999999999999\n+EOG090W0FVI\t120.18999999999998\n+EOG090W0GP3\t64.75\n+EOG090W01NH\t132.65\n+EOG090W0IEE\t90.64999999999999\n+EOG090W0IP7\t102.33999999999999\n+EOG090W038Z\t104.78999999999999\n+EOG090W0B8O\t113.39999999999999\n+EOG090W0KP0\t37.309999999999995\n+EOG090W064P\t142.79999999999998\n+EOG090W0A3V\t166.73999999999998\n+EOG090W0BOZ\t106.18999999999998\n+EOG090W0H6T\t34.089999999999996\n+EOG090W0EDI\t55.36999999999999\n+EOG090W0EFH\t43.89\n+EOG090W04BN\t196.48999999999998\n+EOG090W00SP\t246.04999999999998\n+EOG090W01ZN\t189.64000000000001\n+EOG090W014R\t24.29\n+EOG090W0FJE\t84.41999999999999\n+EOG090W00EP\t16.59\n+EOG090W07MY\t127.67999999999999\n+EOG090W00LR\t281.75\n+EOG090W06QR\t84.84\n+EOG090W06J7\t114.8\n+EOG090W04G6\t264.99\n+EOG090W06IG\t86.66\n+EOG090W036Y\t274.78000000000003\n+EOG090W0CMO\t61.53\n+EOG090W0BFE\t67.34\n+EOG090W0JYN\t62.71999999999999\n+EOG090W054P\t125.64999999999999\n+EOG090W0753\t80.64\n+EOG090W0FKG\t73.42999999999999\n+EOG090W0B6L\t51.66\n+EOG090W0C2I\t135.07999999999998\n+EOG090W057Z\t83.44\n+EOG090W01H1\t66.00999999999999\n+EOG090W07A3\t61.31999999999999\n+EOG090W0D5E\t42.209999999999994\n+EOG090W07PK\t54.809999999999995\n+EOG090W0GKW\t93.86999999999999\n+EOG090W0J9P\t43.47\n+EOG090W0A69\t93.61\n+EOG090W0DRQ\t38.15\n+EOG090W04G1\t107.31\n+EOG090W08L6\t41.019999999999996\n+EOG090W02QT\t95.68999999999998\n+EOG090W0K04\t62.92\n+EOG090W02UI\t134.11999999999998\n+EOG090W00MS\t211.54\n+EOG090W0HXZ\t69.86\n+EOG090W09LF\t17.71\n+EOG090W060L\t301.84\n+EOG090W0I0Q\t47.39\n+EOG090W0B0M\t95.33999999999999\n+EOG090W0EY0\t90.64999999999999\n+EOG090W019L\t107.66\n+EOG090W0ALV\t164.29\n+EOG090W0F9J\t138.67\n+EOG090W0BZ2\t67.19999999999999\n+EOG090W0B5T\t160.85999999999999\n+EOG090W0JBN\t66.43\n+EOG090W06CO\t91.41999999999999\n+EOG090W06Y4\t45.35999999999999\n+EOG090W00D0\t558.88\n+EOG090W0N7U\t66.08\n+EOG090W02H5\t136.95\n+EOG090W04DH\t85.61\n+EOG090W01HI\t194.52999999999997\n+EOG090W02JZ\t297.21999999999997\n+EOG090W0ANA\t126.07\n+EOG090W055F\t99.82\n+EOG090W0KMC\t57.68\n+EOG090W063Z\t241.49999999999997\n+EOG090W0BC3\t72.38\n+EOG090W08A5\t68.03999999999999\n+EOG090W0BMW\t135.1\n+EOG090W08CW\t75.46\n+EOG090W04FE\t98.28\n+EOG090W0FH5\t58.38\n+EOG090W00RS\t269.64\n+EOG090W0EG7\t91.21000000000001\n+EOG090W06OD\t170.1\n+EOG090W00PL\t78.05\n+EOG090W0AV1\t163.45\n+EOG090W015K\t58.239999999999995\n+EOG090W07NX\t129.22\n+EOG090W0J8V\t91.69999999999999\n+EOG090W0FQ8\t58.239999999999995\n+EOG090W0JN2\t78.75\n+EOG090W029M\t83.02\n+EOG090W00TM\t273.90999999999997\n+EOG090W046G\t122.14999999999999\n+EOG090W06J5\t163.23999999999998\n+EOG090W0GWR\t32.34\n+EOG090W063H\t165.48\n+EOG090W01MU\t39.48\n+EOG090W0K07\t75.80999999999999\n+EOG090W02IA\t214.68999999999997\n+EOG090W0HTD\t76.78999999999999\n+EOG090W0F78\t69.36999999999999\n+EOG090W0GR5\t118.58\n+EOG090W04O1\t20.72\n+EOG090W0CN5\t108.57\n+EOG090W00X5\t129.36\n+EOG090W09PQ\t73.5\n+EOG090W06OE\t182.91\n+EOG090W0G1I\t35.769999999999996\n+EOG090W0GZA\t79.53\n+EOG090W07X1\t60.48\n+EOG090W08E9\t146.93\n+EOG090W054N\t226.82\n+EOG090W02JO\t52.849999999999994\n+EOG090W0AKG\t103.81\n+EOG090W04DG\t148.47\n+EOG090W00L3\t195.16\n+EOG090W03O0\t98.56\n+EOG090W0HX7\t36.739999999999995\n+EOG090W08N5\t76.64999999999999\n+EOG090W00BP\t168.21\n+EOG090W0AY7\t48.019999999999996\n+EOG090W051U\t222.67000000000002\n+EOG090W09PJ\t166.18\n+EOG090W03TV\t286.92999999999995\n+EOG090W08A9\t100.44999999999999\n+EOG090W0IBV\t35.629999999999995\n+EOG090W04NQ\t64.05\n+EOG090'..b'92999999999999\n+EOG090W05HI\t39.76\n+EOG090W0DJI\t49.49\n+EOG090W03WV\t73.00999999999999\n+EOG090W0ESV\t140.14\n+EOG090W004H\t39.6\n+EOG090W0BJR\t77.91\n+EOG090W0BPH\t198.37999999999997\n+EOG090W0AH5\t97.86\n+EOG090W0BKY\t96.58\n+EOG090W07XK\t114.38\n+EOG090W0KJ3\t76.58\n+EOG090W0DEY\t74.61999999999999\n+EOG090W0I7M\t15.189999999999998\n+EOG090W05GY\t169.26\n+EOG090W0IF2\t144.41\n+EOG090W09IF\t75.03999999999999\n+EOG090W04XG\t28.49\n+EOG090W06TC\t53.480000000000004\n+EOG090W00ZZ\t305.54999999999995\n+EOG090W05ZP\t55.58\n+EOG090W09AW\t96.46000000000001\n+EOG090W0FFP\t109.76\n+EOG090W0GQZ\t48.51\n+EOG090W0DYP\t29.33\n+EOG090W0GI3\t63.349999999999994\n+EOG090W04OX\t73.36\n+EOG090W0C66\t73.91999999999999\n+EOG090W04IF\t24.29\n+EOG090W04PI\t196.35\n+EOG090W0GPQ\t36.33\n+EOG090W06OY\t207.68999999999997\n+EOG090W0PW0\t15.26\n+EOG090W0BM0\t39.199999999999996\n+EOG090W08QR\t84.91\n+EOG090W00VU\t337.46999999999997\n+EOG090W06PP\t58.169999999999995\n+EOG090W0EIQ\t158.61999999999998\n+EOG090W0KXF\t61.10999999999999\n+EOG090W019B\t225.72\n+EOG090W05XP\t95.61999999999999\n+EOG090W01V1\t162.39999999999998\n+EOG090W0K88\t52.43\n+EOG090W0ITI\t40.10999999999999\n+EOG090W08AN\t158.54999999999998\n+EOG090W0PZH\t39.199999999999996\n+EOG090W0F7U\t63.14\n+EOG090W0CNN\t69.09\n+EOG090W08FZ\t140.35\n+EOG090W0FGQ\t103.38999999999999\n+EOG090W05BJ\t77.77\n+EOG090W0A4R\t40.04\n+EOG090W09QT\t40.10999999999999\n+EOG090W0GDE\t43.05\n+EOG090W050K\t148.72\n+EOG090W0CL8\t70.07\n+EOG090W0JJQ\t77.35\n+EOG090W06W8\t103.72999999999999\n+EOG090W0EPV\t35.559999999999995\n+EOG090W00WM\t140.63\n+EOG090W005S\t210.07\n+EOG090W02UQ\t98.0\n+EOG090W03FA\t114.72999999999999\n+EOG090W02B7\t11.97\n+EOG090W06DJ\t35.209999999999994\n+EOG090W08FE\t122.63999999999999\n+EOG090W06P2\t125.72999999999999\n+EOG090W0C7S\t146.51999999999998\n+EOG090W0C4Z\t55.660000000000004\n+EOG090W00ZP\t144.33999999999997\n+EOG090W0C7Z\t30.52\n+EOG090W06AN\t199.35999999999999\n+EOG090W0FYR\t77.98\n+EOG090W015U\t27.44\n+EOG090W02LX\t267.67999999999995\n+EOG090W0DZ4\t191.17000000000002\n+EOG090W050Y\t215.67000000000002\n+EOG090W08GU\t50.81999999999999\n+EOG090W09LK\t108.36\n+EOG090W077G\t64.61\n+EOG090W0B8P\t58.51999999999999\n+EOG090W0A73\t39.059999999999995\n+EOG090W0B3U\t47.739999999999995\n+EOG090W0LL3\t81.69\n+EOG090W09R9\t63.06999999999999\n+EOG090W0A58\t69.64999999999999\n+EOG090W06VZ\t445.71999999999997\n+EOG090W080B\t61.88\n+EOG090W0ALP\t115.01\n+EOG090W0EJV\t163.1\n+EOG090W0BI6\t112.76999999999998\n+EOG090W05KO\t149.66\n+EOG090W03K0\t120.11999999999999\n+EOG090W0JFZ\t39.199999999999996\n+EOG090W09RO\t93.1\n+EOG090W00ZV\t161.98\n+EOG090W005V\t363.79\n+EOG090W0F9A\t109.55\n+EOG090W0IKC\t25.2\n+EOG090W04QG\t181.85999999999999\n+EOG090W00U5\t126.17\n+EOG090W0JS6\t59.64\n+EOG090W06X4\t243.24999999999997\n+EOG090W0H7U\t18.759999999999998\n+EOG090W02LH\t101.28999999999999\n+EOG090W06AU\t54.10999999999999\n+EOG090W0L6N\t74.13\n+EOG090W0028\t38.39\n+EOG090W05ZG\t117.24999999999999\n+EOG090W0DSQ\t52.36\n+EOG090W0CIU\t151.13\n+EOG090W09DT\t96.46000000000001\n+EOG090W0883\t125.22999999999999\n+EOG090W08IL\t186.41\n+EOG090W07HX\t141.11999999999998\n+EOG090W0ADL\t42.14\n+EOG090W07E5\t140.69\n+EOG090W0CHN\t36.19\n+EOG090W0F27\t66.08\n+EOG090W05FW\t224.20999999999998\n+EOG090W061C\t309.87\n+EOG090W023I\t166.65\n+EOG090W09Y9\t86.59\n+EOG090W029L\t280.28000000000003\n+EOG090W078A\t137.13\n+EOG090W0C83\t33.39\n+EOG090W015Z\t231.07\n+EOG090W05IA\t174.85999999999999\n+EOG090W06HO\t108.71000000000001\n+EOG090W0E6K\t68.25\n+EOG090W032M\t17.009999999999998\n+EOG090W04ZL\t137.82999999999998\n+EOG090W0A4U\t92.11999999999999\n+EOG090W0G0Z\t34.37\n+EOG090W012F\t376.75\n+EOG090W08ME\t86.66\n+EOG090W090H\t203.07\n+EOG090W0C7T\t133.28\n+EOG090W0AUB\t233.79999999999998\n+EOG090W094H\t153.51\n+EOG090W00HE\t202.16\n+EOG090W0HKZ\t63.76999999999999\n+EOG090W02KK\t178.78\n+EOG090W0828\t35.349999999999994\n+EOG090W07PH\t84.84\n+EOG090W01XB\t80.36\n+EOG090W02C5\t70.14\n+EOG090W00WO\t96.25\n+EOG090W0140\t17.849999999999998\n+EOG090W01QT\t99.46999999999998\n+EOG090W0FQ4\t61.669999999999995\n+EOG090W0CAH\t116.61999999999999\n+EOG090W080Z\t53.76\n+EOG090W02AU\t96.03999999999999\n+EOG090W096X\t86.03\n+EOG090W04OJ\t69.36999999999999\n+EOG090W09UY\t66.43\n+EOG090W07CG\t158.41\n+EOG090W0KFZ\t64.75\n+EOG090W0LWB\t66.33\n+EOG090W0F00\t32.269999999999996\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/repeats.dmnd |
b |
Binary file test-data/funannotate_db/repeats.dmnd has changed |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,10659 @@\n+#exon model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[LENGTH]\n+# maximal individually stored length probability =\n+3000\n+# slope of smoothing bandwidth =\n+0.3\n+# smoothing minwindowcount =\n+8\n+# length single initial internal terminal\n+# total number of exons of above types\n+ 1959 8219 22997 8219\n+# number of exons exceeding length d\n+ 27 19 136 55\n+# 1000 P(len=k), k=0,1,..., 3000\n+0\t0\t1.95\t0.0374\t0.249\n+1\t0\t2.18\t0.0414\t0.284\n+2\t0\t2.4\t0.0456\t0.321\n+3\t0\t2.62\t0.0501\t0.359\n+4\t0\t2.84\t0.0548\t0.397\n+5\t0\t3.04\t0.0598\t0.435\n+6\t0\t3.23\t0.065\t0.473\n+7\t0\t3.41\t0.0706\t0.509\n+8\t0\t3.57\t0.0765\t0.543\n+9\t0\t3.71\t0.0828\t0.576\n+10\t0\t3.82\t0.0894\t0.606\n+11\t0\t3.92\t0.0965\t0.633\n+12\t0\t3.99\t0.104\t0.658\n+13\t0\t4.05\t0.112\t0.68\n+14\t0\t4.08\t0.121\t0.699\n+15\t0\t4.1\t0.13\t0.716\n+16\t0\t4.1\t0.139\t0.73\n+17\t0\t4.08\t0.15\t0.743\n+18\t0\t4.06\t0.16\t0.754\n+19\t0\t4.02\t0.172\t0.763\n+20\t0\t3.98\t0.184\t0.771\n+21\t0\t3.94\t0.198\t0.778\n+22\t0\t3.89\t0.212\t0.785\n+23\t0\t3.84\t0.226\t0.791\n+24\t0\t3.79\t0.242\t0.797\n+25\t0\t3.75\t0.259\t0.803\n+26\t0\t3.71\t0.277\t0.809\n+27\t0\t3.67\t0.295\t0.816\n+28\t0\t3.65\t0.315\t0.823\n+29\t0\t3.62\t0.335\t0.831\n+30\t0\t3.61\t0.357\t0.841\n+31\t0\t3.6\t0.379\t0.851\n+32\t0\t3.6\t0.401\t0.862\n+33\t0\t3.61\t0.425\t0.874\n+34\t0\t3.62\t0.449\t0.888\n+35\t0\t3.64\t0.473\t0.902\n+36\t0\t3.67\t0.498\t0.918\n+37\t0\t3.7\t0.524\t0.935\n+38\t0\t3.74\t0.549\t0.953\n+39\t0\t3.77\t0.575\t0.971\n+40\t0\t3.82\t0.601\t0.991\n+41\t0\t3.86\t0.628\t1.01\n+42\t0\t3.91\t0.654\t1.03\n+43\t0\t3.96\t0.68\t1.06\n+44\t0\t4.01\t0.706\t1.08\n+45\t0\t4.06\t0.732\t1.11\n+46\t0\t4.11\t0.759\t1.13\n+47\t0\t4.17\t0.785\t1.16\n+48\t0\t4.22\t0.811\t1.18\n+49\t0\t4.27\t0.838\t1.21\n+50\t0\t4.32\t0.864\t1.23\n+51\t0\t4.37\t0.892\t1.26\n+52\t0\t4.42\t0.92\t1.29\n+53\t0\t4.47\t0.949\t1.32\n+54\t0\t4.51\t0.979\t1.34\n+55\t0\t4.55\t1.01\t1.37\n+56\t0\t4.58\t1.04\t1.4\n+57\t0\t4.61\t1.08\t1.43\n+58\t0\t4.63\t1.12\t1.45\n+59\t0\t4.65\t1.15\t1.48\n+60\t0\t4.65\t1.2\t1.51\n+61\t0\t4.65\t1.24\t1.53\n+62\t0\t4.65\t1.28\t1.56\n+63\t0\t4.63\t1.33\t1.58\n+64\t0\t4.6\t1.37\t1.61\n+65\t0\t4.57\t1.42\t1.63\n+66\t0\t4.53\t1.47\t1.64\n+67\t0\t4.48\t1.52\t1.66\n+68\t0\t4.43\t1.56\t1.67\n+69\t0\t4.37\t1.61\t1.69\n+70\t0\t4.31\t1.66\t1.7\n+71\t0\t4.24\t1.7\t1.7\n+72\t0\t4.17\t1.74\t1.71\n+73\t0\t4.1\t1.79\t1.72\n+74\t0\t4.03\t1.83\t1.72\n+75\t0\t3.96\t1.86\t1.73\n+76\t0\t3.89\t1.9\t1.73\n+77\t0\t3.83\t1.94\t1.74\n+78\t0\t3.76\t1.97\t1.75\n+79\t0\t3.71\t2.01\t1.76\n+80\t0\t3.65\t2.05\t1.77\n+81\t0\t3.61\t2.08\t1.78\n+82\t0\t3.57\t2.12\t1.8\n+83\t0\t3.53\t2.15\t1.81\n+84\t0\t3.5\t2.19\t1.83\n+85\t0\t3.47\t2.23\t1.84\n+86\t0\t3.45\t2.27\t1.86\n+87\t0\t3.43\t2.31\t1.88\n+88\t0\t3.41\t2.35\t1.89\n+89\t0\t3.4\t2.39\t1.91\n+90\t0\t3.38\t2.44\t1.92\n+91\t0\t3.37\t2.48\t1.93\n+92\t0\t3.36\t2.53\t1.95\n+93\t0\t3.35\t2.57\t1.96\n+94\t0\t3.33\t2.62\t1.97\n+95\t0\t3.31\t2.66\t1.98\n+96\t0\t3.29\t2.71\t2\n+97\t0\t3.27\t2.76\t2.01\n+98\t0\t3.25\t2.81\t2.02\n+99\t0\t3.22\t2.85\t2.04\n+100\t0\t3.2\t2.9\t2.05\n+101\t0\t3.17\t2.95\t2.07\n+102\t0\t3.14\t3\t2.08\n+103\t0\t3.12\t3.05\t2.1\n+104\t0\t3.09\t3.1\t2.11\n+105\t0\t3.07\t3.15\t2.13\n+106\t0\t3.04\t3.2\t2.14\n+107\t0\t3.03\t3.24\t2.16\n+108\t0\t3.01\t3.29\t2.17\n+109\t0\t3\t3.33\t2.18\n+110\t0\t2.99\t3.37\t2.2\n+111\t0\t2.98\t3.41\t2.21\n+112\t0\t2.98\t3.45\t2.22\n+113\t0\t2.97\t3.49\t2.22\n+114\t0\t2.97\t3.52\t2.23\n+115\t0\t2.97\t3.56\t2.24\n+116\t0\t2.97\t3.59\t2.24\n+117\t0\t2.96\t3.62\t2.25\n+118\t0\t2.96\t3.65\t2.25\n+119\t0\t2.95\t3.68\t2.25\n+120\t0\t2.94\t3.71\t2.26\n+121\t0\t2.93\t3.74\t2.26\n+122\t0\t2.91\t3.77\t2.27\n+123\t0\t2.9\t3.8\t2.28\n+124\t0\t2.88\t3.84\t2.29\n+125\t0\t2.86\t3.87\t2.29\n+126\t0\t2.84\t3.9\t2.3\n+127\t0\t2.81\t3.93\t2.32\n+128\t0\t2.79\t3.96\t2.33\n+129\t0\t2.76\t3.99\t2.34\n+130\t0\t2.74\t4.01\t2.35\n+131\t0\t2.71\t4.04\t2.36\n+132\t0\t2.68\t4.06\t2.37\n+133\t0\t2.66\t4.08\t2.38\n+134\t0\t2.63\t4.1\t2.39\n+135\t0\t2.61\t4.12\t2.39\n+136\t0\t2.58\t4.14\t2.4\n+137\t0\t2.56\t4.15\t2.41\n+138\t0\t2.53\t4.16\t2.41\n+139\t0\t2.51\t4.17\t2.42\n+140\t0\t2.49\t4.17\t2.43\n+141\t0\t2.47\t4.18\t2.44\n+142\t0\t2.44\t4.18\t2.45\n+143\t0\t2.43\t4.18\t2.46\n+144\t0\t2.41\t4.18\t2.48\n+145\t0\t2.39\t4.18\t2.49\n+146\t0\t2.37\t4.18\t2.51\n+147\t0\t2.35\t4.17\t2.52\n+148\t0\t2.33\t4.17\t2.54\n+149\t0\t2.32\t4.16\t2.56\n+150\t0\t2.3\t4.15\t2.58\n+151\t0\t2.28\t4.15\t2.6\n+152\t0\t2.27\t4.13\t2.62\n+153\t0\t2.25\t4.12\t2.63\n+154\t0\t2.24\t4.1\t2.65\n+155\t0\t2.23\t4.09\t2.66\n+156\t0\t2.21\t4.07\t2.67\n+157\t0\t2.2\t4.04\t2.67\n+158\t0\t2.19\t4.02\t2.68\n+159\t0\t2.18\t4\t2.68\n+160\t0\t2.16\t3.9'..b'25\n+tgcca \t0.25 \t0.25 \t0.25\n+tgccc \t0.25 \t0.25 \t0.25\n+tgccg \t0.25 \t0.25 \t0.25\n+tgcct \t0.25 \t0.25 \t0.25\n+tgcga \t0.25 \t0.25 \t0.25\n+tgcgc \t0.25 \t0.25 \t0.25\n+tgcgg \t0.25 \t0.25 \t0.25\n+tgcgt \t0.25 \t0.25 \t0.25\n+tgcta \t0.25 \t0.25 \t0.25\n+tgctc \t0.25 \t0.25 \t0.25\n+tgctg \t0.25 \t0.25 \t0.25\n+tgctt \t0.25 \t0.25 \t0.25\n+tggaa \t0.25 \t0.25 \t0.25\n+tggac \t0.25 \t0.25 \t0.25\n+tggag \t0.25 \t0.25 \t0.25\n+tggat \t0.25 \t0.25 \t0.25\n+tggca \t0.25 \t0.25 \t0.25\n+tggcc \t0.25 \t0.25 \t0.25\n+tggcg \t0.25 \t0.25 \t0.25\n+tggct \t0.25 \t0.25 \t0.25\n+tggga \t0.25 \t0.25 \t0.25\n+tgggc \t0.25 \t0.25 \t0.25\n+tgggg \t0.25 \t0.25 \t0.25\n+tgggt \t0.25 \t0.25 \t0.25\n+tggta \t0.25 \t0.25 \t0.25\n+tggtc \t0.25 \t0.25 \t0.25\n+tggtg \t0.25 \t0.25 \t0.25\n+tggtt \t0.25 \t0.25 \t0.25\n+tgtaa \t0.25 \t0.25 \t0.25\n+tgtac \t0.25 \t0.25 \t0.25\n+tgtag \t0.25 \t0.25 \t0.25\n+tgtat \t0.25 \t0.25 \t0.25\n+tgtca \t0.25 \t0.25 \t0.25\n+tgtcc \t0.25 \t0.25 \t0.25\n+tgtcg \t0.25 \t0.25 \t0.25\n+tgtct \t0.25 \t0.25 \t0.25\n+tgtga \t0.25 \t0.25 \t0.25\n+tgtgc \t0.25 \t0.25 \t0.25\n+tgtgg \t0.25 \t0.25 \t0.25\n+tgtgt \t0.25 \t0.25 \t0.25\n+tgtta \t0.25 \t0.25 \t0.25\n+tgttc \t0.25 \t0.25 \t0.25\n+tgttg \t0.25 \t0.25 \t0.25\n+tgttt \t0.25 \t0.25 \t0.25\n+ttaaa \t0.25 \t0.25 \t0.25\n+ttaac \t0.25 \t0.25 \t0.25\n+ttaag \t0.25 \t0.25 \t0.25\n+ttaat \t0.25 \t0.25 \t0.25\n+ttaca \t0.25 \t0.25 \t0.25\n+ttacc \t0.25 \t0.25 \t0.25\n+ttacg \t0.25 \t0.25 \t0.25\n+ttact \t0.25 \t0.25 \t0.25\n+ttaga \t0.25 \t0.25 \t0.25\n+ttagc \t0.25 \t0.25 \t0.25\n+ttagg \t0.25 \t0.25 \t0.25\n+ttagt \t0.25 \t0.25 \t0.25\n+ttata \t0.25 \t0.25 \t0.25\n+ttatc \t0.25 \t0.25 \t0.25\n+ttatg \t0.25 \t0.25 \t0.25\n+ttatt \t0.25 \t0.25 \t0.25\n+ttcaa \t0.25 \t0.25 \t0.25\n+ttcac \t0.25 \t0.25 \t0.25\n+ttcag \t0.25 \t0.25 \t0.25\n+ttcat \t0.25 \t0.25 \t0.25\n+ttcca \t0.25 \t0.25 \t0.25\n+ttccc \t0.25 \t0.25 \t0.25\n+ttccg \t0.25 \t0.25 \t0.25\n+ttcct \t0.25 \t0.25 \t0.25\n+ttcga \t0.25 \t0.25 \t0.25\n+ttcgc \t0.25 \t0.25 \t0.25\n+ttcgg \t0.25 \t0.25 \t0.25\n+ttcgt \t0.25 \t0.25 \t0.25\n+ttcta \t0.25 \t0.25 \t0.25\n+ttctc \t0.25 \t0.25 \t0.25\n+ttctg \t0.25 \t0.25 \t0.25\n+ttctt \t0.25 \t0.25 \t0.25\n+ttgaa \t0.25 \t0.25 \t0.25\n+ttgac \t0.25 \t0.25 \t0.25\n+ttgag \t0.25 \t0.25 \t0.25\n+ttgat \t0.25 \t0.25 \t0.25\n+ttgca \t0.25 \t0.25 \t0.25\n+ttgcc \t0.25 \t0.25 \t0.25\n+ttgcg \t0.25 \t0.25 \t0.25\n+ttgct \t0.25 \t0.25 \t0.25\n+ttgga \t0.25 \t0.25 \t0.25\n+ttggc \t0.25 \t0.25 \t0.25\n+ttggg \t0.25 \t0.25 \t0.25\n+ttggt \t0.25 \t0.25 \t0.25\n+ttgta \t0.25 \t0.25 \t0.25\n+ttgtc \t0.25 \t0.25 \t0.25\n+ttgtg \t0.25 \t0.25 \t0.25\n+ttgtt \t0.25 \t0.25 \t0.25\n+tttaa \t0.25 \t0.25 \t0.25\n+tttac \t0.25 \t0.25 \t0.25\n+tttag \t0.25 \t0.25 \t0.25\n+tttat \t0.25 \t0.25 \t0.25\n+tttca \t0.25 \t0.25 \t0.25\n+tttcc \t0.25 \t0.25 \t0.25\n+tttcg \t0.25 \t0.25 \t0.25\n+tttct \t0.25 \t0.25 \t0.25\n+tttga \t0.25 \t0.25 \t0.25\n+tttgc \t0.25 \t0.25 \t0.25\n+tttgg \t0.25 \t0.25 \t0.25\n+tttgt \t0.25 \t0.25 \t0.25\n+tttta \t0.25 \t0.25 \t0.25\n+ttttc \t0.25 \t0.25 \t0.25\n+ttttg \t0.25 \t0.25 \t0.25\n+ttttt \t0.25 \t0.25 \t0.25\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b"@@ -0,0 +1,3445 @@\n+[1]\n+# (a,c,g,t)= (0.295, 0.205, 0.205, 0.295)\n+#\n+# Probabilities file for the intergenic region model\n+#\n+\n+# k =\n+4\n+\n+# The P_l's\n+[P_ls]\n+# l=\n+0\n+# Values\n+A\t0.304\n+C\t0.196\n+G\t0.196\n+T\t0.304\n+# l=\n+1\n+# Values\n+AA\t0.112\n+AC\t0.0524\n+AG\t0.0515\n+AT\t0.088\n+CA\t0.0665\n+CC\t0.0404\n+CG\t0.0378\n+CT\t0.0515\n+GA\t0.0524\n+GC\t0.051\n+GG\t0.0404\n+GT\t0.0524\n+TA\t0.0729\n+TC\t0.0525\n+TG\t0.0665\n+TT\t0.112\n+# l=\n+2\n+# Values\n+AAA\t0.0446\n+AAC\t0.0182\n+AAG\t0.0173\n+AAT\t0.0319\n+ACA\t0.0197\n+ACC\t0.00915\n+ACG\t0.00876\n+ACT\t0.0148\n+AGA\t0.0141\n+AGC\t0.0136\n+AGG\t0.00903\n+AGT\t0.0148\n+ATA\t0.0251\n+ATC\t0.0139\n+ATG\t0.0171\n+ATT\t0.0319\n+CAA\t0.0228\n+CAC\t0.0135\n+CAG\t0.013\n+CAT\t0.0171\n+CCA\t0.0146\n+CCC\t0.00889\n+CCG\t0.00783\n+CCT\t0.00903\n+CGA\t0.0118\n+CGC\t0.00949\n+CGG\t0.00783\n+CGT\t0.00876\n+CTA\t0.0105\n+CTC\t0.0106\n+CTG\t0.013\n+CTT\t0.0173\n+GAA\t0.0192\n+GAC\t0.00881\n+GAG\t0.0106\n+GAT\t0.0139\n+GCA\t0.0166\n+GCC\t0.0113\n+GCG\t0.00949\n+GCT\t0.0136\n+GGA\t0.0111\n+GGC\t0.0113\n+GGG\t0.00889\n+GGT\t0.00915\n+GTA\t0.0118\n+GTC\t0.00881\n+GTG\t0.0135\n+GTT\t0.0182\n+TAA\t0.0254\n+TAC\t0.0118\n+TAG\t0.0105\n+TAT\t0.0251\n+TCA\t0.0156\n+TCC\t0.0111\n+TCG\t0.0118\n+TCT\t0.0141\n+TGA\t0.0156\n+TGC\t0.0166\n+TGG\t0.0146\n+TGT\t0.0197\n+TTA\t0.0254\n+TTC\t0.0192\n+TTG\t0.0228\n+TTT\t0.0446\n+# l=\n+3\n+# Values\n+AAAA\t0.0172\n+AAAC\t0.00735\n+AAAG\t0.00683\n+AAAT\t0.0133\n+AACA\t0.00675\n+AACC\t0.00305\n+AACG\t0.00312\n+AACT\t0.00531\n+AAGA\t0.00461\n+AAGC\t0.0042\n+AAGG\t0.00313\n+AAGT\t0.00541\n+AATA\t0.00908\n+AATC\t0.00494\n+AATG\t0.00662\n+AATT\t0.0113\n+ACAA\t0.00692\n+ACAC\t0.00428\n+ACAG\t0.00328\n+ACAT\t0.00519\n+ACCA\t0.00329\n+ACCC\t0.00206\n+ACCG\t0.00177\n+ACCT\t0.00202\n+ACGA\t0.00278\n+ACGC\t0.00217\n+ACGG\t0.00174\n+ACGT\t0.00208\n+ACTA\t0.00318\n+ACTC\t0.00281\n+ACTG\t0.00337\n+ACTT\t0.00541\n+AGAA\t0.00511\n+AGAC\t0.00223\n+AGAG\t0.00305\n+AGAT\t0.00366\n+AGCA\t0.00446\n+AGCC\t0.00283\n+AGCG\t0.00259\n+AGCT\t0.00373\n+AGGA\t0.00284\n+AGGC\t0.00226\n+AGGG\t0.0019\n+AGGT\t0.00202\n+AGTA\t0.0032\n+AGTC\t0.00248\n+AGTG\t0.00379\n+AGTT\t0.00531\n+ATAA\t0.00847\n+ATAC\t0.00397\n+ATAG\t0.00317\n+ATAT\t0.00947\n+ATCA\t0.00424\n+ATCC\t0.00279\n+ATCG\t0.00317\n+ATCT\t0.00366\n+ATGA\t0.00422\n+ATGC\t0.00407\n+ATGG\t0.00365\n+ATGT\t0.00518\n+ATTA\t0.00767\n+ATTC\t0.00495\n+ATTG\t0.00602\n+ATTT\t0.0133\n+CAAA\t0.00896\n+CAAC\t0.00433\n+CAAG\t0.00353\n+CAAT\t0.00602\n+CACA\t0.00525\n+CACC\t0.00243\n+CACG\t0.002\n+CACT\t0.00379\n+CAGA\t0.00344\n+CAGC\t0.00407\n+CAGG\t0.00215\n+CAGT\t0.00338\n+CATA\t0.00464\n+CATC\t0.00309\n+CATG\t0.00278\n+CATT\t0.00662\n+CCAA\t0.00486\n+CCAC\t0.00325\n+CCAG\t0.00288\n+CCAT\t0.00365\n+CCCA\t0.00319\n+CCCC\t0.00225\n+CCCG\t0.00154\n+CCCT\t0.0019\n+CCGA\t0.00238\n+CCGC\t0.00225\n+CCGG\t0.00146\n+CCGT\t0.00174\n+CCTA\t0.00173\n+CCTC\t0.00201\n+CCTG\t0.00215\n+CCTT\t0.00313\n+CGAA\t0.00419\n+CGAC\t0.00209\n+CGAG\t0.00231\n+CGAT\t0.00317\n+CGCA\t0.00305\n+CGCC\t0.00236\n+CGCG\t0.00149\n+CGCT\t0.00259\n+CGGA\t0.0022\n+CGGC\t0.00232\n+CGGG\t0.00154\n+CGGT\t0.00178\n+CGTA\t0.00197\n+CGTC\t0.00167\n+CGTG\t0.002\n+CGTT\t0.00312\n+CTAA\t0.00374\n+CTAC\t0.00193\n+CTAG\t0.00168\n+CTAT\t0.00317\n+CTCA\t0.00284\n+CTCC\t0.00236\n+CTCG\t0.00231\n+CTCT\t0.00305\n+CTGA\t0.00304\n+CTGC\t0.00383\n+CTGG\t0.00288\n+CTGT\t0.00328\n+CTTA\t0.00383\n+CTTC\t0.00316\n+CTTG\t0.00353\n+CTTT\t0.00683\n+GAAA\t0.00815\n+GAAC\t0.00296\n+GAAG\t0.00316\n+GAAT\t0.00495\n+GACA\t0.003\n+GACC\t0.00166\n+GACG\t0.00167\n+GACT\t0.00248\n+GAGA\t0.003\n+GAGC\t0.00274\n+GAGG\t0.00201\n+GAGT\t0.00281\n+GATA\t0.00357\n+GATC\t0.00227\n+GATG\t0.00309\n+GATT\t0.00494\n+GCAA\t0.00554\n+GCAC\t0.00316\n+GCAG\t0.00383\n+GCAT\t0.00407\n+GCCA\t0.00448\n+GCCC\t0.00221\n+GCCG\t0.00232\n+GCCT\t0.00226\n+GCGA\t0.00295\n+GCGC\t0.00212\n+GCGG\t0.00225\n+GCGT\t0.00217\n+GCTA\t0.00261\n+GCTC\t0.00274\n+GCTG\t0.00407\n+GCTT\t0.0042\n+GGAA\t0.00414\n+GGAC\t0.00178\n+GGAG\t0.00236\n+GGAT\t0.00279\n+GGCA\t0.0037\n+GGCC\t0.00239\n+GGCG\t0.00236\n+GGCT\t0.00282\n+GGGA\t0.00237\n+GGGC\t0.00221\n+GGGG\t0.00225\n+GGGT\t0.00206\n+GGTA\t0.00201\n+GGTC\t0.00166\n+GGTG\t0.00243\n+GGTT\t0.00305\n+GTAA\t0.0039\n+GTAC\t0.00205\n+GTAG\t0.00193\n+GTAT\t0.00397\n+GTCA\t0.00271\n+GTCC\t0.00178\n+GTCG\t0.00209\n+GTCT\t0.00223\n+GTGA\t0.00279\n+GTGC\t0.00315\n+GTGG\t0.00325\n+GTGT\t0.00428\n+GTTA\t0.00359\n+GTTC\t0.00296\n+GTTG\t0.00433\n+GTTT\t0.00735\n+TAAA\t0.0103\n+TAAC\t0.00359\n+TAAG\t0.00383\n+TAAT\t0.00768\n+TACA\t0.00467\n+TACC\t0.00201\n+TACG\t0.00197\n+TACT\t0."..b'1188\n+GTGCA\t415919\n+GTGCC\t261766\n+GTGCG\t295620\n+GTGCT\t323411\n+GTGGA\t332993\n+GTGGC\t385960\n+GTGGG\t323106\n+GTGGT\t292234\n+GTGTA\t359272\n+GTGTC\t243286\n+GTGTG\t676471\n+GTGTT\t478535\n+GTTAA\t529553\n+GTTAC\t237009\n+GTTAG\t244930\n+GTTAT\t462774\n+GTTCA\t351705\n+GTTCC\t242639\n+GTTCG\t284705\n+GTTCT\t337027\n+GTTGA\t395847\n+GTTGC\t467863\n+GTTGG\t398583\n+GTTGT\t518738\n+GTTTA\t656320\n+GTTTC\t525254\n+GTTTG\t635106\n+GTTTT\t1204266\n+TAAAA\t1550931\n+TAAAC\t656320\n+TAAAG\t584278\n+TAAAT\t1453686\n+TAACA\t514469\n+TAACC\t257738\n+TAACG\t228098\n+TAACT\t473671\n+TAAGA\t411688\n+TAAGC\t412400\n+TAAGG\t243072\n+TAAGT\t505986\n+TAATA\t887022\n+TAATC\t429688\n+TAATG\t557972\n+TAATT\t1279151\n+TACAA\t638171\n+TACAC\t359272\n+TACAG\t269554\n+TACAT\t651698\n+TACCA\t294521\n+TACCC\t186072\n+TACCG\t134084\n+TACCT\t210051\n+TACGA\t255187\n+TACGC\t188583\n+TACGG\t144135\n+TACGT\t222807\n+TACTA\t299145\n+TACTC\t243515\n+TACTG\t242385\n+TACTT\t529461\n+TAGAA\t445486\n+TAGAC\t192718\n+TAGAG\t225170\n+TAGAT\t371662\n+TAGCA\t338376\n+TAGCC\t240530\n+TAGCG\t157045\n+TAGCT\t334860\n+TAGGA\t194399\n+TAGGC\t179285\n+TAGGG\t138443\n+TAGGT\t199383\n+TAGTA\t299145\n+TAGTC\t203900\n+TAGTG\t245329\n+TAGTT\t560260\n+TATAA\t905114\n+TATAC\t478377\n+TATAG\t374869\n+TATAT\t1443312\n+TATCA\t432085\n+TATCC\t266505\n+TATCG\t299809\n+TATCT\t467523\n+TATGA\t432243\n+TATGC\t441619\n+TATGG\t339799\n+TATGT\t693461\n+TATTA\t887022\n+TATTC\t522722\n+TATTG\t602062\n+TATTT\t1720154\n+TCAAA\t830733\n+TCAAC\t395847\n+TCAAG\t362165\n+TCAAT\t678967\n+TCACA\t402109\n+TCACC\t212946\n+TCACG\t164762\n+TCACT\t367070\n+TCAGA\t303978\n+TCAGC\t373449\n+TCAGG\t185559\n+TCAGT\t385818\n+TCATA\t432243\n+TCATC\t340433\n+TCATG\t254268\n+TCATT\t707017\n+TCCAA\t464453\n+TCCAC\t332993\n+TCCAG\t285196\n+TCCAT\t425248\n+TCCCA\t332570\n+TCCCC\t246063\n+TCCCG\t164879\n+TCCCT\t229526\n+TCCGA\t253087\n+TCCGC\t252341\n+TCCGG\t163286\n+TCCGT\t233416\n+TCCTA\t194399\n+TCCTC\t267623\n+TCCTG\t279201\n+TCCTT\t425709\n+TCGAA\t507221\n+TCGAC\t242629\n+TCGAG\t284084\n+TCGAT\t460551\n+TCGCA\t375343\n+TCGCC\t294688\n+TCGCG\t173249\n+TCGCT\t370922\n+TCGGA\t253087\n+TCGGC\t265278\n+TCGGG\t190433\n+TCGGT\t269143\n+TCGTA\t255187\n+TCGTC\t227718\n+TCGTG\t218373\n+TCGTT\t441393\n+TCTAA\t393086\n+TCTAC\t220356\n+TCTAG\t208283\n+TCTAT\t413312\n+TCTCA\t305326\n+TCTCC\t255587\n+TCTCG\t248235\n+TCTCT\t424487\n+TCTGA\t303978\n+TCTGC\t389381\n+TCTGG\t307187\n+TCTGT\t414129\n+TCTTA\t411688\n+TCTTC\t375086\n+TCTTG\t371605\n+TCTTT\t735509\n+TGAAA\t979910\n+TGAAC\t351705\n+TGAAG\t365491\n+TGAAT\t675983\n+TGACA\t381622\n+TGACC\t222424\n+TGACG\t180167\n+TGACT\t330719\n+TGAGA\t305326\n+TGAGC\t300824\n+TGAGG\t195709\n+TGAGT\t363535\n+TGATA\t432085\n+TGATC\t244669\n+TGATG\t377889\n+TGATT\t689516\n+TGCAA\t739059\n+TGCAC\t415919\n+TGCAG\t455674\n+TGCAT\t610206\n+TGCCA\t582755\n+TGCCC\t291844\n+TGCCG\t300708\n+TGCCT\t343894\n+TGCGA\t375343\n+TGCGC\t275852\n+TGCGG\t278984\n+TGCGT\t321845\n+TGCTA\t338376\n+TGCTC\t373175\n+TGCTG\t560268\n+TGCTT\t560387\n+TGGAA\t571498\n+TGGAC\t232424\n+TGGAG\t307866\n+TGGAT\t396414\n+TGGCA\t582755\n+TGGCC\t458889\n+TGGCG\t310066\n+TGGCT\t490513\n+TGGGA\t332570\n+TGGGC\t341654\n+TGGGG\t289650\n+TGGGT\t347866\n+TGGTA\t294521\n+TGGTC\t249199\n+TGGTG\t330244\n+TGGTT\t479937\n+TGTAA\t596325\n+TGTAC\t339181\n+TGTAG\t275425\n+TGTAT\t708010\n+TGTCA\t381622\n+TGTCC\t255633\n+TGTCG\t254663\n+TGTCT\t341705\n+TGTGA\t402109\n+TGTGC\t487257\n+TGTGG\t435188\n+TGTGT\t832228\n+TGTTA\t514469\n+TGTTC\t395511\n+TGTTG\t687960\n+TGTTT\t1174890\n+TTAAA\t1506984\n+TTAAC\t529553\n+TTAAG\t603193\n+TTAAT\t1184370\n+TTACA\t596325\n+TTACC\t289904\n+TTACG\t255155\n+TTACT\t462537\n+TTAGA\t393086\n+TTAGC\t402136\n+TTAGG\t254810\n+TTAGT\t488881\n+TTATA\t905114\n+TTATC\t500716\n+TTATG\t638944\n+TTATT\t1435871\n+TTCAA\t845058\n+TTCAC\t412587\n+TTCAG\t438454\n+TTCAT\t676983\n+TTCCA\t571498\n+TTCCC\t388709\n+TTCCG\t320193\n+TTCCT\t422559\n+TTCGA\t507221\n+TTCGC\t428272\n+TTCGG\t349863\n+TTCGT\t436061\n+TTCTA\t445486\n+TTCTC\t421020\n+TTCTG\t474000\n+TTCTT\t759053\n+TTGAA\t845058\n+TTGAC\t370823\n+TTGAG\t388608\n+TTGAT\t662989\n+TTGCA\t739059\n+TTGCC\t538214\n+TTGCG\t352393\n+TTGCT\t647156\n+TTGGA\t464453\n+TTGGC\t630591\n+TTGGG\t411508\n+TTGGT\t490945\n+TTGTA\t638171\n+TTGTC\t422058\n+TTGTG\t611423\n+TTGTT\t1171948\n+TTTAA\t1506984\n+TTTAC\t638916\n+TTTAG\t573057\n+TTTAT\t1526553\n+TTTCA\t979910\n+TTTCC\t759290\n+TTTCG\t714479\n+TTTCT\t897347\n+TTTGA\t830733\n+TTTGC\t883669\n+TTTGG\t776539\n+TTTGT\t1190742\n+TTTTA\t1550931\n+TTTTC\t1313857\n+TTTTG\t1382023\n+TTTTT\t2802550\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b'@@ -0,0 +1,20382 @@\n+#intron model parameters\n+# begin of content independent part\n+#\n+# ASS probabilities\n+#only nonpseudocount values are shown\n+[ASS]\n+# Size of vector\n+1024\n+# c_ass (ASS count)\n+31180\n+# asspseudocount (added to all possible patterns, no matter if they occur)\n+0.01\n+# Probabilities * 1000\n+aaaaa\t0.321\n+aaaac\t0.289\n+aaaag\t0.0324\n+aaaat\t0.289\n+aaaca\t0.161\n+aaacc\t0.257\n+aaacg\t0.0644\n+aaact\t0.257\n+aaaga\t0.0324\n+aaagc\t0.0644\n+aaagg\t0.0324\n+aaagt\t0.0965\n+aaata\t0.161\n+aaatc\t0.193\n+aaatg\t0.129\n+aaatt\t0.161\n+aacaa\t0.193\n+aacag\t0.129\n+aacat\t0.129\n+aacca\t0.129\n+aaccc\t0.193\n+aaccg\t0.193\n+aacct\t0.129\n+aacga\t0.161\n+aacgc\t0.0644\n+aacgg\t0.0965\n+aacta\t0.0644\n+aactc\t0.0965\n+aactg\t0.129\n+aactt\t0.289\n+aagaa\t0.129\n+aagag\t0.0324\n+aagat\t0.161\n+aagca\t0.0324\n+aagcc\t0.0965\n+aagcg\t0.193\n+aagct\t0.129\n+aagga\t0.0324\n+aaggc\t0.0324\n+aaggg\t0.0965\n+aaggt\t0.0644\n+aagta\t0.0324\n+aagtc\t0.0965\n+aagtg\t0.129\n+aagtt\t0.193\n+aataa\t0.193\n+aatac\t0.353\n+aatag\t0.0644\n+aatat\t0.353\n+aatca\t0.417\n+aatcc\t0.513\n+aatcg\t0.385\n+aatct\t0.321\n+aatga\t0.417\n+aatgc\t0.449\n+aatgg\t0.289\n+aatgt\t0.321\n+aatta\t0.193\n+aattc\t0.417\n+aattg\t0.385\n+aattt\t0.257\n+acaaa\t0.161\n+acaac\t0.193\n+acaag\t0.0965\n+acaat\t0.193\n+acaca\t0.129\n+acacc\t0.257\n+acacg\t0.0324\n+acact\t0.129\n+acagc\t0.161\n+acagg\t0.129\n+acagt\t0.0324\n+acata\t0.0324\n+acatc\t0.161\n+acatg\t0.0965\n+acatt\t0.193\n+accaa\t0.161\n+accac\t0.129\n+accag\t0.0644\n+accat\t0.193\n+accca\t0.0965\n+acccc\t0.0644\n+acccg\t0.193\n+accct\t0.129\n+accga\t0.129\n+accgg\t0.0324\n+accgt\t0.161\n+accta\t0.129\n+acctc\t0.129\n+acctg\t0.193\n+acctt\t0.193\n+acgaa\t0.0324\n+acgac\t0.161\n+acgag\t0.0965\n+acgat\t0.0644\n+acgca\t0.129\n+acgcc\t0.129\n+acgcg\t0.0324\n+acgct\t0.129\n+acgga\t0.161\n+acggc\t0.0644\n+acggt\t0.0644\n+acgtc\t0.129\n+acgtg\t0.0965\n+acgtt\t0.129\n+actaa\t0.129\n+actac\t0.417\n+actat\t0.257\n+actca\t0.257\n+actcc\t0.193\n+actcg\t0.0965\n+actct\t0.321\n+actga\t0.257\n+actgc\t0.385\n+actgg\t0.225\n+actgt\t0.289\n+actta\t0.129\n+acttc\t0.193\n+acttg\t0.0965\n+acttt\t0.289\n+agaaa\t0.513\n+agaac\t0.77\n+agaag\t0.0965\n+agaat\t0.545\n+agaca\t0.353\n+agacc\t0.353\n+agacg\t0.385\n+agact\t0.513\n+agaga\t0.193\n+agagc\t0.385\n+agagg\t0.129\n+agagt\t0.289\n+agata\t0.321\n+agatc\t0.449\n+agatg\t0.417\n+agatt\t0.353\n+agcaa\t0.449\n+agcac\t0.257\n+agcag\t0.0644\n+agcat\t0.642\n+agcca\t0.353\n+agccc\t0.161\n+agccg\t0.353\n+agcct\t0.257\n+agcga\t0.385\n+agcgc\t0.225\n+agcgg\t0.129\n+agcgt\t0.225\n+agcta\t0.161\n+agctc\t0.161\n+agctg\t0.577\n+agctt\t0.417\n+aggaa\t0.225\n+aggac\t0.193\n+aggag\t0.129\n+aggat\t0.353\n+aggca\t0.417\n+aggcc\t0.161\n+aggcg\t0.129\n+aggct\t0.225\n+aggga\t0.129\n+agggc\t0.129\n+agggg\t0.0324\n+agggt\t0.193\n+aggta\t0.161\n+aggtc\t0.321\n+aggtg\t0.193\n+aggtt\t0.257\n+agtaa\t0.609\n+agtac\t0.706\n+agtag\t0.0324\n+agtat\t0.77\n+agtca\t0.866\n+agtcc\t0.577\n+agtcg\t0.385\n+agtct\t0.898\n+agtga\t0.706\n+agtgc\t0.738\n+agtgg\t0.994\n+agtgt\t0.449\n+agtta\t0.577\n+agttc\t0.417\n+agttg\t0.449\n+agttt\t0.802\n+ataaa\t0.193\n+ataac\t0.129\n+ataat\t0.161\n+ataca\t0.0324\n+atacc\t0.0644\n+atacg\t0.0324\n+atact\t0.129\n+atata\t0.0644\n+atatc\t0.0644\n+atatg\t0.129\n+atatt\t0.0644\n+atcaa\t0.225\n+atcac\t0.0965\n+atcat\t0.0644\n+atcca\t0.0965\n+atccc\t0.0965\n+atccg\t0.0965\n+atcct\t0.0965\n+atcga\t0.0965\n+atcgc\t0.129\n+atcgt\t0.0965\n+atcta\t0.129\n+atctc\t0.0644\n+atctg\t0.193\n+atctt\t0.0644\n+atgaa\t0.0644\n+atgac\t0.0965\n+atgat\t0.161\n+atgca\t0.161\n+atgcc\t0.225\n+atgcg\t0.129\n+atgct\t0.321\n+atgga\t0.161\n+atggc\t0.0965\n+atggg\t0.0644\n+atggt\t0.161\n+atgta\t0.0324\n+atgtc\t0.193\n+atgtg\t0.0965\n+atgtt\t0.129\n+attaa\t0.0965\n+attac\t0.225\n+attag\t0.0324\n+attat\t0.0644\n+attca\t0.0965\n+attcc\t0.257\n+attcg\t0.193\n+attct\t0.0644\n+attga\t0.0965\n+attgc\t0.257\n+attgg\t0.0644\n+attgt\t0.161\n+attta\t0.129\n+atttc\t0.289\n+atttg\t0.161\n+atttt\t0.289\n+caaaa\t6.77\n+caaac\t5.48\n+caaag\t2.47\n+caaat\t5.26\n+caaca\t3.88\n+caacc\t3.85\n+caacg\t3.5\n+caact\t4.2\n+caaga\t1.64\n+caagc\t1.51\n+caagg\t1.31\n+caagt\t1.28\n+caata\t2.5\n+caatc\t3.59\n+caatg\t5.23\n+caatt\t3.27\n+cacaa\t4.91\n+cacac\t2.63\n+cacag\t0.545\n+cacat\t3.21\n+cacca\t2.66\n+caccc\t2.47\n+caccg\t1.41\n+cacct\t2.79\n+cacga\t3.05\n+cacgc\t2.82\n+cacgg\t1.09\n+cacgt\t1.03\n+cacta\t1.83\n+cactc\t2.79\n+cactg\t3.11\n+cactt\t2.85\n+cagaa\t3.27\n+cagac\t1.44\n+cagag\t1.76\n+cagat\t2.44\n+cagca\t2.89\n+cagcc\t2.79\n+cagcg\t2.31\n+cagct\t3.62\n+cagga\t2.76\n+caggc\t1.'..b'855631\n+#\tttcgg\t698837\n+#\tttcgt\t871342\n+#\tttcta\t890087\n+#\tttctc\t841394\n+#\tttctg\t946845\n+#\tttctt\t1516968\n+#\tttgaa\t1688548\n+#\tttgac\t741091\n+#\tttgag\t776440\n+#\tttgat\t1325067\n+#\tttgca\t1476775\n+#\tttgcc\t1075827\n+#\tttgcg\t704154\n+#\tttgct\t1293450\n+#\tttgga\t927788\n+#\tttggc\t1260242\n+#\tttggg\t822345\n+#\tttggt\t980857\n+#\tttgta\t1275361\n+#\tttgtc\t843496\n+#\tttgtg\t1221327\n+#\tttgtt\t2341971\n+#\ttttaa\t3011593\n+#\ttttac\t1276443\n+#\ttttag\t1145019\n+#\ttttat\t3051195\n+#\ttttca\t1957941\n+#\ttttcc\t1517354\n+#\ttttcg\t1427298\n+#\ttttct\t1793145\n+#\ttttga\t1659999\n+#\ttttgc\t1765755\n+#\ttttgg\t1551540\n+#\ttttgt\t2379188\n+#\ttttta\t3099452\n+#\tttttc\t2625305\n+#\tttttg\t2761348\n+#\tttttt\t5600229\n+\n+# motif upstream of acceptor splice site\n+[ASSMOTIF]\n+# width of motif, n=\n+32\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0 0.358\t0.167\t0.126\t0.35\t0.329\t0.218\t0.131\t0.323\t0.326\t0.213\t0.151\t0.31\t0.298\t0.172\t0.17\t0.36\n+ 1 0.358\t0.168\t0.125\t0.35\t0.328\t0.215\t0.13\t0.327\t0.328\t0.212\t0.151\t0.309\t0.299\t0.171\t0.169\t0.36\n+ 2 0.358\t0.168\t0.122\t0.352\t0.327\t0.217\t0.128\t0.329\t0.329\t0.214\t0.148\t0.309\t0.301\t0.169\t0.167\t0.362\n+ 3 0.358\t0.17\t0.119\t0.353\t0.326\t0.216\t0.126\t0.331\t0.328\t0.216\t0.146\t0.31\t0.303\t0.169\t0.165\t0.362\n+ 4 0.358\t0.172\t0.116\t0.355\t0.324\t0.216\t0.124\t0.336\t0.325\t0.218\t0.145\t0.312\t0.305\t0.17\t0.163\t0.363\n+ 5 0.358\t0.174\t0.11\t0.358\t0.322\t0.215\t0.12\t0.343\t0.324\t0.219\t0.143\t0.314\t0.308\t0.169\t0.159\t0.364\n+ 6 0.358\t0.176\t0.104\t0.361\t0.319\t0.216\t0.117\t0.349\t0.323\t0.221\t0.139\t0.317\t0.313\t0.169\t0.155\t0.363\n+ 7 0.359\t0.178\t0.0971\t0.365\t0.315\t0.216\t0.114\t0.355\t0.322\t0.222\t0.134\t0.322\t0.316\t0.168\t0.151\t0.365\n+ 8 0.362\t0.179\t0.0893\t0.37\t0.31\t0.216\t0.111\t0.363\t0.321\t0.223\t0.129\t0.328\t0.32\t0.168\t0.146\t0.367\n+ 9 0.362\t0.182\t0.0832\t0.372\t0.305\t0.218\t0.106\t0.371\t0.319\t0.225\t0.124\t0.332\t0.323\t0.167\t0.142\t0.369\n+10 0.364\t0.184\t0.0758\t0.376\t0.301\t0.219\t0.103\t0.377\t0.314\t0.228\t0.119\t0.339\t0.325\t0.167\t0.139\t0.37\n+11 0.364\t0.185\t0.0701\t0.38\t0.298\t0.218\t0.102\t0.382\t0.309\t0.229\t0.116\t0.346\t0.324\t0.167\t0.137\t0.372\n+12 0.366\t0.185\t0.0646\t0.385\t0.294\t0.22\t0.101\t0.385\t0.306\t0.228\t0.114\t0.352\t0.32\t0.168\t0.137\t0.375\n+13 0.367\t0.183\t0.0603\t0.389\t0.293\t0.22\t0.102\t0.385\t0.305\t0.228\t0.111\t0.356\t0.314\t0.169\t0.137\t0.38\n+14 0.364\t0.184\t0.0574\t0.394\t0.29\t0.223\t0.104\t0.383\t0.301\t0.228\t0.111\t0.36\t0.305\t0.171\t0.138\t0.386\n+15 0.359\t0.186\t0.0542\t0.4\t0.286\t0.227\t0.105\t0.382\t0.295\t0.23\t0.11\t0.365\t0.292\t0.175\t0.138\t0.395\n+16 0.355\t0.187\t0.0513\t0.407\t0.279\t0.232\t0.106\t0.382\t0.287\t0.234\t0.11\t0.37\t0.278\t0.18\t0.138\t0.404\n+17 0.347\t0.187\t0.05\t0.415\t0.275\t0.236\t0.108\t0.382\t0.277\t0.238\t0.11\t0.376\t0.261\t0.184\t0.14\t0.414\n+18 0.339\t0.187\t0.047\t0.426\t0.268\t0.239\t0.11\t0.382\t0.268\t0.239\t0.11\t0.383\t0.244\t0.189\t0.14\t0.426\n+19 0.329\t0.188\t0.0445\t0.438\t0.262\t0.242\t0.113\t0.383\t0.262\t0.239\t0.11\t0.39\t0.229\t0.194\t0.139\t0.437\n+20 0.32\t0.189\t0.0399\t0.451\t0.252\t0.246\t0.112\t0.389\t0.256\t0.239\t0.107\t0.398\t0.214\t0.2\t0.137\t0.449\n+21 0.307\t0.192\t0.0368\t0.464\t0.244\t0.253\t0.111\t0.392\t0.245\t0.245\t0.106\t0.404\t0.201\t0.206\t0.133\t0.46\n+22 0.294\t0.198\t0.032\t0.475\t0.235\t0.266\t0.11\t0.389\t0.236\t0.253\t0.103\t0.407\t0.192\t0.218\t0.13\t0.46\n+23 0.286\t0.201\t0.0264\t0.487\t0.233\t0.273\t0.111\t0.383\t0.231\t0.261\t0.0999\t0.408\t0.186\t0.225\t0.127\t0.461\n+24 0.276\t0.207\t0.0212\t0.495\t0.228\t0.282\t0.107\t0.383\t0.225\t0.272\t0.0963\t0.406\t0.179\t0.233\t0.121\t0.466\n+25 0.258\t0.204\t0.0171\t0.521\t0.213\t0.286\t0.103\t0.398\t0.214\t0.271\t0.0937\t0.421\t0.166\t0.236\t0.114\t0.484\n+26 0.255\t0.208\t0.0131\t0.524\t0.207\t0.3\t0.0997\t0.393\t0.214\t0.274\t0.0902\t0.421\t0.148\t0.223\t0.102\t0.527\n+27 0.27\t0.208\t0.0141\t0.508\t0.22\t0.291\t0.116\t0.372\t0.225\t0.275\t0.0979\t0.403\t0.147\t0.217\t0.127\t0.509\n+28 0.269\t0.21\t0.0127\t0.508\t0.219\t0.296\t0.116\t0.369\t0.221\t0.282\t0.096\t0.402\t0.143\t0.217\t0.127\t0.514\n+29 0.267\t0.214\t0.0126\t0.507\t0.221\t0.301\t0.118\t0.36\t0.219\t0.29\t0.0977\t0.393\t0.139\t0.217\t0.128\t0.516\n+30 0.27\t0.214\t0.0126\t0.504\t0.223\t0.302\t0.12\t0.354\t0.224\t0.292\t0.097\t0.387\t0.135\t0.215\t0.13\t0.52\n+31 0.273\t0.209\t0.0136\t0.504\t0.224\t0.299\t0.123\t0.354\t0.23\t0.285\t0.0997\t0.385\t0.126\t0.203\t0.132\t0.539\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,48 @@ +# This file contains the list of meta parameters for the coding regions (CDS) which are subject to optimization. +# All other meta parameters are chosen as given in the species parameter file. The order +# of the parameters determines the order in the optimization process. +# Basically, different values for these meta parameters are tried out and the ones +# giving best performance in a cross-validation on the training set are chosen. +# For each parameter the range of possible values is specified after the parameter +# name and at least one white space. +# 3 cases are possible for the range: +# - an explicit list is given, e.g. protein "on" "off" +# - it is an integer range, e.g. window_size "1"-"5" +# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8" +# +# +# Mario Stanke, 19.12.2006 +# + +/Constant/dss_end "1"-"4" +/Constant/dss_start "1"-"3" +/Constant/ass_start "1"-"3" +/Constant/ass_end "0"-"4" +/Constant/ass_upwindow_size "1"-"50" +/IntronModel/d "100"-"950" +/IntronModel/ass_motif_memory "0"-"3" +/IntronModel/ass_motif_radius "0"-"4" +/ExonModel/tis_motif_memory "0"-"3" +/ExonModel/tis_motif_radius "0"-"3" +/Constant/trans_init_window "0"-"25" +/Constant/init_coding_len "0"-"18" +/ExonModel/patpseudocount "0.5"_"5" +/ExonModel/etpseudocount "0"-"10" +/ExonModel/etorder "0"-"3" +/Constant/intterm_coding_len "0"-"13" +/ExonModel/slope_of_bandwidth "0.05"_"0.6" +/ExonModel/minwindowcount "1"-"15" +/IGenicModel/patpseudocount "0.5"_"7" +/IntronModel/patpseudocount "0.5"_"7" +/IntronModel/slope_of_bandwidth "0.05"_"0.6" +/IntronModel/minwindowcount "1"-"8" +/IntronModel/asspseudocount "0.0005"_"0.03" +/IntronModel/dsspseudocount "0.0002"_"0.04" +/IntronModel/dssneighborfactor "0.0001"_"0.01" +/ExonModel/minPatSum "100"_"600" +/Constant/probNinCoding "0.15"_".25" +/Constant/decomp_num_steps "1"-"5" +# comment parameters out that you do not want to be subject of optimization +#/IGenicModel/k "4" "3" "5" +#/IntronModel/k "4" "3" "5" +#/ExonModel/k "4" "3" "5" |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,30 @@ +# This file contains the list of meta parameters for the Untranslated Regions (UTRs), which are subject to optimization. +# All other parameters are chosen as given in the species parameter file. The order +# of the parameters determines the order in the optimization process. +# Basically, different values for these meta parameters are tried out and the ones +# giving best performance in a cross-validation on the training set are chosen. +# For each parameter the range of possible values is specified after the parameter +# name and at least one white space. +# 3 cases are possible for the range: +# - an explicit list is given, e.g. protein "on" "off" +# - it is an integer range, e.g. window_size "1"-"5" +# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8" +# +# +# Mario Stanke, 9.5.2008 +# + +/UtrModel/prob_polya "0.0"_"1.0" +/UtrModel/d_polya_cleavage_min "6"-"14" +/UtrModel/d_polya_cleavage_max "17"-"27" +/UtrModel/tss_start "0"-"12" +/UtrModel/tss_end "0"-"8" +/UtrModel/tts_motif_memory "0"-"2" +/UtrModel/utr5patternweight "0.1"_"1.0" +/UtrModel/utr3patternweight "0.1"_"1.0" +/UtrModel/patpseudocount "1"_"3" +/UtrModel/tssup_k "0"-"2" +/UtrModel/slope_of_bandwidth "0.2"_"0.4" +/UtrModel/minwindowcount "1"-"4" +#/UtrModel/k "2"-"4" + |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,144 @@ +# +# parameters for all Drosophila versions +# +# date : 11.8.2009 +# + +# +# Properties for augustus +#------------------------------------ +/augustus/verbosity 3 # 0-3, 0: only print the necessary +maxDNAPieceSize 200000 # maximum segment that is predicted in one piece +stopCodonExcludedFromCDS false # make this 'true' if the CDS includes the stop codon (training and prediction) + +# gff output options: +protein on # output predicted protein sequence +codingseq off # output the coding sequence +cds on # output 'cds' as feature for exons +start on # output start codons (translation start) +stop on # output stop codons (translation stop) +introns on # output introns +tss on # output transcription start site +tts on # output transcription termination site +print_utr off # output 5'UTR and 3'UTR lines in addition to exon lines + +checkExAcc off # internal parameter for extrinsic accuracy + +# alternative transcripts and posterior probabilities +sample 100 # the number of sampling iterations +alternatives-from-sampling false # output alternative transcripts +minexonintronprob 0.08 # minimal posterior probability of all (coding) exons +minmeanexonintronprob 0.4 # minimal geometric mean of the posterior probs of introns and exons +maxtracks -1 # maximum number of reported transcripts per gene (-1: no limit) +keep_viterbi true # set to true if all Viterbi transcripts should be reported +uniqueCDS true # don't report transcripts that differ only in the UTR +UTR on # predict untranslated regions + +# +# +# The rest of the file contains mainly meta parameters used for training. +# + +# global constants +# ---------------------------- + +/Constant/trans_init_window 25 +/Constant/ass_upwindow_size 32 +/Constant/ass_start 1 +/Constant/ass_end 4 +/Constant/dss_start 3 +/Constant/dss_end 4 +/Constant/init_coding_len 9 +/Constant/intterm_coding_len 0 +/Constant/tss_upwindow_size 45 +/Constant/decomp_num_at 1 +/Constant/decomp_num_gc 1 +/Constant/gc_range_min 0.32 # This range has an effect only when decomp_num_steps>1. +/Constant/gc_range_max 0.50 # States the minimal and maximal percentage of c or g +/Constant/decomp_num_steps 1 # I recommend keeping this to 1 for most species. +/Constant/min_coding_len 201 # no gene with a coding sequence shorter than this is predicted +/Constant/probNinCoding 0.23 # divide this by .25 to get a malus for making one masked letter part of the coding sequence +/Constant/amberprob 0.34 # Prob(stop codon = tag), if 0 tag is assumed to code for amino acid +/Constant/ochreprob 0.41 # Prob(stop codon = taa), if 0 taa is assumed to code for amino acid +/Constant/opalprob 0.25 # Prob(stop codon = tga), if 0 tga is assumed to code for amino acid +/Constant/subopt_transcript_threshold 0.7 +/Constant/almost_identical_maxdiff 10 + +# type of weighing, one of 1 = equalWeights, 2 = gcContentClasses, 3 = multiNormalKernel +/BaseCount/weighingType 3 +# file with the weight matrix (only for multiNormalKernel type weighing) +/BaseCount/weightMatrixFile fly_weightmatrix.txt # change this to your species if at all necessary + +# Properties for IGenicModel +# ---------------------------- +/IGenicModel/verbosity 0 +/IGenicModel/infile fly_igenic_probs.pbl # change this and the other five filenames *_probs.pbl below to your species +/IGenicModel/outfile fly_igenic_probs.pbl +/IGenicModel/patpseudocount 5.0 +/IGenicModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k + +# Properties for ExonModel +# ---------------------------- +/ExonModel/verbosity 3 +/ExonModel/infile fly_exon_probs.pbl +/ExonModel/outfile fly_exon_probs.pbl +/ExonModel/patpseudocount 5.0 +/ExonModel/minPatSum 350 +/ExonModel/k 4 # order of the Markov chain for content model +/ExonModel/etorder 2 +/ExonModel/etpseudocount 3 +/ExonModel/exonlengthD 3000 # beyond this the distribution is geometric +/ExonModel/maxexonlength 15000 +/ExonModel/slope_of_bandwidth 0.3 +/ExonModel/minwindowcount 8 +/ExonModel/tis_motif_memory 3 +/ExonModel/tis_motif_radius 2 + +# Properties for IntronModel +# ---------------------------- +/IntronModel/verbosity 0 +/IntronModel/infile fly_intron_probs.pbl +/IntronModel/outfile fly_intron_probs.pbl +/IntronModel/patpseudocount 5.0 +/IntronModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k +/IntronModel/slope_of_bandwidth 0.4 +/IntronModel/minwindowcount 3 +/IntronModel/asspseudocount 0.01 +/IntronModel/dsspseudocount 0.01015 +/IntronModel/dssneighborfactor 0.001 +#/IntronModel/splicefile fly_splicefile.txt # this optional file contains additional windows around splice sites for training, uncomment if you have one +/IntronModel/sf_with_motif false # if true the splice file is also used to train the branch point region +/IntronModel/d 929 # constraint: this must be larger than 4 + /Constant/dss_end + /Constant/ass_upwindow_size + /Constant/ass_start +/IntronModel/ass_motif_memory 1 +/IntronModel/ass_motif_radius 4 + +# Properties for UtrModel +# ---------------------------- +/UtrModel/verbosity 3 +/UtrModel/infile fly_utr_probs.pbl +/UtrModel/outfile fly_utr_probs.pbl +/UtrModel/k 4 +/UtrModel/utr5patternweight 0.3 #0.7625 +/UtrModel/utr3patternweight 0.3 #0.5 +/UtrModel/patpseudocount 1 +/UtrModel/tssup_k 1 +/UtrModel/tssup_patpseudocount 1 +/UtrModel/slope_of_bandwidth 0.25 +/UtrModel/minwindowcount 1 +/UtrModel/exonlengthD 800 +/UtrModel/maxexonlength 1200 +/UtrModel/max3singlelength 2000 # excludes roughly 1% +/UtrModel/max3termlength 1200 # excludes ~ 0.3% +/UtrModel/tss_start 8 +/UtrModel/tss_end 5 +/UtrModel/tata_start 2 +/UtrModel/tata_end 10 +/UtrModel/tata_pseudocount 2 +/UtrModel/d_tss_tata_min 26 # minimal distance between start of tata box (if existent) and tss +/UtrModel/d_tss_tata_max 37 # maximal distance between start of tata box (if existent) and tss +/UtrModel/polyasig_consensus aataaa # polyadenylation signal training not fully automated yet +/UtrModel/d_polyasig_cleavage 14 # the transcription end is predicted this many bases after the polyadenylation signal +/UtrModel/d_polya_cleavage_min 9 +/UtrModel/d_polya_cleavage_max 35 +/UtrModel/prob_polya 0.95 +/UtrModel/tts_motif_memory 1 |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl Thu Aug 26 06:55:33 2021 +0000 |
[ |
b"@@ -0,0 +1,7137 @@\n+# UTR model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[UTRLENGTH]\n+# maximal individually stored length probability d=\n+800\n+# slope of smoothing bandwidth =\n+0.25\n+# smoothing minwindowcount =\n+1\n+# length 5' sing 5' init 5' int 5' term 3' sing 3' init 3' int 3' term \n+# total number of exons of above types\n+ 6188 1995 342 1995 7966 178 69 178\n+# number of exons exceeding length d=800\n+ 81 32 6 27 823 7 7 48\n+# 1000 P(len=k), k=0,1,..., 800\n+0\t0.681\t5.07e-15\t4.35e-06\t4.62\t0.00284\t5.75\t3.2e-18\t0\n+1\t1.82\t9.16e-12\t0.000391\t10.5\t0.0358\t10.4\t1.04e-17\t0\n+2\t2.76\t6.1e-09\t0.013\t13\t0.178\t8.35\t3.54e-17\t0\n+3\t3.13\t1.49e-06\t0.158\t12.5\t0.405\t6.03\t1.32e-16\t0\n+4\t3.51\t0.000135\t0.708\t12.6\t0.599\t9.23\t5.19e-16\t2.83e-20\n+5\t3.55\t0.00451\t1.17\t13.8\t0.682\t15.3\t2.13e-15\t2.89e-18\n+6\t2.95\t0.0564\t0.708\t15\t0.512\t20.4\t8.93e-15\t2.29e-16\n+7\t2.5\t0.272\t0.158\t15.4\t0.296\t18.5\t3.83e-14\t1.42e-14\n+8\t2.56\t0.548\t0.013\t15.3\t0.27\t12.9\t1.67e-13\t6.84e-13\n+9\t3.12\t0.566\t0.000391\t14.5\t0.332\t11.6\t7.41e-13\t2.57e-11\n+10\t3.83\t0.407\t4.35e-06\t13.6\t0.267\t11.3\t3.35e-12\t7.5e-10\n+11\t4.27\t0.328\t1.78e-08\t12.2\t0.252\t7.9\t1.54e-11\t1.71e-08\n+12\t4.37\t0.472\t5.42e-11\t9.93\t0.347\t6.71\t7.12e-11\t3.03e-07\n+13\t4.42\t0.528\t8.34e-10\t8.9\t0.549\t8.5\t3.31e-10\t4.18e-06\n+14\t4.28\t0.326\t3.63e-08\t9.1\t0.796\t6.71\t1.53e-09\t4.49e-05\n+15\t4.04\t0.333\t4.68e-06\t9.88\t0.875\t4.29\t7.03e-09\t0.000376\n+16\t4.33\t0.612\t0.000396\t11.4\t0.938\t6.01\t3.21e-08\t0.00245\n+17\t4.51\t0.867\t0.013\t12.8\t1.03\t9.28\t1.46e-07\t0.0124\n+18\t4.07\t1.1\t0.159\t12.8\t1.03\t11\t6.62e-07\t0.0492\n+19\t3.76\t1.22\t0.723\t11.3\t0.904\t11.6\t2.99e-06\t0.152\n+20\t3.96\t1.21\t1.34\t10.7\t0.878\t12.3\t1.32e-05\t0.364\n+21\t4.31\t1.12\t1.51\t11\t1.11\t13.6\t5.6e-05\t0.68\n+22\t4.45\t1.4\t1.86\t10.3\t1.38\t12.8\t0.000224\t0.989\n+23\t4.75\t2.06\t2.7\t8.37\t1.36\t8.55\t0.000833\t1.12\n+24\t4.76\t2.19\t3.68\t6.7\t1.25\t5.92\t0.00284\t0.994\n+25\t4.68\t2.04\t3.45\t6\t1.2\t6.15\t0.00881\t0.7\n+26\t4.47\t2.06\t3.12\t6.21\t1.14\t7.01\t0.0248\t0.434\n+27\t4.28\t2.35\t3.29\t6.62\t1.1\t7.47\t0.0629\t0.352\n+28\t4.83\t3.68\t3.35\t6.53\t0.986\t7.33\t0.144\t0.514\n+29\t5.98\t4.63\t3.25\t5.68\t0.939\t6.87\t0.296\t0.878\n+30\t6.15\t4.11\t3.2\t4.95\t1.15\t6.45\t0.549\t1.3\n+31\t5.15\t3.31\t3.23\t5.2\t1.46\t6.14\t0.918\t1.57\n+32\t4.9\t2.94\t3.22\t5.98\t1.72\t5.79\t1.39\t1.59\n+33\t5.97\t2.5\t3.03\t6.3\t1.94\t5.34\t1.9\t1.39\n+34\t7.15\t2.36\t2.73\t6.51\t2.02\t4.96\t2.37\t1.11\n+35\t7.24\t2.86\t2.53\t7.51\t2.19\t4.8\t2.73\t0.859\n+36\t6.12\t2.78\t2.52\t7.26\t2.39\t4.86\t2.94\t0.648\n+37\t4.95\t2.05\t2.64\t5.47\t2.29\t5.02\t3.02\t0.466\n+38\t4.34\t1.81\t2.72\t4.59\t2.15\t5.19\t3.05\t0.311\n+39\t4.11\t2.08\t2.69\t4.74\t2.11\t5.32\t3.09\t0.189\n+40\t4.19\t2.33\t2.7\t4.96\t2\t5.39\t3.19\t0.107\n+41\t4.29\t2.47\t2.95\t4.98\t1.85\t5.44\t3.36\t0.0591\n+42\t4.22\t2.58\t3.52\t4.91\t1.87\t5.48\t3.59\t0.0393\n+43\t4.26\t2.73\t4.35\t4.84\t2.07\t5.58\t3.84\t0.0391\n+44\t4.69\t2.92\t5.33\t4.86\t2.23\t5.78\t4.08\t0.0537\n+45\t4.43\t3.1\t6.32\t4.93\t2.3\t6.09\t4.29\t0.0815\n+46\t3.6\t3.22\t7.22\t4.96\t2.25\t6.51\t4.48\t0.123\n+47\t3.46\t3.29\t7.94\t4.92\t2\t6.99\t4.64\t0.18\n+48\t3.87\t3.39\t8.42\t4.86\t2\t7.48\t4.78\t0.254\n+49\t4.21\t3.6\t8.62\t4.87\t2.27\t7.91\t4.91\t0.344\n+50\t4.36\t4.01\t8.54\t4.94\t2.5\t8.23\t5.04\t0.448\n+51\t4.41\t4.55\t8.25\t4.98\t2.62\t8.41\t5.18\t0.562\n+52\t4.46\t5.04\t7.86\t4.89\t2.68\t8.41\t5.31\t0.679\n+53\t4.62\t5.25\t7.47\t4.66\t2.73\t8.23\t5.42\t0.79\n+54\t4.92\t5.08\t7.17\t4.36\t2.8\t7.87\t5.5\t0.887\n+55\t5.31\t4.64\t6.98\t4.12\t2.9\t7.36\t5.54\t0.963\n+56\t5.61\t4.19\t6.89\t4\t3.07\t6.75\t5.53\t1.01\n+57\t5.7\t3.93\t6.85\t4.03\t3.3\t6.11\t5.46\t1.03\n+58\t5.54\t3.92\t6.83\t4.14\t3.53\t5.48\t5.33\t1.03\n+59\t5.29\t4.03\t6.8\t4.25\t3.69\t4.93\t5.17\t1.01\n+60\t5.13\t4.16\t6.75\t4.3\t3.76\t4.48\t4.97\t0.984\n+61\t5.16\t4.24\t6.66\t4.28\t3.79\t4.14\t4.74\t0.955\n+62\t5.35\t4.23\t6.53\t4.19\t3.8\t3.89\t4.52\t0.934\n+63\t5.63\t4.14\t6.37\t4.05\t3.79\t3.7\t4.29\t0.927\n+64\t5.92\t4\t6.18\t3.88\t3.77\t3.55\t4.1\t0.937\n+65\t6.16\t3.84\t5.96\t3.72\t3.79\t3.4\t3.93\t0.965\n+66\t6.28\t3.71\t5.74\t3.62\t3.87\t3.24\t3.8\t1.01\n+67\t6.2\t3.64\t5.51\t3.59\t3.99\t3.07\t3.72\t1.06\n+68\t5.92\t3.63\t5.3\t3.64\t4.07\t2.88\t3.68\t1.13\n+69\t5.57\t3.68\t5.09\t3.72\t4.04\t2.7\t3.68\t1.19\n+70\t5.32\t3.74\t4.91\t3.79\t3.89\t2.52\t3.73\t1.25\n+71\t5.27\t3.79\t4.73\t3.8\t3.7\t2.35\t3.8\t1.31\n+72\t5."..b'tcgc\t14965\n+#\tttcgg\t11310\n+#\tttcgt\t20239\n+#\tttcta\t28351\n+#\tttctc\t16356\n+#\tttctg\t18660\n+#\tttctt\t32422\n+#\tttgaa\t47499\n+#\tttgac\t14608\n+#\tttgag\t17968\n+#\tttgat\t36555\n+#\tttgca\t31476\n+#\tttgcc\t17850\n+#\tttgcg\t12080\n+#\tttgct\t22771\n+#\tttgga\t20302\n+#\tttggc\t17146\n+#\tttggg\t11267\n+#\tttggt\t16683\n+#\tttgta\t64460\n+#\tttgtc\t18109\n+#\tttgtg\t29654\n+#\tttgtt\t61476\n+#\ttttaa\t101164\n+#\ttttac\t38483\n+#\ttttag\t41333\n+#\ttttat\t89923\n+#\ttttca\t39758\n+#\ttttcc\t27803\n+#\ttttcg\t26520\n+#\ttttct\t36657\n+#\ttttga\t41172\n+#\ttttgc\t30519\n+#\ttttgg\t24062\n+#\ttttgt\t71057\n+#\ttttta\t94640\n+#\tttttc\t46611\n+#\tttttg\t62333\n+#\tttttt\t128813\n+\n+#\n+# The emission probabilities of the tss upwindow\n+#\n+[EMISSION-TSSUPWIN]\n+# size of the emission vector\n+16\n+#tssup_k=\n+1\n+# patpseudo : pseudocount for sequence patterns\n+1\n+aa\t0.326\n+ac\t0.195\n+ag\t0.204\n+at\t0.275\n+ca\t0.306\n+cc\t0.2\n+cg\t0.248\n+ct\t0.245\n+ga\t0.245\n+gc\t0.298\n+gg\t0.193\n+gt\t0.264\n+ta\t0.212\n+tc\t0.219\n+tg\t0.221\n+tt\t0.347\n+\n+# motif around the TSS of TATA-less promoters\n+[TSSMOTIF]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.268\t0.227\t0.222\t0.283\n+ 1 0.257\t0.23\t0.22\t0.293\n+ 2 0.279\t0.234\t0.216\t0.27\n+ 3 0.271\t0.238\t0.214\t0.278\n+ 4 0.285\t0.237\t0.216\t0.261\n+ 5 0.263\t0.214\t0.229\t0.295\n+ 6 0.249\t0.223\t0.195\t0.332\n+ 7 0.162\t0.309\t0.241\t0.288\n+ 8 0.406\t0.252\t0.193\t0.15\n+ 9 0.275\t0.199\t0.233\t0.293\n+10 0.275\t0.194\t0.165\t0.365\n+11 0.294\t0.215\t0.182\t0.308\n+12 0.283\t0.207\t0.228\t0.282\n+\n+# motif around the TSS of TATA promoters\n+[TSSMOTIFTATA]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.293\t0.245\t0.261\t0.201\n+ 1 0.32\t0.188\t0.298\t0.193\n+ 2 0.3\t0.216\t0.248\t0.237\n+ 3 0.301\t0.216\t0.213\t0.271\n+ 4 0.314\t0.254\t0.221\t0.211\n+ 5 0.293\t0.174\t0.264\t0.269\n+ 6 0.235\t0.172\t0.213\t0.38\n+ 7 0.15\t0.422\t0.116\t0.312\n+ 8 0.501\t0.174\t0.208\t0.118\n+ 9 0.256\t0.211\t0.229\t0.304\n+10 0.264\t0.225\t0.113\t0.398\n+11 0.329\t0.222\t0.121\t0.329\n+12 0.312\t0.195\t0.213\t0.28\n+\n+# tata box motif \n+[TATAMOTIF]\n+# width of motif, n=\n+12\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.21\t0.237\t0.381\t0.173\n+ 1 0.19\t0.341\t0.336\t0.133\n+ 2 0.0032\t0.0032\t0.0032\t0.99\n+ 3 0.99\t0.0032\t0.0032\t0.0032\n+ 4 0.0032\t0.0032\t0.0032\t0.99\n+ 5 0.99\t0.0032\t0.0032\t0.0032\n+ 6 0.637\t0.0432\t0.0272\t0.293\n+ 7 0.99\t0.0032\t0.0032\t0.0032\n+ 8 0.602\t0.0592\t0.0576\t0.282\n+ 9 0.37\t0.0976\t0.386\t0.147\n+10 0.202\t0.315\t0.312\t0.171\n+11 0.254\t0.283\t0.278\t0.184\n+\n+# motif after polyA signal\n+[TTSMOTIF]\n+# width of motif, n=\n+14\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0 0.368\t0.167\t0.147\t0.317\t0.414\t0.134\t0.181\t0.27\t0.365\t0.182\t0.115\t0.337\t0.342\t0.126\t0.224\t0.308\n+ 1 0.368\t0.17\t0.139\t0.323\t0.41\t0.145\t0.177\t0.268\t0.338\t0.199\t0.106\t0.358\t0.323\t0.136\t0.21\t0.33\n+ 2 0.397\t0.173\t0.114\t0.316\t0.425\t0.149\t0.173\t0.252\t0.35\t0.19\t0.106\t0.353\t0.335\t0.13\t0.198\t0.337\n+ 3 0.42\t0.159\t0.102\t0.319\t0.437\t0.152\t0.165\t0.246\t0.359\t0.191\t0.102\t0.348\t0.329\t0.124\t0.176\t0.371\n+ 4 0.452\t0.148\t0.104\t0.296\t0.446\t0.147\t0.159\t0.248\t0.386\t0.187\t0.1\t0.328\t0.337\t0.114\t0.167\t0.382\n+ 5 0.455\t0.141\t0.107\t0.297\t0.435\t0.143\t0.159\t0.263\t0.375\t0.194\t0.0979\t0.333\t0.324\t0.115\t0.162\t0.399\n+ 6 0.453\t0.144\t0.11\t0.292\t0.421\t0.15\t0.156\t0.274\t0.362\t0.2\t0.104\t0.334\t0.323\t0.118\t0.158\t0.4\n+ 7 0.453\t0.145\t0.11\t0.292\t0.422\t0.158\t0.145\t0.275\t0.365\t0.194\t0.109\t0.333\t0.325\t0.121\t0.154\t0.401\n+ 8 0.454\t0.145\t0.109\t0.292\t0.427\t0.165\t0.132\t0.276\t0.372\t0.197\t0.112\t0.319\t0.329\t0.119\t0.158\t0.394\n+ 9 0.454\t0.153\t0.106\t0.288\t0.435\t0.156\t0.131\t0.278\t0.371\t0.197\t0.107\t0.325\t0.331\t0.118\t0.166\t0.385\n+10 0.451\t0.155\t0.104\t0.29\t0.424\t0.154\t0.145\t0.276\t0.361\t0.194\t0.105\t0.34\t0.336\t0.116\t0.169\t0.379\n+11 0.457\t0.157\t0.102\t0.284\t0.427\t0.156\t0.147\t0.271\t0.359\t0.199\t0.105\t0.337\t0.343\t0.117\t0.165\t0.376\n+12 0.461\t0.149\t0.103\t0.287\t0.432\t0.168\t0.145\t0.255\t0.364\t0.21\t0.106\t0.32\t0.348\t0.12\t0.162\t0.37\n+13 0.467\t0.144\t0.101\t0.287\t0.447\t0.171\t0.135\t0.247\t0.364\t0.225\t0.111\t0.3\t0.347\t0.122\t0.162\t0.369\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,23 @@ +# +# This file contains a matrix used for weighing the training sequences +# when given an input sequence. Let z = (da, dc, dg, dt) be the vector +# containing the differences in the relative nucleotide frequencies of +# two sequences, the input sequence and a training sequence. +# Then the training sequence has weight proportional to +# +# exp ( - z M z^t) +# +# with M being the matrix specified below. +# If M is nonsingular, then (apart from a two normalizing factors) M +# is the inverse of the covariance matrix of a multinormal +# distribution - the kernel for the estimation. + + +# this matrix is gc-content only, i.e. +# weight = 10 * exp (-200 * (dc + dg))^2) +# in particular weight <= 10 +0 0 0 0 +0 200 0 0 +0 0 200 0 +0 0 0 0 + |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/trained_species/fly/info.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/info.json Thu Aug 26 06:55:33 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{}], "glimmerhmm": [{}]} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/funannotate_db/uniprot_sprot.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/uniprot_sprot.fasta Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,83 @@ +>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1 +MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS +EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD +AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL +EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD +SFRKIYTDLGWKFTPL +>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1 +MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR +IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL +AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC +KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML +DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK +VMFFVAGAVLVAILISTVRW +>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1 +MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL +QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT +FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD +LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET +YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY +STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS +GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI +QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC +>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1 +MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT +PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS +TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI +>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1 +MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD +RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI +FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ +PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD +AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR +TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA +LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR +KAKIQEMFDNMVSRMVTS +>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1 +MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY +>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1 +MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL +CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC +KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH +QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY +>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1 +MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS +NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED +QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT +REFVDKDAQEFQDFLNSLDASLLS +>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1 +MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL +IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII +>fcresfdr +MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL +>BUSCOaEOG7B0HST +MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA +VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE +DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED +TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID +LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI +VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE +AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN +AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY +LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI +IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT +SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS +LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT +AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG +AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK +VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK +>FBpp0306926 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG +ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY +VSKRYKDLPPPHPGFGADQPPA +>FBpp0078508 +MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD +LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA +DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK +KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC +AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI +NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR +RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG +ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK +SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS +RSGSGSRSPSRSRSGSPSGSGSSSGSASDE |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/genome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,3253 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATG\n+TGATGCATTAATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTT\n+TTTTCGGCAAACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAA\n+AAGATACCTATGACATGTGACACCTTTAAAGTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCC\n+CTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATAGATAGTTAGTG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTTTCTTTAAATTTAACCAAATT\n+TATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAAC\n+ATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATAT\n+CTATGCTCAGCGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAAC\n+CTATGAAAATCGCAAAGATCTATTCCTTTGCGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTT\n+CGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATCATATTTTATC\n+AACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCTTTGTGTTTATTTGCATTGGG\n+AATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAA\n+TTCGTTAGCTTTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTT\n+CGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTG\n+ATTCTCCTGCTACCATGGGATCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCAAAAGGAAACTAGACATGTTT\n+CGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAACAGCGATAATGATCTGTGAC\n+TTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAGCATTTTTGGG\n+AGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAATCTAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGT\n+TTTTAAACGTTGATTTTTCAGCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTAT\n+CCATAATATTAGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTG\n+AGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTATCGTACCGGTCAAGTACGGTCACACTGCCA\n+AGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAA\n+AAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCAT\n+TTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGA\n+GCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGA\n+CGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACAT\n+CTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTC\n+AAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCT\n+GCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGA\n+GCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCAT\n+CAATGGCGAGCTGTACAAGGAGGAGGAGGAGTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTG\n+AAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAA\n+GCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCA\n+AGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAG\n+ACGAGCGAGGACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTC\n+TCGGGCCAGCAGCCGCTCAAAGTCTGGTTCTCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCG\n+GCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGAT'..b'ATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTT\n+TAAATAAAATCGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCT\n+AGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTA\n+AGACGTTTTTCTTAGGGGGTGCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAAT\n+GATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTCGTTCCAACTACGTGGCGTCCATCAAAAAGC\n+GCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAG\n+TTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATATGAGCTGTCCC\n+TCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCC\n+TCCTCGGTGGGACATCGATGGGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACT\n+TGTAGGGCATCACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCC\n+TGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGTGACCCGGAAAAGAAGGTATACCTCTCCTTC\n+ATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCCGCTGTGAATC\n+CTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCA\n+TGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTG\n+CTGCACACATCGCACTCCCACAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAG\n+GGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTG\n+GTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCAGGGCACGCCCATCATGTGCA\n+CTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGCACTTGTAGGG\n+CTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCA\n+TTCACTATATCACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATAC\n+ATAGTCATATGAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTT\n+TTTAAAATATAAGGGTATATAGATTTCTTTCTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTC\n+AGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAAAAAAA\n+AATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAGATAATAGCACTTAATATATG\n+TACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTCCGCATTTTCG\n+TGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCG\n+TTTGGGAGGAGCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCT\n+TGCAGTTGGCAAAGGGACAGGCCAGTGGGCCGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAAT\n+ACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCCAGTGCACTCA\n+ATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTTCGGTCTGTGATCATCTATTC\n+AGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCC\n+ATCTCTCGAACAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCA\n+CGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTT\n+CGTTCTAGTCTTTGTAACGCACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTTCCGCCAATATCCAATTGGAA\n+TATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTA\n+CTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTCATTTTCGTAA\n+ATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACAAAACCCACACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTAT\n+TCAGCGATTTAAACAAGCAATCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTAT\n+ACCCCCACTGAATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/genome_masked.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_masked.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,139 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 18 present +DISC_FEATURE_COUNT:CDS: 18 present +DISC_FEATURE_COUNT:mRNA: 18 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:32 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:22 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::32 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::32 features have joined location but no exception +genome:CDS hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002 +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017 +genome:mRNA hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002 +genome:CDS hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018 +genome:mRNA hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003 +genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004 +genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006 +genome:CDS hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007 +genome:mRNA hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008 +genome:mRNA hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009 +genome:CDS hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011 +genome:mRNA hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011 +genome:CDS hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012 +genome:mRNA hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012 +genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013 +genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013 +genome:CDS hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014 +genome:mRNA hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 124) +genome:ncbi:FUN_000002-T1 (length 520) +genome:ncbi:FUN_000003-T1 (length 220) +genome:ncbi:FUN_000004-T1 (length 591) +genome:ncbi:FUN_000005-T1 (length 135) +genome:ncbi:FUN_000006-T1 (length 662) +genome:ncbi:FUN_000007-T1 (length 254) +genome:ncbi:FUN_000008-T1 (length 249) +genome:ncbi:FUN_000009-T1 (length 138) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 94) +genome:ncbi:FUN_000012-T1 (length 986) +genome:ncbi:FUN_000013-T1 (length 4717) +genome:ncbi:FUN_000014-T1 (length 231) +genome:ncbi:FUN_000015-T1 (length 478) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000016-T1 (length 124) +genome:ncbi:FUN_000017-T1 (length 520) +genome:ncbi:FUN_000018-T1 (length 358) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.error.summary.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.error.summary.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,1 @@ + 2 WARNING: SEQ_FEAT.ShortExon |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4258 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene 2126..3863\n+ /locus_tag="FUN_000002"\n+ mRNA join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+ LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+ DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+ KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+ AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+ NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+ RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+ ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+ RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+ QDPALEAPQMND"\n+ gene complement(4883..5802)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,151 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000002-T1.exon4;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t35675\t35679\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t35648\t35655\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t34843\t35594\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35675\t35679\t.\t-\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35648\t35655\t.\t-\t1\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t34843\t35594\t.\t-\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t40223\t41234\t.\t+\t.\tID=FUN_000008;\n+sample\tfunannotate\t'..b'N_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t149952\t150112\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t150174\t150248\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t151966\t152072\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152314\t152429\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152496\t152751\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t153651\t159010\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t159150\t164491\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167135\t168360\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169208\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t169350\t169416\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t192049\t194669\t.\t+\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t192049\t194669\t.\t+\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t192049\t192067\t.\t+\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000014-T1.exon4;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t192049\t192067\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000017-T1.exon2;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000017-T1.exon3;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000017-T1.exon4;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t4248\t5494\t.\t-\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t4248\t5494\t.\t-\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t4930\t5494\t.\t-\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t4248\t4759\t.\t-\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4930\t5494\t.\t-\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4248\t4759\t.\t-\t2\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,158 @@\n+>FUN_000001-T1 FUN_000001\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000002-T1 FUN_000002\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000003-T1 FUN_000003\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000004-T1 FUN_000004\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000005-T1 FUN_000005\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000006-T1 FUN_000006\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000007-T1 FUN_000007\n+MKIRYCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSC\n+KLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSL\n+EFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVR\n+QDVVLHSILPAVCM\n+>FUN_000008-T1 FUN_000008\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGTTD\n+RITRLLAQS\n+>FUN_000009-T1 FUN_000009\n+MWIVNCMCLYLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSW\n+LDSCIVGWRSTVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASVPEPQIIM\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MKVHGNVDEKSPSHGYDSEGEESSSSSIITGGAQTPPSTRLDGSAGSSSGHHPPSDWYHTTAPSGSAEAMNPLNHFGHHH\n+HHHHLMHPGAATAY\n+>FUN_000012-T1 FUN_000012\n+MQRGIDSFFKRLPAKAKSAEAENGETPSKAPKRRKAVIISSDEDEVVSPPETKKRKASKTASSEDDVVAATPEPIAKKAR\n+NGQKPALSKLKRHVDPTELFGGETKRVIVPKPKTKAVLEFENEDIDRSLMEVDLDESIKEAAPEKKVHSITRSSPSPKRA\n+KNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGL\n+TFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVK\n+EEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSS\n+QKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIVGQAGAASNVTKLMNWLSKWYVNHDGNKKPQRPNPWAKNDDGSF'..b'KPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESSSGGSRKPPRIEK\n+PARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIE\n+SSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPAMTSSLGGIGVNP\n+TDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTNLAYISDADRRTS\n+AEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWPLPEIPFDHVPVK\n+PADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVILDEEMAVGPPDV\n+AKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKNSSPEVIVAQPTR\n+SPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRKNYEARLSSGGGG\n+ASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANLSGSDSLSAVSTH\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSATRSDDTTLTLTEM\n+AHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAA\n+ARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQTQQPQQVRQKPR\n+APQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+PKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSATNTTTTTNTLNSE\n+STEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSSLDVRGQEAKMR\n+SRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSG\n+KFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKRQFKREDSTAAGT\n+SGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGAGQDQEQGTGGQA\n+RHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQ\n+AEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQV\n+LSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMY\n+EGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSA\n+NFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEH\n+NTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGG\n+GPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYF\n+YKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNRITCRVDLDLCSARYVQCRSTE\n+>FUN_000014-T1 FUN_000014\n+MVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQGYKITLKNMEAFGASN\n+FKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIHAFKGANYLHIDALSL\n+VLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPVEQFYVD\n+>FUN_000015-T1 FUN_000015\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000016-T1 FUN_000016\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000017-T1 FUN_000017\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000018-T1 FUN_000018\n+MKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALS\n+MLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYHKHLLYHSEVKPHVCGVC\n+GRAFKELSTLHNHQRIHSGEKPFKCEVCGKCFRQRVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRCPTEEAQTPE\n+QLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFS\n+GNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.scaffolds.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.scaffolds.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,119 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpkgev4773/files/c/e/8/dataset_ce85d0fa-1534-47df-8c1e-5f0a5c1b82f0.dat --out output --database /home/abretaud/.planemo/planemo_tmp__fmxm4ll/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --augustus_species fly --min_training_models 200 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-21", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 18, + "common_name": 0, + "mRNA": 18, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 2695.06, + "transcript-level": { + "CDS_transcripts": 18, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 18, + "CDS_five_three_utr": 0, + "CDS_complete": 18, + "CDS_no-start": 0, + "CDS_no-stop": 0, + "CDS_no-start_no-stop": 0, + "total_exons": 57, + "total_cds_exons": 57, + "multiple_exon_transcript": 16, + "single_exon_transcript": 2, + "avg_exon_length": 558.58, + "avg_protein_length": 582.83, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_protein_evidence": 17.54 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,282 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000001 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +2126 3863 gene + locus_tag FUN_000002 +2126 2199 mRNA +2258 3224 +3284 3490 +3549 3863 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +2126 2199 CDS +2258 3224 +3284 3490 +3549 3863 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 4883 gene + locus_tag FUN_000003 +5802 5797 mRNA +5539 4883 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +5802 5797 CDS +5539 4883 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10557 8696 gene + locus_tag FUN_000004 +10557 10549 mRNA +10462 8696 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +10557 10549 CDS +10462 8696 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 14247 gene + locus_tag FUN_000005 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +21705 19533 gene + locus_tag FUN_000006 +21705 21700 mRNA +21515 19533 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +21705 21700 CDS +21515 19533 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +35679 34843 gene + locus_tag FUN_000007 +35679 35675 mRNA +35655 35648 +35594 34843 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +35679 35675 CDS +35655 35648 +35594 34843 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +40223 41234 gene + locus_tag FUN_000008 +40223 40396 mRNA +40659 41234 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +40223 40396 CDS +40659 41234 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +41267 42107 gene + locus_tag FUN_000009 +41267 41274 mRNA +41437 41444 +41707 42107 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +41267 41274 CDS +41437 41444 +41707 42107 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +94727 95583 gene + locus_tag FUN_000011 +94727 94732 mRNA +94873 95016 +95449 95583 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +94727 94732 CDS +94873 95016 +95449 95583 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +133134 136346 gene + locus_tag FUN_000012 +133134 133142 mRNA +133209 134539 +134668 135510 +135569 136346 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +133134 133142 CDS +133209 134539 +134668 135510 +135569 136346 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +144294 169416 gene + locus_tag FUN_000013 +144294 144551 mRNA +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153651 159010 +159150 164491 +167135 168360 +168722 169208 +169350 169416 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +144294 144551 CDS +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153651 159010 +159150 164491 +167135 168360 +168722 169208 +169350 169416 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +192049 194669 gene + locus_tag FUN_000014 +192049 192067 mRNA +193549 193658 +194041 194455 +194518 194669 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +192049 192067 CDS +193549 193658 +194041 194455 +194518 194669 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +210553 209044 gene + locus_tag FUN_000015 +210553 210548 mRNA +210474 209044 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +210553 210548 CDS +210474 209044 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000016 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +2126 3863 gene + locus_tag FUN_000017 +2126 2199 mRNA +2258 3224 +3284 3490 +3549 3863 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +2126 2199 CDS +2258 3224 +3284 3490 +3549 3863 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +5494 4248 gene + locus_tag FUN_000018 +5494 4930 mRNA +4759 4248 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 +5494 4930 CDS +4759 4248 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/Genus_species.validation.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.validation.txt Thu Aug 26 06:55:33 2021 +0000 |
[ |
@@ -0,0 +1,2 @@ +WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:c35679-35675, c35655-35648, c35594-34843)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000007-T1] +WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:41267-41274, 41437-41444, 41707-42107)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000009-T1] |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_augustus/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/fly.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,142 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 18 present +DISC_FEATURE_COUNT:CDS: 18 present +DISC_FEATURE_COUNT:mRNA: 18 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:30 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:22 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::30 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::30 features have joined location but no exception +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002 +genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003 +genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007 +genome:CDS hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008 +genome:mRNA hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008 +genome:CDS hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009 +genome:mRNA hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011 +genome:mRNA hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011 +genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012 +genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012 +genome:CDS hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013 +genome:mRNA hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013 +genome:CDS hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014 +genome:mRNA hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014 +genome:CDS hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015 +genome:mRNA hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +genome:CDS hypothetical protein sample:<2331-3254 FUN_000001 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 307) +genome:ncbi:FUN_000002-T1 (length 220) +genome:ncbi:FUN_000003-T1 (length 591) +genome:ncbi:FUN_000004-T1 (length 135) +genome:ncbi:FUN_000005-T1 (length 662) +genome:ncbi:FUN_000006-T1 (length 278) +genome:ncbi:FUN_000007-T1 (length 578) +genome:ncbi:FUN_000008-T1 (length 396) +genome:ncbi:FUN_000009-T1 (length 130) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 923) +genome:ncbi:FUN_000012-T1 (length 3977) +genome:ncbi:FUN_000013-T1 (length 576) +genome:ncbi:FUN_000014-T1 (length 151) +genome:ncbi:FUN_000015-T1 (length 246) +genome:ncbi:FUN_000016-T1 (length 478) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000017-T1 (length 124) +genome:ncbi:FUN_000018-T1 (length 432) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4259 @@\n+LOCUS sample 215740 bp DNA linear 22-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene <2331..3254\n+ /locus_tag="FUN_000001"\n+ mRNA <2331..3254\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS <2331..3254\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+ DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+ EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+ DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+ LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+ ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+ gene complement(4883..5802)\n+ /locus_tag="FUN_000002"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+ QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+ REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+ NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+ MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+ ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+ KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+ LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+ DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+ FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+ RQFDKSNDNYRKTFRSVDENSKGEL"\n+ gene complement(14247..15214)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(14247..14648,15209..15214))\n+ /locus_tag="FUN_00'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,145 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t2331\t3254\t.\t+\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t2331\t3254\t.\t+\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t34843\t35679\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t40223\t44130\t.\t+\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t40223\t44130\t.\t+\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t40223\t40396\t.\t+\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t40659\t41193\t.\t+\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t41707\t42080\t.\t+\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43409\t43609\t.\t+\t.\tID=FUN_000007-T1.exon4;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43678\t44130\t.\t+\t.\tID=FUN_000007-T1.exon5;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40223\t40396\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40659\t41193\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t41707\t42080\t.\t+\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43409\t43609\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43678\t44130\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t45527\t47195\t.\t-\t.\tID=FUN_000008;\n+sample\tfunannotate\tmRNA\t45527\t47195\t.\t-\t.\tID=FUN_000008-T1;Parent=FUN_000008;product=hypothetical protein;\n+sample\tfunannotate\texon\t46753\t47195\t.\t-\t.\tID=FUN_000008-T1.exon1'..b'_000012-T1;\n+sample\tfunannotate\tCDS\t153296\t153630\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t153689\t155122\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t155789\t158975\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t159190\t164495\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168360\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167121\t168360\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t176699\t178916\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t176699\t178916\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t178873\t178916\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176887\t177172\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176699\t176824\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t178873\t178916\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176887\t177172\t.\t-\t1\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176699\t176824\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t192004\t194669\t.\t+\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t192004\t194669\t.\t+\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t192004\t192067\t.\t+\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000015-T1.exon3;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000015-T1.exon4;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t192004\t192067\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000016;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000018-T1.exon3;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,155 @@\n+>FUN_000001-T1 FUN_000001\n+YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSW\n+LRKSEYISTEQTRFQPQNLENIEAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEY\n+KIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL\n+>FUN_000002-T1 FUN_000002\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000003-T1 FUN_000003\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000004-T1 FUN_000004\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000005-T1 FUN_000005\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000006-T1 FUN_000006\n+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI\n+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL\n+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL\n+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM\n+>FUN_000007-T1 FUN_000007\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL\n+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR\n+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF\n+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI\n+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG\n+YKLQDLWNVMPTKMETME\n+>FUN_000008-T1 FUN_000008\n+MKTLSVRLHRGTEFIKDTVHKALVMSAPTPVAPATAPAPKIVDHSLKRKLSGAGGLMGCSSIGSMTSSIAGSSRSHHYAL\n+TSQVASSQVIPLPSQVPTAAFLRTYTVAPTALHRSAAARKRNPSTDSLLMDLCLFKPIRPMPITPIKIHKFRGFEVKKPK\n+FVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTISDSAFVPMPYIETTNTAINATTTTNSGSRSRSLNTHTSGSAQAITK\n+PKRRRRAPMLTAKRRRKALDTELTTSADAGTEDKAPAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKST\n+SRSEAAKRSRVASVQNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISAST\n+>FUN_000009-T1 FUN_000009\n+MVTLRLPWCIRHKPPLCRIGLSHGCECDNSKKMAASSHAPESDRRAQRLRTQSNWNPPDHSALSLGKLVSRKLTPTAVGH\n+WVVGRQRAACACAGGPNADWTDGQPIESSRGCIFQPAPHCHGGRIARHFG\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASA\n+VLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKL\n+AVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPK'..b'YVSVDESHSAASKSPVPGTGGGTEGYPHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETGSW\n+MTVECDEFIGSDTSDNEPRTLEPDRNVLETQATLEDANPLEYSNCATPTSDLNILLTPPNASPQIEKSVLETFEKYTGSS\n+DTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVSCSIARPLGISQDFGKEEARKCQELK\n+QRMLQLEVGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESS\n+SGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQK\n+EQQSTWRPFPIESSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPA\n+MTSSLGGIGVNPTDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTN\n+LAYISDADRRTSAEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWP\n+LPEIPFDHVPVKPADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVI\n+LDEEMAVGPPDVAKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKN\n+SSPEVIVAQPTRSPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRK\n+NYEARLSSGGGGASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANL\n+SGSDSLSAVSTHSCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSAT\n+RSDDTTLTLTEMAHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEH\n+ADSQTGPETSAAARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQ\n+TQQPQQVRQKPRAPQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVT\n+PSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSAT\n+NTTTTTNTLNSESTEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDS\n+SLDVRGQEAKMRSRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQ\n+TDDYEDYPQYSGKFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKR\n+QFKREDSTAAGTSGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGA\n+GQDQEQGTGGQARHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQ\n+>FUN_000013-T1 FUN_000013\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA\n+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR\n+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI\n+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQ\n+RMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRP\n+PNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVG\n+LGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFK\n+NHVYFFRAESAHTYNR\n+>FUN_000014-T1 FUN_000014\n+MSTPARRRLMRDFKRLQEDPPTGVSGAPTDNNIMIWNAVIFGPHDTPFEDGTFKLTIEFTEEYPNKPPTVRFVSKVFHPN\n+VYADGGICLDILQNRWSPTYDVSAILTSIQSLLSDPNPNSPANSTAAQLYKENRREYEKRVKACVEQSFID\n+>FUN_000015-T1 FUN_000015\n+MNKAVCLVIVIQALRMVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQG\n+YKITLKNMEAFGASNFKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIH\n+AFKGANYLHIDALSLVLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPV\n+EQFYVD\n+>FUN_000016-T1 FUN_000016\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000017-T1 FUN_000017\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000018-T1 FUN_000018\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQVARDRQSRSRSRTRS\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,120 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmp2z22js7e/files/c/c/3/dataset_cc3f66b5-ec9b-4669-93d6-4ddeac0e33c1.dat --out output --database /home/abretaud/.planemo/planemo_tmp_z_14xthq/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --rna_bam /tmp/tmp2z22js7e/files/9/6/5/dataset_965b5091-b838-4f4a-8ec8-9fb84c12cdc5.dat --transcript_evidence /tmp/tmp2z22js7e/files/d/a/e/dataset_daea4ce7-3191-40eb-ad83-b35e9e058d46.dat --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-22", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 18, + "common_name": 0, + "mRNA": 18, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 2775.33, + "transcript-level": { + "CDS_transcripts": 18, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 18, + "CDS_five_three_utr": 0, + "CDS_complete": 16, + "CDS_no-start": 1, + "CDS_no-stop": 1, + "CDS_no-start_no-stop": 0, + "total_exons": 54, + "total_cds_exons": 54, + "multiple_exon_transcript": 15, + "single_exon_transcript": 3, + "avg_exon_length": 563.63, + "avg_protein_length": 571.83, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_transcript_evidence": 31.48, + "pct_exon_overlap_protein_evidence": 9.26 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,276 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +<2331 3254 gene + locus_tag FUN_000001 +<2331 3254 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +<2331 3254 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +5802 4883 gene + locus_tag FUN_000002 +5802 5797 mRNA +5539 4883 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 5797 CDS +5539 4883 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +10557 8696 gene + locus_tag FUN_000003 +10557 10549 mRNA +10462 8696 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10557 10549 CDS +10462 8696 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +15214 14247 gene + locus_tag FUN_000004 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +21705 19533 gene + locus_tag FUN_000005 +21705 21700 mRNA +21515 19533 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +21705 21700 CDS +21515 19533 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +35679 34843 gene + locus_tag FUN_000006 +35679 34843 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +35679 34843 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +40223 44130 gene + locus_tag FUN_000007 +40223 40396 mRNA +40659 41193 +41707 42080 +43409 43609 +43678 44130 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +40223 40396 CDS +40659 41193 +41707 42080 +43409 43609 +43678 44130 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +47195 45527 gene + locus_tag FUN_000008 +47195 46753 mRNA +46330 46214 +46157 45527 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +47195 46753 CDS +46330 46214 +46157 45527 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +79527 78685 gene + locus_tag FUN_000009 +79527 79519 mRNA +79068 78685 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +79527 79519 CDS +79068 78685 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +133587 137862 gene + locus_tag FUN_000011 +133587 134504 mRNA +134720 135510 +135569 136284 +137516 137862 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +133587 134504 CDS +134720 135510 +135569 136284 +137516 137862 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +144294 164495 gene + locus_tag FUN_000012 +144294 144551 mRNA +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153296 153630 +153689 155122 +155789 158975 +159190 164495 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +144294 144551 CDS +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153296 153630 +153689 155122 +155789 158975 +159190 164495 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +167121 169212 gene + locus_tag FUN_000013 +167121 168360 mRNA +168722 169212 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +167121 168360 CDS +168722 169212 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +178916 176699 gene + locus_tag FUN_000014 +178916 178873 mRNA +177172 176887 +176824 176699 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +178916 178873 CDS +177172 176887 +176824 176699 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +192004 194669 gene + locus_tag FUN_000015 +192004 192067 mRNA +193549 193658 +194041 194455 +194518 194669 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +192004 192067 CDS +193549 193658 +194041 194455 +194518 194669 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +210553 209044 gene + locus_tag FUN_000016 +210553 210548 mRNA +210474 209044 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +210553 210548 CDS +210474 209044 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000017 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +2126 >3537 gene + locus_tag FUN_000018 +2126 2199 mRNA +2258 3224 +3284 >3537 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 +2126 2199 CDS +2258 3224 +3284 >3537 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_bam/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.cds-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.discrepency.report.txt Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,135 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 16 present +DISC_FEATURE_COUNT:CDS: 16 present +DISC_FEATURE_COUNT:mRNA: 16 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:26 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:1 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:20 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 16 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 16 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 16 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::26 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::26 features have joined location but no exception +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 +genome:mRNA hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002 +genome:CDS hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003 +genome:CDS hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004 +genome:mRNA hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006 +genome:mRNA hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011 +genome:mRNA hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011 +genome:CDS hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012 +genome:mRNA hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012 +genome:CDS hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013 +genome:mRNA hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::1 featurepartial ends thands that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::20 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 124) +genome:ncbi:FUN_000002-T1 (length 82) +genome:ncbi:FUN_000003-T1 (length 367) +genome:ncbi:FUN_000004-T1 (length 831) +genome:ncbi:FUN_000005-T1 (length 135) +genome:ncbi:FUN_000006-T1 (length 137) +genome:ncbi:FUN_000007-T1 (length 1002) +genome:ncbi:FUN_000008-T1 (length 278) +genome:ncbi:FUN_000009-T1 (length 578) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 554) +genome:ncbi:FUN_000012-T1 (length 479) +genome:ncbi:FUN_000013-T1 (length 61) +genome:ncbi:FUN_000014-T1 (length 484) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000015-T1 (length 124) +genome:ncbi:FUN_000016-T1 (length 432) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.gbk Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4148 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene complement(2565..3142)\n+ /locus_tag="FUN_000002"\n+ mRNA complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+ LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+ gene complement(4248..5802)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+ PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+ SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+ LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+ gene complement(7691..10664)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUN_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUN_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000004-T1"\n+ /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+ DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+ ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+ IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+ EKAKELENFASVMEKVNARLKS'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.gff3 Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,117 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2565\t3142\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2565\t3142\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t3138\t3142\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2883\t3004\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2565\t2686\t.\t-\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3138\t3142\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2883\t3004\t.\t-\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2565\t2686\t.\t-\t2\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4248\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4248\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4937\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4248\t4742\t.\t-\t.\tID=FUN_000003-T1.exon3;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4937\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4248\t4742\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t7691\t10664\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t7691\t10664\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10657\t10664\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8707\t10499\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t7691\t8385\t.\t-\t.\tID=FUN_000004-T1.exon3;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10657\t10664\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8707\t10499\t.\t-\t1\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t7691\t8385\t.\t-\t2\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t15539\t16619\t.\t+\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t15539\t16619\t.\t+\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t15539\t15543\t.\t+\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t15646\t15919\t.\t+\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t16485\t16619\t.\t+\t.\tID=FUN_000006-T1.exon3;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15539\t15543\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15646\t15919\t.\t+\t1\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t16485\t16619\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t18358\t21705\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t18358\t21705\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t19638\t21515\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t18358\t19482\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_00'..b'N_000010-T1;\n+sample\tfunannotate\tCDS\t87202\t87207\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tCDS\t88054\t88320\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tgene\t102510\t106221\t.\t-\t.\tID=FUN_000011;\n+sample\tfunannotate\tmRNA\t102510\t106221\t.\t-\t.\tID=FUN_000011-T1;Parent=FUN_000011;product=hypothetical protein;\n+sample\tfunannotate\texon\t106216\t106221\t.\t-\t.\tID=FUN_000011-T1.exon1;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t104258\t104632\t.\t-\t.\tID=FUN_000011-T1.exon2;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103696\t103947\t.\t-\t.\tID=FUN_000011-T1.exon3;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103229\t103618\t.\t-\t.\tID=FUN_000011-T1.exon4;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t102510\t103151\t.\t-\t.\tID=FUN_000011-T1.exon5;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t106216\t106221\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t104258\t104632\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103696\t103947\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103229\t103618\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t102510\t103151\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000012;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000012-T1;Parent=FUN_000012;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168069\t.\t+\t.\tID=FUN_000012-T1.exon1;Parent=FUN_000012-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000012-T1.exon2;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t167121\t168069\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t180262\t180579\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t180262\t180579\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t180262\t180267\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t180400\t180579\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180262\t180267\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180400\t180579\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t208619\t210553\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t208619\t210553\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t209053\t210474\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t208619\t208645\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t209053\t210474\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t208619\t208645\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000015;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000016-T1.exon3;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.mrna-transcripts.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.proteins.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,96 @@ +>FUN_000001-T1 FUN_000001 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR +FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA +>FUN_000002-T1 FUN_000002 +MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA +QP +>FUN_000003-T1 FUN_000003 +MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN +NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH +KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC +PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL +QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT +>FUN_000004-T1 FUN_000004 +MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL +EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA +IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS +VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN +ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA +DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV +ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL +NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ +RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT +KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS +DMSSLKDDTSSTTSHSGLSIISLELPLPKKK +>FUN_000005-T1 FUN_000005 +MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT +IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS +>FUN_000006-T1 FUN_000006 +MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV +GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL +>FUN_000007-T1 FUN_000007 +MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS +NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN +SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP +RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH +PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR +MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD +RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY +VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH +IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF +GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH +QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP +QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA +KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ +>FUN_000008-T1 FUN_000008 +MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI +VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL +ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL +QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM +>FUN_000009-T1 FUN_000009 +MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK +LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD +ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL +YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR +STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF +VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI +QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG +YKLQDLWNVMPTKMETME +>FUN_000010-T1 FUN_000010 +MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK +PCNPKRYLTT +>FUN_000011-T1 FUN_000011 +MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL +RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI +IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI +TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ +FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD +FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE +KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ +>FUN_000012-T1 FUN_000012 +MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA +EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR +GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI +LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE +EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL +RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR +>FUN_000013-T1 FUN_000013 +MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL +>FUN_000014-T1 FUN_000014 +MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR +RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA +VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR +LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES +TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH +HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR +IREL +>FUN_000015-T1 FUN_000015 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR +FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA +>FUN_000016-T1 FUN_000016 +MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG +VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK +KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE +MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI +YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED +AAVGAQAASGADSPAQVARDRQSRSRSRTRS |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.scaffolds.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.scaffolds.fa Thu Aug 26 06:55:33 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.stats.json Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,119 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpm833xrq1/files/e/1/c/dataset_e1c34c74-e579-4cab-b0ed-5ce938ce4e4b.dat --out output --database /home/abretaud/.planemo/planemo_tmp_yntx6ieu/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-21", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 16, + "common_name": 0, + "mRNA": 16, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 1660.69, + "transcript-level": { + "CDS_transcripts": 16, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 16, + "CDS_five_three_utr": 0, + "CDS_complete": 15, + "CDS_no-start": 0, + "CDS_no-stop": 1, + "CDS_no-start_no-stop": 0, + "total_exons": 42, + "total_cds_exons": 42, + "multiple_exon_transcript": 13, + "single_exon_transcript": 3, + "avg_exon_length": 402.36, + "avg_protein_length": 359.81, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_protein_evidence": 11.9 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.tbl Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,234 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000001 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +3142 2565 gene + locus_tag FUN_000002 +3142 3138 mRNA +3004 2883 +2686 2565 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +3142 3138 CDS +3004 2883 +2686 2565 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 4248 gene + locus_tag FUN_000003 +5802 5797 mRNA +5539 4937 +4742 4248 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +5802 5797 CDS +5539 4937 +4742 4248 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10664 7691 gene + locus_tag FUN_000004 +10664 10657 mRNA +10499 8707 +8385 7691 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +10664 10657 CDS +10499 8707 +8385 7691 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 14247 gene + locus_tag FUN_000005 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15539 16619 gene + locus_tag FUN_000006 +15539 15543 mRNA +15646 15919 +16485 16619 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +15539 15543 CDS +15646 15919 +16485 16619 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +21705 18358 gene + locus_tag FUN_000007 +21705 21700 mRNA +21515 19638 +19482 18358 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +21705 21700 CDS +21515 19638 +19482 18358 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +35679 34843 gene + locus_tag FUN_000008 +35679 34843 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +35679 34843 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +40223 44130 gene + locus_tag FUN_000009 +40223 40396 mRNA +40659 41193 +41707 42080 +43409 43609 +43678 44130 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +40223 40396 CDS +40659 41193 +41707 42080 +43409 43609 +43678 44130 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +106221 102510 gene + locus_tag FUN_000011 +106221 106216 mRNA +104632 104258 +103947 103696 +103618 103229 +103151 102510 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +106221 106216 CDS +104632 104258 +103947 103696 +103618 103229 +103151 102510 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +167121 169212 gene + locus_tag FUN_000012 +167121 168069 mRNA +168722 169212 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +167121 168069 CDS +168722 169212 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +180262 180579 gene + locus_tag FUN_000013 +180262 180267 mRNA +180400 180579 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +180262 180267 CDS +180400 180579 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +210553 208619 gene + locus_tag FUN_000014 +210553 210548 mRNA +210474 209053 +208645 208619 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +210553 210548 CDS +210474 209053 +208645 208619 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000015 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +2126 >3537 gene + locus_tag FUN_000016 +2126 2199 mRNA +2258 3224 +3284 >3537 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +2126 2199 CDS +2258 3224 +3284 >3537 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 |
b |
diff -r 000000000000 -r 40b87aef5241 test-data/predict_scratch/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/fly.parameters.json Thu Aug 26 06:55:33 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r 40b87aef5241 tool-data/funannotate.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/funannotate.loc.sample Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,8 @@ +# this is a tab separated file describing the location of funannotate databases used for the +# funannotate annotation tool +# +# the columns are: +# value description format_version path +# +# for example +# 2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 /tmp/database/funannotate/funannotate/2021-07-20-120000 |
b |
diff -r 000000000000 -r 40b87aef5241 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="funannotate" comment_char="#" allow_duplicate_entries="False"> + <columns>value, description, format_version, path</columns> + <file path="tool-data/funannotate.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 40b87aef5241 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Aug 26 06:55:33 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="funannotate" comment_char="#" allow_duplicate_entries="False"> + <columns>value, description, format_version, path</columns> + <file path="${__HERE__}/test-data/funannotate.loc" /> + </table> +</tables> |