Next changeset 1:78407487f758 (2021-11-18) |
Commit message:
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1" |
added:
README.md funannotate_annotate.xml macros.xml test-data/SRR7458692.bam test-data/cleaned.fa test-data/cleaned_ident.fa test-data/compare/Genus_species.gbk test-data/compare/Other_beast.gbk test-data/compare/Other_species.gbk test-data/compare/Yet_another.gbk test-data/funannotate.loc test-data/funannotate_db/Pfam-A.clans.tsv test-data/funannotate_db/Pfam-A.hmm test-data/funannotate_db/Pfam-A.hmm.h3f test-data/funannotate_db/Pfam-A.hmm.h3i test-data/funannotate_db/Pfam-A.hmm.h3m test-data/funannotate_db/Pfam-A.hmm.h3p test-data/funannotate_db/dbCAN.hmm test-data/funannotate_db/dbCAN.hmm.h3f test-data/funannotate_db/dbCAN.hmm.h3i test-data/funannotate_db/dbCAN.hmm.h3m test-data/funannotate_db/dbCAN.hmm.h3p test-data/funannotate_db/funannotate-db-info.txt test-data/funannotate_db/go.obo test-data/funannotate_db/insecta/ancestral test-data/funannotate_db/insecta/dataset.cfg test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm test-data/funannotate_db/insecta/lengths_cutoff test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl test-data/funannotate_db/insecta/scores_cutoff test-data/funannotate_db/interpro.tsv test-data/funannotate_db/merops.dmnd test-data/funannotate_db/merops.formatted.fa test-data/funannotate_db/ncbi_cleaned_gene_products.txt test-data/funannotate_db/repeats.dmnd test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt test-data/funannotate_db/trained_species/fly/info.json test-data/funannotate_db/uniprot.dmnd test-data/funannotate_db/uniprot_sprot.fasta test-data/genome.fa test-data/genome_masked.fa test-data/predict_augustus/Genus_species.cds-transcripts.fa test-data/predict_augustus/Genus_species.discrepency.report.txt test-data/predict_augustus/Genus_species.error.summary.txt test-data/predict_augustus/Genus_species.gbk test-data/predict_augustus/Genus_species.gff3 test-data/predict_augustus/Genus_species.mrna-transcripts.fa test-data/predict_augustus/Genus_species.proteins.fa test-data/predict_augustus/Genus_species.scaffolds.fa test-data/predict_augustus/Genus_species.stats.json test-data/predict_augustus/Genus_species.tbl test-data/predict_augustus/Genus_species.validation.txt test-data/predict_augustus/fly.parameters.json test-data/predict_bam/Genus_species.cds-transcripts.fa test-data/predict_bam/Genus_species.discrepency.report.txt test-data/predict_bam/Genus_species.error.summary.txt test-data/predict_bam/Genus_species.gbk test-data/predict_bam/Genus_species.gff3 test-data/predict_bam/Genus_species.mrna-transcripts.fa test-data/predict_bam/Genus_species.proteins.fa test-data/predict_bam/Genus_species.stats.json test-data/predict_bam/Genus_species.tbl test-data/predict_bam/Genus_species.validation.txt test-data/predict_bam/fly.parameters.json test-data/predict_scratch/Genus_species.cds-transcripts.fa test-data/predict_scratch/Genus_species.discrepency.report.txt test-data/predict_scratch/Genus_species.error.summary.txt test-data/predict_scratch/Genus_species.gbk test-data/predict_scratch/Genus_species.gff3 test-data/predict_scratch/Genus_species.mrna-transcripts.fa test-data/predict_scratch/Genus_species.proteins.fa test-data/predict_scratch/Genus_species.scaffolds.fa test-data/predict_scratch/Genus_species.stats.json test-data/predict_scratch/Genus_species.tbl test-data/predict_scratch/Genus_species.validation.txt test-data/predict_scratch/fly.parameters.json tool-data/funannotate.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r a5baa4ff168d README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,5 @@ +# Funannotate + +Funannotate can use GeneMark to predict gene, but due to licensing issues, we are not allowed to distribute GeneMark automatically. + +If you want to use it, the Galaxy administrator needs to install it GeneMark following the instructions on https://github.com/nextgenusfs/funannotate, and set the `GENEMARK_PATH` variable on the job destination. |
b |
diff -r 000000000000 -r a5baa4ff168d funannotate_annotate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/funannotate_annotate.xml Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,413 @@\n+<tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+ <description>annotation</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <requirements>\n+ <expand macro="requirements" />\n+ </requirements>\n+ <version_command>funannotate check --show-versions</version_command>\n+ <command><![CDATA[\n+\n+#if $uglyTestingHack == "true":\n+ ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager)\n+ ## Need to copy too as the test_data is read only on CI\n+ cp -r \'${database.fields.path}\' \'./hacked_database\' &&\n+ sed -i.bak \'s|/tmp/prout|\'`pwd`\'/hacked_database|\' \'./hacked_database/trained_species/fly/info.json\' &&\n+#end if\n+\n+funannotate annotate\n+\n+#if $input.input_type == \'gbk\'\n+ --genbank \'${input.genbank}\'\n+#else\n+ --gff \'${input.gff}\'\n+ --fasta \'${input.fasta}\'\n+ --species \'${input.species}\'\n+#end if\n+\n+--out output\n+\n+#if $uglyTestingHack == "true":\n+ --database `pwd`\'/hacked_database\'\n+#else\n+ --database \'$database.fields.path\'\n+#end if\n+\n+#if $sbt:\n+ --sbt \'${sbt}\'\n+#end if\n+\n+#if $annotations:\n+ --annotations \'${annotations}\'\n+#end if\n+\n+#if $eggnog:\n+ --eggnog \'${eggnog}\'\n+#end if\n+\n+#if $antismash:\n+ --antismash \'${antismash}\'\n+#end if\n+\n+#if $iprscan:\n+ --iprscan \'${iprscan}\'\n+#end if\n+\n+#if $phobius:\n+ --phobius \'${phobius}\'\n+#end if\n+\n+--busco_db \'${busco_db}\'\n+\n+--isolate \'${isolate}\'\n+--strain \'${strain}\'\n+\n+#if $rename:\n+ --rename \'${rename}\'\n+#end if\n+#if $fix:\n+ --fix \'${fix}\'\n+#end if\n+#if $remove:\n+ --remove \'${remove}\'\n+#end if\n+\n+--cpus \\${GALAXY_SLOTS:-2}\n+\n+&&\n+\n+mv output/annotate_results/*.gbk out.gbk &&\n+mv output/annotate_results/*.annotations.txt out.annotations.txt &&\n+mv output/annotate_results/*.contigs.fsa out.contigs.fsa &&\n+mv output/annotate_results/*.agp out.agp &&\n+mv output/annotate_results/*.tbl out.tbl &&\n+mv output/annotate_results/*.sqn out.sqn &&\n+mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa &&\n+mv output/annotate_results/*.proteins.fa out.proteins.fa &&\n+mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa &&\n+mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa &&\n+mv output/annotate_results/*.gff3 out.gff3 &&\n+mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt &&\n+mv output/annotate_results/*.stats.json out.stats.json\n+ ]]></command>\n+ <inputs>\n+\n+ <conditional name="input">\n+ <param name="input_type" type="select" label="Input format">\n+ <option value="gbk" selected="True">GenBank (from \'Funannotate predict annotation\' tool)</option>\n+ <option value="gff">GFF</option>\n+ </param>\n+ <when value="gbk">\n+ <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from \'Funannotate predict annotation\' tool" />\n+ </when>\n+ <when value="gff">\n+ <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" />\n+ <param argument="--fasta" type="data" format="fasta" label="Genome sequence" />\n+ <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">\n+ <validator type="empty_field" />\n+ </param>\n+ </when>\n+ </conditional>\n+\n+\n+\n+ <param name="database" label="Funannotate database" type="select">\n+ <options from_data_table="funannotate">\n+ <column name="value" index="0" />\n+ <column name="name" index="1" />\n+ <column name="path" index="3" />\n+ <filter type="sort_by" column="0" />\n+ <filter type="static_value" column="2" v'..b's species" />\n+ </conditional>\n+ <param name="database" value="2021-07-20-120000" />\n+ <param name="busco_db" value="insecta" />\n+ <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />\n+ <output name="gbk">\n+ <assert_contents>\n+ <has_text text="DEFINITION Genus species." />\n+ </assert_contents>\n+ </output>\n+ <output name="annot">\n+ <assert_contents>\n+ <has_text text="EC_number" />\n+ <has_text text="EOG090W0T3K" />\n+ </assert_contents>\n+ </output>\n+ <output name="contigs_fsa">\n+ <assert_contents>\n+ <has_text text=">contig_1" />\n+ </assert_contents>\n+ </output>\n+ <output name="agp">\n+ <assert_contents>\n+ <has_text text="contig_1" />\n+ </assert_contents>\n+ </output>\n+ <output name="tbl">\n+ <assert_contents>\n+ <has_text text="locus_tag" />\n+ </assert_contents>\n+ </output>\n+ <output name="sqn">\n+ <assert_contents>\n+ <has_text text="Seq-submit" />\n+ </assert_contents>\n+ </output>\n+ <output name="fa_scaffolds">\n+ <assert_contents>\n+ <has_text text=">sample" />\n+ </assert_contents>\n+ </output>\n+ <output name="fa_proteins">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <output name="fa_transcripts_mrna">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <output name="fa_transcripts_cds">\n+ <assert_contents>\n+ <has_text text=">FUN_000001-T1 FUN_000001" />\n+ </assert_contents>\n+ </output>\n+ <output name="gff3">\n+ <assert_contents>\n+ <has_text text="ID=FUN_000001;" />\n+ </assert_contents>\n+ </output>\n+ <output name="tbl2asn_report">\n+ <assert_contents>\n+ <has_text text="Discrepancy Report Results" />\n+ </assert_contents>\n+ </output>\n+ <output name="stats">\n+ <assert_contents>\n+ <has_text text="avg_gene_length" />\n+ </assert_contents>\n+ </output>\n+ <output name="must_fix">\n+ <assert_contents>\n+ <has_text text="tbl2asn Error" />\n+ </assert_contents>\n+ </output>\n+ <output name="need_curating">\n+ <assert_contents>\n+ <has_text text="Original Description" />\n+ </assert_contents>\n+ </output>\n+ <output name="new_names_passed">\n+ <assert_contents>\n+ <has_text text="Passed Description" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+Funannotate_ annotate\n+---------------------\n+\n+Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).\n+\n+This script functionally annotates the results from funannotate predict. It pulls\n+annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.\n+\n+.. _Funannotate: http://funannotate.readthedocs.io\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n' |
b |
diff -r 000000000000 -r a5baa4ff168d macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,159 @@\n+<?xml version="1.0"?>\n+<macros>\n+ <token name="@TOOL_VERSION@">1.8.9</token>\n+ <token name="@VERSION_SUFFIX@">1</token>\n+\n+ <xml name="requirements">\n+ <requirement type="package" version="@TOOL_VERSION@">funannotate</requirement>\n+ </xml>\n+\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.5281/zenodo.4054262</citation>\n+ </citations>\n+ </xml>\n+\n+ <xml name="augustus_species">\n+ <!-- list generated from a Funannotate database directory, listing trained_species/* -->\n+ <option value="adorsata">adorsata</option>\n+ <option value="aedes">aedes</option>\n+ <option value="amphimedon">amphimedon</option>\n+ <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>\n+ <option value="anidulans">anidulans</option>\n+ <option value="arabidopsis">arabidopsis</option>\n+ <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>\n+ <option value="aspergillus_nidulans">aspergillus_nidulans</option>\n+ <option value="aspergillus_oryzae">aspergillus_oryzae</option>\n+ <option value="aspergillus_terreus">aspergillus_terreus</option>\n+ <option value="bombus_impatiens1">bombus_impatiens1</option>\n+ <option value="bombus_terrestris2">bombus_terrestris2</option>\n+ <option value="botrytis_cinerea">botrytis_cinerea</option>\n+ <option value="b_pseudomallei">b_pseudomallei</option>\n+ <option value="brugia">brugia</option>\n+ <option value="cacao">cacao</option>\n+ <option value="caenorhabditis">caenorhabditis</option>\n+ <option value="camponotus_floridanus">camponotus_floridanus</option>\n+ <option value="candida_albicans">candida_albicans</option>\n+ <option value="candida_guilliermondii">candida_guilliermondii</option>\n+ <option value="candida_tropicalis">candida_tropicalis</option>\n+ <option value="c_elegans_trsk">c_elegans_trsk</option>\n+ <option value="chaetomium_globosum">chaetomium_globosum</option>\n+ <option value="chicken">chicken</option>\n+ <option value="chiloscyllium">chiloscyllium</option>\n+ <option value="chlamy2011">chlamy2011</option>\n+ <option value="chlamydomonas">chlamydomonas</option>\n+ <option value="chlorella">chlorella</option>\n+ <option value="ciona">ciona</option>\n+ <option value="coccidioides_immitis">coccidioides_immitis</option>\n+ <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>\n+ <option value="coprinus">coprinus</option>\n+ <option value="coprinus_cinereus">coprinus_cinereus</option>\n+ <option value="coyote_tobacco">coyote_tobacco</option>\n+ <option value="cryptococcus">cryptococcus</option>\n+ <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>\n+ <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>\n+ <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>\n+ <option value="culex">culex</option>\n+ <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>\n+ <option value="E_coli_K12">E_coli_K12</option>\n+ <option value="elephant_shark">elephant_shark</option>\n+ <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>\n+ <option value="eremothecium_gossypii">eremothecium_gossypii</option>\n+ <option value="fly">fly</option>\n+ <option value="fly_exp">fly_exp</option>\n+ <option value="fusarium">fusarium</option>\n+ <option value="fusarium_graminearum">fusarium_graminearum</option>\n+ <option value="galdieria">galdieria</option>\n+ <option value="generic">generic</option>\n+ <option value="heliconius_melpomene1">heliconius_melpomene1</option>\n+ <option value="histoplasma">histoplasma</opt'..b'lue="pneumocystis">pneumocystis</option>\n+ <option value="rhincodon">rhincodon</option>\n+ <option value="rhizopus_oryzae">rhizopus_oryzae</option>\n+ <option value="rhodnius">rhodnius</option>\n+ <option value="rice">rice</option>\n+ <option value="saccharomyces">saccharomyces</option>\n+ <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>\n+ <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>\n+ <option value="s_aureus">s_aureus</option>\n+ <option value="schistosoma">schistosoma</option>\n+ <option value="schistosoma2">schistosoma2</option>\n+ <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>\n+ <option value="scyliorhinus">scyliorhinus</option>\n+ <option value="sealamprey">sealamprey</option>\n+ <option value="s_pneumoniae">s_pneumoniae</option>\n+ <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>\n+ <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>\n+ <option value="template_prokaryotic">template_prokaryotic</option>\n+ <option value="tetrahymena">tetrahymena</option>\n+ <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>\n+ <option value="tomato">tomato</option>\n+ <option value="toxoplasma">toxoplasma</option>\n+ <option value="tribolium2012">tribolium2012</option>\n+ <option value="trichinella">trichinella</option>\n+ <option value="ustilago">ustilago</option>\n+ <option value="ustilago_maydis">ustilago_maydis</option>\n+ <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>\n+ <option value="verticillium_longisporum1">verticillium_longisporum1</option>\n+ <option value="volvox">volvox</option>\n+ <option value="wheat">wheat</option>\n+ <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>\n+ <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>\n+ <option value="zebrafish">zebrafish</option>\n+ </xml>\n+\n+ <xml name="busco_species">\n+ <!-- list generated from a Funannotate database directory, with the "funannotate database -show-buscos command" -->\n+ <option value="eukaryota">eukaryota</option>\n+ <option value="metazoa">metazoa</option>\n+ <option value="nematoda">nematoda</option>\n+ <option value="arthropoda">arthropoda</option>\n+ <option value="insecta">insecta</option>\n+ <option value="endopterygota">endopterygota</option>\n+ <option value="hymenoptera">hymenoptera</option>\n+ <option value="diptera">diptera</option>\n+ <option value="vertebrata">vertebrata</option>\n+ <option value="actinopterygii">actinopterygii</option>\n+ <option value="tetrapoda">tetrapoda</option>\n+ <option value="aves">aves</option>\n+ <option value="mammalia">mammalia</option>\n+ <option value="euarchontoglires">euarchontoglires</option>\n+ <option value="laurasiatheria">laurasiatheria</option>\n+ <option value="fungi">fungi</option>\n+ <option value="dikarya">dikarya</option>\n+ <option value="ascomycota">ascomycota</option>\n+ <option value="pezizomycotina">pezizomycotina</option>\n+ <option value="eurotiomycetes">eurotiomycetes</option>\n+ <option value="sordariomycetes">sordariomycetes</option>\n+ <option value="saccharomycetes">saccharomycetes</option>\n+ <option value="saccharomycetales">saccharomycetales</option>\n+ <option value="basidiomycota">basidiomycota</option>\n+ <option value="microsporidia">microsporidia</option>\n+ <option value="embryophyta">embryophyta</option>\n+ <option value="protists">protists</option>\n+ <option value="alveolata_stramenophiles">alveolata_stramenophiles</option>\n+ </xml>\n+</macros>\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/SRR7458692.bam |
b |
Binary file test-data/SRR7458692.bam has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/cleaned.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cleaned.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,2698 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'\n+ATCGGCGACTGTCTGTCATTGTATCCTTCTGCATTCCATTCGTATGTCCGTTTGTCTGTTCATTAGTCCGTCCGTTTGTC\n+CGCCCGTAACTCCGTCCCTGTGTCCTTTTTTTCCGTCCGTTTTCCTGATAAATACTTTTTAAGGAATCCAGCTTACCCTT\n+TTGCACTACAGGTAGCGTGAATAAAAATAAAATAAAAGAGCTAATTTTAAATTAAAATAAAAACAAAAACATCTCTTTGT\n+GTTTATATTTTCCCACTGTGCAGCACAAACACCCCTTTTGCCCACTTAAGCTTGCCACGTTTTCCCTTACTTATAACAGA\n+ACTTACAAACATGCGTTTGGTTTTCGTGGGTTGAGTTTGGTGCTCTCCGTTTACTTTTGCGGTTTTGTTCAGCGCTGCAT\n+ATATTTCCATATTAATTCCCCTGATTATGGGGAATCGTCATCGTCTGCGTTCTGTTCCCTGACGGTTTTGCCCAAATCCA\n+AATCCAAATCCAAATCCAAATCCAAATGCGAATGCGAATATCTGAATGCAGACCACAATTCGACGATGATGTTTCTGTTT\n+CAGAATAATCTAAATCGGCATTATTTATGCATTCAGTTCTTGCATTCATCACAACCACTTAGCGGTTCCACTTCAACGAA\n+CCCACAGATACACAATACATACATATTCATTTATGTATGTATGTACATACCGTCGTGTATATAATTAGTATGCATAGAAG\n+ATACATACGTATGCATTTTAATGGACCACAATTCCCTCGGCCAAAAGAGTGCTCCGATAATAAATATTAATTTTAATAAA\n+TGCTGATGCAGGTCAGCTGATTTCAAACGACCCTTTGTGTCACGGGGCAATGACTTTTGCTCAGTATATGTAGTATTCAA\n+TTTTCAATTTTCCGAAAATGGTATTACAATTACAATTTTTTAAATATTAGATCTCAAAAAATTGCTTGGCATACTTAGCA\n+ACATATCTTTAACTCTCAAGAGTCATATACACCCGATTTTGTTTCAAGTCCCCCTGGGACTTAAAAACCGACAATTACGC\n+CCAACTTGCAATCATTTGGAAAAAAAAACACCAGCTACGAACTTATCAAAACTTATATATCAGCAAAAAATAAATGGGTC\n+TCTATGCGCAGTTATCGGCCCACAAAATATATACAAAAGAAATGACATGTCATTAACTCCGGCCAAGGGCGTGCTTTTTG\n+GTGCCATCGCATTTGGGGGTAACTAGTCGCGGGTCGACCCAGAATCACATCTCCAGGAAGTGGATGTCCAGGGCGTGGTG\n+GTCCACATCGACGGGAGGGTGCAGCTGCAGGAACTTGACGTAGGCCAGGAAGGTGGTCCAGCACATGCTGAAGAAGGAGG\n+TAAACACGACCTGGTTTCGCGCCGGAACAAAGGCGAAGTTCACCGTCTGCACGCAGGGCCAATAAATAACGCCCACCTTG\n+TAGGCGTCCAGGAATTTATCGCTGACCTGGGAACGAGAGGGGGCAACGGGGTGGGTGAGTGCAGTGACATAATGCCCCCA\n+GTAGCAGTCGGAAAATGGAAATGGAAAATGCCAGCGCGGAAAAATTGTAATTAAGTGCCGCCGTCCAGCAGCGGTAGGAG\n+ACGTTTAATATTTTATTTATGGCCTGATCTCTTCTCCTCTTTTAAGGGGCGCTTGACTGGGGCGTGGCATTCAGTTAATG\n+TTGTTAATTAAAAGGCAACGCCTGGCTAGAAAAATTATCATCCAGGCACAGACTGTGTAACAAATGTAACATAGAGCACT\n+TTGGCACATTTTCAATTGGCATGAGAACTTCATTTAACTACAAAGACTATCCTTGTGCCATAAAACTTTCTTTTGTGGAT\n+CTATAGAAGTTGAATCGTTTTACAATCCTCACATAAAATATAGAACTTCGATGGCGAACGCATAGCTATTCCGAAAGCGA\n+TCTTGAAACTTGTACTGCCTTCTACCCGCTTCCAATTTGAAACTACTTTTAGGTGAGGGTCGAGGAACTTACGTCTCTAT\n+CGGCCACGCACACAACTCTTTTACGGCCCAAACCTGAGCCTATCTCAACCAAATCGCTCAGAATAGTCGAGCGATATATC\n+TTTGGAGAACGCATGCAAGGCAAATGAGACGACGCACGTTTGACACAAATATATAGCAAAACATTTACATTTATCAAGAT\n+AAATGGTTTCTAATGATATGGAAGTCAGCCATCGAACCGAGTTTGGCCATGGAGCACAGTGCCTGAAAGTATCTCGCAGT\n+ATGGCCGCAGAGCCAACAAATCAATGTTGACAGCTCGAAACGGAGGACCAGACGCAGAAATTGGGAATGGAGGGAGGTGA\n+GCGACAGACAGGTGATATTTATGCATGCTTACATTGCAGCCTCTGTCTCCGCCACAGATGCAGATTCATAGATACAGATA\n+CAAAGATGGGGATACTGCCTCTGAATGTGTCGTGAGAAAATGGATTCCGAAATCAAAAACTACAGACCGAAAACCGATTT\n+CCAAATAAATAACAATGCATACTGGGCACACACTTAGTAATGAGCACATCTGCGAAATGAAAGACCTTACCAGATAACTG\n+TCAACATTTTAAAATCGTTAAAAGTTGATTCAGGATTTGGGGTAGCCGTGCCAACGGTGTGAATGGGCATGAATAATATG\n+ACATATTCCTTTCCCGAGTAATGAAAAATGTTTTCAGCGAATCTATCCACGAATACCGTACATAAAATAGAGACTCTTCT\n+GCTTGTTAGATATCGTTGGCCCCCGACAAAATGTGGTTCTTTGAAATGAAATTTGAAATAAGTTTGTTGTGCTTAGGCCT\n+TAGTGACTTTGGGATGGGAATATACTTCACCTCCCGCTTGGCCTCCGCGTACGAGTTGCCCTCCATCAAGGTCATGAAGA\n+AGAGGAACGAGCTGATGGCCATCGGATCATAAGCGGTCTGCTCGGTGATCGCCTTGCAGAGCGATGACTTAATGTCGGTG\n+CGCGGCCACATAACGCTGGCCAATCTGATCCACACATATATGGTGGGCCCCATAAAGAAGAAGCCGAATAAGCTGAACCT\n+GCAATGGAATTAGCCAAACCATATCGGGTCCAATTAGCAATCGAATCGGGCCAAGACCCGTGCAGATTGCTCATCACCTA\n+AGGCACTTCATCCAGTCGTACGTCCGGAATGTCTTCTTCTCGATCATGGTCTGCTCGATGAGGGAGCCGCAGGGCCAGAG\n+GGTGCCATACGATATCATGCCGCGTAGGACTTTGTATTTGCTTGTAATATTCACCAAGCTACGAAACATTTTCAACGGTC\n+TCTTGTGATGATCTCGTCGAATGTCAGTGATAGTTTCACTTCCACAAAAGTGCTATAAACACACGCTGAAAGATAAATTG\n+TTTTAGATCAATACGGTGGGCTTTACATGGCTGAGTTCGCTTGGGTTAGTTTTATTAGGTGCCATATTTGTCTTGGCGGA\n+TTTCACTTTTGCAAAAAGTTCGGTTTTAACTCATACGACCTAGAAAGTTATCGAGTACTGATTGTCTTGGGTGGCCATTC\n+CAAATTCAACCGCATTATTCTCCCTCGACAGTGCCTAACTTAGCCACAGAATTCCAGCTCAAGACAATTGCAAATAAGTG\n+TCGGCCAAGCACAGCTGGCACTTCTGGACTCTTCTCTGAACTGATGGATGGCTAAGAGGTGGGTTGGTCTTTGGAAATGG\n+AGAGGGGGTGGTGCCCGCCATCCAGTAACACATGTCAAGTATTATTATCGAGCAACTACAAACGAAATACAAACATACAT\n+TTAATTGCCAAGAATGTTCCTAGCCAAACAAATGTCGAAATAATGTCGGGCCACGGCCAAAGATTTATCATATTCACTCG\n+ACTGTCATAAAAAGCAATTAGGCCCGGGCGAAAGATACACGACCGGCAGTCCCGAAATGT\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/cleaned_ident.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cleaned_ident.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,2849 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATGTGATGCATTA\n+ATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTTTTTTCGGCAAACTCCTCGGA\n+AGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAA\n+GTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAG\n+GCTTTAGTTCTTATCGATAGATAGTTAGTGATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCA\n+AAAAGGAAACATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATATCTATGCTCAG\n+CGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGAATGTGTGCTAATTTCTTGTG\n+ACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCA\n+CAACAAAACAGGTGACAATCATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCT\n+TTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATA\n+ATGTGAATGGCCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTTCGGAGCTCTTCGCTACTGCT\n+AGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGATCCACCAGTT\n+TCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCCAGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATG\n+AAAATAACCAAAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAA\n+CAGCGATAATGATCTGTGACTTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAATAGTAAAATC\n+TAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGTTTTTAAACGTTGATTTTTCA\n+GCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATTAGTTGTGGAAATGAAATTCA\n+AATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCG\n+CTACCAAAACTGGGGCTTGAGTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAG\n+TGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAA\n+GTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTG\n+GACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGAC\n+GGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGG\n+CCAAGGTCGGTTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATC\n+GAGAAGACCTTCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGA\n+ACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCC\n+TACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGAGGAGGA\n+GTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCT\n+TCGTGATGCGTCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTAC\n+CAGGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGG\n+ACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGG\n+AGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTCTCGGGCCAGCAGCCGCTCAAAGTCTGGTTC\n+TCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAAC'..b'ATCCAGATCGAGATCGGTAT\n+CACGTTCCCGATCCCGTTCCAAGTCCGGCTCTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCG\n+GGCTCCAGATCTGGCTCTGGGTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGG\n+AAGCGCCTCAGATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCTAGTCGATTTACACTTGGCTG\n+AGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGTGCTTAGGAAT\n+TGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAATGATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTC\n+GTTCCAACTACGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAG\n+TAGATTCTCAAGTTCTGGAGTTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGCTGGTGAAGGC\n+TGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATG\n+GGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCATCACTCCCGTGTGGATGCGCG\n+TGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCT\n+GGCTAACAAAATAAGGGGCGGCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCC\n+GCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCATGTGTACGGC\n+ATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGAGCATGCTCAGCGCCCCACTCAGCGCGTATG\n+GCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTC\n+AGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCA\n+GGGCACGCCCATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGC\n+ACTTGTAGGGCTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCATTCACTATAT\n+CACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATACATAGTCATATGAACAGTTGA\n+AAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTT\n+CTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTT\n+TTTTTGCTGCCAGTGAGCATAGAAAAAAAAAATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTC\n+CGCATTTTCGTGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGA\n+GCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCCTCCTGAGCCATCAGACTTGT\n+GTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACC\n+GAGGTGGAACACATTTCGCCAGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTT\n+CGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGC\n+AAGTGAAGATTTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCACGGCTATATACTATATGTAT\n+GTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGCACGATGCGCA\n+GGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGACTTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATA\n+ATTGGCTTTTCCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAA\n+AGTAAACACATTAATATGTACTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAATACAAAACCCA\n+CACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAA\n+TCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTGAATCGCTGTGTGCTATTTTT\n+ATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/compare/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/compare/Genus_species.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4260 @@\n+LOCUS sample 215740 bp DNA linear 28-SEP-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (28-SEP-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.9\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene 2126..3863\n+ /locus_tag="FUN_000002"\n+ mRNA join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+ LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+ DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+ KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+ AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+ NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+ RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+ ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+ RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+ QDPALEAPQMND"\n+ gene complement(4883..5802)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/compare/Other_beast.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/compare/Other_beast.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4148 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Other beast.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Other beast\n+ ORGANISM Other beast\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Other beast"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUNC_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUNC_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUNC_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNC_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene complement(2565..3142)\n+ /locus_tag="FUNC_000002"\n+ mRNA complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUNC_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUNC_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNC_000002-T1"\n+ /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+ LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+ gene complement(4248..5802)\n+ /locus_tag="FUNC_000003"\n+ mRNA complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUNC_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUNC_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNC_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+ PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+ SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+ LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+ gene complement(7691..10664)\n+ /locus_tag="FUNC_000004"\n+ mRNA complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUNC_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUNC_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNC_000004-T1"\n+ /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+ DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+ ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+ IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+ EKAKELENFASVME'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/compare/Other_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/compare/Other_species.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4259 @@\n+LOCUS sample 215740 bp DNA linear 22-JUL-2021\n+DEFINITION Other species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Other species\n+ ORGANISM Other species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Other species"\n+ /mol_type="genomic DNA"\n+ gene <2331..3254\n+ /locus_tag="FUNB_000001"\n+ mRNA <2331..3254\n+ /locus_tag="FUNB_000001"\n+ /product="hypothetical protein"\n+ CDS <2331..3254\n+ /locus_tag="FUNB_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNB_000001-T1"\n+ /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+ DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+ EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+ DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+ LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+ ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+ gene complement(4883..5802)\n+ /locus_tag="FUNB_000002"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUNB_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUNB_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNB_000002-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUNB_000003"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUNB_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUNB_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUNB_000003-T1"\n+ /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+ QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+ REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+ NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+ MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+ ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+ KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+ LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+ DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+ FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+ RQFDKSNDNYRKTFRSVDENSKGEL"\n+ gene complement(14247..15214)\n+ /locus_tag="FUNB_000004"\n+ mRNA complement(join(14247..14648,15209..15214))\n+ /locu'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/compare/Yet_another.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/compare/Yet_another.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4148 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Yet another.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Yet another\n+ ORGANISM Yet another\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Yet another"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUND_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUND_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUND_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUND_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene complement(2565..3142)\n+ /locus_tag="FUND_000002"\n+ mRNA complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUND_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUND_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUND_000002-T1"\n+ /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+ LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+ gene complement(4248..5802)\n+ /locus_tag="FUND_000003"\n+ mRNA complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUND_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUND_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUND_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+ PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+ SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+ LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+ gene complement(7691..10664)\n+ /locus_tag="FUND_000004"\n+ mRNA complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUND_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUND_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUND_000004-T1"\n+ /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+ DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+ ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+ IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+ EKAKELENFASVME'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate.loc Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of funannotate databases used for the +# funannotate annotation tool +# +# the columns are: +# value description format_version path +# +2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 ${__HERE__}/funannotate_db |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.clans.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/Pfam-A.clans.tsv Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,31 @@ +PF00001 CL0192 GPCR_A 7tm_1 7 transmembrane receptor (rhodopsin family) +PF00002 CL0192 GPCR_A 7tm_2 7 transmembrane receptor (Secretin family) +PF00003 CL0192 GPCR_A 7tm_3 7 transmembrane sweet-taste receptor of 3 GCPR +PF00004 CL0023 P-loop_NTPase AAA ATPase family associated with various cellular activities (AAA) +PF00005 CL0023 P-loop_NTPase ABC_tran ABC transporter +PF00006 CL0023 P-loop_NTPase ATP-synt_ab ATP synthase alpha/beta family, nucleotide-binding domain +PF00007 CL0079 Cystine-knot Cys_knot Cystine-knot domain +PF00008 CL0001 EGF EGF EGF-like domain +PF00009 CL0023 P-loop_NTPase GTP_EFTU Elongation factor Tu GTP binding domain +PF00010 HLH Helix-loop-helix DNA-binding domain +PF00011 CL0190 HSP20 HSP20 Hsp20/alpha crystallin family +PF00012 CL0108 Actin_ATPase HSP70 Hsp70 protein +PF00013 CL0007 KH KH_1 KH domain +PF00014 Kunitz_BPTI Kunitz/Bovine pancreatic trypsin inhibitor domain +PF00015 MCPsignal Methyl-accepting chemotaxis protein (MCP) signalling domain +PF00016 RuBisCO_large Ribulose bisphosphate carboxylase large chain, catalytic domain +PF00017 CL0541 SH2-like SH2 SH2 domain +PF00018 CL0010 SH3 SH3_1 SH3 domain +PF00019 CL0079 Cystine-knot TGF_beta Transforming growth factor beta like domain +PF00020 CL0607 TNF_receptor TNFR_c6 TNFR/NGFR cysteine-rich region +PF00021 CL0117 uPAR_Ly6_toxin UPAR_LY6 u-PAR/Ly-6 domain +PF00022 CL0108 Actin_ATPase Actin Actin +PF00023 CL0465 Ank Ank Ankyrin repeat +PF00024 CL0168 PAN PAN_1 PAN domain +PF00025 CL0023 P-loop_NTPase Arf ADP-ribosylation factor family +PF00026 CL0129 Peptidase_AA Asp Eukaryotic aspartyl protease +PF00027 CL0029 Cupin cNMP_binding Cyclic nucleotide-binding domain +PF00028 CL0159 E-set Cadherin Cadherin domain +PF00029 CL0375 Transporter Connexin Connexin +PF00030 CL0333 gCrystallin Crystall Beta/Gamma crystallin +PF00031 CL0121 Cystatin Cystatin Cystatin domain |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/Pfam-A.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,2268 @@\n+HMMER3/f [3.1b2 | February 2015]\n+NAME 1-cysPrx_C\n+ACC PF10417.11\n+DESC C-terminal domain of 1-Cys peroxiredoxin\n+LENG 40\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS yes\n+MAP yes\n+DATE Wed Feb 24 18:37:46 2021\n+NSEQ 40\n+EFFN 17.426758\n+CKSUM 4086680297\n+GA 21.10 21.10;\n+TC 21.10 21.10;\n+NC 21.00 21.00;\n+BM hmmbuild HMM.ann SEED.ann\n+SM hmmsearch -Z 57096847 -E 1000 --cpu 4 HMM pfamseq\n+STATS LOCAL MSV -7.5463 0.71948\n+STATS LOCAL VITERBI -7.8624 0.71948\n+STATS LOCAL FORWARD -4.3303 0.71948\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.28046 4.31208 2.83393 2.63913 3.90855 2.69988 3.89812 3.33401 2.56310 2.85023 3.99954 3.22924 2.52123 2.90328 3.31238 2.94055 2.70512 2.59551 3.49266 3.82715\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.00000 *\n+ 1 0.29666 6.14436 6.78514 6.79783 7.06332 2.55785 7.22049 6.57837 6.66651 6.27638 3.28757 5.91223 5.83978 6.69238 6.58162 2.20136 4.83343 5.59959 8.41086 7.43107 1 A - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.48576 0.95510\n+ 2 4.59591 5.92009 6.57211 5.96147 1.92899 5.81035 6.10135 2.33093 5.75927 0.69439 2.86149 5.97820 6.07717 5.78793 5.72916 5.13924 4.81708 2.59612 3.18569 3.35842 2 l - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.48576 0.95510\n+ 3 4.81290 7.05274 3.71696 4.47757 6.60126 5.41623 3.72993 5.92180 2.06538 3.59487 6.10993 4.89014 5.75663 0.42291 2.54802 4.76779 4.95656 5.56452 7.24472 6.08615 3 Q - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.48576 0.95510\n+ 4 2.64100 5.28125 5.84007 3.33362 1.80025 5.06896 2.72827 3.71332 5.01717 1.75203 2.65498 5.22280 5.43290 5.15329 5.03455 4.37913 1.80041 2.31249 5.90246 2.63298 4 l - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.48576 0.95510\n+ 5 1.41804 5.70097 4.57246 4.01393 4.89018 2.99141 3.06166 4.26400 3.98607 3.95402 4.82703 3.11439 5.16775 4.30619 4.38279 2.19082 1.83510 1.66601 6.29409 3.75702 5 a - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00226 6.48754 7.20989 0.61958 0.77255 0.48576 0.95510\n+ 6 2.40940 6.27006 0.95419 1.88136 5.61364 4.53669 4.72417 5.09838 3.04943 4.57453 3.59812 4.00473 4.93049 3.33824 2.90687 3.72620 2.65150 4.65993 3.45464 5.29359 6 d - - H\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.5847'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 442 2.29864 4.36166 2.78326 3.38999 2.53742 2.77187 4.12835 1.90087 3.31519 2.56018 3.47681 3.70473 4.21526 3.57901 3.58783 3.10825 3.02383 2.10914 4.97533 3.76638 497 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 443 1.63203 4.34013 4.06646 3.49075 2.53230 3.86648 4.15865 2.76897 3.39829 2.04499 2.63095 2.72733 4.24075 3.64956 3.64097 3.14318 3.03596 2.57834 4.93886 3.73074 498 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 444 3.45776 4.79585 5.18920 4.63265 3.52372 4.70258 5.10103 1.90217 4.47095 0.76832 3.34020 4.80189 4.96861 4.60775 4.56613 4.05504 2.50816 2.41399 5.48117 4.38492 499 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 445 2.36957 4.45811 5.03009 4.44127 2.41688 4.36160 4.72344 1.58595 4.26807 1.49345 3.37957 4.49747 4.67797 4.39767 4.31200 3.68800 3.35368 1.84206 5.17229 4.01590 500 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 446 2.96533 4.42430 4.42687 3.84923 3.41907 4.08405 4.39794 2.61936 3.71705 1.40335 1.95988 2.61322 4.43548 3.93974 3.89559 3.38106 3.19904 2.03697 5.04359 3.86701 501 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 447 2.78421 4.21512 4.43112 3.83593 2.56381 3.91375 4.21971 2.14171 3.68350 2.40586 3.33147 3.96427 4.28054 3.87190 2.63279 2.49740 3.01625 2.02770 2.95875 2.66709 502 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 448 2.82824 5.21690 3.11861 2.17918 2.94314 3.60476 3.77516 3.98203 1.59578 3.49916 4.27124 3.09125 3.99567 2.88856 2.31346 2.30812 3.05474 3.59349 5.65674 4.29044 503 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01335 4.71894 5.44129 0.61958 0.77255 0.48576 0.95510\n+ 449 3.39753 5.46518 3.96355 3.19882 4.85160 4.06610 3.96064 4.17905 1.03571 1.93276 4.52854 3.58771 4.40326 3.09886 1.88259 3.41029 3.55223 3.89272 5.74795 4.58563 504 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00900 4.71460 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.hmm.h3f |
b |
Binary file test-data/funannotate_db/Pfam-A.hmm.h3f has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.hmm.h3i |
b |
Binary file test-data/funannotate_db/Pfam-A.hmm.h3i has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.hmm.h3m |
b |
Binary file test-data/funannotate_db/Pfam-A.hmm.h3m has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/Pfam-A.hmm.h3p |
b |
Binary file test-data/funannotate_db/Pfam-A.hmm.h3p has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/dbCAN.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/dbCAN.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,613 @@\n+HMMER3/b [3.0 | March 2010]\n+NAME CBM10\n+LENG 28\n+ALPH amino\n+RF no\n+CS no\n+MAP yes\n+DATE Thu Apr 21 15:04:19 2011\n+NSEQ 84\n+EFFN 8.697876\n+CKSUM 1939305542\n+STATS LOCAL MSV -7.3395 0.71998\n+STATS LOCAL VITERBI -7.4498 0.71998\n+STATS LOCAL FORWARD -3.9737 0.71998\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 3.06033 2.42663 2.85747 2.77283 4.58046 2.24214 4.56942 3.29197 3.49682 3.34028 4.33944 2.39813 3.09771 2.95262 3.60586 2.44839 2.84569 3.24117 2.72301 3.28669\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.59367 5.66823 0.81137 0.61958 0.77255 0.00000 *\n+ 1 2.74340 3.47729 3.44643 2.64352 4.17985 3.79353 4.02282 3.57300 2.87097 3.22136 2.03109 3.38316 4.18747 1.79120 3.30841 2.03216 2.98385 3.28775 5.51287 4.21451 1 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00925 5.08381 5.80616 0.61958 0.77255 0.72961 0.65797\n+ 2 3.21366 0.33490 5.23672 5.17809 5.69922 3.92522 5.89105 5.22956 5.14397 4.94899 5.73770 3.11988 4.78715 5.32569 5.20447 2.46009 3.79312 4.43668 7.07761 5.95722 2 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00690 5.37599 6.09834 0.61958 0.77255 0.73167 0.65606\n+ 3 3.46517 6.03524 2.03286 2.67508 5.36194 2.25828 4.36563 4.86858 2.77875 4.33291 5.09631 1.07839 4.52524 2.62719 3.73072 2.75509 3.71412 4.41890 6.46315 5.00841 3 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.04091 5.49877 3.32435 0.61958 0.77255 0.52775 0.89143\n+ 4 2.51597 5.24536 3.21395 3.79383 4.81190 3.30725 4.89534 4.21717 3.89295 3.90352 4.76570 4.13904 4.73005 4.17210 4.26805 3.51068 1.58681 3.86875 0.87084 4.95297 4 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00570 5.56595 6.28829 0.61958 0.77255 0.68571 0.70065\n+ 5 1.88947 5.00491 4.00042 3.04471 4.13869 4.17114 3.84034 3.52228 3.40249 3.22374 4.10809 3.86994 4.55537 3.71667 3.77844 2.67165 2.19530 3.28758 3.58338 1.27239 5 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00552 5.59725 6.31960 0.61958 0.77255 0.60266 0.79264\n+ 6 3.27926 5.74099 2.79040 2.94678 5.08062 1.27592 4.24831 3.41153 3.01359 4.05572 4.81107 3.21498 2.15563 2.32440 3.50951 2.31938 3.51838 4.13470 6.20511 4.79969 6 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00538 5.62391 6.34625 0.61958 0.77255 0.69965 0.68669\n+ 7 2.98451 5.68149 2.03356 2.71108 5.01893 3.09684 4.15179 4.49988 2.89056 3.98355 4.72438 1.82442 4.35665 2.68802 3.38251 2.74393 1.71463 4.06807 3.52699 3.90983 '..b' 4.13890 203 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 156 3.23629 4.53295 5.17116 4.60062 2.56767 4.55401 4.94436 1.53611 4.45009 1.93006 2.67230 4.68873 4.82232 4.56190 4.49974 3.89871 3.47038 1.24322 5.32187 4.18538 204 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 157 3.35666 5.86974 0.68984 2.46426 4.92782 3.56196 2.78368 4.75794 3.22196 4.22981 5.12544 3.04989 4.21061 3.34715 3.81641 3.23366 3.66181 4.32892 6.19564 4.65728 205 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 158 2.67298 5.45036 2.13159 2.39143 4.78238 2.86513 3.84529 4.27738 2.69136 3.76387 4.54115 1.53088 3.99325 2.24761 3.21918 2.86652 3.18624 3.84376 5.91068 4.46667 206 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 159 3.47622 4.69232 5.49800 5.03902 4.03274 5.04172 5.78958 1.10648 4.96863 2.46343 3.77576 5.20399 5.28896 5.22948 5.14151 4.47780 3.74214 0.89149 6.13647 4.91519 207 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 160 2.97740 5.38053 3.16559 1.47840 4.78122 3.66390 3.76082 4.20817 1.84043 3.64747 4.43358 3.13556 4.05699 2.52986 2.01786 2.93673 3.18400 3.80690 5.72537 4.41283 208 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 161 3.57702 4.81880 5.48209 4.93951 3.50997 4.99824 5.42819 1.75624 4.81911 0.87415 3.27296 5.12067 5.15674 4.87874 4.88006 4.38284 3.80904 1.60841 5.63134 4.57121 209 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01622 4.52558 5.24792 0.61958 0.77255 0.61781 0.77462\n+ 162 2.76975 4.18299 4.46129 3.86923 2.59055 3.91437 4.21294 2.01754 3.71166 2.30234 3.27364 3.97872 4.27520 3.88975 3.81544 3.21651 2.25828 2.00412 4.76988 2.10687 210 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.21940 4.52558 1.68107 0.61958 0.77255 0.61781 0.77462\n+ 163 2.75776 4.82375 2.92091 2.81848 4.73679 1.07105 4.24317 4.29296 3.20401 3.89127 4.73515 1.86580 4.00371 3.46690 3.61880 2.84293 3.19250 3.75461 5.98100 4.66726 211 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01339 4.31957 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/dbCAN.hmm.h3f |
b |
Binary file test-data/funannotate_db/dbCAN.hmm.h3f has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/dbCAN.hmm.h3i |
b |
Binary file test-data/funannotate_db/dbCAN.hmm.h3i has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/dbCAN.hmm.h3m |
b |
Binary file test-data/funannotate_db/dbCAN.hmm.h3m has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/dbCAN.hmm.h3p |
b |
Binary file test-data/funannotate_db/dbCAN.hmm.h3p has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/funannotate-db-info.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/funannotate-db-info.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,10 @@ +merops diamond /tmp/prout/merops.dmnd 12.0 2017-10-04 5009 a6dd76907896708f3ca5335f58560356 +uniprot diamond /tmp/prout/uniprot.dmnd 2021_03 2021-06-02 565254 68ed1e475d13bb3d5574c53822d11cd3 +dbCAN hmmer3 /tmp/prout/dbCAN.hmm 9.0 2020-08-04 641 04696dfba1c3bb82ff9b72cfbb3e4a65 +pfam hmmer3 /tmp/prout/Pfam-A.hmm 34.0 2021-03 19179 f83c0d00445257fd9c066ad3e9e10568 +repeats diamond /tmp/prout/repeats.dmnd 1.0 2021-07-19 11950 4e8cafc3eea47ec7ba505bb1e3465d21 +go text /tmp/prout/go.obo 2021-07-02 2021-07-02 47228 f5b79fe1a6d6a67c542e39da5d4661dc +mibig diamond /tmp/prout/mibig.dmnd 1.4 2021-07-19 31023 118f2c11edde36c81bdea030a0228492 +interpro xml /tmp/prout/interpro.xml 86.0 2021-06-03 38913 0d8c575f88f397397b9491520b38db1e +busco_outgroups outgroups /tmp/prout/outgroups 1.0 2021-07-19 8 6795b1d4545850a4226829c7ae8ef058 +gene2product text /tmp/prout/ncbi_cleaned_gene_products.txt 1.70 2021-06-15 34039 e93924259b8294255def54097bdab07b |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/go.obo --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/go.obo Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,34 @@ +format-version: 1.2 +data-version: releases/2021-08-18 +subsetdef: chebi_ph7_3 "Rhea list of ChEBI terms representing the major species at pH 7.3." +subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation" +subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation" +subsetdef: goslim_agr "AGR slim" +subsetdef: goslim_aspergillus "Aspergillus GO slim" +subsetdef: goslim_candida "Candida GO slim" +subsetdef: goslim_chembl "ChEMBL protein targets summary" +subsetdef: goslim_drosophila "Drosophila GO slim" +subsetdef: goslim_flybase_ribbon "FlyBase Drosophila GO ribbon slim" +subsetdef: goslim_generic "Generic GO slim" +subsetdef: goslim_metagenomics "Metagenomics GO slim" +subsetdef: goslim_mouse "Mouse GO slim" +subsetdef: goslim_pir "PIR GO slim" +subsetdef: goslim_plant "Plant GO slim" +subsetdef: goslim_pombe "Fission yeast GO slim" +subsetdef: goslim_synapse "synapse GO slim" +subsetdef: goslim_yeast "Yeast GO slim" +synonymtypedef: syngo_official_label "label approved by the SynGO project" +synonymtypedef: systematic_synonym "Systematic synonym" EXACT +default-namespace: gene_ontology +ontology: go +property_value: http://purl.org/dc/elements/1.1/description "The Gene Ontology (GO) provides a framework and set of concepts for describing the functions of gene products from all organisms." xsd:string +property_value: http://purl.org/dc/elements/1.1/title "Gene Ontology" xsd:string +property_value: http://purl.org/dc/terms/license http://creativecommons.org/licenses/by/4.0/ +property_value: owl:versionInfo "2021-08-18" xsd:string + +[Term] +id: GO:0000001 +name: mitochondrion inheritance +namespace: biological_process +def: "The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by interactions between mitochondria and the cytoskeleton." [GOC:mcc, PMID:10873824, PMID:11389764] +synonym: "mitochondrial inheritance" EXACT [] |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/ancestral --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/ancestral Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,10899 @@\n+>EOG090W0028\n+VPLKDNQDVACFLVTKHSWKGKYKRIFSIGTAGITTYNPDKLEVTNKWLYSDVISVAPEF\n+VLTLKKDKKVDSLKFSSEHRAELLTEALKYFAEKPKRYEAYKLHWSDTRLPVVLEVTPAS\n+LDQLDPATNTVLASYAYKDIEGIGGFVIVVGGFSRLHLFEIKKKILESALGIEIKVITLE\n+EFEEQRLGKYSGDEHQTSLSEFTVEKVRHKEPVRRLLCLSETCLLERDPQTYSIVTLRPL\n+SDVFALVRIEYLNGQVRSYLATDRDSLLASLLDGVRASGNRDVHVKIKKTERGKRLGPLV\n+DEEVEALLLKLLQNEVLERFNANVPYSGLLYSVTQDGLFAENKEKLILEALQALVQKELE\n+AQFHALRRLVASKVGFAAFTKLSGFREAIGKKVVKALKRNDEAVTQAAIDLICALMQPMD\n+LDLRQEQLNKSSLLSSKKFLESLLDMWTEHVSKGTGALVVSAMLDLLTFALCVPYSETTD\n+GKQFDALLELVAERGRVLFKLFQHPSLAIVKGAGLVMRAIIEEGVAAKMQELALAEGALP\n+RHLLAALYTRLLTHRQLSRHLVGLWVTAMELLKRILPAGLLAFLESEEKVPEEEKLNVRD\n+NLKLAQDHASKKKVEKHLEALKHWGAKVEKIKERPVVLRKRRERKKSKLFYYKFNKDHAL\n+PNLIWNHKTREELREALENELRAFESDKELAGLVAWNYAEFEVKYQCLADEVKIGDYYLR\n+LLLEKDDSDSLIRKSYELFNDLYHRFLLTTKVELKVLCLQALAIVYGRYYEDIGPFSDTK\n+YIVQLLDRCLDRDRLVLFLKKLILHKRNVKEILDVRILVDLLTLAHLHTSRAEKEWYYNE\n+RKGPVSFKELKELYKKGKITAKTKVWAQGLDGWRSLQQVPQLKWTLVAKGSPVLNESELA\n+ALILDILIKLTEYFPSRAVIRPLPRVKRLLSELACLPHIVQLLLTFDPVLVEKVATLLLE\n+IMKDNPEVSKLYLTGVFYFILLYTGSNVLPIARFLKLTHTKQAFRSDESDIMQRSILGQL\n+LPEAMVSYLENHGAEKFAEIFLGEFDTPEAIWNSEMRRLLIEKIAAHIADFTPRLRSHTR\n+ARYQYLAIPAVRYPQLEKELFCNIFYLRHLCDTAKFPDWPIADPVKLLKDVLEAWKKEVE\n+KKPPAMTVEEAYKELGLDEAAVRKAYYKLAQKYHPDKNPEGRDKFEAVNKAYEFLCSRSS\n+WSGPNPNNIVLILRTQSILFERYSELRPYKYAGYPQLIKTIKLETKDEQLFSKLLAAASE\n+LAYHTVKCSALNAEELRREEGLEVLLEAYSRCVSVLSKSSKEEDQVCLNITRCFAVAAKF\n+EACRDKIVELPQLVKDLVRVLKFKHLAADSELQLQLVKAGVLWSLLLFLFEYDYTLEESG\n+VERSEEENKQEVANKLAKLAVKACAALAGYLEKLLTPYLARKLILKILTSNTENPYLIWD\n+NGTRAELLEFLEEKRFKYSAHKDELKIGEVFIRIYNEQPTFPINPKEFVLDLLEFLKHVV\n+MALEALANVIKNNKGVEIQCIGKFKLLFGLLSIKKAALEVISLVSRNKECVEDIAASEVL\n+VKLLLLLKVLDTLSALLKIVKEALAKGAVLYLLDLFCNSIREAAAELLAKLSADKLSGPK\n+VRLTLSKFLPKLLADALRDSPVQLFESKHENPELIWDDEARKRVNELVVGGVYLRLFVAN\n+PAWTLRKPKEFLSDLLDTVLELLSKLELATTALVALLRAQPALADAVPSLGHIPKLVRQL\n+KSALLVLHQLALSEICVSAISQTECISPLKRDLIAVACETLSRLFDKLVKQALEAELVKY\n+LLELLESRTKAQIVKALKAMSRSGEKVKAILEKSSVWAEYKDQKHDLFISAAGYLTAGPS\n+TSPPPVD\n+>EOG090W002U\n+MTTDISVVEYDGGNSSSRLFERSRIKAERESVQKKTFQKWVNSHLVRRIGDLLRDGKKLI\n+KLLEVLSGERLPRPTKGKMRIHCLENVDKALQFLREQRVHLENLGSHDIVDGNARLSLGL\n+IWTIILRFQIQDITIEETDNKETKSAKDALLLWCQMKTAGYHNVNVRNFTTSWRDGLAFN\n+AIIHKHRPDLIQFEKLSKSNAIYNLNNAFNVAEDKLGLTKLLDAEDVFVEQPDEKSIITY\n+VVTYYHYFSKLKQETVQGKRIGKVVGIAMENDRMIKEYESLTSDLLKWIEATIEALGDRK\n+FANSLVGVQQQLAQFSNYRTVEKPPKFVEKGNLEVLLFTLQSKMRANNQKPYTPKEGKMI\n+SDINKAWERLEKAEHERELALREELIRQEKLEQLAARFNRKASMRETWLSENQRLVSQDN\n+FGFDLAAVEAAAKKHEAIETDIFAYEERVQAVVAVSQELEAENYHDIERINARKDNVLRL\n+WNYLLELLRARRLRLELSLQLQQNFQEMLYILDSMEELKLRLLTDDYGKHLMGVEDLLQK\n+HSLVEADINVLGERVKAVVQQSQRFLYKPCDPAIIVERVQQLEDAYAELVKLAVERRARL\n+EESRKLWQFYWDMADEENWIKEKEQIVSTADIGHDLTTVNLLLSKHKALENEIQSHEPQL\n+MSVVAVGDELVHFGADRIQERLKEILAKWNHLLDLRRKRLEAVDYHQLFADADDVDIWML\n+DTLRLVSSEDVGRDEANVQSLLKKHKDVTDELKNYALHQQAEELERLASIDSRYKELLEL\n+AKLRKQRLLDALSLYKLLSESDGVEQWIGEKDRMLDTMVPAKDIEDVEILKHRYDGFDKE\n+MNANASRVAVVNQLARQLLHVEHPNSEQIVARQNELNQKWAELREKAEAKRDELNSAHGV\n+QTFYIECRETVSWIEDKKRILQETDSLEMDLTGVMTLQRRLSGMERDLAAIQAKLDSLEK\n+EAEAIHPEEAALIRERIAQIELIWEQLTQMLKERDAKLEEAGDLHRFLRDLDHFQAWLTK\n+TQTDVASEDTPTSLAEAEKLLSQHQSIKEEIDNYTDDYKKMMEYGERLTAEPSTQDDPQY\n+MFLRERLKALKDGWEELHQMWENRQQLLSQSLNLQLFNRDARQAEVLLSQQEHVLAKDET\n+PVNLEQAENLLKRHEAFLTTMEANDDKINSVVQFAERLVDEEHFAADKVKKKAENIEERA\n+NREKAEKLKDQEFLQDLEELSEWVQEKKITAQDETYRSAKTVHSKWTRHQAFEAEIASNK\n+ERLKPELAEIIEPKLKELADQFEELETTTKEKGERLFDANREVLIHQTCDDIDSWLNELE\n+KQIESEDTGSDLASVNILMQKQQLIETQMAVKAKQVEELEKQAEYLQKTVPVKKEKVEER\n+FEKLKAPLLERQRQLEKKKEAFQFRRDVEDEKLWIAEKLPLATSTEYGNSLFNVHVLKKK\n+NQSLKTEIDNHEPRIKAVCNNGQKLIDEGHEDAKEFEKLIEELWKELKDAVEEREKAQQY\n+LFDASEAESWMSEQELYMMVEDRGKDEISAQNLMKKHESLEKAVEDYAETIRQLGETARQ\n+LDQIAVKQSQVDKLYAGLKDLAGERRAKLDEALQLFMLNREVDDLEQWIAEREVVAGSHE\n+LGQDYDHVTLLWERFKEFARDTEAVGSERVAAVNEIADELIAAGHSDSATIAEWKDGLNE\n+AWQDLLELIETRTQMLAASRELHKFFHDCKDVLGRILEKQSDELGRDAGSVSALQRKHQN\n+FLQDLSTLQSQVQQIQEESAKLQASYAGDKAKEITNREAEVVAAWANLQALCDARKAKLA\n+DTGDLFFFNLVRTLLLWLDDVVRQMNTSEKPRDVSGVELLMNNHQSLKAEIDAREDNFSA\n+CISLGKELLARNHYASIKEKLLALTNQRNALLKRWEERWENLQLILEVYQFARDAAVAEA\n+WLIAQEPYLLSQELGHTIDEVENLIKKHEAFEKSA'..b'OG090W0MK4\n+DAEQIKSFKDFLLSYNKLSELCFVDCISDFTSREVEEKCALNCLEKYLKMNQRISQRFQE\n+FQLIANENALAAAKK\n+>EOG090W0MLJ\n+QKKLQELDKYKQVQKEYKKAVKQRQQLDGQLNENKVVELDLLKEDNEVYKLIGPVLVKQE\n+LEEAKQNVSKRIEYISKELKRVEDLIASLEKKQEKHRENLEKLQQQLQ\n+>EOG090W0MM4\n+LYEPDYLKPKIPLYDVLNVQIKGYDYAVLESYQKLIHKIAEALDLDVEDSWALPAQELKV\n+QRYKPKSTVVEAEYKLKVYERNVQISDVSSPILLRVLEAALPEGVTLEVEEHEEEKEEKR\n+YVPDKELLDLKQELDEL\n+>EOG090W0MNZ\n+KIEEYETFINDVLKEDLKKLEKKLEKLNEEIAEYVQLKSTIETLDGLKTKVDIGCNFFVQ\n+AKVEDSKILVNIGLGVYLELTLEEALKFIDVRIKLLEKQIEKLRKESAKTKAHIKLVLLA\n+IEELQ\n+>EOG090W0MYQ\n+NPFEKEKKKCILCKLNIEPDYKNVKLLSQFQSPYTGRIYGRHITGLCKKKQEKVEKEILK\n+AQFLKDPKLFDPEKPLRPHK\n+>EOG090W0MZQ\n+PPINQKRLLAFINHFIISTVSFLNKFAKSCEEKLLEFEKKLQKVEASLVILEAKLSSIPE\n+LEEDPEYKKYFKMVQVGVPKEAVKLKMQQEGLDPSLLD\n+>EOG090W0N0N\n+LSKKEKLKKAVKDYGSTVVVFHVGISLISLGALYLLVSSGLDVLLEKLEASTFVVAYAVH\n+KVLAPVRISITLAATPLIVRYLRKIGLLK\n+>EOG090W0N4N\n+MDLSKVKNEKKLELCKLYFGFALLPFLWAVNAVWFFKEAFKKPEYEEQKQIKKYVILSAI\n+GALIWAWIVIFQLKRAEWGELADEISFIIPLG\n+>EOG090W0N5S\n+MKAVTAVCATGASVPAVASGRVKRRRDLENEEIQMYLSKLKDLVPFMPKNRKLSKLEVIQ\n+HVIDYICDLQTALEEHPAAAALARQPLGVLPNTIL\n+>EOG090W0N7H\n+MKLSHETVTIELKNGTQVHGTITGVDVAMNTHLKAVKLTIKNRLETLSIRGNNIRYYILP\n+DSLPLETLLIDDTPKAKAKKK\n+>EOG090W0N7U\n+SSTSQKHREPMGDKPVTDLAGVGEVLGKRLVVLGQYLVLKKDKELFKEWMKDTCSANSKQ\n+SSDCYQCLSDWCEEF\n+>EOG090W0NCE\n+VNKTVSIITDGRNFIGTLKGFDQTINLILDESHERVYSTTQGVEQVVLGLHIIRGDNVAI\n+VGELDDSRLDLSSIRAEPLSSVVH\n+>EOG090W0NFV\n+DPELEAIRAQRLAQLQSQYKGQKAQEEKKREQEEMKNSILSQVLDQSARARLNTLKLGKP\n+EKGKMVENLLIRMAQRGQIKGKLGEKELIKLLESVNQQTTVKFDRRRAALDSDDD\n+>EOG090W0NJA\n+TRVYVGGLTEKVKKEDLEAEFEKYGKLNSVWVAFNPPGFAFIEFENKDEAEKACDNLNGT\n+ELLGSKLRVEISRGRGRKGGRGKRGSRFRSRSPVGR\n+>EOG090W0NJU\n+YLKSWEEFEKAAERLYLQDPLKRYTMKYVHSKGLLVLKLTDNCLQYKTEDLKKIEKFISN\n+LMRHMASKE\n+>EOG090W0NK3\n+VNVPKQRRTFCKKCKVHKLHKVTQYKKSKEGRRRYDRKQQGFGGQTKPIFRKKAKTTKKI\n+VLRLECTECKYRKPLKRCKHFELGGDKKRK\n+>EOG090W0NO8\n+MGKVKCSELRTKDKKELLKQLEELKTELTNLRVAKVTGGAASKLSKIRVVRKAIARVYIV\n+LHQKQKENLRKNKKYKPLDLRPKKTRALRRALTTLKEIRKRKYAVKA\n+>EOG090W0NRT\n+RKEALSQFIQQIHGRPVVVKLNSGVDYRGVLACLDGYMNIALEQTEEYVNGQLKNKYGDA\n+FIRGNNVLY\n+>EOG090W0NTV\n+EEWLEKEVIGLRVWQLLLLVLSILLSLVILLCCCIRFRIPRTKQEIEADYERKKLTKKFR\n+KRLKKIKNSEMDELDLKKAEAESLE\n+>EOG090W0O4V\n+MPKYYCDYCDTYLTHDSPSVRKTHCQGRKHKDNVKFYYQKWMEEQAQHLIDATTAAFKAG\n+KIASNPFAGVAIPPPGPGLAAPPGMPMMMGPHGPMPPMMMRPLMKPKGPMAPMGPLGALG\n+PVRPPL\n+>EOG090W0O82\n+MLEITCNDRLGKKVRVKCNPDDTIGDLKKLIAAQTGTKIVLKKWYTIFKDHIKLQD\n+>EOG090W0O88\n+AKRTKKVGITGKYGTRYGASLRKMVKKMEITQHSKYTCSFCGKAMKRSVVGIWSCKRCKR\n+TVAGGAWVYSTTAAASVRSAVRRLRE\n+>EOG090W0ODH\n+MEEKLAEYRAKKRREELLEKVKEKLKEVYLLYFLLWATLYIIAIELEFGAVYLVLSALVF\n+IYLNTRTGPKKKGEVSAYSVFNKNCEAIDGTLKAEQFEREIRYG\n+>EOG090W0OM7\n+LGRSRSPSPRRRRKERRDRRRRRSRERRRRSRDRERSLSRSRSRSEERERPVITEADLEG\n+KSPEEQEMLKLMGFCGFDTTKGKKVEGNDVGEVHVILKRKYRQYMNRKGGFNRPLDFV\n+>EOG090W0ORD\n+DEYALVAKGKLKLKSDKKKKKKKKRTKAELAFKMQEKMQKERIKEKASMTHKQRVEEFNR\n+HLDSLTEHFDIPKVSWTK\n+>EOG090W0ORX\n+PREIKEIKDFLLKARRKDAKSKIKKNAENVKFKVRCSRFLYTLVITDKEKAEKLKQSLPP\n+G\n+>EOG090W0OS5\n+KELEKLEEAKLKAKYPEGHSAFLQKRLAKGQKYFDSGDYQMAKQKTGEAIPTPETVPVRK\n+TSIIQP\n+>EOG090W0PDB\n+FAKDSIRLVKRCTKPDREFQKIAIATAIGFCIMGFIGFFVKLIHIPINNIIV\n+>EOG090W0PQO\n+LLLLAVALAAAQLFLAQALEASLAHPAVVENAEAEAQLPEELRNPFYKNPRIAAALAKES\n+WFTNKEMQVIDREAEKIPREKIYKILKNAGLVRRR\n+>EOG090W0PW0\n+EEKELKAGHPPAVKAGGMRITQHKTPSPPKTISGAPVKGNEAVQVFHEKKPPTIQQPRK\n+>EOG090W0PZH\n+KPIDSKREEFRKYLERAGVLDALTKVLVSLYEEPEKPEDALEYLRKNLGLKKELEEAKAE\n+IAELE\n+>EOG090W0Q9X\n+VIGGAVVGLLCAILVVMFIVYRLRKKDEGSYALEPKKRSPNREFYA\n+>EOG090W0QXM\n+PAAPSSTSVGSGSRSPSKQRKTTGSGGMWRFYTDDSPGIKVGPVPVLVMSLLFIASVFML\n+HIWGKYTRS\n+>EOG090W0R2X\n+MKRTKEKVEKEEGEELYSNEITEEMKKFIIEPSYVLCEKLIEGRLSFGGMNPEIEKLMEE\n+EEKDVSDEEMA\n+>EOG090W0RGQ\n+DVLDSWEEIDESEALEKKLKKLVIIKEEDELRSQLVPPEPTVKILKRPEKSSNGESKPKQ\n+PIKTLKQREQEYAEARLRILGEAKSPEENVLRLPRGPDGTKGFNVRR\n+>EOG090W0S6D\n+RVNGSLLKQFIGKKVSILGKVKKKSSNGKSFLKTTDNQKVTVELKEPLDEPLEGWVEVHG\n+VVKSSTISCDEYIEFPEETENFDAEAYNKLLNTVKNPWK\n+>EOG090W0T3K\n+MREFTNIVTTLSKLSKECVLRLTKDKLVFIVPLVWCELDQKFFSEYNMEGVSNEIYLELS\n+AEMLSRSLSSLKAKSVKIKLTNKQSPCLTVEIELSSESRQVVHDIPVTVIPRKEWSEYEE\n+PSIELPSLKKLRKVVDRMKNLSPSLTISATLKIETDTATVSTHFKNLKVSARVDIKKLSA\n+FLEVICSIEKLIKLELVKLHYFLPAV\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/dataset.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/dataset.cfg Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +name=insecta_odb9 +species=fly +domain=eukaryota +creation_date=2016-02-13 +number_of_BUSCOs=1658 +number_of_species=42 |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W03A6.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,892 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W03A6\n+LENG 290\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:14:12 2016\n+NSEQ 41\n+EFFN 0.573059\n+CKSUM 4199501958\n+STATS LOCAL MSV -11.0137 0.70159\n+STATS LOCAL VITERBI -11.8914 0.70159\n+STATS LOCAL FORWARD -5.4517 0.70159\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.61260 4.34648 2.94292 2.63725 3.00827 2.97272 3.70379 2.92379 2.63311 2.34924 3.71976 3.18648 3.40267 3.05104 2.84850 2.73289 2.87560 2.67151 4.50628 3.28193\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.06535 3.91193 3.14059 0.61958 0.77255 0.00000 *\n+ 1 3.00919 4.52311 4.14821 3.62922 3.09723 4.03596 4.32783 2.27940 3.34231 1.08727 2.86803 3.94138 4.37623 3.72340 3.44120 3.39248 3.26003 2.31137 4.98326 3.74489 1 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 2 2.36218 4.33870 3.13011 2.73889 4.02232 3.10957 3.85645 3.40174 2.64759 3.07800 3.92900 3.09783 3.59930 3.07936 2.94839 1.61731 2.43809 2.94535 5.34618 4.08403 2 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 3 2.67521 4.27814 3.24960 2.71563 4.05115 3.41144 3.69906 3.32637 1.42497 3.01802 3.94128 3.12968 3.88008 2.88045 2.51411 2.76334 2.94671 2.94958 5.28612 4.02052 3 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03123 3.87781 4.60016 0.61958 0.77255 0.51713 0.90691\n+ 4 2.80665 4.86651 3.06170 2.65102 4.29941 3.41912 3.68135 3.61311 1.29697 3.22463 4.14220 3.08300 3.90692 2.84934 2.39258 2.85149 2.85896 3.30620 5.40498 4.16189 4 k - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.04394 3.87781 3.80347 0.61958 0.77255 0.51713 0.90691\n+ 5 1.15269 4.13250 3.42805 3.19131 4.05298 2.97131 4.22552 3.11818 3.19502 3.02891 4.02623 3.33501 3.71301 3.53350 3.46107 2.45402 2.62633 2.76070 5.51002 4.28558 5 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03162 3.86549 4.58784 0.61958 0.77255 0.52796 0.89112\n+ 6 3.14746 4.61180 4.17828 3.81812 3.13308 3.97327 4.47605 2.33676 3.58594 0.88383 3.15824 4.09427 4.40984 3.95594 3.76314 3.54334 3.44203 2.37228 5.00981 3.73151 6 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03162 3.86549 4.58784 0.61958 0.77255 0.52796 0.89112\n+ 7 3.23704 4.72886 3.82795 3.55604 2.29363 3.82202 3.61532 3.24723 3.41375 2.70296 3.92299 3.71488 4.29603 3.72147 3'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 283 2.86715 4.85199 2.89168 2.64635 4.01816 3.35460 3.80443 3.64641 2.46840 3.15049 4.17905 3.10585 3.90794 1.26571 2.75284 2.91574 3.16423 3.37724 5.29340 3.98605 284 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 284 2.30662 4.22669 3.19139 3.00653 4.05883 2.95692 4.11678 3.57914 3.07910 3.30080 4.22848 3.23401 3.69430 3.42983 3.36556 1.11793 2.78547 3.11811 5.43277 4.14231 285 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.54899 0.86164\n+ 285 2.97610 4.79637 3.10901 2.86734 3.27278 3.43295 1.18022 3.67628 2.71089 3.17239 4.21410 3.27732 3.98459 3.23340 2.97758 3.04699 3.27893 3.41176 4.73803 3.22416 286 h - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03197 3.85485 4.57719 0.61958 0.77255 0.49247 0.94447\n+ 286 3.02629 4.38931 4.55440 4.02788 3.38969 4.29987 4.75221 1.66570 3.90157 1.73351 3.22011 4.29643 4.60414 4.16932 4.12143 3.65546 3.28153 1.24068 5.32317 4.12520 287 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 287 3.12120 4.54522 4.46798 3.93436 3.10940 4.26644 4.61200 2.09702 3.74039 1.03668 2.81140 4.23014 4.55041 4.00262 3.93893 3.62030 3.36876 2.00221 5.11579 3.95042 288 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 288 2.79129 4.78468 3.27333 2.77748 4.23368 3.42379 3.72210 3.67372 2.18570 3.22145 4.12764 3.17710 3.22448 2.90441 1.31319 2.86068 3.06244 3.35113 5.34061 4.12949 289 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 289 2.87748 5.28398 2.07313 1.23939 4.58665 3.21010 3.73813 4.06161 2.69686 3.62602 4.49991 2.71592 3.81535 2.91427 3.22357 2.79830 3.16561 3.67987 5.77966 4.34719 290 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03072 3.89395 4.61629 0.61958 0.77255 0.51483 0.91031\n+ 290 2.83903 4.94076 3.19058 2.65002 4.36201 3.09854 3.60817 3.74025 1.86467 3.26108 4.11943 3.08356 3.91775 2.75841 1.52389 2.86394 3.05522 3.41435 5.37419 4.15539 291 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02078 3.88401 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W06A3.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,973 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W06A3\n+LENG 317\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:14:23 2016\n+NSEQ 42\n+EFFN 0.505005\n+CKSUM 2173114792\n+STATS LOCAL MSV -11.1177 0.70082\n+STATS LOCAL VITERBI -11.8237 0.70082\n+STATS LOCAL FORWARD -5.8436 0.70082\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.60078 4.20386 3.09081 2.72939 3.29201 2.73750 3.80596 2.60287 2.76439 2.32806 3.62751 3.16496 3.24412 3.25571 3.03713 2.74208 2.91733 2.41359 4.52910 3.56529\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.00000 *\n+ 1 2.88548 4.98203 3.26484 2.66893 4.37797 3.54234 3.56403 3.73430 1.73193 3.23652 4.10452 3.09116 3.93137 2.65462 1.56967 2.90213 2.96175 3.42446 5.34579 4.14426 1 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 2 1.06014 4.17714 3.43127 3.22164 4.02163 3.00241 4.24732 3.14248 3.23710 3.02549 4.06118 3.37901 3.73871 3.58127 3.48951 2.52791 2.80311 2.80236 5.45195 4.24880 2 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 3 3.05279 4.55050 4.19258 3.77702 3.08275 4.01645 4.45173 2.21046 3.53641 1.02046 3.06577 4.04994 4.42083 3.89443 3.73294 3.46036 3.34301 2.19383 5.01051 3.71888 3 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 4 3.00664 4.43473 4.29003 3.90501 3.35346 4.03179 4.63929 1.11780 3.73250 1.94647 3.29426 4.15480 4.46644 4.08432 3.93271 3.52442 3.30654 1.84634 5.23744 3.97271 4 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 5 3.12038 4.59225 4.13872 3.77890 3.12451 3.94175 4.44453 2.32740 3.54694 0.91499 3.15816 4.05737 4.38285 3.92282 3.72797 3.50962 3.41625 2.35646 4.99189 3.70778 5 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 6 2.78335 4.33516 4.08134 3.74395 3.52758 3.69083 4.55127 2.05344 3.61123 2.22041 3.49797 3.93870 4.25342 3.97351 3.82669 3.19274 3.14588 1.06562 5.30654 4.03600 6 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03242 3.84097 4.56332 0.61958 0.77255 0.48576 0.95510\n+ 7 2.62697 4.40500 3.29425 3.18992 4.33181 0.77941 4.31170 3.92417 3.36686 3.58217 4.55801 3.45867 3.79512 3.69359 3'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 310 2.30574 4.22577 3.18979 3.00487 4.05694 2.95603 4.11516 3.57701 3.07731 3.29883 4.22670 3.23264 3.69326 3.42820 3.36380 1.12089 2.78445 3.11640 5.43104 4.14042 310 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 311 2.79273 4.34096 4.09811 3.76084 3.53604 3.70346 4.56601 2.05196 3.62832 2.22661 3.50436 3.95375 4.26521 3.98962 3.84267 3.20593 3.15484 1.04991 5.31711 4.04742 311 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 312 2.52063 4.46247 3.14399 2.66329 3.61752 3.33464 3.73235 3.11172 2.55088 2.69643 3.68563 3.09303 2.21417 2.90819 2.81318 2.64840 2.75793 2.77252 5.11552 3.82308 312 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 313 2.83516 5.28588 2.02146 1.37746 4.57342 3.19313 3.68741 4.04429 2.57588 3.59161 4.44614 2.67503 3.78602 2.85457 3.15755 2.75155 3.11440 3.65636 5.76541 4.31525 313 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 314 2.42177 4.37033 3.17444 2.98491 4.08142 3.08409 4.11584 3.50342 3.03789 3.18538 4.19247 3.28514 1.20231 3.42206 3.32349 2.65489 2.93898 3.13658 5.40418 4.19997 314 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 315 2.63050 4.69517 2.96852 2.54738 3.99565 3.34796 3.67351 3.38755 2.34178 3.00253 3.70071 3.00925 3.82818 1.82403 2.67315 2.61986 2.77726 3.09208 5.27729 3.96986 315 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 316 2.99195 4.39606 4.46052 3.99700 3.37863 4.17130 4.73529 1.20074 3.83833 1.90438 3.24173 4.24154 4.54901 4.14877 4.05130 3.57249 3.27663 1.64463 5.32209 4.08731 316 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03203 3.85293 4.57528 0.61958 0.77255 0.49328 0.94320\n+ 317 3.01592 4.44036 4.30752 3.92275 3.36125 4.04511 4.65464 1.10243 3.75049 1.95152 3.29989 4.17084 4.47839 4.10092 3.94931 3.53870 3.31551 1.84594 5.24789 3.98436 317 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02167 3.84258 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W0GYE.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,352 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W0GYE\n+LENG 110\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:02:11 2016\n+NSEQ 42\n+EFFN 0.417847\n+CKSUM 96359631\n+STATS LOCAL MSV -9.8588 0.71539\n+STATS LOCAL VITERBI -10.5314 0.71539\n+STATS LOCAL FORWARD -4.2136 0.71539\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.58050 4.26382 2.85365 2.80613 3.15035 2.89189 3.81397 2.73428 2.84682 2.50149 3.63880 3.04623 2.92146 3.23736 2.97958 2.69288 2.75694 2.61051 4.37217 3.44918\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.11208 3.80830 2.47881 0.61958 0.77255 0.00000 *\n+ 1 2.86563 4.43941 3.90030 3.48825 3.15849 3.72815 4.25618 2.32070 3.26187 1.77984 1.67728 3.78149 4.19270 3.65614 3.48266 3.19769 3.17068 2.30899 4.97739 3.72347 1 m - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03621 3.73242 4.45477 0.61958 0.77255 0.51831 0.90517\n+ 2 2.32088 4.22631 3.23062 2.88208 3.75054 3.06494 3.91305 3.12034 2.80280 2.86950 3.46740 3.17206 3.71214 3.20009 3.11255 1.58694 2.72892 2.79566 5.19560 3.87469 2 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 3 2.16583 4.14387 3.31164 3.04023 3.91928 2.99832 4.08555 2.96050 2.99484 2.87006 3.88524 3.25429 3.70169 3.37677 3.27365 2.46801 1.56963 2.64620 5.38268 4.14256 3 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 4 2.50339 4.36552 3.18021 2.87748 3.70569 3.17426 3.92862 3.09513 2.80524 2.51701 3.83182 3.22048 1.72512 3.22934 3.10382 2.66637 2.88472 2.83864 5.15020 3.85399 4 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 5 1.37966 4.07220 3.30932 3.05735 4.00851 2.90363 4.11036 3.10631 3.06226 3.01691 3.96738 3.22700 3.63731 3.40459 3.34236 2.32349 2.50171 2.72896 5.44141 4.20867 5 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 6 2.79738 4.71128 3.35305 2.81859 3.86956 3.45352 3.66728 3.34124 2.12422 2.73535 3.92040 3.19573 3.91394 2.88067 1.48782 2.88950 3.04990 3.10526 5.13081 3.85640 6 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03541 3.75430 4.47665 0.61958 0.77255 0.52979 0.88850\n+ 7 2.79738 4.71128 3.35305 2.81859 3.86956 3.45352 3.66728 3.34124 2.12422 2.73535 3.92040 3.19573 3.91394 2.88067 1.4'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 103 2.28595 4.20469 3.15305 2.96732 4.01306 2.93587 4.07824 3.52766 3.03667 3.25343 4.18597 3.20146 3.66970 3.39139 3.32369 1.19215 2.76154 3.07690 5.39105 4.09660 103 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 104 1.11047 4.16262 3.39942 3.18942 3.99027 2.98852 4.21878 3.10639 3.20405 2.99074 4.03096 3.35463 3.72182 3.55149 3.45809 2.51413 2.78712 2.77202 5.42393 4.21734 104 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 105 2.98123 4.41976 4.24147 3.85605 3.33251 3.99469 4.59708 1.16125 3.68281 1.93347 3.27967 4.11045 4.43341 4.03863 3.88681 3.48501 3.28213 1.84863 5.20897 3.94068 105 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 106 3.08459 4.56673 4.08567 3.72675 3.11378 3.89922 4.40266 2.31619 3.49506 0.95858 3.15957 4.00826 4.34667 3.87883 3.68103 3.46477 3.38236 2.33656 4.96826 3.67627 106 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 107 2.38084 4.22527 3.37365 3.12444 3.91461 3.06621 4.14632 3.03402 3.06612 2.89972 3.95563 3.34312 3.76545 3.46456 3.32701 2.58155 1.28564 2.73588 5.35817 4.13168 107 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 108 2.28595 4.20469 3.15305 2.96732 4.01306 2.93587 4.07824 3.52766 3.03667 3.25343 4.18597 3.20146 3.66970 3.39139 3.32369 1.19215 2.76154 3.07690 5.39105 4.09660 108 s - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 109 2.98123 4.41976 4.24147 3.85605 3.33251 3.99469 4.59708 1.16125 3.68281 1.93347 3.27967 4.11045 4.43341 4.03863 3.88681 3.48501 3.28213 1.84863 5.20897 3.94068 109 i - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.03352 3.80830 4.53064 0.61958 0.77255 0.48576 0.95510\n+ 110 2.83281 4.81376 2.86370 2.61910 3.97332 3.32524 3.77627 3.59570 2.44478 3.10430 4.13561 3.07764 3.87883 1.35288 2.72955 2.88405 3.13061 3.32988 5.25759 3.94755 110 q - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.02268 3.79747 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/hmms/EOG090W0T3K.hmm Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,640 @@\n+HMMER3/f [3.1b1 | May 2013]\n+NAME EOG090W0T3K\n+LENG 206\n+ALPH amino\n+RF no\n+MM no\n+CONS yes\n+CS no\n+MAP yes\n+DATE Tue Jul 19 12:12:21 2016\n+NSEQ 42\n+EFFN 1.961060\n+CKSUM 265104873\n+STATS LOCAL MSV -10.5526 0.70500\n+STATS LOCAL VITERBI -11.3203 0.70500\n+STATS LOCAL FORWARD -5.0917 0.70500\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 2.66939 4.05673 3.22993 2.79099 3.15365 3.55719 3.71237 2.54173 2.57440 2.22720 3.33451 3.21043 3.51162 3.12964 3.00991 2.70726 2.80134 2.41473 4.61383 3.55842\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.16208 4.61100 1.96838 0.61958 0.77255 0.00000 *\n+ 1 2.93600 4.38428 4.46198 3.90104 3.40267 3.46509 4.45615 1.93047 3.75500 2.17341 1.33550 4.09987 4.43304 3.98673 3.92700 3.38467 3.18624 2.18032 5.08252 3.90751 1 m - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 2 3.51610 5.57440 4.08521 3.23432 5.16373 4.05621 3.85036 4.44109 1.24790 3.78589 4.67516 3.56930 4.38323 2.97693 1.02813 3.49054 3.62846 4.13070 5.74164 4.65992 2 r - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 3 2.71675 4.18007 2.20863 1.78454 4.48942 3.47153 3.68161 3.95962 2.43737 2.96771 4.22140 2.70561 3.87879 2.38271 2.92830 2.68696 2.95012 3.54764 5.61847 4.21897 3 e - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 4 3.79955 5.05528 5.24619 4.82928 0.91393 4.84708 4.43636 2.76588 4.65616 1.39209 3.35680 4.77841 5.06654 4.62904 4.66340 4.24935 4.02525 2.91694 4.56452 2.97666 4 f - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 5 2.33881 4.63133 3.04986 2.72039 3.81134 3.58086 3.80889 3.19122 2.69933 2.40831 3.28616 3.19757 3.96766 2.62497 3.12129 2.40087 2.24400 2.60449 5.18249 3.90340 5 t - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 6 2.53534 4.98255 2.88571 2.48098 3.55645 3.22634 3.11506 3.68024 2.49296 3.25497 4.05057 1.83222 3.89751 2.84319 2.96725 2.38521 2.93102 3.33084 5.46784 3.79390 6 n - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01722 4.46614 5.18849 0.61958 0.77255 0.71506 0.67170\n+ 7 3.33815 4.58635 5.28231 4.77386 3.86630 4.81962 5.40260 1.04591 4.67983 2.15439 3.65019 4.93514 5.08026 4.90950 4.'..b'.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 199 3.17292 4.49948 5.07350 4.47897 2.19664 4.41569 4.75303 1.67850 4.30714 1.34256 2.66830 4.55216 4.68996 4.38672 4.33103 3.74557 3.18632 2.02062 5.12680 4.01423 203 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 200 2.82721 5.23462 3.00978 2.50556 4.56561 3.55698 1.89081 4.01902 2.34647 3.51731 4.29242 2.45019 3.96371 2.18070 2.67888 2.52972 3.05315 3.62288 5.65577 4.29113 204 h - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 201 2.79753 3.26426 4.38245 3.80576 2.13306 3.90254 4.09723 2.46777 3.65960 2.40542 3.34950 3.92355 4.27118 3.84123 3.78510 2.46701 3.03179 2.48318 4.61601 1.76843 205 y - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 202 2.72610 3.77026 4.37676 3.78412 1.86105 3.85380 3.70355 1.91248 3.39011 2.25745 3.26975 3.64215 4.21973 3.81329 3.74151 3.15148 2.95801 2.40389 4.70415 2.65981 206 f - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 203 3.15910 4.51063 4.85177 3.56044 3.48065 4.41056 4.77828 1.50858 4.14418 1.18189 3.34602 4.46100 4.70781 4.33071 4.27364 3.73860 3.39432 1.87237 5.27969 4.12987 207 l - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 204 2.54223 4.83441 3.15835 2.33659 4.10387 3.53825 3.79739 3.50371 2.61506 2.95960 3.75295 3.12487 1.86716 2.96533 3.06273 2.60399 2.36144 3.19033 5.39703 4.08002 208 p - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 205 1.87617 5.09761 2.66753 2.36334 4.39589 2.74795 3.18023 3.84868 2.46564 3.38501 4.15665 2.99165 3.89862 2.67910 2.94944 2.53808 2.60969 3.21059 5.56524 4.18537 209 a - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01619 4.52743 5.24978 0.61958 0.77255 0.62832 0.76248\n+ 206 3.02518 4.38844 4.11409 4.15378 3.60416 4.28092 4.67591 1.38646 4.03415 2.26037 3.36491 4.32477 4.61541 4.25349 4.19108 3.60271 2.71238 1.26673 5.26692 4.07016 210 v - - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.01092 4.52217 * 0.61958 0.77255 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/lengths_cutoff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/lengths_cutoff Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t0\t26.009834120677418\t151\n+EOG090W0427\t0\t14.787662575205127\t119\n+EOG090W09K7\t0\t48.74476058088982\t218\n+EOG090W0B5K\t0\t8.436399156361812\t117\n+EOG090W0153\t0\t110.0106535860915\t535\n+EOG090W051T\t0\t48.42670503702809\t341\n+EOG090W01WI\t0\t66.27955761081256\t599\n+EOG090W01A3\t0\t147.49150864355283\t636\n+EOG090W067A\t0\t24.752819851328745\t239\n+EOG090W0IUR\t0\t33.027556860826415\t176\n+EOG090W09BV\t0\t59.827044564511475\t249\n+EOG090W0AIA\t0\t62.4605436431282\t296\n+EOG090W05D8\t0\t120.959030700539\t437\n+EOG090W01IP\t0\t203.56155914702887\t709\n+EOG090W02C3\t0\t123.60839779175912\t441\n+EOG090W0B1Y\t0\t14.92967253328818\t221\n+EOG090W01VD\t0\t137.07676494930246\t506\n+EOG090W035W\t0\t58.49371845525309\t413\n+EOG090W0DWN\t0\t41.156486855129046\t178\n+EOG090W028U\t0\t103.86567310127292\t544\n+EOG090W02TI\t0\t98.41080498838295\t449\n+EOG090W00PB\t0\t106.20423261052379\t448\n+EOG090W038B\t0\t77.2556129538091\t369\n+EOG090W0BUR\t0\t61.86700358352731\t260\n+EOG090W0F0L\t0\t46.56222745575028\t243\n+EOG090W08IZ\t0\t62.201306771663276\t286\n+EOG090W0BEB\t0\t24.453706055266927\t238\n+EOG090W0I37\t0\t18.215830819664493\t138\n+EOG090W0CQ9\t0\t22.121097366946984\t209\n+EOG090W04BS\t0\t57.09128334646491\t340\n+EOG090W0FCL\t0\t16.012152397171466\t130\n+EOG090W0AXJ\t0\t28.151048076823233\t222\n+EOG090W0FEP\t0\t12.631376967230036\t126\n+EOG090W0FVI\t0\t15.398685404771472\t141\n+EOG090W0GP3\t0\t19.401812121264605\t132\n+EOG090W01NH\t0\t109.56816301330042\t599\n+EOG090W0IEE\t0\t13.648148270718346\t133\n+EOG090W0IP7\t0\t16.863805057639492\t174\n+EOG090W038Z\t0\t59.028806526983075\t372\n+EOG090W0B8O\t0\t25.131143830790958\t193\n+EOG090W0KP0\t0\t31.31312208007784\t139\n+EOG090W064P\t0\t35.99213522298181\t305\n+EOG090W0A3V\t0\t29.293708718757674\t271\n+EOG090W0BOZ\t0\t11.373911097706742\t158\n+EOG090W0H6T\t0\t40.78248368613364\t174\n+EOG090W0EDI\t0\t25.598027290426895\t161\n+EOG090W0EFH\t0\t32.47988529410085\t158\n+EOG090W04BN\t0\t42.69662448203931\t429\n+EOG090W00SP\t0\t347.84066824884763\t1158\n+EOG090W01ZN\t0\t35.63683451653604\t248\n+EOG090W014R\t0\t303.558641419558\t908\n+EOG090W0FJE\t0\t11.554345658688966\t128\n+EOG090W00EP\t0\t90.98598088688564\t391\n+EOG090W07MY\t0\t26.394659474607508\t276\n+EOG090W00LR\t0\t47.803732742449085\t428\n+EOG090W06QR\t0\t45.20103500608554\t255\n+EOG090W06J7\t0\t48.950477475341124\t349\n+EOG090W04G6\t0\t20.747524095601406\t271\n+EOG090W06IG\t0\t83.91320663964761\t392\n+EOG090W036Y\t0\t90.1813890269454\t405\n+EOG090W0CMO\t0\t27.111085843428626\t212\n+EOG090W0BFE\t0\t43.83422206895252\t292\n+EOG090W0JYN\t0\t8.6240352353082\t95\n+EOG090W054P\t0\t42.0493667617466\t242\n+EOG090W0753\t0\t88.63736820874865\t365\n+EOG090W0FKG\t0\t36.02492014973623\t238\n+EOG090W0B6L\t0\t37.512128120083524\t178\n+EOG090W0C2I\t0\t39.9957024404497\t248\n+EOG090W057Z\t0\t39.37529750270147\t336\n+EOG090W01H1\t0\t167.89279528460568\t626\n+EOG090W07A3\t0\t38.193049881273545\t207\n+EOG090W0D5E\t0\t36.11275078460196\t174\n+EOG090W07PK\t0\t44.50703007278868\t242\n+EOG090W0GKW\t0\t13.60945220480054\t126\n+EOG090W0J9P\t0\t72.21231118153511\t276\n+EOG090W0A69\t0\t38.463468950032976\t247\n+EOG090W0DRQ\t0\t7.927491529558358\t71\n+EOG090W04G1\t0\t70.1561152864883\t382\n+EOG090W08L6\t0\t34.61950687705208\t168\n+EOG090W02QT\t0\t126.78937306395682\t573\n+EOG090W0K04\t0\t13.698118905107428\t123\n+EOG090W02UI\t0\t75.33983172202097\t569\n+EOG090W00MS\t0\t222.74918630603344\t865\n+EOG090W0HXZ\t0\t14.672974042346928\t111\n+EOG090W09LF\t0\t49.42509730797137\t225\n+EOG090W060L\t0\t48.49481993961427\t407\n+EOG090W0I0Q\t0\t8.898446275006405\t87\n+EOG090W0B0M\t0\t19.378137039507983\t192\n+EOG090W0EY0\t0\t25.633665074147427\t177\n+EOG090W019L\t0\t161.9212108198199\t655\n+EOG090W0ALV\t0\t33.567421363630004\t327\n+EOG090W0F9J\t0\t11.9066247090783\t195\n+EOG090W0BZ2\t0\t13.511577175520067\t123\n+EOG090W0B5T\t0\t12.744268370786148\t181\n+EOG090W0JBN\t0\t8.059235823034468\t94\n+EOG090W06CO\t0\t44.76325256823732\t283\n+EOG090W06Y4\t0\t58.56965239084716\t304\n+EOG090W00D0\t0\t173.60012223217007\t969\n+EOG090W0N7U\t0\t7.090271589968341\t73\n+EOG090W02H5\t0\t187.43347716555155\t645\n+EOG090W04DH\t0\t97.51184745911475\t425\n+EOG090W01HI\t0\t141.17028909316608\t668\n+EOG090W02JZ\t0\t106.2021542200327\t499\n+EOG090W0ANA\t0\t43.80791670031081\t242\n+EOG090W055F\t0\t25.315135740520443\t269\n+EOG090W0KMC\t0\t20.563083052290207\t149\n+EOG090W063Z\t0\t20.0'..b'656909\t232\n+EOG090W050K\t0\t77.93871889144076\t376\n+EOG090W0CL8\t0\t25.966747028064802\t218\n+EOG090W0JJQ\t0\t13.62856576746243\t117\n+EOG090W06W8\t0\t48.24622893316924\t267\n+EOG090W0EPV\t0\t35.10902433482864\t239\n+EOG090W00WM\t0\t260.8164746511948\t897\n+EOG090W005S\t0\t611.4917595121128\t1630\n+EOG090W02UQ\t0\t51.49464074592311\t285\n+EOG090W03FA\t0\t53.973113494178904\t344\n+EOG090W02B7\t0\t115.3009159526159\t425\n+EOG090W06DJ\t0\t77.792516412288\t338\n+EOG090W08FE\t0\t36.68154855844776\t279\n+EOG090W06P2\t0\t41.670994647017906\t299\n+EOG090W0C7S\t0\t29.611469746269982\t147\n+EOG090W0C4Z\t0\t26.43060073195963\t145\n+EOG090W00ZP\t0\t161.60550467275013\t697\n+EOG090W0C7Z\t0\t37.684920128988004\t192\n+EOG090W06AN\t0\t42.662308774978584\t289\n+EOG090W0FYR\t0\t13.508876473998257\t128\n+EOG090W015U\t0\t42.296492199255404\t276\n+EOG090W02LX\t0\t53.129566825639934\t495\n+EOG090W0DZ4\t0\t9.599076852633498\t186\n+EOG090W050Y\t0\t60.226966189955085\t368\n+EOG090W08GU\t0\t53.02740180911049\t280\n+EOG090W09LK\t0\t91.72851994063471\t372\n+EOG090W077G\t0\t76.76324145120006\t331\n+EOG090W0B8P\t0\t19.196863497747415\t150\n+EOG090W0A73\t0\t27.406826637563558\t161\n+EOG090W0B3U\t0\t43.48966796277244\t264\n+EOG090W0LL3\t0\t13.321975417932757\t120\n+EOG090W09R9\t0\t36.65014937864991\t212\n+EOG090W0A58\t0\t48.47060965162291\t255\n+EOG090W06VZ\t0\t29.419164232825445\t317\n+EOG090W080B\t0\t79.75592849488558\t305\n+EOG090W0ALP\t0\t13.883838648218155\t145\n+EOG090W0EJV\t0\t5.956771852621479\t160\n+EOG090W0BI6\t0\t30.97683170635923\t224\n+EOG090W05KO\t0\t95.433274449961\t425\n+EOG090W03K0\t0\t72.48414460771538\t334\n+EOG090W0JFZ\t0\t14.305445143137689\t92\n+EOG090W09RO\t0\t29.48406784028039\t267\n+EOG090W00ZV\t0\t61.02240025870282\t345\n+EOG090W005V\t0\t262.52686163098906\t1059\n+EOG090W0F9A\t0\t10.246423950758501\t105\n+EOG090W0IKC\t0\t14.947429244710978\t76\n+EOG090W04QG\t0\t49.56877927900001\t361\n+EOG090W00U5\t0\t71.63448419048026\t402\n+EOG090W0JS6\t0\t8.421979153901846\t87\n+EOG090W06X4\t0\t32.054695199308284\t297\n+EOG090W0H7U\t0\t24.51177799464656\t186\n+EOG090W02LH\t0\t31.4090320424614\t301\n+EOG090W06AU\t0\t21.750918119113738\t153\n+EOG090W0L6N\t0\t11.596992881882745\t94\n+EOG090W0028\t0\t588.5016762903192\t1601\n+EOG090W05ZG\t0\t33.671801910536935\t269\n+EOG090W0DSQ\t0\t53.83672675323885\t204\n+EOG090W0CIU\t0\t10.5996734768809\t188\n+EOG090W09DT\t0\t42.16152158590845\t238\n+EOG090W0883\t0\t43.82515148221423\t331\n+EOG090W08IL\t0\t31.073523765582504\t244\n+EOG090W07HX\t0\t43.87674109168712\t264\n+EOG090W0ADL\t0\t74.64253566218997\t270\n+EOG090W07E5\t0\t31.219065990205618\t342\n+EOG090W0CHN\t0\t36.32844749725724\t170\n+EOG090W0F27\t0\t28.270070111541965\t150\n+EOG090W05FW\t0\t36.389874917560235\t332\n+EOG090W061C\t0\t33.972962707452396\t322\n+EOG090W023I\t0\t6.011474071541897\t134\n+EOG090W09Y9\t0\t20.77931440362711\t167\n+EOG090W029L\t0\t129.73510535308327\t545\n+EOG090W078A\t0\t54.90487338981139\t401\n+EOG090W0C83\t0\t38.32093295366156\t262\n+EOG090W015Z\t0\t176.6733585172255\t684\n+EOG090W05IA\t0\t51.778721543176026\t345\n+EOG090W06HO\t0\t71.48257830521672\t342\n+EOG090W0E6K\t0\t76.78984701084445\t299\n+EOG090W032M\t0\t41.865258362423724\t238\n+EOG090W04ZL\t0\t64.14590294096341\t349\n+EOG090W0A4U\t0\t17.926850008284003\t178\n+EOG090W0G0Z\t0\t34.8181704466556\t189\n+EOG090W012F\t0\t210.2707396955778\t725\n+EOG090W08ME\t0\t14.38099468500289\t138\n+EOG090W090H\t0\t89.63598147718064\t390\n+EOG090W0C7T\t0\t14.33867361284194\t189\n+EOG090W0AUB\t0\t1.1004433696270324\t217\n+EOG090W094H\t0\t21.30754829636339\t247\n+EOG090W00HE\t0\t188.33841438573634\t778\n+EOG090W0HKZ\t0\t15.56285848109415\t128\n+EOG090W02KK\t0\t67.63366170127085\t626\n+EOG090W0828\t0\t24.44301723915799\t140\n+EOG090W07PH\t0\t52.049033834231\t266\n+EOG090W01XB\t0\t112.40728428787705\t527\n+EOG090W02C5\t0\t174.99547032465253\t515\n+EOG090W00WO\t0\t160.60058592597883\t618\n+EOG090W0140\t0\t77.3058731633741\t335\n+EOG090W01QT\t0\t54.84769853734253\t240\n+EOG090W0FQ4\t0\t21.524963224401723\t140\n+EOG090W0CAH\t0\t18.306986664749655\t190\n+EOG090W080Z\t0\t42.915151411519396\t247\n+EOG090W02AU\t0\t90.99510841851644\t457\n+EOG090W096X\t0\t37.173612523675914\t248\n+EOG090W04OJ\t0\t60.586185145045505\t253\n+EOG090W09UY\t0\t59.0061956628029\t262\n+EOG090W07CG\t0\t39.79244204205305\t320\n+EOG090W0KFZ\t0\t5.208228204067198\t77\n+EOG090W0LWB\t0\t5.161494516225474\t78\n+EOG090W0F00\t0\t36.17643723361296\t196\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W03A6.prfl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,495 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t11\n+\n+[block]\n+# block no. 0 follows, 26 sequences, length 14\n+# corresponding to MSA columns:\n+# 26-39\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01222\t0.00873\t0.01222\t0.01397\t0.01572\t0.00873\t0.01222\t0.01572\t0.01746\t0.02270\t0.04016\t0.08556\t0.04365\t0.02095\t0.01048\t0.00349\t0.00698\t0.63506\t0.00698\t0.00698\n+1\t0.00956\t0.00674\t0.00871\t0.01015\t0.01051\t0.00637\t0.00660\t0.01065\t0.01390\t0.01853\t0.03861\t0.60723\t0.04613\t0.15052\t0.01485\t0.00384\t0.00493\t0.01968\t0.00660\t0.00588\n+2\t0.01493\t0.01410\t0.02384\t0.62468\t0.13806\t0.01701\t0.02136\t0.01991\t0.01545\t0.02011\t0.01359\t0.02003\t0.01037\t0.00747\t0.00757\t0.00249\t0.00996\t0.00674\t0.00342\t0.00891\n+3\t0.01103\t0.00735\t0.00827\t0.00827\t0.00827\t0.00735\t0.00460\t0.01103\t0.01103\t0.01471\t0.02390\t0.04964\t0.02758\t0.73343\t0.03861\t0.00735\t0.00735\t0.01103\t0.00460\t0.00460\n+4\t0.00924\t0.00660\t0.00880\t0.01056\t0.01100\t0.00616\t0.00704\t0.01056\t0.01452\t0.01936\t0.04181\t0.72848\t0.05017\t0.02376\t0.00968\t0.00308\t0.00440\t0.02156\t0.00704\t0.00616\n+5\t0.02748\t0.02058\t0.02200\t0.01718\t0.02301\t0.02289\t0.01409\t0.57682\t0.12187\t0.04563\t0.01992\t0.01958\t0.01429\t0.00927\t0.00761\t0.00232\t0.00803\t0.00707\t0.00761\t0.01278\n+6\t0.15256\t0.01392\t0.02108\t0.55605\t0.07699\t0.01694\t0.01888\t0.02009\t0.01487\t0.02226\t0.01300\t0.01874\t0.00987\t0.00745\t0.00706\t0.00248\t0.00930\t0.00627\t0.00363\t0.00857\n+7\t0.11917\t0.01388\t0.02161\t0.58414\t0.08089\t0.01694\t0.01944\t0.01998\t0.01497\t0.02165\t0.01313\t0.01908\t0.00996\t0.00747\t0.00719\t0.00249\t0.00948\t0.00639\t0.00357\t0.00859\n+8\t0.07699\t0.01719\t0.02511\t0.32900\t0.31130\t0.06966\t0.02051\t0.02206\t0.01635\t0.02270\t0.01345\t0.01851\t0.01073\t0.00722\t0.00727\t0.00236\t0.00960\t0.00640\t0.00367\t0.00993\n+9\t0.22837\t0.01515\t0.01649\t0.12060\t0.02277\t0.05359\t0.01194\t0.11110\t0.02019\t0.19029\t0.11508\t0.02414\t0.02104\t0.00925\t0.00698\t0.00235\t0.00710\t0.00718\t0.00650\t0.00989\n+10\t0.01881\t0.01720\t0.02326\t0.46321\t0.09186\t0.07067\t0.01933\t0.15694\t0.02017\t0.02577\t0.01441\t0.01919\t0.01082\t0.00786\t0.00752\t0.00241\t0.00992\t0.00662\t0.00434\t0.00968\n+11\t0.05197\t0.02202\t0.02168\t0.07042\t0.02458\t0.14127\t0.01479\t0.34418\t0.08557\t0.03496\t0.01658\t0.01808\t0.01254\t0.00931\t0.00949\t0.00240\t0.09660\t0.00652\t0.00601\t0.01103\n+12\t0.04828\t0.01906\t0.02153\t0.02065\t0.11087\t0.13727\t0.06274\t0.12273\t0.06954\t0.09870\t0.14622\t0.02634\t0.02452\t0.00986\t0.00865\t0.00235\t0.04619\t0.00808\t0.00615\t0.01027\n+13\t0.08658\t0.02046\t0.07459\t0.03874\t0.02561\t0.07075\t0.13650\t0.20915\t0.09128\t0.05154\t0.01799\t0.02037\t0.01366\t0.01401\t0.09115\t0.00355\t0.01079\t0.00712\t0.00554\t0.01060\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t30\n+\n+[block]\n+# block no. 1 follows, 26 sequences, length 8\n+# corresponding to MSA columns:\n+# 71-78\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02194\t0.06459\t0.02353\t0.16038\t0.11196\t0.02016\t0.01601\t0.31327\t0.06090\t0.03449\t0.06104\t0.02133\t0.01644\t0.00872\t0.00744\t0.00227\t0.00824\t0.00711\t0.00593\t0.03426\n+1\t0.02052\t0.05093\t0.07159\t0.02726\t0.10655\t0.20206\t0.25692\t0.02480\t0.01765\t0.02167\t0.01318\t0.01732\t0.01070\t0.00789\t0.01035\t0.00243\t0.11776\t0.00689\t0.00371\t0.00981\n+2\t0.01568\t0.10472\t0.01891\t0.01780\t0.14957\t0.01459\t0.01164\t0.12310\t0.02011\t0.02557\t0.04608\t0.04525\t0.35102\t0.01341\t0.00792\t0.00233\t0.00617\t0.01110\t0.00599\t0.00904\n+3\t0.10891\t0.01808\t0.02949\t0.10245\t0.33046\t0.01872\t0.14796\t0.10391\t0.01901\t0.02814\t0.01447\t0.01843\t0.01139\t0.00710\t0.00749\t0.00236\t0.00943\t0.00690\t0.00433\t0.01097\n+4\t0.08080\t0.02151\t0.01983\t0.01626\t0.02095\t0.19381\t0.01303\t0.17602\t0.02227\t0.15089\t0.01650\t0.01750\t0.01226\t0.00877\t0.00914\t0.00220\t0.09913\t0.00596\t0.00583\t0.10734\n+5\t0.01885\t0.01934\t0.03543\t0.02641\t0.03208\t0.02036\t0.40079\t0.08844\t0.08944\t0.02629\t0.01628\t0.02034\t0.01261\t0.00841\t0.01174\t0.00268\t0.14739\t0.00825\t0.00453\t0.01032\n+6\t0.01577\t0.01496\t0.02536\t0.49795\t0.25820\t0.01723\t0.02178\t0.02065\t0.01585\t0.02113\t0.01374\t0.01975\t0.01073\t0.00732\t0.00756\t0.00244\t0.00975\t0.00675\t0.00350\t0.0095'..b'1\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+73\t0.01076\t0.00724\t0.00835\t0.00861\t0.00868\t0.00718\t0.00496\t0.01096\t0.01155\t0.01540\t0.02656\t0.15039\t0.03093\t0.62810\t0.03431\t0.00672\t0.00692\t0.01259\t0.00496\t0.00483\n+74\t0.01521\t0.03923\t0.69413\t0.02162\t0.03283\t0.01762\t0.02802\t0.02402\t0.01601\t0.02402\t0.01361\t0.01601\t0.00961\t0.00721\t0.00721\t0.00240\t0.01121\t0.00560\t0.00320\t0.01121\n+75\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+76\t0.01474\t0.01390\t0.02349\t0.65337\t0.11086\t0.01696\t0.02127\t0.01974\t0.01536\t0.01988\t0.01355\t0.02009\t0.01029\t0.00751\t0.00758\t0.00250\t0.01001\t0.00674\t0.00341\t0.00876\n+77\t0.01573\t0.01348\t0.01573\t0.01123\t0.01798\t0.01011\t0.00899\t0.01910\t0.01573\t0.02472\t0.01348\t0.01573\t0.01123\t0.00562\t0.00562\t0.00112\t0.00562\t0.00449\t0.00449\t0.77980\n+78\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+79\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+80\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+81\t0.01091\t0.00784\t0.01074\t0.01074\t0.01159\t0.00869\t0.00767\t0.01261\t0.01176\t0.01653\t0.02165\t0.03717\t0.02216\t0.30815\t0.45648\t0.01039\t0.01550\t0.00921\t0.00426\t0.00596\n+82\t0.14579\t0.01317\t0.01664\t0.01297\t0.01867\t0.01202\t0.01071\t0.03479\t0.02040\t0.59380\t0.02705\t0.02381\t0.01721\t0.00904\t0.00719\t0.00235\t0.00637\t0.00711\t0.00869\t0.01221\n+83\t0.01705\t0.01494\t0.02275\t0.58420\t0.04685\t0.01810\t0.01982\t0.13981\t0.01901\t0.02472\t0.01437\t0.01984\t0.01063\t0.00787\t0.00758\t0.00248\t0.00978\t0.00676\t0.00416\t0.00927\n+84\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+85\t0.01790\t0.02046\t0.04476\t0.03197\t0.03964\t0.01918\t0.65857\t0.02430\t0.01790\t0.02430\t0.01535\t0.02046\t0.01151\t0.00639\t0.00895\t0.00256\t0.01279\t0.00895\t0.00384\t0.01023\n+86\t0.02883\t0.02125\t0.02276\t0.01745\t0.02352\t0.02352\t0.01442\t0.66082\t0.03566\t0.04780\t0.01821\t0.01821\t0.01290\t0.00911\t0.00759\t0.00228\t0.00835\t0.00683\t0.00759\t0.01290\n+87\t0.01659\t0.01659\t0.02323\t0.01991\t0.01991\t0.02323\t0.01659\t0.01825\t0.01162\t0.01825\t0.00996\t0.01659\t0.00996\t0.01328\t0.02489\t0.00332\t0.71955\t0.00664\t0.00332\t0.00830\n+88\t0.01039\t0.00753\t0.00980\t0.00967\t0.01123\t0.00694\t0.00708\t0.01348\t0.02001\t0.02797\t0.53948\t0.18340\t0.09020\t0.01719\t0.00908\t0.00252\t0.00374\t0.01528\t0.00806\t0.00694\n+89\t0.00949\t0.00678\t0.00909\t0.01084\t0.01139\t0.00637\t0.00747\t0.01099\t0.01477\t0.01964\t0.04167\t0.67500\t0.04963\t0.02353\t0.00975\t0.00311\t0.00462\t0.07259\t0.00704\t0.00623\n+90\t0.01432\t0.01348\t0.02275\t0.71520\t0.05224\t0.01685\t0.02107\t0.01938\t0.01517\t0.01938\t0.01348\t0.02022\t0.01011\t0.00758\t0.00758\t0.00253\t0.01011\t0.00674\t0.00337\t0.00843\n+91\t0.01579\t0.11910\t0.61934\t0.02063\t0.03130\t0.01903\t0.02630\t0.02387\t0.01595\t0.02332\t0.01326\t0.01557\t0.00962\t0.00712\t0.00694\t0.00231\t0.01086\t0.00543\t0.00321\t0.01104\n+92\t0.01499\t0.01416\t0.02396\t0.61489\t0.14734\t0.01703\t0.02140\t0.01996\t0.01548\t0.02019\t0.01360\t0.02000\t0.01040\t0.00746\t0.00757\t0.00249\t0.00995\t0.00674\t0.00343\t0.00896\n+93\t0.01755\t0.02345\t0.14323\t0.02872\t0.03641\t0.05046\t0.49780\t0.02386\t0.01761\t0.02374\t0.01621\t0.05431\t0.01303\t0.00744\t0.00862\t0.00253\t0.01217\t0.00886\t0.00390\t0.01012\n+94\t0.01667\t0.01591\t0.02684\t0.34869\t0.37232\t0.01770\t0.02201\t0.02131\t0.01609\t0.02207\t0.01374\t0.01934\t0.01107\t0.00739\t0.00823\t0.00242\t0.03764\t0.00675\t0.00356\t0.01024\n+95\t0.02037\t0.02306\t0.14580\t0.02556\t0.03296\t0.06905\t0.37543\t0.02613\t0.01817\t0.12499\t0.01723\t0.02046\t0.01252\t0.00947\t0.04096\t0.00291\t0.01198\t0.00784\t0.00459\t0.01053\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t774\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W03A6.fa\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W06A3.prfl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,451 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t78\n+\n+[block]\n+# block no. 0 follows, 35 sequences, length 97\n+# corresponding to MSA columns:\n+# 78-174\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01132\t0.00850\t0.01136\t0.01285\t0.05849\t0.00867\t0.00997\t0.01593\t0.10286\t0.03809\t0.03330\t0.08023\t0.10035\t0.01684\t0.04478\t0.00301\t0.00605\t0.42505\t0.00569\t0.00668\n+1\t0.01291\t0.01226\t0.02066\t0.57532\t0.15656\t0.01442\t0.04607\t0.01778\t0.04826\t0.01763\t0.01217\t0.01705\t0.00931\t0.00630\t0.00638\t0.00208\t0.00823\t0.00579\t0.00307\t0.00775\n+2\t0.02846\t0.01080\t0.01472\t0.01129\t0.01619\t0.00932\t0.00932\t0.03092\t0.01816\t0.74187\t0.02503\t0.02159\t0.01570\t0.00785\t0.00638\t0.00196\t0.00540\t0.00638\t0.00785\t0.01080\n+3\t0.00778\t0.00556\t0.00741\t0.00880\t0.00921\t0.00519\t0.00589\t0.00897\t0.01240\t0.01660\t0.06578\t0.74094\t0.04275\t0.01957\t0.00807\t0.00255\t0.00365\t0.01775\t0.00594\t0.00519\n+4\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+5\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+6\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+7\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+8\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+9\t0.00907\t0.00677\t0.01003\t0.01003\t0.01111\t0.00784\t0.00773\t0.01122\t0.01015\t0.01461\t0.01714\t0.02574\t0.01621\t0.08357\t0.71641\t0.01000\t0.01646\t0.00692\t0.00342\t0.00557\n+10\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+11\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+12\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+13\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+14\t0.01198\t0.01128\t0.01903\t0.76180\t0.04369\t0.01409\t0.01762\t0.01621\t0.01268\t0.01621\t0.01128\t0.01691\t0.00846\t0.00634\t0.00634\t0.00211\t0.00846\t0.00564\t0.00282\t0.00705\n+15\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+16\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+17\t0.01578\t0.01363\t0.01434\t0.01291\t0.01650\t0.01578\t0.01004\t0.03371\t0.72602\t0.02654\t0.02582\t0.02367\t0.01937\t0.00861\t0.00646\t0.00215\t0.00502\t0.00717\t0.00646\t0.01004\n+18\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+19\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+20\t0.01276\t0.01176\t0.01807\t0.60873\t0.03813\t0.01444\t0.01607\t0.01979\t0.15849\t0.01832\t0.01425\t0.01829\t0.01069\t0.00681\t0.00637\t0.00212\t0.00775\t0.00595\t0.00356\t0.00766\n+21\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+22\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.0'..b'0.70975\t0.00996\t0.01636\t0.00694\t0.00342\t0.00556\n+89\t0.00785\t0.00644\t0.00692\t0.00680\t0.00879\t0.00551\t0.00510\t0.00979\t0.01530\t0.01913\t0.22483\t0.05778\t0.57309\t0.01532\t0.00749\t0.00211\t0.00316\t0.01293\t0.00615\t0.00551\n+90\t0.02370\t0.03023\t0.01798\t0.01634\t0.01961\t0.75158\t0.01226\t0.02533\t0.01798\t0.01553\t0.00981\t0.01144\t0.00817\t0.00654\t0.00572\t0.00163\t0.01144\t0.00409\t0.00327\t0.00735\n+91\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+92\t0.82186\t0.01227\t0.00932\t0.00834\t0.01227\t0.01423\t0.00687\t0.01865\t0.01080\t0.02846\t0.00883\t0.01031\t0.00687\t0.00589\t0.00393\t0.00196\t0.00491\t0.00344\t0.00393\t0.00687\n+93\t0.01501\t0.01702\t0.03689\t0.02731\t0.06618\t0.01600\t0.68185\t0.02028\t0.01495\t0.02034\t0.01279\t0.01705\t0.00965\t0.00536\t0.00743\t0.00213\t0.01055\t0.00740\t0.00321\t0.00863\n+94\t0.00891\t0.00648\t0.00838\t0.00791\t0.00943\t0.00596\t0.00593\t0.01183\t0.01779\t0.02504\t0.70181\t0.04805\t0.09243\t0.01312\t0.00748\t0.00200\t0.00300\t0.01157\t0.00693\t0.00596\n+95\t0.00773\t0.00552\t0.00736\t0.00883\t0.00920\t0.00515\t0.00589\t0.00883\t0.01215\t0.01619\t0.03497\t0.77291\t0.04196\t0.01987\t0.00810\t0.00258\t0.00368\t0.01803\t0.00589\t0.00515\n+96\t0.01315\t0.01128\t0.01315\t0.00940\t0.01503\t0.00846\t0.00752\t0.01597\t0.01315\t0.02067\t0.01128\t0.01315\t0.00940\t0.00470\t0.00470\t0.00094\t0.00470\t0.00376\t0.00376\t0.81583\n+97\t0.01426\t0.01451\t0.01937\t0.01664\t0.01677\t0.04764\t0.01382\t0.01565\t0.01003\t0.01528\t0.00838\t0.01379\t0.00832\t0.01093\t0.02024\t0.00273\t0.73639\t0.00550\t0.00279\t0.00696\n+98\t0.01570\t0.01507\t0.02575\t0.03894\t0.73748\t0.01507\t0.01947\t0.01947\t0.01444\t0.02073\t0.01193\t0.01570\t0.01005\t0.00565\t0.00628\t0.00188\t0.00754\t0.00565\t0.00314\t0.01005\n+99\t0.02399\t0.02158\t0.01871\t0.01513\t0.01965\t0.24313\t0.01212\t0.50536\t0.02621\t0.03251\t0.01357\t0.01407\t0.00999\t0.00729\t0.00616\t0.00182\t0.00834\t0.00522\t0.00541\t0.00974\n+100\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+101\t0.02536\t0.01255\t0.01545\t0.01209\t0.01676\t0.01228\t0.00990\t0.19825\t0.08366\t0.47295\t0.05595\t0.02143\t0.01689\t0.00810\t0.00643\t0.00197\t0.00564\t0.00652\t0.00732\t0.01050\n+102\t0.01980\t0.04080\t0.05751\t0.01220\t0.01625\t0.04709\t0.01024\t0.14781\t0.13568\t0.24600\t0.06686\t0.13103\t0.02095\t0.00976\t0.00657\t0.00204\t0.00578\t0.00810\t0.00631\t0.00921\n+103\t0.02412\t0.01777\t0.01904\t0.01460\t0.01967\t0.01967\t0.01206\t0.71633\t0.02983\t0.03998\t0.01523\t0.01523\t0.01079\t0.00762\t0.00635\t0.00190\t0.00698\t0.00571\t0.00635\t0.01079\n+104\t0.00898\t0.00648\t0.00848\t0.00798\t0.00948\t0.00599\t0.00599\t0.01197\t0.01796\t0.02544\t0.73413\t0.04739\t0.05986\t0.01297\t0.00748\t0.00200\t0.00299\t0.01147\t0.00698\t0.00599\n+105\t0.01281\t0.01107\t0.01383\t0.06016\t0.01698\t0.00905\t0.02996\t0.01609\t0.03517\t0.02003\t0.01323\t0.05154\t0.01172\t0.03363\t0.00611\t0.00136\t0.00515\t0.00499\t0.00387\t0.64326\n+106\t0.01318\t0.09360\t0.65981\t0.01824\t0.05223\t0.01560\t0.02226\t0.01998\t0.01339\t0.01969\t0.01119\t0.01321\t0.00812\t0.00596\t0.00588\t0.00195\t0.00910\t0.00462\t0.00270\t0.00930\n+107\t0.01361\t0.01126\t0.01320\t0.00945\t0.01507\t0.00848\t0.00757\t0.01642\t0.01330\t0.04218\t0.01169\t0.01341\t0.00958\t0.00479\t0.00475\t0.00097\t0.00472\t0.00384\t0.00388\t0.79183\n+108\t0.01557\t0.01687\t0.03452\t0.02493\t0.03104\t0.01623\t0.61740\t0.06216\t0.07226\t0.02197\t0.01400\t0.01752\t0.01047\t0.00574\t0.00734\t0.00213\t0.01003\t0.00736\t0.00365\t0.00880\n+109\t0.00762\t0.00643\t0.00660\t0.00656\t0.00864\t0.00541\t0.00492\t0.00934\t0.01475\t0.01783\t0.11991\t0.05992\t0.67883\t0.01581\t0.00750\t0.00213\t0.00319\t0.01323\t0.00598\t0.00541\n+110\t0.00750\t0.00643\t0.00643\t0.00643\t0.00857\t0.00536\t0.00482\t0.00910\t0.01446\t0.01714\t0.06427\t0.06105\t0.73490\t0.01607\t0.00750\t0.00214\t0.00321\t0.01339\t0.00589\t0.00536\n+111\t0.01022\t0.00730\t0.01022\t0.01168\t0.01314\t0.00730\t0.01022\t0.01314\t0.01460\t0.01899\t0.03359\t0.07156\t0.03651\t0.01752\t0.00876\t0.00292\t0.00584\t0.69478\t0.00584\t0.00584\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t0\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W06A3.fa\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W0GYE.prfl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,133 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 0 follows, 37 sequences, length 109\n+# corresponding to MSA columns:\n+# 1-109\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.02227\t0.01639\t0.01774\t0.01388\t0.01851\t0.01810\t0.01150\t0.67323\t0.02769\t0.03707\t0.01601\t0.01874\t0.01226\t0.00806\t0.00630\t0.00191\t0.00665\t0.05755\t0.00609\t0.01006\n+1\t0.01614\t0.01295\t0.01387\t0.01234\t0.01590\t0.01476\t0.00964\t0.03233\t0.68224\t0.07953\t0.02486\t0.02269\t0.01842\t0.00825\t0.00622\t0.00206\t0.00487\t0.00686\t0.00633\t0.00974\n+2\t0.01215\t0.01030\t0.01211\t0.00901\t0.01392\t0.00783\t0.00709\t0.01470\t0.01259\t0.01950\t0.01326\t0.09266\t0.01234\t0.00606\t0.00487\t0.00107\t0.00443\t0.00506\t0.00384\t0.73721\n+3\t0.02643\t0.01082\t0.01430\t0.01110\t0.01575\t0.00976\t0.00913\t0.05075\t0.07124\t0.67581\t0.02393\t0.02080\t0.01527\t0.00762\t0.00616\t0.00191\t0.00523\t0.00619\t0.00743\t0.01036\n+4\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+5\t0.01125\t0.01047\t0.01752\t0.71354\t0.03969\t0.01296\t0.01616\t0.01511\t0.01220\t0.01564\t0.01258\t0.07316\t0.01056\t0.00709\t0.00625\t0.00207\t0.00782\t0.00633\t0.00294\t0.00666\n+6\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+7\t0.00895\t0.00571\t0.00763\t0.00870\t0.00938\t0.00527\t0.00593\t0.01011\t0.01215\t0.07029\t0.03303\t0.72437\t0.03860\t0.01831\t0.00769\t0.00244\t0.00367\t0.01657\t0.00582\t0.00538\n+8\t0.00969\t0.00692\t0.00966\t0.01107\t0.01240\t0.00689\t0.00955\t0.01237\t0.01392\t0.01812\t0.03251\t0.12197\t0.03562\t0.01708\t0.00841\t0.00279\t0.00548\t0.65433\t0.00564\t0.00559\n+9\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+10\t0.01570\t0.73033\t0.03022\t0.01033\t0.01520\t0.02279\t0.01012\t0.01760\t0.01238\t0.01449\t0.01039\t0.06715\t0.01028\t0.00627\t0.00422\t0.00140\t0.00632\t0.00432\t0.00285\t0.00764\n+11\t0.00937\t0.00647\t0.00721\t0.00711\t0.00736\t0.00663\t0.00415\t0.01066\t0.06293\t0.01289\t0.01971\t0.03878\t0.02199\t0.72714\t0.02930\t0.00565\t0.00585\t0.00875\t0.00390\t0.00415\n+12\t0.01575\t0.01474\t0.02436\t0.03583\t0.69259\t0.01487\t0.01825\t0.07139\t0.01504\t0.02138\t0.01175\t0.01512\t0.00975\t0.00559\t0.00606\t0.00182\t0.00723\t0.00546\t0.00326\t0.00975\n+13\t0.01183\t0.01105\t0.01802\t0.71383\t0.04021\t0.01372\t0.01646\t0.01690\t0.06602\t0.01638\t0.01192\t0.01680\t0.00894\t0.00628\t0.00613\t0.00204\t0.00791\t0.00555\t0.00298\t0.00701\n+14\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+15\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+16\t0.01228\t0.03166\t0.75316\t0.01745\t0.02649\t0.01422\t0.02262\t0.01939\t0.01292\t0.01939\t0.01099\t0.01292\t0.00775\t0.00582\t0.00582\t0.00194\t0.00905\t0.00452\t0.00258\t0.00905\n+17\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+18\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+19\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+20\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+21\t0.82811\t0.01184\t0.00900\t0.00805\t0.01184\t0.01373\t0.00663\t0.01799\t0.01042\t0.02746\t0.00852\t0.00994\t0.00663\t0.00568\t0.00379\t0.00189\t0.00474\t0.00331\t0.00379\t0.00663\n+22\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04'..b'0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+86\t0.01141\t0.00571\t0.00571\t0.00571\t0.00713\t0.00571\t0.00428\t0.01426\t0.01284\t0.02282\t0.01997\t0.02282\t0.01569\t0.00713\t0.00428\t0.00143\t0.00285\t0.00571\t0.81886\t0.00571\n+87\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+88\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+89\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+90\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+91\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+92\t0.02287\t0.02917\t0.01735\t0.01577\t0.01892\t0.76030\t0.01183\t0.02444\t0.01735\t0.01498\t0.00946\t0.01104\t0.00788\t0.00631\t0.00552\t0.00158\t0.01104\t0.00394\t0.00315\t0.00710\n+93\t0.01156\t0.01088\t0.01836\t0.77016\t0.04216\t0.01360\t0.01700\t0.01564\t0.01224\t0.01564\t0.01088\t0.01632\t0.00816\t0.00612\t0.00612\t0.00204\t0.00816\t0.00544\t0.00272\t0.00680\n+94\t0.01063\t0.00532\t0.00797\t0.00797\t0.00797\t0.00532\t0.00532\t0.00797\t0.00797\t0.01063\t0.01063\t0.01861\t0.01063\t0.02127\t0.02392\t0.82190\t0.00532\t0.00532\t0.00266\t0.00266\n+95\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+96\t0.01269\t0.01088\t0.01269\t0.00907\t0.01451\t0.00816\t0.00725\t0.01541\t0.01269\t0.01995\t0.01088\t0.01269\t0.00907\t0.00453\t0.00453\t0.00091\t0.00453\t0.00363\t0.00363\t0.82229\n+97\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+98\t0.00874\t0.00656\t0.00984\t0.00984\t0.01093\t0.00765\t0.00765\t0.01093\t0.00984\t0.01421\t0.01640\t0.02405\t0.01530\t0.04591\t0.76062\t0.00984\t0.01640\t0.00656\t0.00328\t0.00547\n+99\t0.01637\t0.78856\t0.03208\t0.01047\t0.01571\t0.02422\t0.01047\t0.01833\t0.01244\t0.01440\t0.00851\t0.00982\t0.00786\t0.00524\t0.00393\t0.00131\t0.00655\t0.00327\t0.00262\t0.00786\n+100\t0.00866\t0.00626\t0.00818\t0.00770\t0.00914\t0.00578\t0.00578\t0.01155\t0.01733\t0.02455\t0.74346\t0.04572\t0.05776\t0.01251\t0.00722\t0.00193\t0.00289\t0.01107\t0.00674\t0.00578\n+101\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+102\t0.02746\t0.01042\t0.01421\t0.01089\t0.01563\t0.00900\t0.00900\t0.02983\t0.01752\t0.75093\t0.02415\t0.02083\t0.01515\t0.00758\t0.00616\t0.00189\t0.00521\t0.00616\t0.00758\t0.01042\n+103\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+104\t0.00746\t0.00533\t0.00710\t0.00852\t0.00888\t0.00497\t0.00568\t0.00852\t0.01172\t0.01563\t0.03374\t0.78088\t0.04049\t0.01918\t0.00781\t0.00249\t0.00355\t0.01740\t0.00568\t0.00497\n+105\t0.01523\t0.01315\t0.01384\t0.01246\t0.01592\t0.01523\t0.00969\t0.03253\t0.73563\t0.02561\t0.02491\t0.02284\t0.01869\t0.00830\t0.00623\t0.00208\t0.00484\t0.00692\t0.00623\t0.00969\n+106\t0.02327\t0.01715\t0.01837\t0.01408\t0.01898\t0.01898\t0.01163\t0.72628\t0.02878\t0.03858\t0.01470\t0.01470\t0.01041\t0.00735\t0.00612\t0.00184\t0.00674\t0.00551\t0.00612\t0.01041\n+107\t0.00723\t0.00620\t0.00620\t0.00620\t0.00827\t0.00517\t0.00465\t0.00878\t0.01395\t0.01654\t0.06201\t0.05891\t0.74421\t0.01550\t0.00723\t0.00207\t0.00310\t0.01292\t0.00568\t0.00517\n+108\t0.01445\t0.01651\t0.03612\t0.02580\t0.03199\t0.01548\t0.72446\t0.01961\t0.01445\t0.01961\t0.01238\t0.01651\t0.00929\t0.00516\t0.00722\t0.00206\t0.01032\t0.00722\t0.00310\t0.00826\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+11\t120\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0GYE.fa\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/prfl/EOG090W0T3K.prfl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,429 @@\n+[name]\n+unknown\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t16\n+\n+[block]\n+# block no. 0 follows, 30 sequences, length 21\n+# corresponding to MSA columns:\n+# 93-113\n+name=unknown_A\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.01057\t0.00773\t0.01034\t0.01163\t0.01327\t0.00750\t0.00987\t0.01344\t0.01601\t0.02067\t0.06875\t0.11927\t0.14101\t0.01901\t0.00933\t0.00302\t0.00572\t0.50008\t0.00649\t0.00630\n+1\t0.01546\t0.06663\t0.01682\t0.01327\t0.01666\t0.04776\t0.06572\t0.02349\t0.22653\t0.06835\t0.14524\t0.05426\t0.08041\t0.04384\t0.04676\t0.00294\t0.00678\t0.04435\t0.00614\t0.00858\n+2\t0.01789\t0.53823\t0.20660\t0.01426\t0.02119\t0.06285\t0.01549\t0.02158\t0.01462\t0.01788\t0.01047\t0.01216\t0.00893\t0.00620\t0.00512\t0.00169\t0.00846\t0.00413\t0.00301\t0.00925\n+3\t0.12683\t0.01280\t0.01355\t0.01162\t0.01527\t0.03360\t0.00908\t0.13641\t0.11546\t0.07711\t0.12786\t0.08985\t0.04344\t0.01130\t0.00812\t0.05341\t0.00558\t0.00834\t0.00613\t0.09425\n+4\t0.14326\t0.01153\t0.03546\t0.01195\t0.01515\t0.01117\t0.05274\t0.04592\t0.11102\t0.02467\t0.09562\t0.20004\t0.11112\t0.01336\t0.00744\t0.00240\t0.00540\t0.05331\t0.00593\t0.04251\n+5\t0.02165\t0.01301\t0.01394\t0.01176\t0.01544\t0.09116\t0.00934\t0.08063\t0.01834\t0.27268\t0.05014\t0.02373\t0.01776\t0.01554\t0.12344\t0.00343\t0.00824\t0.00676\t0.16082\t0.04219\n+6\t0.01092\t0.00790\t0.01079\t0.01213\t0.01380\t0.00777\t0.01047\t0.01401\t0.01627\t0.02115\t0.08035\t0.07615\t0.09660\t0.01884\t0.00944\t0.00308\t0.00600\t0.57143\t0.00651\t0.00639\n+7\t0.01490\t0.01416\t0.02404\t0.44524\t0.32734\t0.01596\t0.02024\t0.01934\t0.01477\t0.01992\t0.01271\t0.01804\t0.01004\t0.00666\t0.00695\t0.00222\t0.00888\t0.00621\t0.00325\t0.00914\n+8\t0.01741\t0.27337\t0.28936\t0.01871\t0.02621\t0.10378\t0.13668\t0.02242\t0.01545\t0.01966\t0.01166\t0.01403\t0.00921\t0.00634\t0.00616\t0.00195\t0.00989\t0.00515\t0.00314\t0.00944\n+9\t0.00978\t0.00661\t0.00772\t0.00809\t0.00818\t0.00652\t0.00474\t0.01005\t0.01087\t0.01450\t0.02570\t0.20443\t0.03005\t0.58960\t0.02952\t0.00588\t0.00615\t0.01233\t0.00474\t0.00455\n+10\t0.01976\t0.01393\t0.01804\t0.01508\t0.01922\t0.01460\t0.07980\t0.16972\t0.21138\t0.14459\t0.02649\t0.13414\t0.02277\t0.01140\t0.00766\t0.00241\t0.00656\t0.06562\t0.00682\t0.01001\n+11\t0.02375\t0.05431\t0.01867\t0.01574\t0.01930\t0.45877\t0.01197\t0.11245\t0.01988\t0.05951\t0.01404\t0.01807\t0.01203\t0.09670\t0.03288\t0.00273\t0.01088\t0.00560\t0.00438\t0.00832\n+12\t0.00905\t0.00728\t0.00811\t0.00811\t0.01012\t0.00650\t0.00600\t0.01186\t0.04448\t0.02139\t0.22165\t0.14724\t0.43929\t0.01702\t0.00828\t0.00238\t0.00362\t0.01457\t0.00680\t0.00624\n+13\t0.05667\t0.00906\t0.01074\t0.00966\t0.01254\t0.00852\t0.00747\t0.01900\t0.07503\t0.17135\t0.27144\t0.08776\t0.20624\t0.01356\t0.00772\t0.00228\t0.00426\t0.01152\t0.00729\t0.00791\n+14\t0.04127\t0.01698\t0.01775\t0.01504\t0.01907\t0.12772\t0.06970\t0.11033\t0.24965\t0.08074\t0.06339\t0.07501\t0.06234\t0.01012\t0.00723\t0.00224\t0.00714\t0.00849\t0.00621\t0.00958\n+15\t0.08182\t0.01473\t0.01498\t0.01335\t0.01725\t0.03410\t0.01044\t0.06184\t0.51550\t0.04834\t0.05791\t0.02708\t0.05309\t0.00969\t0.00695\t0.00231\t0.00561\t0.00790\t0.00682\t0.01031\n+16\t0.00861\t0.00647\t0.00778\t0.00870\t0.00970\t0.00586\t0.00593\t0.00996\t0.01406\t0.01808\t0.06850\t0.45850\t0.24625\t0.08430\t0.01101\t0.00301\t0.00410\t0.01721\t0.00632\t0.00564\n+17\t0.02657\t0.01690\t0.01962\t0.01639\t0.05856\t0.01788\t0.01271\t0.41742\t0.08333\t0.22637\t0.02048\t0.01947\t0.01409\t0.00842\t0.00700\t0.00213\t0.00706\t0.00661\t0.00725\t0.01176\n+18\t0.05960\t0.01505\t0.02492\t0.26659\t0.45727\t0.01618\t0.01999\t0.02020\t0.01504\t0.02166\t0.01270\t0.01737\t0.01029\t0.00648\t0.00679\t0.00216\t0.00846\t0.00608\t0.00339\t0.00979\n+19\t0.00917\t0.00665\t0.00857\t0.00985\t0.01070\t0.00627\t0.00710\t0.01074\t0.01393\t0.01819\t0.04060\t0.47254\t0.12255\t0.08385\t0.01126\t0.00319\t0.00470\t0.14815\t0.00627\t0.00572\n+20\t0.05581\t0.01600\t0.01854\t0.01474\t0.01969\t0.01718\t0.04419\t0.31554\t0.18190\t0.16248\t0.02140\t0.02064\t0.01518\t0.00846\t0.00684\t0.00216\t0.00669\t0.00680\t0.05469\t0.01106\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 1 follows, 30 sequences, length 20\n+# corresponding to MSA columns:\n+# 116-135\n+name=unknown_B\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\t'..b'33\t0.01572\t0.00916\t0.09893\t0.05443\t0.01661\t0.06048\t0.08914\t0.29528\t0.05706\t0.01621\t0.00853\t0.00266\t0.00586\t0.17199\t0.04059\t0.00718\n+1\t0.01008\t0.00819\t0.01089\t0.01291\t0.10367\t0.00753\t0.00812\t0.01261\t0.01684\t0.02246\t0.28461\t0.17280\t0.27194\t0.01565\t0.00818\t0.00234\t0.00417\t0.01372\t0.00650\t0.00679\n+2\t0.01697\t0.01675\t0.01923\t0.11577\t0.11874\t0.14866\t0.01441\t0.02029\t0.04384\t0.04284\t0.01635\t0.04987\t0.03525\t0.01258\t0.05954\t0.00298\t0.21250\t0.00690\t0.03830\t0.00823\n+3\t0.00988\t0.00761\t0.01041\t0.10716\t0.01594\t0.00766\t0.00898\t0.01213\t0.01462\t0.01901\t0.07698\t0.32024\t0.13860\t0.05909\t0.01016\t0.00296\t0.00532\t0.16117\t0.00596\t0.00612\n+4\t0.01458\t0.01504\t0.06713\t0.05603\t0.04708\t0.08021\t0.08114\t0.03845\t0.04180\t0.02064\t0.04067\t0.07262\t0.02074\t0.08999\t0.05974\t0.00347\t0.08364\t0.09565\t0.06377\t0.00762\n+5\t0.01213\t0.00727\t0.00944\t0.00982\t0.01117\t0.00674\t0.00703\t0.01376\t0.01473\t0.11171\t0.09387\t0.39223\t0.07980\t0.12326\t0.01241\t0.00325\t0.00481\t0.07357\t0.00654\t0.00645\n+6\t0.01895\t0.19040\t0.13613\t0.01559\t0.02078\t0.13609\t0.04289\t0.12433\t0.05084\t0.02399\t0.06629\t0.01812\t0.01490\t0.00799\t0.00736\t0.00200\t0.05856\t0.00587\t0.04971\t0.00919\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t1\n+\n+[block]\n+# block no. 11 follows, 30 sequences, length 15\n+# corresponding to MSA columns:\n+# 432-446\n+name=unknown_L\n+#\n+# <colnr> <probs for GDERKNQSTAVLIFYWHMCP>\n+#\tG\tD\tE\tR\tK\tN\tQ\tS\tT\tA\tV\tL\tI\tF\tY\tW\tH\tM\tC\tP\n+0\t0.06236\t0.06510\t0.08912\t0.16991\t0.02482\t0.07478\t0.01410\t0.04641\t0.01563\t0.11644\t0.01478\t0.01743\t0.01120\t0.00780\t0.00831\t0.00210\t0.10327\t0.00563\t0.08136\t0.06945\n+1\t0.14285\t0.23985\t0.21370\t0.01521\t0.02151\t0.06006\t0.05814\t0.06360\t0.01556\t0.02266\t0.01380\t0.07797\t0.01226\t0.00785\t0.00593\t0.00202\t0.00820\t0.00604\t0.00382\t0.00898\n+2\t0.07880\t0.07716\t0.01983\t0.01623\t0.04251\t0.14432\t0.02899\t0.04272\t0.07521\t0.09621\t0.01563\t0.04745\t0.01326\t0.01265\t0.06429\t0.00300\t0.20210\t0.00644\t0.00454\t0.00868\n+3\t0.01021\t0.00702\t0.00876\t0.00889\t0.01028\t0.00644\t0.00636\t0.01227\t0.01612\t0.05081\t0.28848\t0.24278\t0.15953\t0.10544\t0.01168\t0.00297\t0.00416\t0.03488\t0.00674\t0.00616\n+4\t0.01544\t0.03846\t0.06976\t0.05110\t0.11266\t0.03479\t0.07755\t0.07899\t0.08991\t0.04165\t0.09492\t0.11646\t0.02418\t0.10979\t0.01117\t0.00287\t0.00724\t0.00932\t0.00518\t0.00856\n+5\t0.00969\t0.00759\t0.00884\t0.00922\t0.01095\t0.00717\t0.00684\t0.01318\t0.07550\t0.02076\t0.14049\t0.24513\t0.29586\t0.04121\t0.00932\t0.00269\t0.00420\t0.07837\t0.00662\t0.00638\n+6\t0.01690\t0.01784\t0.02771\t0.06260\t0.05360\t0.07422\t0.22649\t0.04652\t0.01448\t0.01995\t0.01149\t0.01650\t0.00979\t0.00915\t0.01485\t0.00262\t0.35666\t0.00667\t0.00342\t0.00854\n+7\t0.01238\t0.00894\t0.01140\t0.01072\t0.01252\t0.00986\t0.00819\t0.10557\t0.01468\t0.02065\t0.02318\t0.03131\t0.07093\t0.13342\t0.42455\t0.00794\t0.01322\t0.00815\t0.06564\t0.00674\n+8\t0.01167\t0.00997\t0.01048\t0.01060\t0.02926\t0.07115\t0.00719\t0.01294\t0.01273\t0.01635\t0.02939\t0.06880\t0.15102\t0.31387\t0.14024\t0.00557\t0.04995\t0.00999\t0.03309\t0.00573\n+9\t0.00925\t0.00996\t0.08892\t0.00991\t0.01224\t0.00707\t0.00833\t0.01173\t0.01525\t0.02012\t0.14372\t0.35023\t0.25207\t0.01770\t0.00834\t0.00252\t0.00449\t0.01551\t0.00625\t0.00640\n+10\t0.01655\t0.01534\t0.07694\t0.01239\t0.01839\t0.01227\t0.01075\t0.05370\t0.16343\t0.06417\t0.01692\t0.01762\t0.01303\t0.00657\t0.00590\t0.00154\t0.00585\t0.00530\t0.00505\t0.47830\n+11\t0.11634\t0.01390\t0.04532\t0.01352\t0.01857\t0.01281\t0.05510\t0.08091\t0.08585\t0.36568\t0.09789\t0.02439\t0.02065\t0.00880\t0.00690\t0.00219\t0.00624\t0.00728\t0.00717\t0.01048\n+12\t0.01105\t0.07400\t0.01185\t0.00947\t0.01187\t0.00952\t0.00741\t0.01565\t0.09949\t0.02356\t0.31299\t0.10422\t0.22936\t0.01467\t0.00787\t0.00229\t0.00417\t0.03677\t0.00673\t0.00706\n+13\t0.01971\t0.07456\t0.14709\t0.01386\t0.01908\t0.07351\t0.01304\t0.16242\t0.06634\t0.11664\t0.08820\t0.02331\t0.03910\t0.00872\t0.00659\t0.00203\t0.00708\t0.00685\t0.10223\t0.00964\n+14\t0.01454\t0.01364\t0.08113\t0.01206\t0.01537\t0.03959\t0.01025\t0.10345\t0.10294\t0.04920\t0.09273\t0.11455\t0.12668\t0.07330\t0.06551\t0.00337\t0.00701\t0.04199\t0.02468\t0.00801\n+\n+[dist]\n+# distance from previous block\n+# <min> <max>\n+0\t46\n+\n+# created by:\n+# /home/cegg/simao/soft/augustus-3.2.1/scripts/msa2prfl.pl ./align_prep/EOG090W0T3K.fa\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/insecta/scores_cutoff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/insecta/scores_cutoff Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,1658 @@\n+EOG090W0HFV\t20.79\n+EOG090W0427\t76.86\n+EOG090W09K7\t70.14\n+EOG090W0B5K\t128.73\n+EOG090W0153\t102.48\n+EOG090W051T\t215.17999999999998\n+EOG090W01WI\t383.17999999999995\n+EOG090W01A3\t180.95\n+EOG090W067A\t151.13\n+EOG090W0IUR\t68.03999999999999\n+EOG090W09BV\t69.41\n+EOG090W0AIA\t45.01\n+EOG090W05D8\t52.35999999999999\n+EOG090W01IP\t193.27\n+EOG090W02C3\t62.79\n+EOG090W0B1Y\t200.97\n+EOG090W01VD\t37.519999999999996\n+EOG090W035W\t263.76\n+EOG090W0DWN\t50.489999999999995\n+EOG090W028U\t132.57999999999998\n+EOG090W02TI\t99.96000000000001\n+EOG090W00PB\t71.53999999999999\n+EOG090W038B\t138.32\n+EOG090W0BUR\t78.89\n+EOG090W0F0L\t27.65\n+EOG090W08IZ\t79.59\n+EOG090W0BEB\t171.71\n+EOG090W0I37\t40.10999999999999\n+EOG090W0CQ9\t196.62999999999997\n+EOG090W04BS\t134.26\n+EOG090W0FCL\t64.96\n+EOG090W0AXJ\t159.88\n+EOG090W0FEP\t79.44999999999999\n+EOG090W0FVI\t120.18999999999998\n+EOG090W0GP3\t64.75\n+EOG090W01NH\t132.65\n+EOG090W0IEE\t90.64999999999999\n+EOG090W0IP7\t102.33999999999999\n+EOG090W038Z\t104.78999999999999\n+EOG090W0B8O\t113.39999999999999\n+EOG090W0KP0\t37.309999999999995\n+EOG090W064P\t142.79999999999998\n+EOG090W0A3V\t166.73999999999998\n+EOG090W0BOZ\t106.18999999999998\n+EOG090W0H6T\t34.089999999999996\n+EOG090W0EDI\t55.36999999999999\n+EOG090W0EFH\t43.89\n+EOG090W04BN\t196.48999999999998\n+EOG090W00SP\t246.04999999999998\n+EOG090W01ZN\t189.64000000000001\n+EOG090W014R\t24.29\n+EOG090W0FJE\t84.41999999999999\n+EOG090W00EP\t16.59\n+EOG090W07MY\t127.67999999999999\n+EOG090W00LR\t281.75\n+EOG090W06QR\t84.84\n+EOG090W06J7\t114.8\n+EOG090W04G6\t264.99\n+EOG090W06IG\t86.66\n+EOG090W036Y\t274.78000000000003\n+EOG090W0CMO\t61.53\n+EOG090W0BFE\t67.34\n+EOG090W0JYN\t62.71999999999999\n+EOG090W054P\t125.64999999999999\n+EOG090W0753\t80.64\n+EOG090W0FKG\t73.42999999999999\n+EOG090W0B6L\t51.66\n+EOG090W0C2I\t135.07999999999998\n+EOG090W057Z\t83.44\n+EOG090W01H1\t66.00999999999999\n+EOG090W07A3\t61.31999999999999\n+EOG090W0D5E\t42.209999999999994\n+EOG090W07PK\t54.809999999999995\n+EOG090W0GKW\t93.86999999999999\n+EOG090W0J9P\t43.47\n+EOG090W0A69\t93.61\n+EOG090W0DRQ\t38.15\n+EOG090W04G1\t107.31\n+EOG090W08L6\t41.019999999999996\n+EOG090W02QT\t95.68999999999998\n+EOG090W0K04\t62.92\n+EOG090W02UI\t134.11999999999998\n+EOG090W00MS\t211.54\n+EOG090W0HXZ\t69.86\n+EOG090W09LF\t17.71\n+EOG090W060L\t301.84\n+EOG090W0I0Q\t47.39\n+EOG090W0B0M\t95.33999999999999\n+EOG090W0EY0\t90.64999999999999\n+EOG090W019L\t107.66\n+EOG090W0ALV\t164.29\n+EOG090W0F9J\t138.67\n+EOG090W0BZ2\t67.19999999999999\n+EOG090W0B5T\t160.85999999999999\n+EOG090W0JBN\t66.43\n+EOG090W06CO\t91.41999999999999\n+EOG090W06Y4\t45.35999999999999\n+EOG090W00D0\t558.88\n+EOG090W0N7U\t66.08\n+EOG090W02H5\t136.95\n+EOG090W04DH\t85.61\n+EOG090W01HI\t194.52999999999997\n+EOG090W02JZ\t297.21999999999997\n+EOG090W0ANA\t126.07\n+EOG090W055F\t99.82\n+EOG090W0KMC\t57.68\n+EOG090W063Z\t241.49999999999997\n+EOG090W0BC3\t72.38\n+EOG090W08A5\t68.03999999999999\n+EOG090W0BMW\t135.1\n+EOG090W08CW\t75.46\n+EOG090W04FE\t98.28\n+EOG090W0FH5\t58.38\n+EOG090W00RS\t269.64\n+EOG090W0EG7\t91.21000000000001\n+EOG090W06OD\t170.1\n+EOG090W00PL\t78.05\n+EOG090W0AV1\t163.45\n+EOG090W015K\t58.239999999999995\n+EOG090W07NX\t129.22\n+EOG090W0J8V\t91.69999999999999\n+EOG090W0FQ8\t58.239999999999995\n+EOG090W0JN2\t78.75\n+EOG090W029M\t83.02\n+EOG090W00TM\t273.90999999999997\n+EOG090W046G\t122.14999999999999\n+EOG090W06J5\t163.23999999999998\n+EOG090W0GWR\t32.34\n+EOG090W063H\t165.48\n+EOG090W01MU\t39.48\n+EOG090W0K07\t75.80999999999999\n+EOG090W02IA\t214.68999999999997\n+EOG090W0HTD\t76.78999999999999\n+EOG090W0F78\t69.36999999999999\n+EOG090W0GR5\t118.58\n+EOG090W04O1\t20.72\n+EOG090W0CN5\t108.57\n+EOG090W00X5\t129.36\n+EOG090W09PQ\t73.5\n+EOG090W06OE\t182.91\n+EOG090W0G1I\t35.769999999999996\n+EOG090W0GZA\t79.53\n+EOG090W07X1\t60.48\n+EOG090W08E9\t146.93\n+EOG090W054N\t226.82\n+EOG090W02JO\t52.849999999999994\n+EOG090W0AKG\t103.81\n+EOG090W04DG\t148.47\n+EOG090W00L3\t195.16\n+EOG090W03O0\t98.56\n+EOG090W0HX7\t36.739999999999995\n+EOG090W08N5\t76.64999999999999\n+EOG090W00BP\t168.21\n+EOG090W0AY7\t48.019999999999996\n+EOG090W051U\t222.67000000000002\n+EOG090W09PJ\t166.18\n+EOG090W03TV\t286.92999999999995\n+EOG090W08A9\t100.44999999999999\n+EOG090W0IBV\t35.629999999999995\n+EOG090W04NQ\t64.05\n+EOG090'..b'92999999999999\n+EOG090W05HI\t39.76\n+EOG090W0DJI\t49.49\n+EOG090W03WV\t73.00999999999999\n+EOG090W0ESV\t140.14\n+EOG090W004H\t39.6\n+EOG090W0BJR\t77.91\n+EOG090W0BPH\t198.37999999999997\n+EOG090W0AH5\t97.86\n+EOG090W0BKY\t96.58\n+EOG090W07XK\t114.38\n+EOG090W0KJ3\t76.58\n+EOG090W0DEY\t74.61999999999999\n+EOG090W0I7M\t15.189999999999998\n+EOG090W05GY\t169.26\n+EOG090W0IF2\t144.41\n+EOG090W09IF\t75.03999999999999\n+EOG090W04XG\t28.49\n+EOG090W06TC\t53.480000000000004\n+EOG090W00ZZ\t305.54999999999995\n+EOG090W05ZP\t55.58\n+EOG090W09AW\t96.46000000000001\n+EOG090W0FFP\t109.76\n+EOG090W0GQZ\t48.51\n+EOG090W0DYP\t29.33\n+EOG090W0GI3\t63.349999999999994\n+EOG090W04OX\t73.36\n+EOG090W0C66\t73.91999999999999\n+EOG090W04IF\t24.29\n+EOG090W04PI\t196.35\n+EOG090W0GPQ\t36.33\n+EOG090W06OY\t207.68999999999997\n+EOG090W0PW0\t15.26\n+EOG090W0BM0\t39.199999999999996\n+EOG090W08QR\t84.91\n+EOG090W00VU\t337.46999999999997\n+EOG090W06PP\t58.169999999999995\n+EOG090W0EIQ\t158.61999999999998\n+EOG090W0KXF\t61.10999999999999\n+EOG090W019B\t225.72\n+EOG090W05XP\t95.61999999999999\n+EOG090W01V1\t162.39999999999998\n+EOG090W0K88\t52.43\n+EOG090W0ITI\t40.10999999999999\n+EOG090W08AN\t158.54999999999998\n+EOG090W0PZH\t39.199999999999996\n+EOG090W0F7U\t63.14\n+EOG090W0CNN\t69.09\n+EOG090W08FZ\t140.35\n+EOG090W0FGQ\t103.38999999999999\n+EOG090W05BJ\t77.77\n+EOG090W0A4R\t40.04\n+EOG090W09QT\t40.10999999999999\n+EOG090W0GDE\t43.05\n+EOG090W050K\t148.72\n+EOG090W0CL8\t70.07\n+EOG090W0JJQ\t77.35\n+EOG090W06W8\t103.72999999999999\n+EOG090W0EPV\t35.559999999999995\n+EOG090W00WM\t140.63\n+EOG090W005S\t210.07\n+EOG090W02UQ\t98.0\n+EOG090W03FA\t114.72999999999999\n+EOG090W02B7\t11.97\n+EOG090W06DJ\t35.209999999999994\n+EOG090W08FE\t122.63999999999999\n+EOG090W06P2\t125.72999999999999\n+EOG090W0C7S\t146.51999999999998\n+EOG090W0C4Z\t55.660000000000004\n+EOG090W00ZP\t144.33999999999997\n+EOG090W0C7Z\t30.52\n+EOG090W06AN\t199.35999999999999\n+EOG090W0FYR\t77.98\n+EOG090W015U\t27.44\n+EOG090W02LX\t267.67999999999995\n+EOG090W0DZ4\t191.17000000000002\n+EOG090W050Y\t215.67000000000002\n+EOG090W08GU\t50.81999999999999\n+EOG090W09LK\t108.36\n+EOG090W077G\t64.61\n+EOG090W0B8P\t58.51999999999999\n+EOG090W0A73\t39.059999999999995\n+EOG090W0B3U\t47.739999999999995\n+EOG090W0LL3\t81.69\n+EOG090W09R9\t63.06999999999999\n+EOG090W0A58\t69.64999999999999\n+EOG090W06VZ\t445.71999999999997\n+EOG090W080B\t61.88\n+EOG090W0ALP\t115.01\n+EOG090W0EJV\t163.1\n+EOG090W0BI6\t112.76999999999998\n+EOG090W05KO\t149.66\n+EOG090W03K0\t120.11999999999999\n+EOG090W0JFZ\t39.199999999999996\n+EOG090W09RO\t93.1\n+EOG090W00ZV\t161.98\n+EOG090W005V\t363.79\n+EOG090W0F9A\t109.55\n+EOG090W0IKC\t25.2\n+EOG090W04QG\t181.85999999999999\n+EOG090W00U5\t126.17\n+EOG090W0JS6\t59.64\n+EOG090W06X4\t243.24999999999997\n+EOG090W0H7U\t18.759999999999998\n+EOG090W02LH\t101.28999999999999\n+EOG090W06AU\t54.10999999999999\n+EOG090W0L6N\t74.13\n+EOG090W0028\t38.39\n+EOG090W05ZG\t117.24999999999999\n+EOG090W0DSQ\t52.36\n+EOG090W0CIU\t151.13\n+EOG090W09DT\t96.46000000000001\n+EOG090W0883\t125.22999999999999\n+EOG090W08IL\t186.41\n+EOG090W07HX\t141.11999999999998\n+EOG090W0ADL\t42.14\n+EOG090W07E5\t140.69\n+EOG090W0CHN\t36.19\n+EOG090W0F27\t66.08\n+EOG090W05FW\t224.20999999999998\n+EOG090W061C\t309.87\n+EOG090W023I\t166.65\n+EOG090W09Y9\t86.59\n+EOG090W029L\t280.28000000000003\n+EOG090W078A\t137.13\n+EOG090W0C83\t33.39\n+EOG090W015Z\t231.07\n+EOG090W05IA\t174.85999999999999\n+EOG090W06HO\t108.71000000000001\n+EOG090W0E6K\t68.25\n+EOG090W032M\t17.009999999999998\n+EOG090W04ZL\t137.82999999999998\n+EOG090W0A4U\t92.11999999999999\n+EOG090W0G0Z\t34.37\n+EOG090W012F\t376.75\n+EOG090W08ME\t86.66\n+EOG090W090H\t203.07\n+EOG090W0C7T\t133.28\n+EOG090W0AUB\t233.79999999999998\n+EOG090W094H\t153.51\n+EOG090W00HE\t202.16\n+EOG090W0HKZ\t63.76999999999999\n+EOG090W02KK\t178.78\n+EOG090W0828\t35.349999999999994\n+EOG090W07PH\t84.84\n+EOG090W01XB\t80.36\n+EOG090W02C5\t70.14\n+EOG090W00WO\t96.25\n+EOG090W0140\t17.849999999999998\n+EOG090W01QT\t99.46999999999998\n+EOG090W0FQ4\t61.669999999999995\n+EOG090W0CAH\t116.61999999999999\n+EOG090W080Z\t53.76\n+EOG090W02AU\t96.03999999999999\n+EOG090W096X\t86.03\n+EOG090W04OJ\t69.36999999999999\n+EOG090W09UY\t66.43\n+EOG090W07CG\t158.41\n+EOG090W0KFZ\t64.75\n+EOG090W0LWB\t66.33\n+EOG090W0F00\t32.269999999999996\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/interpro.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/interpro.tsv Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,49 @@ +ENTRY_AC ENTRY_TYPE ENTRY_NAME +IPR000126 Active_site Serine proteases, V8 family, serine active site +IPR000138 Active_site Hydroxymethylglutaryl-CoA lyase, active site +IPR000169 Active_site Cysteine peptidase, cysteine active site +IPR000180 Active_site Membrane dipeptidase, active site +IPR000189 Active_site Prokaryotic transglycosylase, active site +IPR000590 Active_site Hydroxymethylglutaryl-coenzyme A synthase, active site +IPR001252 Active_site Malate dehydrogenase, active site +IPR001345 Active_site Phosphoglycerate/bisphosphoglycerate mutase, active site +IPR001497 Active_site Methylated-DNA-[protein]-cysteine S-methyltransferase, active site +IPR001555 Active_site Phosphoribosylglycinamide formyltransferase, active site +IPR001579 Active_site Glycosyl hydrolases family 18 (GH18) active site +IPR001586 Active_site Beta-lactamase, class-C active site +IPR001969 Active_site Aspartic peptidase, active site +IPR002071 Active_site Thermonuclease active site +IPR002137 Active_site Beta-lactamase, class-D active site +IPR002168 Active_site Lipase, GDXG, putative histidine active site +IPR002471 Active_site Peptidase S9, serine active site +IPR004164 Active_site Coenzyme A transferase active site +IPR006650 Active_site Adenosine/AMP deaminase active site +IPR008255 Active_site Pyridine nucleotide-disulphide oxidoreductase, class-II, active site +IPR008259 Active_site FMN-dependent alpha-hydroxy acid dehydrogenase, active site +IPR008261 Active_site Iodothyronine deiodinase, active site +IPR008263 Active_site Glycoside hydrolase, family 16, active site +IPR008265 Active_site Lipase, GDSL, active site +IPR008266 Active_site Tyrosine-protein kinase, active site +IPR008268 Active_site Peptidase S16, active site +IPR008270 Active_site Glycosyl hydrolases family 25, active site +IPR008271 Active_site Serine/threonine-protein kinase, active site +IPR008272 Active_site 4-hydroxybenzoyl-CoA thioesterase, active site +IPR011767 Active_site Glutaredoxin active site +IPR012999 Active_site Pyridine nucleotide-disulphide oxidoreductase, class I, active site +IPR013808 Active_site Transglutaminase, active site +IPR016129 Active_site Peptidase family C14A, His active site +IPR016130 Active_site Protein-tyrosine phosphatase, active site +IPR017440 Active_site ATP-citrate lyase/succinyl-CoA ligase, active site +IPR017950 Active_site Urease active site +IPR018040 Active_site Pectinesterase, Tyr active site +IPR018053 Active_site Glycoside hydrolase, family 32, active site +IPR018057 Active_site Deoxyribonuclease I, active site +IPR018085 Active_site Uracil-DNA glycosylase, active site +IPR018088 Active_site Chalcone/stilbene synthase, active site +IPR018089 Active_site Orotidine 5'-phosphate decarboxylase, active site +IPR018114 Active_site Serine proteases, trypsin family, histidine active site +IPR018117 Active_site DNA methylase, C-5 cytosine-specific, active site +IPR018120 Active_site Glycoside hydrolase family 1, active site +IPR018129 Active_site Phosphoenolpyruvate carboxylase, Lys active site +IPR018148 Active_site Methylglyoxal synthase, active site +IPR018177 Active_site L-lactate dehydrogenase, active site |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/merops.dmnd |
b |
Binary file test-data/funannotate_db/merops.dmnd has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/merops.formatted.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/merops.formatted.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,49 @@ +>MER0000002 S01A +IVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGS +SSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTT +CVTTGWGLTRYTNANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDS +GGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN +>MER0000004 S01A +IVNGEDAVPGSWPWQVSLQDSTGFHFCGGSLISEDWVVTAAHCGVTTSDVVVAGEFDQGL +ETEDTQVLKIGKVFKNPKFSILTVRNDITLLKLATPAQFSETVSAVCLPSADEDFPAGML +CATTGWGKTKYNALKTPDKLQQATLPIVSNTDCRKYWGSRVTDVMICAGASGVSSCMGDS +GGPLVCQKNGAWTLAGIVSWGSSTCSTSTPAVYARVTALMPWVQETLAAN +>MER0000009 S01A +VVGGEVAKNGSAPYQVSLQVPGWGHNCGGSLLNDRWVLTAAHCLVGHAPGDLMVLVGTNS +LKEGGELLKVDKLLYHSRYNLPRFHNDIGLVRLEQPVQFSELVQSVEYSEKAVPANATVR +LTGWGRTSANGPSPTLLQSLNVVTLSNEDCNKKGGDPGYTDVGHLCTLTKTGEGACNGDS +GGPLVYEGKLVGVVNFGVPCALGYPDGFARVSYYHDWVRTTMANN +>MER0000012 S01A +YILTAAHCVSNEDVNHVITPIAAERFTIRAGSNDRFSGGVLVQVAEVIVHEEYGNFLNDV +ALLRLESPLILSASIQPIDLPTVDTPADVDVVISGWGRIKHQGDLPRYLQYNTLKSITRQ +QCEELIDFGFEGELCLLHQVDNGACNGDSGGP +>MER0000013 S01A +ITNGYPAYEGKVPYIVGLLFSGNGNWWCGGSIIGNTWVLTAAHCTNGASGVTINYGASIR +TQPQYTHWVGSGDIIQHHHYNSGNLHNDISLIRTPHVDFWSLVNKVELPSYNDRYQDYAG +WWAVASGWGGTYDGSPLPDWLQSVDVQIISQSDCSRTWSLHDNMICINTDGGKSTCGGDS +GGPLVTHDGNRLVGVTSFGSAAGCQSGAPAVFSRVTGYLDWIRDNTGIS +>MER0000015 S01A +ITNGQDAVMGQFPYQVGLSLNLGNFKSAWCGGSLIGNEWVLTAAHCTDGVKSVTVFLGAT +YRTEAEVKYTVKPNDILIHPGWNNKTLKNDISLVKIPETAYTALIQPVELPALASSYPSF +AGDEVIASGWGRISDSASGVTNYLQWARLEVISNAVCARTYGSTITSSNLCVKTPGGVST +CKGDSGGPLVLASSGVQVGLTSFGSILGCEKGFPAAFTRVTSYLEWINEHTGIS +>MER0000020 S01A +IVGGYNCEENSVPYQVSLNSGYHFCGGSLINEQWVVSAGHCYKSRIQVRLGEHNIEVLEG +NEQFINAAKIIRHPQYDRKTLNNDIMLIKLSSRAVINARVSTISLPTAPPATGTKCLISG +WGNTASSGADYPDELQCLDAPVLSQAKCEASYPGKITSNMFCVGFLEGGKDSCQGDSGGP +VVCNGQLQGVVSWGDGCAQKNKPGVYTKVYNYVKWIKNTIAAN +>MER0000021 S01A +IVGGYICEENSVPYQVSLNSGYHFCGGSLISEQWVVSAGHCYKSRIQVRLGEHNIEVLEG +NEQFINAAKIIRHPKYNSRTLDNDILLIKLSSPAVINSRVSAISLPTAPPAAGTESLISG +WGNTLSSGADYPDELQCLDAPVLSQAECEASYPGKITNNMFCVGFLEGGKDSCQGDSGGP +VVSNGELQGIVSWGYGCAQKNRPGVYTKVYNYVDWIKDTIAAN +>MER0000022 S01A +IVGGYTCEENSLPYQVSLNSGSHFCGGSLISEQWVVSAAHCYKTRIQVRLGEHNIKVLEG +NEQFINAAKIIRHPKYNRDTLDNDIMLIKLSSPAVINARVSTISLPTAPPAAGTECLISG +WGNTLSFGADYPDELKCLDAPVLREAECKASCPGKITNSMFCVGFLEGGKDSWKRDSGGP +VVCNGQLQGVVSWGHGCAWKNRPGVYTKVYNYVDWIKDTIAAN +>MER0000024 S01A +IVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDNINVVEG +NEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISG +WGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGP +VVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/ncbi_cleaned_gene_products.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/ncbi_cleaned_gene_products.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,54 @@ +#version 1.70 +#Date 06-15-2021 +#Name Description +1-Oct mitochondrial intermediate peptidase +1AMINOCYCLOPROPANE1CARBOXYLATE Probable 1-aminocyclopropane-1-carboxylate deaminase +2ABA Protein phosphatase PP2A 55 kDa regulatory subunit +2ABD Serine/threonine-protein phosphatase 2A 55 kDa regulatory subunit B delta isoform +2E4.130 Regulator of nonsense transcripts 1 +2E4.130_0 Regulator of nonsense transcripts 1 +2E4.130_1 Regulator of nonsense transcripts 1 +2MBCD 2-methylacyl-CoA dehydrogenase, mitochondrial +2METHYLACONITATE Aconitate/2-methylaconitate hydratase +4CL 4-coumarate--CoA ligase +4CL1 4-coumarate--CoA ligase 1 +4CL2 4-coumarate--CoA ligase 2 +4CL3 4-coumarate--CoA ligase 3 +4CLL1 4-coumarate--CoA ligase-like 1 +4CLL2 4-coumarate--CoA ligase-like 2 +4CLL3 4-coumarate--CoA ligase-like 3 +4CLL7 4-coumarate--CoA ligase-like 7 +4CLL7_0 4-coumarate--CoA ligase-like 7 +4CLL7_1 4-coumarate--CoA ligase-like 7 +4CLL7_2 4-coumarate--CoA ligase-like 7 +4CLL9 4-coumarate--CoA ligase-like 9 +4COUMARATECOA 4-coumarateCoA ligase-like 6 +4EBP 4E-binding protein THOR +4EHP Eukaryotic translation initiation factor 4E type 2 +4HYDROXYPHENYLPYRUVATE 4-hydroxyphenylpyruvate dioxygenase +6-PGD 6-phosphogluconate dehydrogenase, decarboxylating +6GAL Endo-beta-1 6-galactanase +6HN3M 6-hydroxynicotinate 3-monooxygenase +6PGD 6-phosphogluconate dehydrogenase, decarboxylating +6PGL phosphogluconolactonase +6PGL4 6-phosphogluconolactonase 4 +20H10.080 NADH-ubiquinone oxidoreductase 21 subunit +26S 26s proteasome regulatory subunit 6B +26S_PRC 26S proteasome regulatory complex protein +40S 40s ribosomal protein SA +60S 60s acidic ribosomal protein P2 +A1 mating type regulatory protein, silenced copy at HMR locus +A1CF APOB1 complementation factor +A2 mating type regulatory protein, silenced copy at HMR locus +A4GALT Lactosylceramide 4-alpha-galactosyltransferase +A4GNT Alpha-1,4-N-acetylglucosaminyltransferase +A4LEA 4-alpha-L-fucosyltransferase +AAA1 Asc-type amino acid transporter 1 +AAC ADP,ATP carrier protein +AAC1 ADP/ATP carrier protein AAC1 +AAC2 ADP,ATP carrier protein 2 +AAC3 ADP/ATP carrier protein AAC3 +AACC7 Aminoglycoside N(3)-acetyltransferase VII +AACS Acetoacetyl-CoA synthetase +AACS_0 Acetoacetyl-CoA synthetase +AACS_1 Acetoacetyl-CoA synthetase |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/repeats.dmnd |
b |
Binary file test-data/funannotate_db/repeats.dmnd has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_exon_probs.pbl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,10659 @@\n+#exon model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[LENGTH]\n+# maximal individually stored length probability =\n+3000\n+# slope of smoothing bandwidth =\n+0.3\n+# smoothing minwindowcount =\n+8\n+# length single initial internal terminal\n+# total number of exons of above types\n+ 1959 8219 22997 8219\n+# number of exons exceeding length d\n+ 27 19 136 55\n+# 1000 P(len=k), k=0,1,..., 3000\n+0\t0\t1.95\t0.0374\t0.249\n+1\t0\t2.18\t0.0414\t0.284\n+2\t0\t2.4\t0.0456\t0.321\n+3\t0\t2.62\t0.0501\t0.359\n+4\t0\t2.84\t0.0548\t0.397\n+5\t0\t3.04\t0.0598\t0.435\n+6\t0\t3.23\t0.065\t0.473\n+7\t0\t3.41\t0.0706\t0.509\n+8\t0\t3.57\t0.0765\t0.543\n+9\t0\t3.71\t0.0828\t0.576\n+10\t0\t3.82\t0.0894\t0.606\n+11\t0\t3.92\t0.0965\t0.633\n+12\t0\t3.99\t0.104\t0.658\n+13\t0\t4.05\t0.112\t0.68\n+14\t0\t4.08\t0.121\t0.699\n+15\t0\t4.1\t0.13\t0.716\n+16\t0\t4.1\t0.139\t0.73\n+17\t0\t4.08\t0.15\t0.743\n+18\t0\t4.06\t0.16\t0.754\n+19\t0\t4.02\t0.172\t0.763\n+20\t0\t3.98\t0.184\t0.771\n+21\t0\t3.94\t0.198\t0.778\n+22\t0\t3.89\t0.212\t0.785\n+23\t0\t3.84\t0.226\t0.791\n+24\t0\t3.79\t0.242\t0.797\n+25\t0\t3.75\t0.259\t0.803\n+26\t0\t3.71\t0.277\t0.809\n+27\t0\t3.67\t0.295\t0.816\n+28\t0\t3.65\t0.315\t0.823\n+29\t0\t3.62\t0.335\t0.831\n+30\t0\t3.61\t0.357\t0.841\n+31\t0\t3.6\t0.379\t0.851\n+32\t0\t3.6\t0.401\t0.862\n+33\t0\t3.61\t0.425\t0.874\n+34\t0\t3.62\t0.449\t0.888\n+35\t0\t3.64\t0.473\t0.902\n+36\t0\t3.67\t0.498\t0.918\n+37\t0\t3.7\t0.524\t0.935\n+38\t0\t3.74\t0.549\t0.953\n+39\t0\t3.77\t0.575\t0.971\n+40\t0\t3.82\t0.601\t0.991\n+41\t0\t3.86\t0.628\t1.01\n+42\t0\t3.91\t0.654\t1.03\n+43\t0\t3.96\t0.68\t1.06\n+44\t0\t4.01\t0.706\t1.08\n+45\t0\t4.06\t0.732\t1.11\n+46\t0\t4.11\t0.759\t1.13\n+47\t0\t4.17\t0.785\t1.16\n+48\t0\t4.22\t0.811\t1.18\n+49\t0\t4.27\t0.838\t1.21\n+50\t0\t4.32\t0.864\t1.23\n+51\t0\t4.37\t0.892\t1.26\n+52\t0\t4.42\t0.92\t1.29\n+53\t0\t4.47\t0.949\t1.32\n+54\t0\t4.51\t0.979\t1.34\n+55\t0\t4.55\t1.01\t1.37\n+56\t0\t4.58\t1.04\t1.4\n+57\t0\t4.61\t1.08\t1.43\n+58\t0\t4.63\t1.12\t1.45\n+59\t0\t4.65\t1.15\t1.48\n+60\t0\t4.65\t1.2\t1.51\n+61\t0\t4.65\t1.24\t1.53\n+62\t0\t4.65\t1.28\t1.56\n+63\t0\t4.63\t1.33\t1.58\n+64\t0\t4.6\t1.37\t1.61\n+65\t0\t4.57\t1.42\t1.63\n+66\t0\t4.53\t1.47\t1.64\n+67\t0\t4.48\t1.52\t1.66\n+68\t0\t4.43\t1.56\t1.67\n+69\t0\t4.37\t1.61\t1.69\n+70\t0\t4.31\t1.66\t1.7\n+71\t0\t4.24\t1.7\t1.7\n+72\t0\t4.17\t1.74\t1.71\n+73\t0\t4.1\t1.79\t1.72\n+74\t0\t4.03\t1.83\t1.72\n+75\t0\t3.96\t1.86\t1.73\n+76\t0\t3.89\t1.9\t1.73\n+77\t0\t3.83\t1.94\t1.74\n+78\t0\t3.76\t1.97\t1.75\n+79\t0\t3.71\t2.01\t1.76\n+80\t0\t3.65\t2.05\t1.77\n+81\t0\t3.61\t2.08\t1.78\n+82\t0\t3.57\t2.12\t1.8\n+83\t0\t3.53\t2.15\t1.81\n+84\t0\t3.5\t2.19\t1.83\n+85\t0\t3.47\t2.23\t1.84\n+86\t0\t3.45\t2.27\t1.86\n+87\t0\t3.43\t2.31\t1.88\n+88\t0\t3.41\t2.35\t1.89\n+89\t0\t3.4\t2.39\t1.91\n+90\t0\t3.38\t2.44\t1.92\n+91\t0\t3.37\t2.48\t1.93\n+92\t0\t3.36\t2.53\t1.95\n+93\t0\t3.35\t2.57\t1.96\n+94\t0\t3.33\t2.62\t1.97\n+95\t0\t3.31\t2.66\t1.98\n+96\t0\t3.29\t2.71\t2\n+97\t0\t3.27\t2.76\t2.01\n+98\t0\t3.25\t2.81\t2.02\n+99\t0\t3.22\t2.85\t2.04\n+100\t0\t3.2\t2.9\t2.05\n+101\t0\t3.17\t2.95\t2.07\n+102\t0\t3.14\t3\t2.08\n+103\t0\t3.12\t3.05\t2.1\n+104\t0\t3.09\t3.1\t2.11\n+105\t0\t3.07\t3.15\t2.13\n+106\t0\t3.04\t3.2\t2.14\n+107\t0\t3.03\t3.24\t2.16\n+108\t0\t3.01\t3.29\t2.17\n+109\t0\t3\t3.33\t2.18\n+110\t0\t2.99\t3.37\t2.2\n+111\t0\t2.98\t3.41\t2.21\n+112\t0\t2.98\t3.45\t2.22\n+113\t0\t2.97\t3.49\t2.22\n+114\t0\t2.97\t3.52\t2.23\n+115\t0\t2.97\t3.56\t2.24\n+116\t0\t2.97\t3.59\t2.24\n+117\t0\t2.96\t3.62\t2.25\n+118\t0\t2.96\t3.65\t2.25\n+119\t0\t2.95\t3.68\t2.25\n+120\t0\t2.94\t3.71\t2.26\n+121\t0\t2.93\t3.74\t2.26\n+122\t0\t2.91\t3.77\t2.27\n+123\t0\t2.9\t3.8\t2.28\n+124\t0\t2.88\t3.84\t2.29\n+125\t0\t2.86\t3.87\t2.29\n+126\t0\t2.84\t3.9\t2.3\n+127\t0\t2.81\t3.93\t2.32\n+128\t0\t2.79\t3.96\t2.33\n+129\t0\t2.76\t3.99\t2.34\n+130\t0\t2.74\t4.01\t2.35\n+131\t0\t2.71\t4.04\t2.36\n+132\t0\t2.68\t4.06\t2.37\n+133\t0\t2.66\t4.08\t2.38\n+134\t0\t2.63\t4.1\t2.39\n+135\t0\t2.61\t4.12\t2.39\n+136\t0\t2.58\t4.14\t2.4\n+137\t0\t2.56\t4.15\t2.41\n+138\t0\t2.53\t4.16\t2.41\n+139\t0\t2.51\t4.17\t2.42\n+140\t0\t2.49\t4.17\t2.43\n+141\t0\t2.47\t4.18\t2.44\n+142\t0\t2.44\t4.18\t2.45\n+143\t0\t2.43\t4.18\t2.46\n+144\t0\t2.41\t4.18\t2.48\n+145\t0\t2.39\t4.18\t2.49\n+146\t0\t2.37\t4.18\t2.51\n+147\t0\t2.35\t4.17\t2.52\n+148\t0\t2.33\t4.17\t2.54\n+149\t0\t2.32\t4.16\t2.56\n+150\t0\t2.3\t4.15\t2.58\n+151\t0\t2.28\t4.15\t2.6\n+152\t0\t2.27\t4.13\t2.62\n+153\t0\t2.25\t4.12\t2.63\n+154\t0\t2.24\t4.1\t2.65\n+155\t0\t2.23\t4.09\t2.66\n+156\t0\t2.21\t4.07\t2.67\n+157\t0\t2.2\t4.04\t2.67\n+158\t0\t2.19\t4.02\t2.68\n+159\t0\t2.18\t4\t2.68\n+160\t0\t2.16\t3.9'..b'25\n+tgcca \t0.25 \t0.25 \t0.25\n+tgccc \t0.25 \t0.25 \t0.25\n+tgccg \t0.25 \t0.25 \t0.25\n+tgcct \t0.25 \t0.25 \t0.25\n+tgcga \t0.25 \t0.25 \t0.25\n+tgcgc \t0.25 \t0.25 \t0.25\n+tgcgg \t0.25 \t0.25 \t0.25\n+tgcgt \t0.25 \t0.25 \t0.25\n+tgcta \t0.25 \t0.25 \t0.25\n+tgctc \t0.25 \t0.25 \t0.25\n+tgctg \t0.25 \t0.25 \t0.25\n+tgctt \t0.25 \t0.25 \t0.25\n+tggaa \t0.25 \t0.25 \t0.25\n+tggac \t0.25 \t0.25 \t0.25\n+tggag \t0.25 \t0.25 \t0.25\n+tggat \t0.25 \t0.25 \t0.25\n+tggca \t0.25 \t0.25 \t0.25\n+tggcc \t0.25 \t0.25 \t0.25\n+tggcg \t0.25 \t0.25 \t0.25\n+tggct \t0.25 \t0.25 \t0.25\n+tggga \t0.25 \t0.25 \t0.25\n+tgggc \t0.25 \t0.25 \t0.25\n+tgggg \t0.25 \t0.25 \t0.25\n+tgggt \t0.25 \t0.25 \t0.25\n+tggta \t0.25 \t0.25 \t0.25\n+tggtc \t0.25 \t0.25 \t0.25\n+tggtg \t0.25 \t0.25 \t0.25\n+tggtt \t0.25 \t0.25 \t0.25\n+tgtaa \t0.25 \t0.25 \t0.25\n+tgtac \t0.25 \t0.25 \t0.25\n+tgtag \t0.25 \t0.25 \t0.25\n+tgtat \t0.25 \t0.25 \t0.25\n+tgtca \t0.25 \t0.25 \t0.25\n+tgtcc \t0.25 \t0.25 \t0.25\n+tgtcg \t0.25 \t0.25 \t0.25\n+tgtct \t0.25 \t0.25 \t0.25\n+tgtga \t0.25 \t0.25 \t0.25\n+tgtgc \t0.25 \t0.25 \t0.25\n+tgtgg \t0.25 \t0.25 \t0.25\n+tgtgt \t0.25 \t0.25 \t0.25\n+tgtta \t0.25 \t0.25 \t0.25\n+tgttc \t0.25 \t0.25 \t0.25\n+tgttg \t0.25 \t0.25 \t0.25\n+tgttt \t0.25 \t0.25 \t0.25\n+ttaaa \t0.25 \t0.25 \t0.25\n+ttaac \t0.25 \t0.25 \t0.25\n+ttaag \t0.25 \t0.25 \t0.25\n+ttaat \t0.25 \t0.25 \t0.25\n+ttaca \t0.25 \t0.25 \t0.25\n+ttacc \t0.25 \t0.25 \t0.25\n+ttacg \t0.25 \t0.25 \t0.25\n+ttact \t0.25 \t0.25 \t0.25\n+ttaga \t0.25 \t0.25 \t0.25\n+ttagc \t0.25 \t0.25 \t0.25\n+ttagg \t0.25 \t0.25 \t0.25\n+ttagt \t0.25 \t0.25 \t0.25\n+ttata \t0.25 \t0.25 \t0.25\n+ttatc \t0.25 \t0.25 \t0.25\n+ttatg \t0.25 \t0.25 \t0.25\n+ttatt \t0.25 \t0.25 \t0.25\n+ttcaa \t0.25 \t0.25 \t0.25\n+ttcac \t0.25 \t0.25 \t0.25\n+ttcag \t0.25 \t0.25 \t0.25\n+ttcat \t0.25 \t0.25 \t0.25\n+ttcca \t0.25 \t0.25 \t0.25\n+ttccc \t0.25 \t0.25 \t0.25\n+ttccg \t0.25 \t0.25 \t0.25\n+ttcct \t0.25 \t0.25 \t0.25\n+ttcga \t0.25 \t0.25 \t0.25\n+ttcgc \t0.25 \t0.25 \t0.25\n+ttcgg \t0.25 \t0.25 \t0.25\n+ttcgt \t0.25 \t0.25 \t0.25\n+ttcta \t0.25 \t0.25 \t0.25\n+ttctc \t0.25 \t0.25 \t0.25\n+ttctg \t0.25 \t0.25 \t0.25\n+ttctt \t0.25 \t0.25 \t0.25\n+ttgaa \t0.25 \t0.25 \t0.25\n+ttgac \t0.25 \t0.25 \t0.25\n+ttgag \t0.25 \t0.25 \t0.25\n+ttgat \t0.25 \t0.25 \t0.25\n+ttgca \t0.25 \t0.25 \t0.25\n+ttgcc \t0.25 \t0.25 \t0.25\n+ttgcg \t0.25 \t0.25 \t0.25\n+ttgct \t0.25 \t0.25 \t0.25\n+ttgga \t0.25 \t0.25 \t0.25\n+ttggc \t0.25 \t0.25 \t0.25\n+ttggg \t0.25 \t0.25 \t0.25\n+ttggt \t0.25 \t0.25 \t0.25\n+ttgta \t0.25 \t0.25 \t0.25\n+ttgtc \t0.25 \t0.25 \t0.25\n+ttgtg \t0.25 \t0.25 \t0.25\n+ttgtt \t0.25 \t0.25 \t0.25\n+tttaa \t0.25 \t0.25 \t0.25\n+tttac \t0.25 \t0.25 \t0.25\n+tttag \t0.25 \t0.25 \t0.25\n+tttat \t0.25 \t0.25 \t0.25\n+tttca \t0.25 \t0.25 \t0.25\n+tttcc \t0.25 \t0.25 \t0.25\n+tttcg \t0.25 \t0.25 \t0.25\n+tttct \t0.25 \t0.25 \t0.25\n+tttga \t0.25 \t0.25 \t0.25\n+tttgc \t0.25 \t0.25 \t0.25\n+tttgg \t0.25 \t0.25 \t0.25\n+tttgt \t0.25 \t0.25 \t0.25\n+tttta \t0.25 \t0.25 \t0.25\n+ttttc \t0.25 \t0.25 \t0.25\n+ttttg \t0.25 \t0.25 \t0.25\n+ttttt \t0.25 \t0.25 \t0.25\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_igenic_probs.pbl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b"@@ -0,0 +1,3445 @@\n+[1]\n+# (a,c,g,t)= (0.295, 0.205, 0.205, 0.295)\n+#\n+# Probabilities file for the intergenic region model\n+#\n+\n+# k =\n+4\n+\n+# The P_l's\n+[P_ls]\n+# l=\n+0\n+# Values\n+A\t0.304\n+C\t0.196\n+G\t0.196\n+T\t0.304\n+# l=\n+1\n+# Values\n+AA\t0.112\n+AC\t0.0524\n+AG\t0.0515\n+AT\t0.088\n+CA\t0.0665\n+CC\t0.0404\n+CG\t0.0378\n+CT\t0.0515\n+GA\t0.0524\n+GC\t0.051\n+GG\t0.0404\n+GT\t0.0524\n+TA\t0.0729\n+TC\t0.0525\n+TG\t0.0665\n+TT\t0.112\n+# l=\n+2\n+# Values\n+AAA\t0.0446\n+AAC\t0.0182\n+AAG\t0.0173\n+AAT\t0.0319\n+ACA\t0.0197\n+ACC\t0.00915\n+ACG\t0.00876\n+ACT\t0.0148\n+AGA\t0.0141\n+AGC\t0.0136\n+AGG\t0.00903\n+AGT\t0.0148\n+ATA\t0.0251\n+ATC\t0.0139\n+ATG\t0.0171\n+ATT\t0.0319\n+CAA\t0.0228\n+CAC\t0.0135\n+CAG\t0.013\n+CAT\t0.0171\n+CCA\t0.0146\n+CCC\t0.00889\n+CCG\t0.00783\n+CCT\t0.00903\n+CGA\t0.0118\n+CGC\t0.00949\n+CGG\t0.00783\n+CGT\t0.00876\n+CTA\t0.0105\n+CTC\t0.0106\n+CTG\t0.013\n+CTT\t0.0173\n+GAA\t0.0192\n+GAC\t0.00881\n+GAG\t0.0106\n+GAT\t0.0139\n+GCA\t0.0166\n+GCC\t0.0113\n+GCG\t0.00949\n+GCT\t0.0136\n+GGA\t0.0111\n+GGC\t0.0113\n+GGG\t0.00889\n+GGT\t0.00915\n+GTA\t0.0118\n+GTC\t0.00881\n+GTG\t0.0135\n+GTT\t0.0182\n+TAA\t0.0254\n+TAC\t0.0118\n+TAG\t0.0105\n+TAT\t0.0251\n+TCA\t0.0156\n+TCC\t0.0111\n+TCG\t0.0118\n+TCT\t0.0141\n+TGA\t0.0156\n+TGC\t0.0166\n+TGG\t0.0146\n+TGT\t0.0197\n+TTA\t0.0254\n+TTC\t0.0192\n+TTG\t0.0228\n+TTT\t0.0446\n+# l=\n+3\n+# Values\n+AAAA\t0.0172\n+AAAC\t0.00735\n+AAAG\t0.00683\n+AAAT\t0.0133\n+AACA\t0.00675\n+AACC\t0.00305\n+AACG\t0.00312\n+AACT\t0.00531\n+AAGA\t0.00461\n+AAGC\t0.0042\n+AAGG\t0.00313\n+AAGT\t0.00541\n+AATA\t0.00908\n+AATC\t0.00494\n+AATG\t0.00662\n+AATT\t0.0113\n+ACAA\t0.00692\n+ACAC\t0.00428\n+ACAG\t0.00328\n+ACAT\t0.00519\n+ACCA\t0.00329\n+ACCC\t0.00206\n+ACCG\t0.00177\n+ACCT\t0.00202\n+ACGA\t0.00278\n+ACGC\t0.00217\n+ACGG\t0.00174\n+ACGT\t0.00208\n+ACTA\t0.00318\n+ACTC\t0.00281\n+ACTG\t0.00337\n+ACTT\t0.00541\n+AGAA\t0.00511\n+AGAC\t0.00223\n+AGAG\t0.00305\n+AGAT\t0.00366\n+AGCA\t0.00446\n+AGCC\t0.00283\n+AGCG\t0.00259\n+AGCT\t0.00373\n+AGGA\t0.00284\n+AGGC\t0.00226\n+AGGG\t0.0019\n+AGGT\t0.00202\n+AGTA\t0.0032\n+AGTC\t0.00248\n+AGTG\t0.00379\n+AGTT\t0.00531\n+ATAA\t0.00847\n+ATAC\t0.00397\n+ATAG\t0.00317\n+ATAT\t0.00947\n+ATCA\t0.00424\n+ATCC\t0.00279\n+ATCG\t0.00317\n+ATCT\t0.00366\n+ATGA\t0.00422\n+ATGC\t0.00407\n+ATGG\t0.00365\n+ATGT\t0.00518\n+ATTA\t0.00767\n+ATTC\t0.00495\n+ATTG\t0.00602\n+ATTT\t0.0133\n+CAAA\t0.00896\n+CAAC\t0.00433\n+CAAG\t0.00353\n+CAAT\t0.00602\n+CACA\t0.00525\n+CACC\t0.00243\n+CACG\t0.002\n+CACT\t0.00379\n+CAGA\t0.00344\n+CAGC\t0.00407\n+CAGG\t0.00215\n+CAGT\t0.00338\n+CATA\t0.00464\n+CATC\t0.00309\n+CATG\t0.00278\n+CATT\t0.00662\n+CCAA\t0.00486\n+CCAC\t0.00325\n+CCAG\t0.00288\n+CCAT\t0.00365\n+CCCA\t0.00319\n+CCCC\t0.00225\n+CCCG\t0.00154\n+CCCT\t0.0019\n+CCGA\t0.00238\n+CCGC\t0.00225\n+CCGG\t0.00146\n+CCGT\t0.00174\n+CCTA\t0.00173\n+CCTC\t0.00201\n+CCTG\t0.00215\n+CCTT\t0.00313\n+CGAA\t0.00419\n+CGAC\t0.00209\n+CGAG\t0.00231\n+CGAT\t0.00317\n+CGCA\t0.00305\n+CGCC\t0.00236\n+CGCG\t0.00149\n+CGCT\t0.00259\n+CGGA\t0.0022\n+CGGC\t0.00232\n+CGGG\t0.00154\n+CGGT\t0.00178\n+CGTA\t0.00197\n+CGTC\t0.00167\n+CGTG\t0.002\n+CGTT\t0.00312\n+CTAA\t0.00374\n+CTAC\t0.00193\n+CTAG\t0.00168\n+CTAT\t0.00317\n+CTCA\t0.00284\n+CTCC\t0.00236\n+CTCG\t0.00231\n+CTCT\t0.00305\n+CTGA\t0.00304\n+CTGC\t0.00383\n+CTGG\t0.00288\n+CTGT\t0.00328\n+CTTA\t0.00383\n+CTTC\t0.00316\n+CTTG\t0.00353\n+CTTT\t0.00683\n+GAAA\t0.00815\n+GAAC\t0.00296\n+GAAG\t0.00316\n+GAAT\t0.00495\n+GACA\t0.003\n+GACC\t0.00166\n+GACG\t0.00167\n+GACT\t0.00248\n+GAGA\t0.003\n+GAGC\t0.00274\n+GAGG\t0.00201\n+GAGT\t0.00281\n+GATA\t0.00357\n+GATC\t0.00227\n+GATG\t0.00309\n+GATT\t0.00494\n+GCAA\t0.00554\n+GCAC\t0.00316\n+GCAG\t0.00383\n+GCAT\t0.00407\n+GCCA\t0.00448\n+GCCC\t0.00221\n+GCCG\t0.00232\n+GCCT\t0.00226\n+GCGA\t0.00295\n+GCGC\t0.00212\n+GCGG\t0.00225\n+GCGT\t0.00217\n+GCTA\t0.00261\n+GCTC\t0.00274\n+GCTG\t0.00407\n+GCTT\t0.0042\n+GGAA\t0.00414\n+GGAC\t0.00178\n+GGAG\t0.00236\n+GGAT\t0.00279\n+GGCA\t0.0037\n+GGCC\t0.00239\n+GGCG\t0.00236\n+GGCT\t0.00282\n+GGGA\t0.00237\n+GGGC\t0.00221\n+GGGG\t0.00225\n+GGGT\t0.00206\n+GGTA\t0.00201\n+GGTC\t0.00166\n+GGTG\t0.00243\n+GGTT\t0.00305\n+GTAA\t0.0039\n+GTAC\t0.00205\n+GTAG\t0.00193\n+GTAT\t0.00397\n+GTCA\t0.00271\n+GTCC\t0.00178\n+GTCG\t0.00209\n+GTCT\t0.00223\n+GTGA\t0.00279\n+GTGC\t0.00315\n+GTGG\t0.00325\n+GTGT\t0.00428\n+GTTA\t0.00359\n+GTTC\t0.00296\n+GTTG\t0.00433\n+GTTT\t0.00735\n+TAAA\t0.0103\n+TAAC\t0.00359\n+TAAG\t0.00383\n+TAAT\t0.00768\n+TACA\t0.00467\n+TACC\t0.00201\n+TACG\t0.00197\n+TACT\t0."..b'1188\n+GTGCA\t415919\n+GTGCC\t261766\n+GTGCG\t295620\n+GTGCT\t323411\n+GTGGA\t332993\n+GTGGC\t385960\n+GTGGG\t323106\n+GTGGT\t292234\n+GTGTA\t359272\n+GTGTC\t243286\n+GTGTG\t676471\n+GTGTT\t478535\n+GTTAA\t529553\n+GTTAC\t237009\n+GTTAG\t244930\n+GTTAT\t462774\n+GTTCA\t351705\n+GTTCC\t242639\n+GTTCG\t284705\n+GTTCT\t337027\n+GTTGA\t395847\n+GTTGC\t467863\n+GTTGG\t398583\n+GTTGT\t518738\n+GTTTA\t656320\n+GTTTC\t525254\n+GTTTG\t635106\n+GTTTT\t1204266\n+TAAAA\t1550931\n+TAAAC\t656320\n+TAAAG\t584278\n+TAAAT\t1453686\n+TAACA\t514469\n+TAACC\t257738\n+TAACG\t228098\n+TAACT\t473671\n+TAAGA\t411688\n+TAAGC\t412400\n+TAAGG\t243072\n+TAAGT\t505986\n+TAATA\t887022\n+TAATC\t429688\n+TAATG\t557972\n+TAATT\t1279151\n+TACAA\t638171\n+TACAC\t359272\n+TACAG\t269554\n+TACAT\t651698\n+TACCA\t294521\n+TACCC\t186072\n+TACCG\t134084\n+TACCT\t210051\n+TACGA\t255187\n+TACGC\t188583\n+TACGG\t144135\n+TACGT\t222807\n+TACTA\t299145\n+TACTC\t243515\n+TACTG\t242385\n+TACTT\t529461\n+TAGAA\t445486\n+TAGAC\t192718\n+TAGAG\t225170\n+TAGAT\t371662\n+TAGCA\t338376\n+TAGCC\t240530\n+TAGCG\t157045\n+TAGCT\t334860\n+TAGGA\t194399\n+TAGGC\t179285\n+TAGGG\t138443\n+TAGGT\t199383\n+TAGTA\t299145\n+TAGTC\t203900\n+TAGTG\t245329\n+TAGTT\t560260\n+TATAA\t905114\n+TATAC\t478377\n+TATAG\t374869\n+TATAT\t1443312\n+TATCA\t432085\n+TATCC\t266505\n+TATCG\t299809\n+TATCT\t467523\n+TATGA\t432243\n+TATGC\t441619\n+TATGG\t339799\n+TATGT\t693461\n+TATTA\t887022\n+TATTC\t522722\n+TATTG\t602062\n+TATTT\t1720154\n+TCAAA\t830733\n+TCAAC\t395847\n+TCAAG\t362165\n+TCAAT\t678967\n+TCACA\t402109\n+TCACC\t212946\n+TCACG\t164762\n+TCACT\t367070\n+TCAGA\t303978\n+TCAGC\t373449\n+TCAGG\t185559\n+TCAGT\t385818\n+TCATA\t432243\n+TCATC\t340433\n+TCATG\t254268\n+TCATT\t707017\n+TCCAA\t464453\n+TCCAC\t332993\n+TCCAG\t285196\n+TCCAT\t425248\n+TCCCA\t332570\n+TCCCC\t246063\n+TCCCG\t164879\n+TCCCT\t229526\n+TCCGA\t253087\n+TCCGC\t252341\n+TCCGG\t163286\n+TCCGT\t233416\n+TCCTA\t194399\n+TCCTC\t267623\n+TCCTG\t279201\n+TCCTT\t425709\n+TCGAA\t507221\n+TCGAC\t242629\n+TCGAG\t284084\n+TCGAT\t460551\n+TCGCA\t375343\n+TCGCC\t294688\n+TCGCG\t173249\n+TCGCT\t370922\n+TCGGA\t253087\n+TCGGC\t265278\n+TCGGG\t190433\n+TCGGT\t269143\n+TCGTA\t255187\n+TCGTC\t227718\n+TCGTG\t218373\n+TCGTT\t441393\n+TCTAA\t393086\n+TCTAC\t220356\n+TCTAG\t208283\n+TCTAT\t413312\n+TCTCA\t305326\n+TCTCC\t255587\n+TCTCG\t248235\n+TCTCT\t424487\n+TCTGA\t303978\n+TCTGC\t389381\n+TCTGG\t307187\n+TCTGT\t414129\n+TCTTA\t411688\n+TCTTC\t375086\n+TCTTG\t371605\n+TCTTT\t735509\n+TGAAA\t979910\n+TGAAC\t351705\n+TGAAG\t365491\n+TGAAT\t675983\n+TGACA\t381622\n+TGACC\t222424\n+TGACG\t180167\n+TGACT\t330719\n+TGAGA\t305326\n+TGAGC\t300824\n+TGAGG\t195709\n+TGAGT\t363535\n+TGATA\t432085\n+TGATC\t244669\n+TGATG\t377889\n+TGATT\t689516\n+TGCAA\t739059\n+TGCAC\t415919\n+TGCAG\t455674\n+TGCAT\t610206\n+TGCCA\t582755\n+TGCCC\t291844\n+TGCCG\t300708\n+TGCCT\t343894\n+TGCGA\t375343\n+TGCGC\t275852\n+TGCGG\t278984\n+TGCGT\t321845\n+TGCTA\t338376\n+TGCTC\t373175\n+TGCTG\t560268\n+TGCTT\t560387\n+TGGAA\t571498\n+TGGAC\t232424\n+TGGAG\t307866\n+TGGAT\t396414\n+TGGCA\t582755\n+TGGCC\t458889\n+TGGCG\t310066\n+TGGCT\t490513\n+TGGGA\t332570\n+TGGGC\t341654\n+TGGGG\t289650\n+TGGGT\t347866\n+TGGTA\t294521\n+TGGTC\t249199\n+TGGTG\t330244\n+TGGTT\t479937\n+TGTAA\t596325\n+TGTAC\t339181\n+TGTAG\t275425\n+TGTAT\t708010\n+TGTCA\t381622\n+TGTCC\t255633\n+TGTCG\t254663\n+TGTCT\t341705\n+TGTGA\t402109\n+TGTGC\t487257\n+TGTGG\t435188\n+TGTGT\t832228\n+TGTTA\t514469\n+TGTTC\t395511\n+TGTTG\t687960\n+TGTTT\t1174890\n+TTAAA\t1506984\n+TTAAC\t529553\n+TTAAG\t603193\n+TTAAT\t1184370\n+TTACA\t596325\n+TTACC\t289904\n+TTACG\t255155\n+TTACT\t462537\n+TTAGA\t393086\n+TTAGC\t402136\n+TTAGG\t254810\n+TTAGT\t488881\n+TTATA\t905114\n+TTATC\t500716\n+TTATG\t638944\n+TTATT\t1435871\n+TTCAA\t845058\n+TTCAC\t412587\n+TTCAG\t438454\n+TTCAT\t676983\n+TTCCA\t571498\n+TTCCC\t388709\n+TTCCG\t320193\n+TTCCT\t422559\n+TTCGA\t507221\n+TTCGC\t428272\n+TTCGG\t349863\n+TTCGT\t436061\n+TTCTA\t445486\n+TTCTC\t421020\n+TTCTG\t474000\n+TTCTT\t759053\n+TTGAA\t845058\n+TTGAC\t370823\n+TTGAG\t388608\n+TTGAT\t662989\n+TTGCA\t739059\n+TTGCC\t538214\n+TTGCG\t352393\n+TTGCT\t647156\n+TTGGA\t464453\n+TTGGC\t630591\n+TTGGG\t411508\n+TTGGT\t490945\n+TTGTA\t638171\n+TTGTC\t422058\n+TTGTG\t611423\n+TTGTT\t1171948\n+TTTAA\t1506984\n+TTTAC\t638916\n+TTTAG\t573057\n+TTTAT\t1526553\n+TTTCA\t979910\n+TTTCC\t759290\n+TTTCG\t714479\n+TTTCT\t897347\n+TTTGA\t830733\n+TTTGC\t883669\n+TTTGG\t776539\n+TTTGT\t1190742\n+TTTTA\t1550931\n+TTTTC\t1313857\n+TTTTG\t1382023\n+TTTTT\t2802550\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_intron_probs.pbl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b'@@ -0,0 +1,20382 @@\n+#intron model parameters\n+# begin of content independent part\n+#\n+# ASS probabilities\n+#only nonpseudocount values are shown\n+[ASS]\n+# Size of vector\n+1024\n+# c_ass (ASS count)\n+31180\n+# asspseudocount (added to all possible patterns, no matter if they occur)\n+0.01\n+# Probabilities * 1000\n+aaaaa\t0.321\n+aaaac\t0.289\n+aaaag\t0.0324\n+aaaat\t0.289\n+aaaca\t0.161\n+aaacc\t0.257\n+aaacg\t0.0644\n+aaact\t0.257\n+aaaga\t0.0324\n+aaagc\t0.0644\n+aaagg\t0.0324\n+aaagt\t0.0965\n+aaata\t0.161\n+aaatc\t0.193\n+aaatg\t0.129\n+aaatt\t0.161\n+aacaa\t0.193\n+aacag\t0.129\n+aacat\t0.129\n+aacca\t0.129\n+aaccc\t0.193\n+aaccg\t0.193\n+aacct\t0.129\n+aacga\t0.161\n+aacgc\t0.0644\n+aacgg\t0.0965\n+aacta\t0.0644\n+aactc\t0.0965\n+aactg\t0.129\n+aactt\t0.289\n+aagaa\t0.129\n+aagag\t0.0324\n+aagat\t0.161\n+aagca\t0.0324\n+aagcc\t0.0965\n+aagcg\t0.193\n+aagct\t0.129\n+aagga\t0.0324\n+aaggc\t0.0324\n+aaggg\t0.0965\n+aaggt\t0.0644\n+aagta\t0.0324\n+aagtc\t0.0965\n+aagtg\t0.129\n+aagtt\t0.193\n+aataa\t0.193\n+aatac\t0.353\n+aatag\t0.0644\n+aatat\t0.353\n+aatca\t0.417\n+aatcc\t0.513\n+aatcg\t0.385\n+aatct\t0.321\n+aatga\t0.417\n+aatgc\t0.449\n+aatgg\t0.289\n+aatgt\t0.321\n+aatta\t0.193\n+aattc\t0.417\n+aattg\t0.385\n+aattt\t0.257\n+acaaa\t0.161\n+acaac\t0.193\n+acaag\t0.0965\n+acaat\t0.193\n+acaca\t0.129\n+acacc\t0.257\n+acacg\t0.0324\n+acact\t0.129\n+acagc\t0.161\n+acagg\t0.129\n+acagt\t0.0324\n+acata\t0.0324\n+acatc\t0.161\n+acatg\t0.0965\n+acatt\t0.193\n+accaa\t0.161\n+accac\t0.129\n+accag\t0.0644\n+accat\t0.193\n+accca\t0.0965\n+acccc\t0.0644\n+acccg\t0.193\n+accct\t0.129\n+accga\t0.129\n+accgg\t0.0324\n+accgt\t0.161\n+accta\t0.129\n+acctc\t0.129\n+acctg\t0.193\n+acctt\t0.193\n+acgaa\t0.0324\n+acgac\t0.161\n+acgag\t0.0965\n+acgat\t0.0644\n+acgca\t0.129\n+acgcc\t0.129\n+acgcg\t0.0324\n+acgct\t0.129\n+acgga\t0.161\n+acggc\t0.0644\n+acggt\t0.0644\n+acgtc\t0.129\n+acgtg\t0.0965\n+acgtt\t0.129\n+actaa\t0.129\n+actac\t0.417\n+actat\t0.257\n+actca\t0.257\n+actcc\t0.193\n+actcg\t0.0965\n+actct\t0.321\n+actga\t0.257\n+actgc\t0.385\n+actgg\t0.225\n+actgt\t0.289\n+actta\t0.129\n+acttc\t0.193\n+acttg\t0.0965\n+acttt\t0.289\n+agaaa\t0.513\n+agaac\t0.77\n+agaag\t0.0965\n+agaat\t0.545\n+agaca\t0.353\n+agacc\t0.353\n+agacg\t0.385\n+agact\t0.513\n+agaga\t0.193\n+agagc\t0.385\n+agagg\t0.129\n+agagt\t0.289\n+agata\t0.321\n+agatc\t0.449\n+agatg\t0.417\n+agatt\t0.353\n+agcaa\t0.449\n+agcac\t0.257\n+agcag\t0.0644\n+agcat\t0.642\n+agcca\t0.353\n+agccc\t0.161\n+agccg\t0.353\n+agcct\t0.257\n+agcga\t0.385\n+agcgc\t0.225\n+agcgg\t0.129\n+agcgt\t0.225\n+agcta\t0.161\n+agctc\t0.161\n+agctg\t0.577\n+agctt\t0.417\n+aggaa\t0.225\n+aggac\t0.193\n+aggag\t0.129\n+aggat\t0.353\n+aggca\t0.417\n+aggcc\t0.161\n+aggcg\t0.129\n+aggct\t0.225\n+aggga\t0.129\n+agggc\t0.129\n+agggg\t0.0324\n+agggt\t0.193\n+aggta\t0.161\n+aggtc\t0.321\n+aggtg\t0.193\n+aggtt\t0.257\n+agtaa\t0.609\n+agtac\t0.706\n+agtag\t0.0324\n+agtat\t0.77\n+agtca\t0.866\n+agtcc\t0.577\n+agtcg\t0.385\n+agtct\t0.898\n+agtga\t0.706\n+agtgc\t0.738\n+agtgg\t0.994\n+agtgt\t0.449\n+agtta\t0.577\n+agttc\t0.417\n+agttg\t0.449\n+agttt\t0.802\n+ataaa\t0.193\n+ataac\t0.129\n+ataat\t0.161\n+ataca\t0.0324\n+atacc\t0.0644\n+atacg\t0.0324\n+atact\t0.129\n+atata\t0.0644\n+atatc\t0.0644\n+atatg\t0.129\n+atatt\t0.0644\n+atcaa\t0.225\n+atcac\t0.0965\n+atcat\t0.0644\n+atcca\t0.0965\n+atccc\t0.0965\n+atccg\t0.0965\n+atcct\t0.0965\n+atcga\t0.0965\n+atcgc\t0.129\n+atcgt\t0.0965\n+atcta\t0.129\n+atctc\t0.0644\n+atctg\t0.193\n+atctt\t0.0644\n+atgaa\t0.0644\n+atgac\t0.0965\n+atgat\t0.161\n+atgca\t0.161\n+atgcc\t0.225\n+atgcg\t0.129\n+atgct\t0.321\n+atgga\t0.161\n+atggc\t0.0965\n+atggg\t0.0644\n+atggt\t0.161\n+atgta\t0.0324\n+atgtc\t0.193\n+atgtg\t0.0965\n+atgtt\t0.129\n+attaa\t0.0965\n+attac\t0.225\n+attag\t0.0324\n+attat\t0.0644\n+attca\t0.0965\n+attcc\t0.257\n+attcg\t0.193\n+attct\t0.0644\n+attga\t0.0965\n+attgc\t0.257\n+attgg\t0.0644\n+attgt\t0.161\n+attta\t0.129\n+atttc\t0.289\n+atttg\t0.161\n+atttt\t0.289\n+caaaa\t6.77\n+caaac\t5.48\n+caaag\t2.47\n+caaat\t5.26\n+caaca\t3.88\n+caacc\t3.85\n+caacg\t3.5\n+caact\t4.2\n+caaga\t1.64\n+caagc\t1.51\n+caagg\t1.31\n+caagt\t1.28\n+caata\t2.5\n+caatc\t3.59\n+caatg\t5.23\n+caatt\t3.27\n+cacaa\t4.91\n+cacac\t2.63\n+cacag\t0.545\n+cacat\t3.21\n+cacca\t2.66\n+caccc\t2.47\n+caccg\t1.41\n+cacct\t2.79\n+cacga\t3.05\n+cacgc\t2.82\n+cacgg\t1.09\n+cacgt\t1.03\n+cacta\t1.83\n+cactc\t2.79\n+cactg\t3.11\n+cactt\t2.85\n+cagaa\t3.27\n+cagac\t1.44\n+cagag\t1.76\n+cagat\t2.44\n+cagca\t2.89\n+cagcc\t2.79\n+cagcg\t2.31\n+cagct\t3.62\n+cagga\t2.76\n+caggc\t1.'..b'855631\n+#\tttcgg\t698837\n+#\tttcgt\t871342\n+#\tttcta\t890087\n+#\tttctc\t841394\n+#\tttctg\t946845\n+#\tttctt\t1516968\n+#\tttgaa\t1688548\n+#\tttgac\t741091\n+#\tttgag\t776440\n+#\tttgat\t1325067\n+#\tttgca\t1476775\n+#\tttgcc\t1075827\n+#\tttgcg\t704154\n+#\tttgct\t1293450\n+#\tttgga\t927788\n+#\tttggc\t1260242\n+#\tttggg\t822345\n+#\tttggt\t980857\n+#\tttgta\t1275361\n+#\tttgtc\t843496\n+#\tttgtg\t1221327\n+#\tttgtt\t2341971\n+#\ttttaa\t3011593\n+#\ttttac\t1276443\n+#\ttttag\t1145019\n+#\ttttat\t3051195\n+#\ttttca\t1957941\n+#\ttttcc\t1517354\n+#\ttttcg\t1427298\n+#\ttttct\t1793145\n+#\ttttga\t1659999\n+#\ttttgc\t1765755\n+#\ttttgg\t1551540\n+#\ttttgt\t2379188\n+#\ttttta\t3099452\n+#\tttttc\t2625305\n+#\tttttg\t2761348\n+#\tttttt\t5600229\n+\n+# motif upstream of acceptor splice site\n+[ASSMOTIF]\n+# width of motif, n=\n+32\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0 0.358\t0.167\t0.126\t0.35\t0.329\t0.218\t0.131\t0.323\t0.326\t0.213\t0.151\t0.31\t0.298\t0.172\t0.17\t0.36\n+ 1 0.358\t0.168\t0.125\t0.35\t0.328\t0.215\t0.13\t0.327\t0.328\t0.212\t0.151\t0.309\t0.299\t0.171\t0.169\t0.36\n+ 2 0.358\t0.168\t0.122\t0.352\t0.327\t0.217\t0.128\t0.329\t0.329\t0.214\t0.148\t0.309\t0.301\t0.169\t0.167\t0.362\n+ 3 0.358\t0.17\t0.119\t0.353\t0.326\t0.216\t0.126\t0.331\t0.328\t0.216\t0.146\t0.31\t0.303\t0.169\t0.165\t0.362\n+ 4 0.358\t0.172\t0.116\t0.355\t0.324\t0.216\t0.124\t0.336\t0.325\t0.218\t0.145\t0.312\t0.305\t0.17\t0.163\t0.363\n+ 5 0.358\t0.174\t0.11\t0.358\t0.322\t0.215\t0.12\t0.343\t0.324\t0.219\t0.143\t0.314\t0.308\t0.169\t0.159\t0.364\n+ 6 0.358\t0.176\t0.104\t0.361\t0.319\t0.216\t0.117\t0.349\t0.323\t0.221\t0.139\t0.317\t0.313\t0.169\t0.155\t0.363\n+ 7 0.359\t0.178\t0.0971\t0.365\t0.315\t0.216\t0.114\t0.355\t0.322\t0.222\t0.134\t0.322\t0.316\t0.168\t0.151\t0.365\n+ 8 0.362\t0.179\t0.0893\t0.37\t0.31\t0.216\t0.111\t0.363\t0.321\t0.223\t0.129\t0.328\t0.32\t0.168\t0.146\t0.367\n+ 9 0.362\t0.182\t0.0832\t0.372\t0.305\t0.218\t0.106\t0.371\t0.319\t0.225\t0.124\t0.332\t0.323\t0.167\t0.142\t0.369\n+10 0.364\t0.184\t0.0758\t0.376\t0.301\t0.219\t0.103\t0.377\t0.314\t0.228\t0.119\t0.339\t0.325\t0.167\t0.139\t0.37\n+11 0.364\t0.185\t0.0701\t0.38\t0.298\t0.218\t0.102\t0.382\t0.309\t0.229\t0.116\t0.346\t0.324\t0.167\t0.137\t0.372\n+12 0.366\t0.185\t0.0646\t0.385\t0.294\t0.22\t0.101\t0.385\t0.306\t0.228\t0.114\t0.352\t0.32\t0.168\t0.137\t0.375\n+13 0.367\t0.183\t0.0603\t0.389\t0.293\t0.22\t0.102\t0.385\t0.305\t0.228\t0.111\t0.356\t0.314\t0.169\t0.137\t0.38\n+14 0.364\t0.184\t0.0574\t0.394\t0.29\t0.223\t0.104\t0.383\t0.301\t0.228\t0.111\t0.36\t0.305\t0.171\t0.138\t0.386\n+15 0.359\t0.186\t0.0542\t0.4\t0.286\t0.227\t0.105\t0.382\t0.295\t0.23\t0.11\t0.365\t0.292\t0.175\t0.138\t0.395\n+16 0.355\t0.187\t0.0513\t0.407\t0.279\t0.232\t0.106\t0.382\t0.287\t0.234\t0.11\t0.37\t0.278\t0.18\t0.138\t0.404\n+17 0.347\t0.187\t0.05\t0.415\t0.275\t0.236\t0.108\t0.382\t0.277\t0.238\t0.11\t0.376\t0.261\t0.184\t0.14\t0.414\n+18 0.339\t0.187\t0.047\t0.426\t0.268\t0.239\t0.11\t0.382\t0.268\t0.239\t0.11\t0.383\t0.244\t0.189\t0.14\t0.426\n+19 0.329\t0.188\t0.0445\t0.438\t0.262\t0.242\t0.113\t0.383\t0.262\t0.239\t0.11\t0.39\t0.229\t0.194\t0.139\t0.437\n+20 0.32\t0.189\t0.0399\t0.451\t0.252\t0.246\t0.112\t0.389\t0.256\t0.239\t0.107\t0.398\t0.214\t0.2\t0.137\t0.449\n+21 0.307\t0.192\t0.0368\t0.464\t0.244\t0.253\t0.111\t0.392\t0.245\t0.245\t0.106\t0.404\t0.201\t0.206\t0.133\t0.46\n+22 0.294\t0.198\t0.032\t0.475\t0.235\t0.266\t0.11\t0.389\t0.236\t0.253\t0.103\t0.407\t0.192\t0.218\t0.13\t0.46\n+23 0.286\t0.201\t0.0264\t0.487\t0.233\t0.273\t0.111\t0.383\t0.231\t0.261\t0.0999\t0.408\t0.186\t0.225\t0.127\t0.461\n+24 0.276\t0.207\t0.0212\t0.495\t0.228\t0.282\t0.107\t0.383\t0.225\t0.272\t0.0963\t0.406\t0.179\t0.233\t0.121\t0.466\n+25 0.258\t0.204\t0.0171\t0.521\t0.213\t0.286\t0.103\t0.398\t0.214\t0.271\t0.0937\t0.421\t0.166\t0.236\t0.114\t0.484\n+26 0.255\t0.208\t0.0131\t0.524\t0.207\t0.3\t0.0997\t0.393\t0.214\t0.274\t0.0902\t0.421\t0.148\t0.223\t0.102\t0.527\n+27 0.27\t0.208\t0.0141\t0.508\t0.22\t0.291\t0.116\t0.372\t0.225\t0.275\t0.0979\t0.403\t0.147\t0.217\t0.127\t0.509\n+28 0.269\t0.21\t0.0127\t0.508\t0.219\t0.296\t0.116\t0.369\t0.221\t0.282\t0.096\t0.402\t0.143\t0.217\t0.127\t0.514\n+29 0.267\t0.214\t0.0126\t0.507\t0.221\t0.301\t0.118\t0.36\t0.219\t0.29\t0.0977\t0.393\t0.139\t0.217\t0.128\t0.516\n+30 0.27\t0.214\t0.0126\t0.504\t0.223\t0.302\t0.12\t0.354\t0.224\t0.292\t0.097\t0.387\t0.135\t0.215\t0.13\t0.52\n+31 0.273\t0.209\t0.0136\t0.504\t0.224\t0.299\t0.123\t0.354\t0.23\t0.285\t0.0997\t0.385\t0.126\t0.203\t0.132\t0.539\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.cfg Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,48 @@ +# This file contains the list of meta parameters for the coding regions (CDS) which are subject to optimization. +# All other meta parameters are chosen as given in the species parameter file. The order +# of the parameters determines the order in the optimization process. +# Basically, different values for these meta parameters are tried out and the ones +# giving best performance in a cross-validation on the training set are chosen. +# For each parameter the range of possible values is specified after the parameter +# name and at least one white space. +# 3 cases are possible for the range: +# - an explicit list is given, e.g. protein "on" "off" +# - it is an integer range, e.g. window_size "1"-"5" +# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8" +# +# +# Mario Stanke, 19.12.2006 +# + +/Constant/dss_end "1"-"4" +/Constant/dss_start "1"-"3" +/Constant/ass_start "1"-"3" +/Constant/ass_end "0"-"4" +/Constant/ass_upwindow_size "1"-"50" +/IntronModel/d "100"-"950" +/IntronModel/ass_motif_memory "0"-"3" +/IntronModel/ass_motif_radius "0"-"4" +/ExonModel/tis_motif_memory "0"-"3" +/ExonModel/tis_motif_radius "0"-"3" +/Constant/trans_init_window "0"-"25" +/Constant/init_coding_len "0"-"18" +/ExonModel/patpseudocount "0.5"_"5" +/ExonModel/etpseudocount "0"-"10" +/ExonModel/etorder "0"-"3" +/Constant/intterm_coding_len "0"-"13" +/ExonModel/slope_of_bandwidth "0.05"_"0.6" +/ExonModel/minwindowcount "1"-"15" +/IGenicModel/patpseudocount "0.5"_"7" +/IntronModel/patpseudocount "0.5"_"7" +/IntronModel/slope_of_bandwidth "0.05"_"0.6" +/IntronModel/minwindowcount "1"-"8" +/IntronModel/asspseudocount "0.0005"_"0.03" +/IntronModel/dsspseudocount "0.0002"_"0.04" +/IntronModel/dssneighborfactor "0.0001"_"0.01" +/ExonModel/minPatSum "100"_"600" +/Constant/probNinCoding "0.15"_".25" +/Constant/decomp_num_steps "1"-"5" +# comment parameters out that you do not want to be subject of optimization +#/IGenicModel/k "4" "3" "5" +#/IntronModel/k "4" "3" "5" +#/ExonModel/k "4" "3" "5" |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_metapars.utr.cfg Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,30 @@ +# This file contains the list of meta parameters for the Untranslated Regions (UTRs), which are subject to optimization. +# All other parameters are chosen as given in the species parameter file. The order +# of the parameters determines the order in the optimization process. +# Basically, different values for these meta parameters are tried out and the ones +# giving best performance in a cross-validation on the training set are chosen. +# For each parameter the range of possible values is specified after the parameter +# name and at least one white space. +# 3 cases are possible for the range: +# - an explicit list is given, e.g. protein "on" "off" +# - it is an integer range, e.g. window_size "1"-"5" +# - it is a range of floating point numbers, e.g. pseudocount "0.3"_"1.8" +# +# +# Mario Stanke, 9.5.2008 +# + +/UtrModel/prob_polya "0.0"_"1.0" +/UtrModel/d_polya_cleavage_min "6"-"14" +/UtrModel/d_polya_cleavage_max "17"-"27" +/UtrModel/tss_start "0"-"12" +/UtrModel/tss_end "0"-"8" +/UtrModel/tts_motif_memory "0"-"2" +/UtrModel/utr5patternweight "0.1"_"1.0" +/UtrModel/utr3patternweight "0.1"_"1.0" +/UtrModel/patpseudocount "1"_"3" +/UtrModel/tssup_k "0"-"2" +/UtrModel/slope_of_bandwidth "0.2"_"0.4" +/UtrModel/minwindowcount "1"-"4" +#/UtrModel/k "2"-"4" + |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_parameters.cfg Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,144 @@ +# +# parameters for all Drosophila versions +# +# date : 11.8.2009 +# + +# +# Properties for augustus +#------------------------------------ +/augustus/verbosity 3 # 0-3, 0: only print the necessary +maxDNAPieceSize 200000 # maximum segment that is predicted in one piece +stopCodonExcludedFromCDS false # make this 'true' if the CDS includes the stop codon (training and prediction) + +# gff output options: +protein on # output predicted protein sequence +codingseq off # output the coding sequence +cds on # output 'cds' as feature for exons +start on # output start codons (translation start) +stop on # output stop codons (translation stop) +introns on # output introns +tss on # output transcription start site +tts on # output transcription termination site +print_utr off # output 5'UTR and 3'UTR lines in addition to exon lines + +checkExAcc off # internal parameter for extrinsic accuracy + +# alternative transcripts and posterior probabilities +sample 100 # the number of sampling iterations +alternatives-from-sampling false # output alternative transcripts +minexonintronprob 0.08 # minimal posterior probability of all (coding) exons +minmeanexonintronprob 0.4 # minimal geometric mean of the posterior probs of introns and exons +maxtracks -1 # maximum number of reported transcripts per gene (-1: no limit) +keep_viterbi true # set to true if all Viterbi transcripts should be reported +uniqueCDS true # don't report transcripts that differ only in the UTR +UTR on # predict untranslated regions + +# +# +# The rest of the file contains mainly meta parameters used for training. +# + +# global constants +# ---------------------------- + +/Constant/trans_init_window 25 +/Constant/ass_upwindow_size 32 +/Constant/ass_start 1 +/Constant/ass_end 4 +/Constant/dss_start 3 +/Constant/dss_end 4 +/Constant/init_coding_len 9 +/Constant/intterm_coding_len 0 +/Constant/tss_upwindow_size 45 +/Constant/decomp_num_at 1 +/Constant/decomp_num_gc 1 +/Constant/gc_range_min 0.32 # This range has an effect only when decomp_num_steps>1. +/Constant/gc_range_max 0.50 # States the minimal and maximal percentage of c or g +/Constant/decomp_num_steps 1 # I recommend keeping this to 1 for most species. +/Constant/min_coding_len 201 # no gene with a coding sequence shorter than this is predicted +/Constant/probNinCoding 0.23 # divide this by .25 to get a malus for making one masked letter part of the coding sequence +/Constant/amberprob 0.34 # Prob(stop codon = tag), if 0 tag is assumed to code for amino acid +/Constant/ochreprob 0.41 # Prob(stop codon = taa), if 0 taa is assumed to code for amino acid +/Constant/opalprob 0.25 # Prob(stop codon = tga), if 0 tga is assumed to code for amino acid +/Constant/subopt_transcript_threshold 0.7 +/Constant/almost_identical_maxdiff 10 + +# type of weighing, one of 1 = equalWeights, 2 = gcContentClasses, 3 = multiNormalKernel +/BaseCount/weighingType 3 +# file with the weight matrix (only for multiNormalKernel type weighing) +/BaseCount/weightMatrixFile fly_weightmatrix.txt # change this to your species if at all necessary + +# Properties for IGenicModel +# ---------------------------- +/IGenicModel/verbosity 0 +/IGenicModel/infile fly_igenic_probs.pbl # change this and the other five filenames *_probs.pbl below to your species +/IGenicModel/outfile fly_igenic_probs.pbl +/IGenicModel/patpseudocount 5.0 +/IGenicModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k + +# Properties for ExonModel +# ---------------------------- +/ExonModel/verbosity 3 +/ExonModel/infile fly_exon_probs.pbl +/ExonModel/outfile fly_exon_probs.pbl +/ExonModel/patpseudocount 5.0 +/ExonModel/minPatSum 350 +/ExonModel/k 4 # order of the Markov chain for content model +/ExonModel/etorder 2 +/ExonModel/etpseudocount 3 +/ExonModel/exonlengthD 3000 # beyond this the distribution is geometric +/ExonModel/maxexonlength 15000 +/ExonModel/slope_of_bandwidth 0.3 +/ExonModel/minwindowcount 8 +/ExonModel/tis_motif_memory 3 +/ExonModel/tis_motif_radius 2 + +# Properties for IntronModel +# ---------------------------- +/IntronModel/verbosity 0 +/IntronModel/infile fly_intron_probs.pbl +/IntronModel/outfile fly_intron_probs.pbl +/IntronModel/patpseudocount 5.0 +/IntronModel/k 4 # order of the Markov chain for content model, keep equal to /ExonModel/k +/IntronModel/slope_of_bandwidth 0.4 +/IntronModel/minwindowcount 3 +/IntronModel/asspseudocount 0.01 +/IntronModel/dsspseudocount 0.01015 +/IntronModel/dssneighborfactor 0.001 +#/IntronModel/splicefile fly_splicefile.txt # this optional file contains additional windows around splice sites for training, uncomment if you have one +/IntronModel/sf_with_motif false # if true the splice file is also used to train the branch point region +/IntronModel/d 929 # constraint: this must be larger than 4 + /Constant/dss_end + /Constant/ass_upwindow_size + /Constant/ass_start +/IntronModel/ass_motif_memory 1 +/IntronModel/ass_motif_radius 4 + +# Properties for UtrModel +# ---------------------------- +/UtrModel/verbosity 3 +/UtrModel/infile fly_utr_probs.pbl +/UtrModel/outfile fly_utr_probs.pbl +/UtrModel/k 4 +/UtrModel/utr5patternweight 0.3 #0.7625 +/UtrModel/utr3patternweight 0.3 #0.5 +/UtrModel/patpseudocount 1 +/UtrModel/tssup_k 1 +/UtrModel/tssup_patpseudocount 1 +/UtrModel/slope_of_bandwidth 0.25 +/UtrModel/minwindowcount 1 +/UtrModel/exonlengthD 800 +/UtrModel/maxexonlength 1200 +/UtrModel/max3singlelength 2000 # excludes roughly 1% +/UtrModel/max3termlength 1200 # excludes ~ 0.3% +/UtrModel/tss_start 8 +/UtrModel/tss_end 5 +/UtrModel/tata_start 2 +/UtrModel/tata_end 10 +/UtrModel/tata_pseudocount 2 +/UtrModel/d_tss_tata_min 26 # minimal distance between start of tata box (if existent) and tss +/UtrModel/d_tss_tata_max 37 # maximal distance between start of tata box (if existent) and tss +/UtrModel/polyasig_consensus aataaa # polyadenylation signal training not fully automated yet +/UtrModel/d_polyasig_cleavage 14 # the transcription end is predicted this many bases after the polyadenylation signal +/UtrModel/d_polya_cleavage_min 9 +/UtrModel/d_polya_cleavage_max 35 +/UtrModel/prob_polya 0.95 +/UtrModel/tts_motif_memory 1 |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_utr_probs.pbl Mon Oct 04 19:39:38 2021 +0000 |
[ |
b"@@ -0,0 +1,7137 @@\n+# UTR model parameters\n+# begin of content independent part\n+\n+# Length distributions\n+[UTRLENGTH]\n+# maximal individually stored length probability d=\n+800\n+# slope of smoothing bandwidth =\n+0.25\n+# smoothing minwindowcount =\n+1\n+# length 5' sing 5' init 5' int 5' term 3' sing 3' init 3' int 3' term \n+# total number of exons of above types\n+ 6188 1995 342 1995 7966 178 69 178\n+# number of exons exceeding length d=800\n+ 81 32 6 27 823 7 7 48\n+# 1000 P(len=k), k=0,1,..., 800\n+0\t0.681\t5.07e-15\t4.35e-06\t4.62\t0.00284\t5.75\t3.2e-18\t0\n+1\t1.82\t9.16e-12\t0.000391\t10.5\t0.0358\t10.4\t1.04e-17\t0\n+2\t2.76\t6.1e-09\t0.013\t13\t0.178\t8.35\t3.54e-17\t0\n+3\t3.13\t1.49e-06\t0.158\t12.5\t0.405\t6.03\t1.32e-16\t0\n+4\t3.51\t0.000135\t0.708\t12.6\t0.599\t9.23\t5.19e-16\t2.83e-20\n+5\t3.55\t0.00451\t1.17\t13.8\t0.682\t15.3\t2.13e-15\t2.89e-18\n+6\t2.95\t0.0564\t0.708\t15\t0.512\t20.4\t8.93e-15\t2.29e-16\n+7\t2.5\t0.272\t0.158\t15.4\t0.296\t18.5\t3.83e-14\t1.42e-14\n+8\t2.56\t0.548\t0.013\t15.3\t0.27\t12.9\t1.67e-13\t6.84e-13\n+9\t3.12\t0.566\t0.000391\t14.5\t0.332\t11.6\t7.41e-13\t2.57e-11\n+10\t3.83\t0.407\t4.35e-06\t13.6\t0.267\t11.3\t3.35e-12\t7.5e-10\n+11\t4.27\t0.328\t1.78e-08\t12.2\t0.252\t7.9\t1.54e-11\t1.71e-08\n+12\t4.37\t0.472\t5.42e-11\t9.93\t0.347\t6.71\t7.12e-11\t3.03e-07\n+13\t4.42\t0.528\t8.34e-10\t8.9\t0.549\t8.5\t3.31e-10\t4.18e-06\n+14\t4.28\t0.326\t3.63e-08\t9.1\t0.796\t6.71\t1.53e-09\t4.49e-05\n+15\t4.04\t0.333\t4.68e-06\t9.88\t0.875\t4.29\t7.03e-09\t0.000376\n+16\t4.33\t0.612\t0.000396\t11.4\t0.938\t6.01\t3.21e-08\t0.00245\n+17\t4.51\t0.867\t0.013\t12.8\t1.03\t9.28\t1.46e-07\t0.0124\n+18\t4.07\t1.1\t0.159\t12.8\t1.03\t11\t6.62e-07\t0.0492\n+19\t3.76\t1.22\t0.723\t11.3\t0.904\t11.6\t2.99e-06\t0.152\n+20\t3.96\t1.21\t1.34\t10.7\t0.878\t12.3\t1.32e-05\t0.364\n+21\t4.31\t1.12\t1.51\t11\t1.11\t13.6\t5.6e-05\t0.68\n+22\t4.45\t1.4\t1.86\t10.3\t1.38\t12.8\t0.000224\t0.989\n+23\t4.75\t2.06\t2.7\t8.37\t1.36\t8.55\t0.000833\t1.12\n+24\t4.76\t2.19\t3.68\t6.7\t1.25\t5.92\t0.00284\t0.994\n+25\t4.68\t2.04\t3.45\t6\t1.2\t6.15\t0.00881\t0.7\n+26\t4.47\t2.06\t3.12\t6.21\t1.14\t7.01\t0.0248\t0.434\n+27\t4.28\t2.35\t3.29\t6.62\t1.1\t7.47\t0.0629\t0.352\n+28\t4.83\t3.68\t3.35\t6.53\t0.986\t7.33\t0.144\t0.514\n+29\t5.98\t4.63\t3.25\t5.68\t0.939\t6.87\t0.296\t0.878\n+30\t6.15\t4.11\t3.2\t4.95\t1.15\t6.45\t0.549\t1.3\n+31\t5.15\t3.31\t3.23\t5.2\t1.46\t6.14\t0.918\t1.57\n+32\t4.9\t2.94\t3.22\t5.98\t1.72\t5.79\t1.39\t1.59\n+33\t5.97\t2.5\t3.03\t6.3\t1.94\t5.34\t1.9\t1.39\n+34\t7.15\t2.36\t2.73\t6.51\t2.02\t4.96\t2.37\t1.11\n+35\t7.24\t2.86\t2.53\t7.51\t2.19\t4.8\t2.73\t0.859\n+36\t6.12\t2.78\t2.52\t7.26\t2.39\t4.86\t2.94\t0.648\n+37\t4.95\t2.05\t2.64\t5.47\t2.29\t5.02\t3.02\t0.466\n+38\t4.34\t1.81\t2.72\t4.59\t2.15\t5.19\t3.05\t0.311\n+39\t4.11\t2.08\t2.69\t4.74\t2.11\t5.32\t3.09\t0.189\n+40\t4.19\t2.33\t2.7\t4.96\t2\t5.39\t3.19\t0.107\n+41\t4.29\t2.47\t2.95\t4.98\t1.85\t5.44\t3.36\t0.0591\n+42\t4.22\t2.58\t3.52\t4.91\t1.87\t5.48\t3.59\t0.0393\n+43\t4.26\t2.73\t4.35\t4.84\t2.07\t5.58\t3.84\t0.0391\n+44\t4.69\t2.92\t5.33\t4.86\t2.23\t5.78\t4.08\t0.0537\n+45\t4.43\t3.1\t6.32\t4.93\t2.3\t6.09\t4.29\t0.0815\n+46\t3.6\t3.22\t7.22\t4.96\t2.25\t6.51\t4.48\t0.123\n+47\t3.46\t3.29\t7.94\t4.92\t2\t6.99\t4.64\t0.18\n+48\t3.87\t3.39\t8.42\t4.86\t2\t7.48\t4.78\t0.254\n+49\t4.21\t3.6\t8.62\t4.87\t2.27\t7.91\t4.91\t0.344\n+50\t4.36\t4.01\t8.54\t4.94\t2.5\t8.23\t5.04\t0.448\n+51\t4.41\t4.55\t8.25\t4.98\t2.62\t8.41\t5.18\t0.562\n+52\t4.46\t5.04\t7.86\t4.89\t2.68\t8.41\t5.31\t0.679\n+53\t4.62\t5.25\t7.47\t4.66\t2.73\t8.23\t5.42\t0.79\n+54\t4.92\t5.08\t7.17\t4.36\t2.8\t7.87\t5.5\t0.887\n+55\t5.31\t4.64\t6.98\t4.12\t2.9\t7.36\t5.54\t0.963\n+56\t5.61\t4.19\t6.89\t4\t3.07\t6.75\t5.53\t1.01\n+57\t5.7\t3.93\t6.85\t4.03\t3.3\t6.11\t5.46\t1.03\n+58\t5.54\t3.92\t6.83\t4.14\t3.53\t5.48\t5.33\t1.03\n+59\t5.29\t4.03\t6.8\t4.25\t3.69\t4.93\t5.17\t1.01\n+60\t5.13\t4.16\t6.75\t4.3\t3.76\t4.48\t4.97\t0.984\n+61\t5.16\t4.24\t6.66\t4.28\t3.79\t4.14\t4.74\t0.955\n+62\t5.35\t4.23\t6.53\t4.19\t3.8\t3.89\t4.52\t0.934\n+63\t5.63\t4.14\t6.37\t4.05\t3.79\t3.7\t4.29\t0.927\n+64\t5.92\t4\t6.18\t3.88\t3.77\t3.55\t4.1\t0.937\n+65\t6.16\t3.84\t5.96\t3.72\t3.79\t3.4\t3.93\t0.965\n+66\t6.28\t3.71\t5.74\t3.62\t3.87\t3.24\t3.8\t1.01\n+67\t6.2\t3.64\t5.51\t3.59\t3.99\t3.07\t3.72\t1.06\n+68\t5.92\t3.63\t5.3\t3.64\t4.07\t2.88\t3.68\t1.13\n+69\t5.57\t3.68\t5.09\t3.72\t4.04\t2.7\t3.68\t1.19\n+70\t5.32\t3.74\t4.91\t3.79\t3.89\t2.52\t3.73\t1.25\n+71\t5.27\t3.79\t4.73\t3.8\t3.7\t2.35\t3.8\t1.31\n+72\t5."..b'tcgc\t14965\n+#\tttcgg\t11310\n+#\tttcgt\t20239\n+#\tttcta\t28351\n+#\tttctc\t16356\n+#\tttctg\t18660\n+#\tttctt\t32422\n+#\tttgaa\t47499\n+#\tttgac\t14608\n+#\tttgag\t17968\n+#\tttgat\t36555\n+#\tttgca\t31476\n+#\tttgcc\t17850\n+#\tttgcg\t12080\n+#\tttgct\t22771\n+#\tttgga\t20302\n+#\tttggc\t17146\n+#\tttggg\t11267\n+#\tttggt\t16683\n+#\tttgta\t64460\n+#\tttgtc\t18109\n+#\tttgtg\t29654\n+#\tttgtt\t61476\n+#\ttttaa\t101164\n+#\ttttac\t38483\n+#\ttttag\t41333\n+#\ttttat\t89923\n+#\ttttca\t39758\n+#\ttttcc\t27803\n+#\ttttcg\t26520\n+#\ttttct\t36657\n+#\ttttga\t41172\n+#\ttttgc\t30519\n+#\ttttgg\t24062\n+#\ttttgt\t71057\n+#\ttttta\t94640\n+#\tttttc\t46611\n+#\tttttg\t62333\n+#\tttttt\t128813\n+\n+#\n+# The emission probabilities of the tss upwindow\n+#\n+[EMISSION-TSSUPWIN]\n+# size of the emission vector\n+16\n+#tssup_k=\n+1\n+# patpseudo : pseudocount for sequence patterns\n+1\n+aa\t0.326\n+ac\t0.195\n+ag\t0.204\n+at\t0.275\n+ca\t0.306\n+cc\t0.2\n+cg\t0.248\n+ct\t0.245\n+ga\t0.245\n+gc\t0.298\n+gg\t0.193\n+gt\t0.264\n+ta\t0.212\n+tc\t0.219\n+tg\t0.221\n+tt\t0.347\n+\n+# motif around the TSS of TATA-less promoters\n+[TSSMOTIF]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.268\t0.227\t0.222\t0.283\n+ 1 0.257\t0.23\t0.22\t0.293\n+ 2 0.279\t0.234\t0.216\t0.27\n+ 3 0.271\t0.238\t0.214\t0.278\n+ 4 0.285\t0.237\t0.216\t0.261\n+ 5 0.263\t0.214\t0.229\t0.295\n+ 6 0.249\t0.223\t0.195\t0.332\n+ 7 0.162\t0.309\t0.241\t0.288\n+ 8 0.406\t0.252\t0.193\t0.15\n+ 9 0.275\t0.199\t0.233\t0.293\n+10 0.275\t0.194\t0.165\t0.365\n+11 0.294\t0.215\t0.182\t0.308\n+12 0.283\t0.207\t0.228\t0.282\n+\n+# motif around the TSS of TATA promoters\n+[TSSMOTIFTATA]\n+# width of motif, n=\n+13\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.293\t0.245\t0.261\t0.201\n+ 1 0.32\t0.188\t0.298\t0.193\n+ 2 0.3\t0.216\t0.248\t0.237\n+ 3 0.301\t0.216\t0.213\t0.271\n+ 4 0.314\t0.254\t0.221\t0.211\n+ 5 0.293\t0.174\t0.264\t0.269\n+ 6 0.235\t0.172\t0.213\t0.38\n+ 7 0.15\t0.422\t0.116\t0.312\n+ 8 0.501\t0.174\t0.208\t0.118\n+ 9 0.256\t0.211\t0.229\t0.304\n+10 0.264\t0.225\t0.113\t0.398\n+11 0.329\t0.222\t0.121\t0.329\n+12 0.312\t0.195\t0.213\t0.28\n+\n+# tata box motif \n+[TATAMOTIF]\n+# width of motif, n=\n+12\n+# order of markov model, k=\n+0\n+# markov chain emission probabilities\n+ 0 0.21\t0.237\t0.381\t0.173\n+ 1 0.19\t0.341\t0.336\t0.133\n+ 2 0.0032\t0.0032\t0.0032\t0.99\n+ 3 0.99\t0.0032\t0.0032\t0.0032\n+ 4 0.0032\t0.0032\t0.0032\t0.99\n+ 5 0.99\t0.0032\t0.0032\t0.0032\n+ 6 0.637\t0.0432\t0.0272\t0.293\n+ 7 0.99\t0.0032\t0.0032\t0.0032\n+ 8 0.602\t0.0592\t0.0576\t0.282\n+ 9 0.37\t0.0976\t0.386\t0.147\n+10 0.202\t0.315\t0.312\t0.171\n+11 0.254\t0.283\t0.278\t0.184\n+\n+# motif after polyA signal\n+[TTSMOTIF]\n+# width of motif, n=\n+14\n+# order of markov model, k=\n+1\n+# markov chain emission probabilities\n+ 0 0.368\t0.167\t0.147\t0.317\t0.414\t0.134\t0.181\t0.27\t0.365\t0.182\t0.115\t0.337\t0.342\t0.126\t0.224\t0.308\n+ 1 0.368\t0.17\t0.139\t0.323\t0.41\t0.145\t0.177\t0.268\t0.338\t0.199\t0.106\t0.358\t0.323\t0.136\t0.21\t0.33\n+ 2 0.397\t0.173\t0.114\t0.316\t0.425\t0.149\t0.173\t0.252\t0.35\t0.19\t0.106\t0.353\t0.335\t0.13\t0.198\t0.337\n+ 3 0.42\t0.159\t0.102\t0.319\t0.437\t0.152\t0.165\t0.246\t0.359\t0.191\t0.102\t0.348\t0.329\t0.124\t0.176\t0.371\n+ 4 0.452\t0.148\t0.104\t0.296\t0.446\t0.147\t0.159\t0.248\t0.386\t0.187\t0.1\t0.328\t0.337\t0.114\t0.167\t0.382\n+ 5 0.455\t0.141\t0.107\t0.297\t0.435\t0.143\t0.159\t0.263\t0.375\t0.194\t0.0979\t0.333\t0.324\t0.115\t0.162\t0.399\n+ 6 0.453\t0.144\t0.11\t0.292\t0.421\t0.15\t0.156\t0.274\t0.362\t0.2\t0.104\t0.334\t0.323\t0.118\t0.158\t0.4\n+ 7 0.453\t0.145\t0.11\t0.292\t0.422\t0.158\t0.145\t0.275\t0.365\t0.194\t0.109\t0.333\t0.325\t0.121\t0.154\t0.401\n+ 8 0.454\t0.145\t0.109\t0.292\t0.427\t0.165\t0.132\t0.276\t0.372\t0.197\t0.112\t0.319\t0.329\t0.119\t0.158\t0.394\n+ 9 0.454\t0.153\t0.106\t0.288\t0.435\t0.156\t0.131\t0.278\t0.371\t0.197\t0.107\t0.325\t0.331\t0.118\t0.166\t0.385\n+10 0.451\t0.155\t0.104\t0.29\t0.424\t0.154\t0.145\t0.276\t0.361\t0.194\t0.105\t0.34\t0.336\t0.116\t0.169\t0.379\n+11 0.457\t0.157\t0.102\t0.284\t0.427\t0.156\t0.147\t0.271\t0.359\t0.199\t0.105\t0.337\t0.343\t0.117\t0.165\t0.376\n+12 0.461\t0.149\t0.103\t0.287\t0.432\t0.168\t0.145\t0.255\t0.364\t0.21\t0.106\t0.32\t0.348\t0.12\t0.162\t0.37\n+13 0.467\t0.144\t0.101\t0.287\t0.447\t0.171\t0.135\t0.247\t0.364\t0.225\t0.111\t0.3\t0.347\t0.122\t0.162\t0.369\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/augustus/fly_weightmatrix.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,23 @@ +# +# This file contains a matrix used for weighing the training sequences +# when given an input sequence. Let z = (da, dc, dg, dt) be the vector +# containing the differences in the relative nucleotide frequencies of +# two sequences, the input sequence and a training sequence. +# Then the training sequence has weight proportional to +# +# exp ( - z M z^t) +# +# with M being the matrix specified below. +# If M is nonsingular, then (apart from a two normalizing factors) M +# is the inverse of the covariance matrix of a multinormal +# distribution - the kernel for the estimation. + + +# this matrix is gc-content only, i.e. +# weight = 10 * exp (-200 * (dc + dg))^2) +# in particular weight <= 10 +0 0 0 0 +0 200 0 0 +0 0 200 0 +0 0 0 0 + |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/trained_species/fly/info.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/trained_species/fly/info.json Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{}], "glimmerhmm": [{}]} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/uniprot.dmnd |
b |
Binary file test-data/funannotate_db/uniprot.dmnd has changed |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/funannotate_db/uniprot_sprot.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/funannotate_db/uniprot_sprot.fasta Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,83 @@ +>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1 +MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS +EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD +AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL +EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD +SFRKIYTDLGWKFTPL +>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1 +MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR +IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL +AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC +KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML +DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK +VMFFVAGAVLVAILISTVRW +>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1 +MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL +QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT +FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD +LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET +YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY +STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS +GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI +QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC +>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1 +MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT +PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS +TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI +>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1 +MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD +RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI +FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ +PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD +AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR +TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA +LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR +KAKIQEMFDNMVSRMVTS +>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1 +MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY +>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1 +MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL +CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC +KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH +QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY +>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1 +MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS +NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED +QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT +REFVDKDAQEFQDFLNSLDASLLS +>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1 +MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL +IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII +>fcresfdr +MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL +>BUSCOaEOG7B0HST +MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA +VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE +DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED +TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID +LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI +VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE +AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN +AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY +LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI +IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT +SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS +LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT +AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG +AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK +VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK +>FBpp0306926 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG +ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY +VSKRYKDLPPPHPGFGADQPPA +>FBpp0078508 +MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD +LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA +DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK +KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC +AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI +NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR +RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG +ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK +SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS +RSGSGSRSPSRSRSGSPSGSGSSSGSASDE |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/genome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,3253 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGTTGACTTTCTTCGCCATCATG\n+TGATGCATTAATTAAACAATAATTACTAATTGACAGTAATTAATAATTGTGGCAAAAAGCGCGACACGTT\n+TTTTCGGCAAACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAAGCATTTTAAA\n+AAGATACCTATGACATGTGACACCTTTAAAGTGCAATACAAGTTTTCATCTCTTTATATCCTTTTACTCC\n+CTAATTTGAATATAAAAGGAATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATAGATAGTTAGTG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTTTCTTTAAATTTAACCAAATT\n+TATGTGATAAAATGGATATTCCATAGATAAGACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAAC\n+ATTGTGCACGCTATCAAATGGTATTCTTAAAATCGAGTCAGTTAGGTAAGTTATTAATTAAATGGTAACT\n+TTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAACTCATCCAAAACATTCTCAACACCACAATAT\n+CTATGCTCAGCGATGACAAATTTCTCCTGATTTCTTAATTTTCTATCTATGCTATGCGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTTAATATCCGATGTAAATAAAC\n+CTATGAAAATCGCAAAGATCTATTCCTTTGCGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTT\n+CGCAGCTTCCAACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATCATATTTTATC\n+AACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAATTTATTGGGCTTTGTGTTTATTTGCATTGGG\n+AATCCGTGGAGCTGATATTGTTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGGCGGTTGATCCGCTCCAAATC\n+CCGGATGAGGCGGCGGAAGATCCTTATACCGCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAA\n+TTCGTTAGCTTTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTTTGGCAATTTT\n+CGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACATCTTTCTGATCCTTAATCATCCTTTTAGGTG\n+ATTCTCCTGCTACCATGGGATCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCAAAAGGAAACTAGACATGTTT\n+CGTTACAGACAGATATAGATTGGATATTATTGAAAGAAAATGAAAATAAACAGCGATAATGATCTGTGAC\n+TTATTGGAAATTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAGCATTTTTGGG\n+AGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATACATATATCATTTATATACTAATCATTTCTG\n+GTAGCCGTTCGTAATCAGGATCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAATCTAAAAGTATACAAAAATTCAAATAGTAAAACCAAAAAGTATTAAAAAAAATATCAATCGT\n+TTTTAAACGTTGATTTTTCAGCTTGTGGGGTGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTAT\n+CCATAATATTAGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGATGTTGCATTTG\n+AGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTATCGTACCGGTCAAGTACGGTCACACTGCCA\n+AGCGCAGATTTGAGGATTTCTAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAA\n+AAGCGACCCCAGCGGCAAACGGAGCGCAAGTAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGAC\n+TGATGTGTACCTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCAT\n+TTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGA\n+GCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCTGGAGGAGGAGACTCTGA\n+CGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACAT\n+CTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTC\n+AAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCT\n+GCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGA\n+GCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCAT\n+CAATGGCGAGCTGTACAAGGAGGAGGAGGAGTACGAGTACAAGATCGCTCGAGAGTACAACTGGAACGTG\n+AAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAA\n+GCTGGTAAGTATATTTATGCGCATACATCTATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCA\n+AGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGG\n+CGAGGAGGAGGAGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAG\n+ACGAGCGAGGACGCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAGTTCAGGATCTGGATCTGGCTCCGGCTC\n+TCGGGCCAGCAGCCGCTCAAAGTCTGGTTCTCGGTCTGGTAGCGGCTCCAGATCACGCACAAATTCGCCG\n+GCAGGATCCCAGAAATCCGGATCCAGATCGAGATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGAT'..b'ATCGGTATCACGTTCCCGATCCCGTTCCAAGTCCGGCT\n+CTCGGTCGCGTTCTAGGTCGAGATCCAAGTCCGGTTCCCGATCACGTTCGGGCTCCAGATCTGGCTCTGG\n+GTCGCGATCGCCCAGCCGGTCTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAATCAACCAAGTACATTTGAAA\n+ACTGAACTAACTCGATTTAATATCATTTTCGCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTT\n+TAAATAAAATCGGCAGTTTAACATAATTTATATTAGATGTTGTTGTTGTATTGCAAACAAGTCGGGTCCT\n+AGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTCAAATAATATCCTCATAAGAAGATGTAATTA\n+AGACGTTTTTCTTAGGGGGTGCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAAAT\n+GATATAAAAGGGTATAAATTAAGTGGATATATGCATCTTCGTTCCAACTACGTGGCGTCCATCAAAAAGC\n+GCTGGAAGACTTCGCCATCGGAACTAGGTAGCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAG\n+TTGCTGCAGCGGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATATGAGCTGTCCC\n+TCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGCACTGCTCAACGACAATGTCGTCGATTGACT\n+GCGAAAGCAGTGCCTCCTGCTCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCAGCTGCTCCGGTGTCTGGGCC\n+TCCTCGGTGGGACATCGATGGGTCCTTTGACTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACT\n+TGTAGGGCATCACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGAAACACTTGCC\n+TGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGTGACCCGGAAAAGAAGGTATACCTCTCCTTC\n+ATTTAAAGTAAATAGGGCAAATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGGGCTTTTCGCCGCTGTGAATC\n+CTCTGGTGGTTATGCAGCGTAGACAGTTCCTTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTA\n+CCTCGCTGTGGTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGCAGATCTCGCA\n+TGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTCTTCAGAAAGTACTTGGTGGTGAAGGACTTG\n+CTGCACACATCGCACTCCCACAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTGGAGTTATTTCCGTTGCCAAG\n+GGCTCCAGGCTCTGTGTGTCGAATGCGATCGCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTG\n+GTAGAAGCGGGTGTAGGACTGGGATTAGGATTTGGATTGGGATTGGAGCAGGGCACGCCCATCATGTGCA\n+CTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAACGTCTTGGAGCAGAGATGGCACTTGTAGGG\n+CTCCTGGTCCTGTATAAAGCAATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGCACAAAAATCATTCACTTTCA\n+TTCACTATATCACAAAGTTGCCATGGTTTTAAATTGATCAAAAACAAATTAATATCTATCATATATATAC\n+ATAGTCATATGAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATACATCAGTTGTT\n+TTTAAAATATAAGGGTATATAGATTTCTTTCTTGTTGTTGTTGATTTTAATTACGTCAAACTTTTGTTTC\n+AGATTCAATGTAAATGGTCTAGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAAAAAAA\n+AATCAAAATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAGATAATAGCACTTAATATATG\n+TACATAGCCAATAGTTACCGGTTCCTTCTGTTGGGGTTCCTTTTGCTTGGGTTCTCCCTCCGCATTTTCG\n+TGGACTAAGCGGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATGTCGCAGTGGG\n+CGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGACGGGCTTGGACCTGGTTCGCTTGGCCCTCCG\n+TTTGGGAGGAGCTGCGGCAAGGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTCCAGCCCCAGGAAGAGCTCCT\n+TGCAGTTGGCAAAGGGACAGGCCAGTGGGCCGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAAT\n+ACTGCCGAAACTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCCAGTGCACTCA\n+ATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTATCCTCCTGTTCGGTCTGTGATCATCTATTC\n+AGGAGTCCATTCCCAGACTGCCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGCTAACGAAATAATGAAAAATA\n+ATGAAATGCCCGGCGCGGATCGTCGAATCGTCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCC\n+ATCTCTCGAACAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATAGCTCTGAGCA\n+CGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGATATATGTTGCACTGGCGGCCATATAGCCTT\n+CGTTCTAGTCTTTGTAACGCACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTTCCGCCAATATCCAATTGGAA\n+TATGGTTGGTTACTGCAATTGTCGCTCCATTTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTA\n+CTCTTATTGGAGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTCATTTTCGTAA\n+ATAAGAACTGAGAAAATATTATTATTATATATATTTCTTTATTAGGAAAATACGAAGATTGAGTATTTCA\n+GATTGAATTAGCATATCCGTCTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACAAAACCCACACCAAAGGTGGTAGCTAATATACATATTTTGTGTAATACTTTTGTAGAGTATTTACTAT\n+TCAGCGATTTAAACAAGCAATCGCCTAGACACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTAT\n+ACCCCCACTGAATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCGCTTGGGCAAC\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/genome_masked.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_masked.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.cds-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.discrepency.report.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,139 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 18 present +DISC_FEATURE_COUNT:CDS: 18 present +DISC_FEATURE_COUNT:mRNA: 18 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:32 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:22 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::32 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::32 features have joined location but no exception +genome:CDS hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002 +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000017 +genome:mRNA hypothetical protein (sample:2126-2199, 2258-3224, 3284-3490, 3549-3863) FUN_000002 +genome:CDS hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018 +genome:mRNA hypothetical protein (sample4:c5494-4930, c4759-4248) FUN_000018 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000003 +genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004 +genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000006 +genome:CDS hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007 +genome:mRNA hypothetical protein (sample:c35679-35675, c35655-35648, c35594-34843) FUN_000007 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41234) FUN_000008 +genome:mRNA hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009 +genome:CDS hypothetical protein (sample:41267-41274, 41437-41444, 41707-42107) FUN_000009 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011 +genome:mRNA hypothetical protein (sample:94727-94732, 94873-95016, 95449-95583) FUN_000011 +genome:CDS hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012 +genome:mRNA hypothetical protein (sample:133134-133142, 133209-134539, 134668-135510, 135569-136346) FUN_000012 +genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013 +genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153651-159010, 159150-164491, 167135-168360, 168722-169208, 169350-169416) FUN_000013 +genome:CDS hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014 +genome:mRNA hypothetical protein (sample:192049-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000014 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000015 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 124) +genome:ncbi:FUN_000002-T1 (length 520) +genome:ncbi:FUN_000003-T1 (length 220) +genome:ncbi:FUN_000004-T1 (length 591) +genome:ncbi:FUN_000005-T1 (length 135) +genome:ncbi:FUN_000006-T1 (length 662) +genome:ncbi:FUN_000007-T1 (length 254) +genome:ncbi:FUN_000008-T1 (length 249) +genome:ncbi:FUN_000009-T1 (length 138) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 94) +genome:ncbi:FUN_000012-T1 (length 986) +genome:ncbi:FUN_000013-T1 (length 4717) +genome:ncbi:FUN_000014-T1 (length 231) +genome:ncbi:FUN_000015-T1 (length 478) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000016-T1 (length 124) +genome:ncbi:FUN_000017-T1 (length 520) +genome:ncbi:FUN_000018-T1 (length 358) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.error.summary.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.error.summary.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,1 @@ + 2 WARNING: SEQ_FEAT.ShortExon |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4258 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene 2126..3863\n+ /locus_tag="FUN_000002"\n+ mRNA join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS join(2126..2199,2258..3224,3284..3490,3549..3863)\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD\n+ LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA\n+ DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+ KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC\n+ AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI\n+ NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR\n+ RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG\n+ ETEKTSEDAAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDP\n+ RNPDPDRDRYHVPDPVPSPALGRVLGRDPSPVPDHVRAPDLALGRDRPAGLAVARLLV\n+ QDPALEAPQMND"\n+ gene complement(4883..5802)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000004-T'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.gff3 Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,151 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000002-T1.exon4;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t35675\t35679\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t35648\t35655\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t34843\t35594\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35675\t35679\t.\t-\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t35648\t35655\t.\t-\t1\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t34843\t35594\t.\t-\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t40223\t41234\t.\t+\t.\tID=FUN_000008;\n+sample\tfunannotate\t'..b'N_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t149952\t150112\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t150174\t150248\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t151966\t152072\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152314\t152429\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t152496\t152751\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t153651\t159010\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t159150\t164491\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167135\t168360\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169208\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t169350\t169416\t.\t+\t1\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t192049\t194669\t.\t+\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t192049\t194669\t.\t+\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t192049\t192067\t.\t+\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000014-T1.exon4;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t192049\t192067\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t2126\t3863\t.\t+\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t2126\t3863\t.\t+\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000017-T1.exon2;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3284\t3490\t.\t+\t.\tID=FUN_000017-T1.exon3;Parent=FUN_000017-T1;\n+sample4\tfunannotate\texon\t3549\t3863\t.\t+\t.\tID=FUN_000017-T1.exon4;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3284\t3490\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t3549\t3863\t.\t+\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t4248\t5494\t.\t-\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t4248\t5494\t.\t-\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t4930\t5494\t.\t-\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t4248\t4759\t.\t-\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4930\t5494\t.\t-\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t4248\t4759\t.\t-\t2\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.mrna-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,421 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000004-T1 FUN_000004\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGA'..b'TCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000016-T1 FUN_000016\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000017-T1 FUN_000017\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGgatctggatctggctccggctcTCGGGCCAgc\n+agccgctcaaagtctggttctcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCCAGAAATCCGG\n+atccagatcgagatcggtatcacgttcccgatcccgttccaagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagt\n+ccggttcccgatcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGTCTCGCAGTGGCTCGCCTTCT\n+GGTTCAGGATCCAGCTCTGGAAGCGCCTCAGATGAATGATTAA\n+>FUN_000018-T1 FUN_000018\n+ATGAAGGGCTCCCTTCGAATCCACCTGAAAGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAA\n+TCCCAGTCCTACACCCGCTTCTACCACCAGTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACA\n+CAGAGCCTGGAGCCCTTGGCAACGGAAATAACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGC\n+ATGCTCCAGCAGTCCCCCAGTTCGCCGGAGTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTC\n+CTTCACCACCAAGTACTTTCTGAAGAAGCACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCA\n+GGACCTTCACCTTCCAGCAGTCGTACCACAAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGT\n+GGACGCGCCTTCAAGGAACTGTCTACGCTGCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGT\n+CTGCGGCAAGTGTTTCCGGCAGCGAGTCTCTTTCCTTGTCCACACGCGCATCCACACGGGAGTGATGCCCTACAAGTGCG\n+AGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGTCCCACCGAGGAGGCCCAGACACCGGAG\n+CAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACCAGCGAGCGCCGAAATAGCTGCCATCAA\n+CAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCGACGACATTGTCGTTGAGCAGTGCCAAA\n+AGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTACAGCCGGTTGCGGTGGTACACTTCAGC\n+GGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCAACAAACAGAGCTACCTAGTTCCGATGG\n+CGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.proteins.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,158 @@\n+>FUN_000001-T1 FUN_000001\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000002-T1 FUN_000002\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000003-T1 FUN_000003\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000004-T1 FUN_000004\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000005-T1 FUN_000005\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000006-T1 FUN_000006\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000007-T1 FUN_000007\n+MKIRYCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYIVLGVSSANLGRALSVLRGGGVNSC\n+KLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQLALGLPSLRLLKSLIDKLKNISPSL\n+EFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGALQLPNEELTIGIDREHSIHLQIDVR\n+QDVVLHSILPAVCM\n+>FUN_000008-T1 FUN_000008\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGTTD\n+RITRLLAQS\n+>FUN_000009-T1 FUN_000009\n+MWIVNCMCLYLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSW\n+LDSCIVGWRSTVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASVPEPQIIM\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MKVHGNVDEKSPSHGYDSEGEESSSSSIITGGAQTPPSTRLDGSAGSSSGHHPPSDWYHTTAPSGSAEAMNPLNHFGHHH\n+HHHHLMHPGAATAY\n+>FUN_000012-T1 FUN_000012\n+MQRGIDSFFKRLPAKAKSAEAENGETPSKAPKRRKAVIISSDEDEVVSPPETKKRKASKTASSEDDVVAATPEPIAKKAR\n+NGQKPALSKLKRHVDPTELFGGETKRVIVPKPKTKAVLEFENEDIDRSLMEVDLDESIKEAAPEKKVHSITRSSPSPKRA\n+KNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASAVLYQKYKNRSSCLNPGSKEIPKGSPDCLSGL\n+TFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKLAVAEELNIPILSEDGLFDLIREKSGIAKQVK\n+EEKKSPKKEHSSEEKGKKEVKTSRRSSDKKEKEATKLKYGEKHDIAKHKVKEEHTSPKETKDKLNDVPAVTLKVKKEPSS\n+QKEHPPSPRTADLKTLDVVGMAWVDKHKPTSIKEIVGQAGAASNVTKLMNWLSKWYVNHDGNKKPQRPNPWAKNDDGSF'..b'KPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESSSGGSRKPPRIEK\n+PARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQKEQQSTWRPFPIE\n+SSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPAMTSSLGGIGVNP\n+TDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTNLAYISDADRRTS\n+AEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWPLPEIPFDHVPVK\n+PADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVILDEEMAVGPPDV\n+AKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKNSSPEVIVAQPTR\n+SPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRKNYEARLSSGGGG\n+ASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANLSGSDSLSAVSTH\n+SCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSATRSDDTTLTLTEM\n+AHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEHADSQTGPETSAA\n+ARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQTQQPQQVRQKPR\n+APQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVTPSDLPGDAVAPP\n+PKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSATNTTTTTNTLNSE\n+STEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDSSLDVRGQEAKMR\n+SRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQTDDYEDYPQYSG\n+KFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKRQFKREDSTAAGT\n+SGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGAGQDQEQGTGGQA\n+RHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQ\n+AEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEAEVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQV\n+LSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGRGGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMY\n+EGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLILERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSA\n+NFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQRMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEH\n+NTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRPPNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGG\n+GPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYF\n+YKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNRITCRVDLDLCSARYVQCRSTE\n+>FUN_000014-T1 FUN_000014\n+MVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQGYKITLKNMEAFGASN\n+FKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIHAFKGANYLHIDALSL\n+VLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPVEQFYVD\n+>FUN_000015-T1 FUN_000015\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000016-T1 FUN_000016\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000017-T1 FUN_000017\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQDLDLAPALGPAAAQSLVLGLVAAPDHAQIRRQDPRNPDPDRDRYHVPDPVPSPALGRVLGRDPS\n+PVPDHVRAPDLALGRDRPAGLAVARLLVQDPALEAPQMND\n+>FUN_000018-T1 FUN_000018\n+MKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALS\n+MLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYHKHLLYHSEVKPHVCGVC\n+GRAFKELSTLHNHQRIHSGEKPFKCEVCGKCFRQRVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRCPTEEAQTPE\n+QLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFS\n+GNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.scaffolds.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.scaffolds.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.stats.json Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,119 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpkgev4773/files/c/e/8/dataset_ce85d0fa-1534-47df-8c1e-5f0a5c1b82f0.dat --out output --database /home/abretaud/.planemo/planemo_tmp__fmxm4ll/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --augustus_species fly --min_training_models 200 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-21", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 18, + "common_name": 0, + "mRNA": 18, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 2695.06, + "transcript-level": { + "CDS_transcripts": 18, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 18, + "CDS_five_three_utr": 0, + "CDS_complete": 18, + "CDS_no-start": 0, + "CDS_no-stop": 0, + "CDS_no-start_no-stop": 0, + "total_exons": 57, + "total_cds_exons": 57, + "multiple_exon_transcript": 16, + "single_exon_transcript": 2, + "avg_exon_length": 558.58, + "avg_protein_length": 582.83, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_protein_evidence": 17.54 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.tbl Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,282 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000001 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +2126 3863 gene + locus_tag FUN_000002 +2126 2199 mRNA +2258 3224 +3284 3490 +3549 3863 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +2126 2199 CDS +2258 3224 +3284 3490 +3549 3863 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 4883 gene + locus_tag FUN_000003 +5802 5797 mRNA +5539 4883 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +5802 5797 CDS +5539 4883 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10557 8696 gene + locus_tag FUN_000004 +10557 10549 mRNA +10462 8696 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +10557 10549 CDS +10462 8696 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 14247 gene + locus_tag FUN_000005 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +21705 19533 gene + locus_tag FUN_000006 +21705 21700 mRNA +21515 19533 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +21705 21700 CDS +21515 19533 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +35679 34843 gene + locus_tag FUN_000007 +35679 35675 mRNA +35655 35648 +35594 34843 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +35679 35675 CDS +35655 35648 +35594 34843 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +40223 41234 gene + locus_tag FUN_000008 +40223 40396 mRNA +40659 41234 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +40223 40396 CDS +40659 41234 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +41267 42107 gene + locus_tag FUN_000009 +41267 41274 mRNA +41437 41444 +41707 42107 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +41267 41274 CDS +41437 41444 +41707 42107 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +94727 95583 gene + locus_tag FUN_000011 +94727 94732 mRNA +94873 95016 +95449 95583 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +94727 94732 CDS +94873 95016 +95449 95583 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +133134 136346 gene + locus_tag FUN_000012 +133134 133142 mRNA +133209 134539 +134668 135510 +135569 136346 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +133134 133142 CDS +133209 134539 +134668 135510 +135569 136346 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +144294 169416 gene + locus_tag FUN_000013 +144294 144551 mRNA +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153651 159010 +159150 164491 +167135 168360 +168722 169208 +169350 169416 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +144294 144551 CDS +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153651 159010 +159150 164491 +167135 168360 +168722 169208 +169350 169416 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +192049 194669 gene + locus_tag FUN_000014 +192049 192067 mRNA +193549 193658 +194041 194455 +194518 194669 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +192049 192067 CDS +193549 193658 +194041 194455 +194518 194669 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +210553 209044 gene + locus_tag FUN_000015 +210553 210548 mRNA +210474 209044 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +210553 210548 CDS +210474 209044 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000016 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +2126 3863 gene + locus_tag FUN_000017 +2126 2199 mRNA +2258 3224 +3284 3490 +3549 3863 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +2126 2199 CDS +2258 3224 +3284 3490 +3549 3863 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +5494 4248 gene + locus_tag FUN_000018 +5494 4930 mRNA +4759 4248 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 +5494 4930 CDS +4759 4248 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/Genus_species.validation.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/Genus_species.validation.txt Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,2 @@ +WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:c35679-35675, c35655-35648, c35594-34843)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000007-T1] +WARNING: valid [SEQ_FEAT.ShortExon] Internal coding region exon is too short FEATURE: CDS: hypothetical protein [(lcl|sample:41267-41274, 41437-41444, 41707-42107)] [lcl|sample: raw, dna len= 215740] -> [gnl|ncbi|FUN_000009-T1] |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_augustus/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_augustus/fly.parameters.json Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "AUGUSTUS (3.3.3)", "source": "augustus pre-trained", "date": "2021-07-19", "path": "/tmp/prout/trained_species/fly/augustus"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/fly.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpkgev4773/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.cds-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.discrepency.report.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,142 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 18 present +DISC_FEATURE_COUNT:CDS: 18 present +DISC_FEATURE_COUNT:mRNA: 18 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:30 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:22 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 18 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 18 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::30 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::30 features have joined location but no exception +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4883) FUN_000002 +genome:mRNA hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003 +genome:CDS hypothetical protein (sample:c10557-10549, c10462-8696) FUN_000003 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000004 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19533) FUN_000005 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000007 +genome:CDS hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008 +genome:mRNA hypothetical protein (sample:c47195-46753, c46330-46214, c46157-45527) FUN_000008 +genome:CDS hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009 +genome:mRNA hypothetical protein (sample:c79527-79519, c79068-78685) FUN_000009 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011 +genome:mRNA hypothetical protein (sample:133587-134504, 134720-135510, 135569-136284, 137516-137862) FUN_000011 +genome:CDS hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012 +genome:mRNA hypothetical protein (sample:144294-144551, 149012-149244, 149367-149588, 149654-149897, 149952-150112, 150174-150248, 151966-152072, 152314-152429, 152496-152751, 153296-153630, 153689-155122, 155789-158975, 159190-164495) FUN_000012 +genome:CDS hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013 +genome:mRNA hypothetical protein (sample:167121-168360, 168722-169212) FUN_000013 +genome:CDS hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014 +genome:mRNA hypothetical protein (sample:c178916-178873, c177172-176887, c176824-176699) FUN_000014 +genome:CDS hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015 +genome:mRNA hypothetical protein (sample:192004-192067, 193549-193658, 194041-194455, 194518-194669) FUN_000015 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209044) FUN_000016 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::2 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +genome:CDS hypothetical protein sample:<2331-3254 FUN_000001 +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000018 + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::22 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 307) +genome:ncbi:FUN_000002-T1 (length 220) +genome:ncbi:FUN_000003-T1 (length 591) +genome:ncbi:FUN_000004-T1 (length 135) +genome:ncbi:FUN_000005-T1 (length 662) +genome:ncbi:FUN_000006-T1 (length 278) +genome:ncbi:FUN_000007-T1 (length 578) +genome:ncbi:FUN_000008-T1 (length 396) +genome:ncbi:FUN_000009-T1 (length 130) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 923) +genome:ncbi:FUN_000012-T1 (length 3977) +genome:ncbi:FUN_000013-T1 (length 576) +genome:ncbi:FUN_000014-T1 (length 151) +genome:ncbi:FUN_000015-T1 (length 246) +genome:ncbi:FUN_000016-T1 (length 478) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000017-T1 (length 124) +genome:ncbi:FUN_000018-T1 (length 432) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4259 @@\n+LOCUS sample 215740 bp DNA linear 22-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (22-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene <2331..3254\n+ /locus_tag="FUN_000001"\n+ mRNA <2331..3254\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS <2331..3254\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQA\n+ DSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENI\n+ EAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+ DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQT\n+ LEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNEL\n+ ETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL"\n+ gene complement(4883..5802)\n+ /locus_tag="FUN_000002"\n+ mRNA complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(4883..5539,5797..5802))\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPL\n+ FC"\n+ gene complement(8696..10557)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(8696..10462,10549..10557))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKR\n+ QLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMVETWKSAVNQMTQ\n+ REHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+ NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASV\n+ MEKVNARLKSVQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVI\n+ ELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSLSFKCLEAERRLAEI\n+ KGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKE\n+ LEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTY\n+ DLEQHRLAFRRAIKDRTVELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKAR\n+ FELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVVALENTL\n+ RQFDKSNDNYRKTFRSVDENSKGEL"\n+ gene complement(14247..15214)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(14247..14648,15209..15214))\n+ /locus_tag="FUN_00'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.gff3 Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,145 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t2331\t3254\t.\t+\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t2331\t3254\t.\t+\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t2331\t3254\t.\t+\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t4883\t5802\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t4883\t5802\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t4883\t5539\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t4883\t5539\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t8696\t10557\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t8696\t10557\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t10549\t10557\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t8696\t10462\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t10549\t10557\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t8696\t10462\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t19533\t21705\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t19533\t21705\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t19533\t21515\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t21700\t21705\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t19533\t21515\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t34843\t35679\t.\t-\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t34843\t35679\t.\t-\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t34843\t35679\t.\t-\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t40223\t44130\t.\t+\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t40223\t44130\t.\t+\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t40223\t40396\t.\t+\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t40659\t41193\t.\t+\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t41707\t42080\t.\t+\t.\tID=FUN_000007-T1.exon3;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43409\t43609\t.\t+\t.\tID=FUN_000007-T1.exon4;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t43678\t44130\t.\t+\t.\tID=FUN_000007-T1.exon5;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40223\t40396\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t40659\t41193\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t41707\t42080\t.\t+\t2\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43409\t43609\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tCDS\t43678\t44130\t.\t+\t0\tID=FUN_000007-T1.cds;Parent=FUN_000007-T1;\n+sample\tfunannotate\tgene\t45527\t47195\t.\t-\t.\tID=FUN_000008;\n+sample\tfunannotate\tmRNA\t45527\t47195\t.\t-\t.\tID=FUN_000008-T1;Parent=FUN_000008;product=hypothetical protein;\n+sample\tfunannotate\texon\t46753\t47195\t.\t-\t.\tID=FUN_000008-T1.exon1'..b'_000012-T1;\n+sample\tfunannotate\tCDS\t153296\t153630\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t153689\t155122\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t155789\t158975\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t159190\t164495\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168360\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t167121\t168360\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t176699\t178916\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t176699\t178916\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t178873\t178916\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176887\t177172\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t176699\t176824\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t178873\t178916\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176887\t177172\t.\t-\t1\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t176699\t176824\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tgene\t192004\t194669\t.\t+\t.\tID=FUN_000015;\n+sample\tfunannotate\tmRNA\t192004\t194669\t.\t+\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample\tfunannotate\texon\t192004\t192067\t.\t+\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t193549\t193658\t.\t+\t.\tID=FUN_000015-T1.exon2;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194041\t194455\t.\t+\t.\tID=FUN_000015-T1.exon3;Parent=FUN_000015-T1;\n+sample\tfunannotate\texon\t194518\t194669\t.\t+\t.\tID=FUN_000015-T1.exon4;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t192004\t192067\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t193549\t193658\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194041\t194455\t.\t+\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tCDS\t194518\t194669\t.\t+\t2\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample\tfunannotate\tgene\t209044\t210553\t.\t-\t.\tID=FUN_000016;\n+sample\tfunannotate\tmRNA\t209044\t210553\t.\t-\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample\tfunannotate\texon\t209044\t210474\t.\t-\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample\tfunannotate\tCDS\t209044\t210474\t.\t-\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000017;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1;Parent=FUN_000017;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000017-T1.exon1;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000017-T1.cds;Parent=FUN_000017-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000018;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000018-T1;Parent=FUN_000018;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000018-T1.exon1;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000018-T1.exon2;Parent=FUN_000018-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000018-T1.exon3;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000018-T1.cds;Parent=FUN_000018-T1;\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.mrna-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,413 @@\n+>FUN_000001-T1 FUN_000001\n+TACCCCTTCGACAGCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGA\n+ACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCG\n+ATGAAAAACTGCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGG\n+TTGCGCAAATCCGAGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCT\n+TCAGCGACACCAAGAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCC\n+GACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGC\n+CCAGCTGGAGGAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGC\n+CCACAGAGCAGACGCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCG\n+TCAGGACGGCATCTACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCA\n+ACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCTATAG\n+>FUN_000002-T1 FUN_000002\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAAGTCTGCGGTGAGTATCGATCAAACCCCTTAA\n+TTAGCCGCCCCTTATTTTGTTAG\n+>FUN_000003-T1 FUN_000003\n+ATGATACATTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGCCATGGAAGACGGAAACAAGGGCTACCAGCTAATCGA\n+AAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACATTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGC\n+GGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTGGAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGG\n+TCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAGCGCCGTGAACCAGATGACCCAGCGAGAGCACGACAT\n+TCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCCAGCAGACGGCTCAGACCTACAAGGAGTACGACAACC\n+AGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCGATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAA\n+AACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCGCGAGATCGACGGACTGCGTCGGGAACTGGAGAACCT\n+TTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGATGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGA\n+ACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCTGTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTG\n+CAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCGAAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGAT\n+GTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGGAGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACT\n+CCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAACGAACTCAAGCGGCAGACGGAGATCCATTACAGCCTG\n+TCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAAGGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAA\n+CATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGCAACGCCTCATCGCCACCACGGAGGCGCAGAATAAGA\n+AGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCCGACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATT\n+AAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCGCCAGATTCGATACGAGAATTCCGAGCTCATAGTCGA\n+CCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCATCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGC\n+AGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTTGAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAG\n+AAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCTCGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGC\n+GCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACGGCTCCATCATGACCAGCACACAGCTGAAGGTGGTGA\n+GCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTCAACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTC\n+GCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAACTACCGAAAAACATTCCGATCTGTGGACGAAAACTC\n+AAAGGGTGAGTTATAA\n+>FUN_000004-T1 FUN_000004\n+ATGCGTTGCGTTTTCTGTGGATCGGGCAGCGAGCAGCAGCATTCGCGATGGGAAATCAAGATGCTGCAGGAGAGCTGCCG\n+CACTGATCATGGATTCCACCAGGACTCGCAAGCTATCCAATATCTGTATGAGATCCTGGCCTCTTACAACCGCGACGAAC\n+AGCGCGCCTTCTTACAGTTTGTGACTGGATCACCACGCCTTCCGACTGGAGGATTCAAGGCCCTTACGCCACCACTGACT\n+ATTGTACGCAAGACGTTGGATGAGAACCAAAACCCTAACGATTACTTACCATCTGTGATGACCTGTGTCAACTATCTAAA\n+GTTGCCCGACTACTCTAGTCGCGAGGTGATGAGGCAGAAGCTGAAAGTGGCTGCTAACGAAGGCAGCATGTCTTTCCACC\n+TCTCATAA\n+>'..b'GATCCAAGCATTGCGGATGGTGCAGGCCGAGACCCCGCCCTATATTAAACA\n+ATGTCATAGGAACGACCCGAAATTGGTGGACTGCTTTATCGGAGCTATTGAACACCTAAAGCCATATTTGGCCAATGGCA\n+TTCCTGATATTCAGCTGCCCTCTGTGGAGCCCTTTAAGATGGACACCCTTGCCCTGCAGTTAACAGAGGGTCCCCAGGGG\n+TATAAGATCACGCTGAAGAACATGGAGGCCTTCGGGGCCAGCAACTTCAAGGTGACATCCCTGAAACTGAGCGAAGGAAG\n+CGAGCCCTTCAAGGCGAAGATCGTGATGCCCAAGCTAAAGATTGAGGCTAAATACACGAGCTCCGGGGTCCTGCTGATCC\n+TGCCCGCCTCCGGAGGTGGGGACTTCCATGCTAACTTCGAGGGTGTGAGTGCCGATCTCACAGGAAAGACATCCATTCAC\n+GCCTTCAAGGGCGCTAACTACCTCCACATCGATGCTCTCAGCTTGGTTCTGGATGTGAAGGATGTGAAAATGAGCATCTC\n+AGGTGCCTTCAACAACAATCGAATTCTGCTGGAGGCCACCAATCTGTTTCTGCGGGAAAACTCTCAAGTCGTTTTGGAGG\n+CTATGCAGGCTCAATTGCAGAAAAAATTGGCTAGCGAGTTCGGCAAACTCGCCAACCAGCTCCTGAAGAATGTTCCTGTA\n+GAGCAATTCTACGTGGACTAG\n+>FUN_000016-T1 FUN_000016\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGTGGGGTAG\n+>FUN_000017-T1 FUN_000017\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000018-T1 FUN_000018\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.proteins.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,155 @@\n+>FUN_000001-T1 FUN_000001\n+YPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSW\n+LRKSEYISTEQTRFQPQNLENIEAKVGYNVKKSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFP\n+DFTNWKFPCAQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEY\n+KIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKRRVKVGQQPNNTKLVSIFMRIHL\n+>FUN_000002-T1 FUN_000002\n+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN\n+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCEVCGEYRSNPLISRPLFC\n+>FUN_000003-T1 FUN_000003\n+MIHSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYR\n+SAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELAIESLNEESSDMK\n+NQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKSVQNKALNAEQRL\n+QILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNNELKRQTEIHYSL\n+SFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNADEKELEMVRFKI\n+KEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTVELRSQEDVLLLK\n+KKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDALNKKVLKAEKEVV\n+ALENTLRQFDKSNDNYRKTFRSVDENSKGEL\n+>FUN_000004-T1 FUN_000004\n+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT\n+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS\n+>FUN_000005-T1 FUN_000005\n+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS\n+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN\n+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP\n+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH\n+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR\n+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD\n+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY\n+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKVRNKMFCLFTRY\n+TLKLNNLPNYNVFFMKYFRRHS\n+>FUN_000006-T1 FUN_000006\n+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI\n+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL\n+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL\n+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM\n+>FUN_000007-T1 FUN_000007\n+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK\n+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD\n+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL\n+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR\n+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF\n+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI\n+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG\n+YKLQDLWNVMPTKMETME\n+>FUN_000008-T1 FUN_000008\n+MKTLSVRLHRGTEFIKDTVHKALVMSAPTPVAPATAPAPKIVDHSLKRKLSGAGGLMGCSSIGSMTSSIAGSSRSHHYAL\n+TSQVASSQVIPLPSQVPTAAFLRTYTVAPTALHRSAAARKRNPSTDSLLMDLCLFKPIRPMPITPIKIHKFRGFEVKKPK\n+FVPAGNPDSEDDEDNDEDGTVRKPKPSNLTLPTISDSAFVPMPYIETTNTAINATTTTNSGSRSRSLNTHTSGSAQAITK\n+PKRRRRAPMLTAKRRRKALDTELTTSADAGTEDKAPAVRKATAARGGSKRSRGESITAPTPAEPIKSPVAIKAPTKRKST\n+SRSEAAKRSRVASVQNDTVLTATSTTSADSIRKAATKRIAANEKVAKRSRGSAALSARPSPPMTRQRARQQISAST\n+>FUN_000009-T1 FUN_000009\n+MVTLRLPWCIRHKPPLCRIGLSHGCECDNSKKMAASSHAPESDRRAQRLRTQSNWNPPDHSALSLGKLVSRKLTPTAVGH\n+WVVGRQRAACACAGGPNADWTDGQPIESSRGCIFQPAPHCHGGRIARHFG\n+>FUN_000010-T1 FUN_000010\n+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK\n+PCNPKRYLTT\n+>FUN_000011-T1 FUN_000011\n+MEVDLDESIKEAAPEKKVHSITRSSPSPKRAKNSSPEPPKPKSTKSKATTPRVKKEKPAADLESSVLTDEERHERKRASA\n+VLYQKYKNRSSCLNPGSKEIPKGSPDCLSGLTFVVTGVLESMEREEAESVIKEYGGKVMTVVGKKLKYLVVGEEAGPKKL\n+AVAEELNIPILSEDGLFDLIREKSGIAKQVKEEKKSPK'..b'YVSVDESHSAASKSPVPGTGGGTEGYPHRVPTIECEEPSIEEDENSSERRHLKVGGQDTNRLSLDRSRSDETGSW\n+MTVECDEFIGSDTSDNEPRTLEPDRNVLETQATLEDANPLEYSNCATPTSDLNILLTPPNASPQIEKSVLETFEKYTGSS\n+DTGKKKNTLDKQSDRSKSSDSWTSGEKDTSPQRQQDWSLSVGKEKSSVEEESSVSCSIARPLGISQDFGKEEARKCQELK\n+QRMLQLEVGKEEITPTPSNEQTPTNEPKILVSKKPSTPTLEKQSPIDLGTSTESYLEPIEERIAKILDRGGARTEDSESS\n+SGGSRKPPRIEKPARANAGKKLSVTRADAGKSGSDRSSQESKSSFDSKGSLSVESRGSFETESSSGSLGAAQRRGELAQK\n+EQQSTWRPFPIESSNSSSTDDPWHHVETDGGYERYDAQNPLRDSSDSDVKEASPDDQKDASDASYQDELNDFPATFGYPA\n+MTSSLGGIGVNPTDIIGYSTGFTLGRTLSRISERSTASEKSSMEDDVSKASTHSVSMRDESVGSTDHQPSLSSDSRSNTN\n+LAYISDADRRTSAEMPEIPCDSATGDRLSSFGSLNEPKSPTLVTGRFSVTHVDEQQGDDVERHTLMCLSNAGSQDSEDWP\n+LPEIPFDHVPVKPADSLYAMPDLDKPVPKSFCWKASLSFQQSQDSLDWPSPPSSAIGAPIIVENIETYYASEVQSADKVI\n+LDEEMAVGPPDVAKVLPYEDTAYLMSAAFDDNDFGNEQLQPDTVSCLSSTLSAASCLSSSLNVSCTTSSTQATARALRKN\n+SSPEVIVAQPTRSPAPRSPLSEDELFSSDDVFMPGTIKVQLSPDAQLRKLSKGSNNSDTSIDDILSGSTTYLEDQTTVRK\n+NYEARLSSGGGGASCKKCSHSSHSEEETSSLGTDLDGTVRMGGLQQKKCTHSSHSEDTSIGLSISEWSTGTNTVRQYANL\n+SGSDSLSAVSTHSCAKSEKSNQTKSSISSINKSAESLNEQSGGSSFSHKFSGDNGSSDGLRYDMLSNSETDKLSEATSAT\n+RSDDTTLTLTEMAHTISEWSTSSSRTLVGVAPGEYLPLKQALSGNKTSLSSPSEEKRCALPQVHRRSGSNGNQARAAQEH\n+ADSQTGPETSAAARKRRSLEMMSKLYQSQEICSESESPFVERLYAHSEKLTERYQSQEFVPLHGGPPASHLASSTTSQIQ\n+TQQPQQVRQKPRAPQPPTKPKPAVTRPIMQALLNKMKQPGLAEQAAEAAEAEEKKAMIAASAVAAKPPPPPVPTVPPIVT\n+PSDLPGDAVAPPPKPLAKHHSYDDRTLSKTQIREFKTTSKQLRQSSSFHEHMLSKSQQSSQELPMRIDEERDPHSTSSAT\n+NTTTTTNTLNSESTEPNSPQMPQRADKLVRCSPYYSSSLSSESPPNQLLQKPPRKTATQLSAGAVAASLKSPPSGNDTDS\n+SLDVRGQEAKMRSRGYRKKRQLPVKRMRANLTAAALLEQAESSECSEGYVPEVDSGSSEYSSCQRDDQYLEFDEELERDQ\n+TDDYEDYPQYSGKFESLDMSDNVDEMGFPRYDRLSHITKPMYHQALVMERPNPVQLPAPANHPMPPATGQPVKPARTKKR\n+QFKREDSTAAGTSGHSTAAPQVRPYHGRSYCNPEESEYETRGGGLSDELANSSEDSCSGFGGDAGASGSGTIRRGTTKGA\n+GQDQEQGTGGQARHVPYPDFLSDYESEPIEYERYACGLDIRVDPPPKFHDSDELSDQ\n+>FUN_000013-T1 FUN_000013\n+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA\n+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR\n+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI\n+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGLQQ\n+RMFFLFSDLLLYGSKSPLDQSFRILGHVPVRSLLTENAEHNTFSIFGGQCAITVSAGTTAEKTLWLAELSKAAADIKNRP\n+PNMQLQLTTLKNCSSSEEGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVG\n+LGDHLIAAEHQLSGYLLRKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFK\n+NHVYFFRAESAHTYNR\n+>FUN_000014-T1 FUN_000014\n+MSTPARRRLMRDFKRLQEDPPTGVSGAPTDNNIMIWNAVIFGPHDTPFEDGTFKLTIEFTEEYPNKPPTVRFVSKVFHPN\n+VYADGGICLDILQNRWSPTYDVSAILTSIQSLLSDPNPNSPANSTAAQLYKENRREYEKRVKACVEQSFID\n+>FUN_000015-T1 FUN_000015\n+MNKAVCLVIVIQALRMVQAETPPYIKQCHRNDPKLVDCFIGAIEHLKPYLANGIPDIQLPSVEPFKMDTLALQLTEGPQG\n+YKITLKNMEAFGASNFKVTSLKLSEGSEPFKAKIVMPKLKIEAKYTSSGVLLILPASGGGDFHANFEGVSADLTGKTSIH\n+AFKGANYLHIDALSLVLDVKDVKMSISGAFNNNRILLEATNLFLRENSQVVLEAMQAQLQKKLASEFGKLANQLLKNVPV\n+EQFYVD\n+>FUN_000016-T1 FUN_000016\n+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR\n+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA\n+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR\n+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES\n+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH\n+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQVG\n+>FUN_000017-T1 FUN_000017\n+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR\n+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA\n+>FUN_000018-T1 FUN_000018\n+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG\n+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK\n+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE\n+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI\n+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED\n+AAVGAQAASGADSPAQVARDRQSRSRSRTRS\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.stats.json Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,120 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmp2z22js7e/files/c/c/3/dataset_cc3f66b5-ec9b-4669-93d6-4ddeac0e33c1.dat --out output --database /home/abretaud/.planemo/planemo_tmp_z_14xthq/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --rna_bam /tmp/tmp2z22js7e/files/9/6/5/dataset_965b5091-b838-4f4a-8ec8-9fb84c12cdc5.dat --transcript_evidence /tmp/tmp2z22js7e/files/d/a/e/dataset_daea4ce7-3191-40eb-ad83-b35e9e058d46.dat --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-22", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 18, + "common_name": 0, + "mRNA": 18, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 2775.33, + "transcript-level": { + "CDS_transcripts": 18, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 18, + "CDS_five_three_utr": 0, + "CDS_complete": 16, + "CDS_no-start": 1, + "CDS_no-stop": 1, + "CDS_no-start_no-stop": 0, + "total_exons": 54, + "total_cds_exons": 54, + "multiple_exon_transcript": 15, + "single_exon_transcript": 3, + "avg_exon_length": 563.63, + "avg_protein_length": 571.83, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_transcript_evidence": 31.48, + "pct_exon_overlap_protein_evidence": 9.26 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/Genus_species.tbl Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,276 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +<2331 3254 gene + locus_tag FUN_000001 +<2331 3254 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +<2331 3254 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +5802 4883 gene + locus_tag FUN_000002 +5802 5797 mRNA +5539 4883 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 5797 CDS +5539 4883 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +10557 8696 gene + locus_tag FUN_000003 +10557 10549 mRNA +10462 8696 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10557 10549 CDS +10462 8696 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +15214 14247 gene + locus_tag FUN_000004 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +21705 19533 gene + locus_tag FUN_000005 +21705 21700 mRNA +21515 19533 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +21705 21700 CDS +21515 19533 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +35679 34843 gene + locus_tag FUN_000006 +35679 34843 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +35679 34843 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +40223 44130 gene + locus_tag FUN_000007 +40223 40396 mRNA +40659 41193 +41707 42080 +43409 43609 +43678 44130 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +40223 40396 CDS +40659 41193 +41707 42080 +43409 43609 +43678 44130 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +47195 45527 gene + locus_tag FUN_000008 +47195 46753 mRNA +46330 46214 +46157 45527 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +47195 46753 CDS +46330 46214 +46157 45527 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +79527 78685 gene + locus_tag FUN_000009 +79527 79519 mRNA +79068 78685 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +79527 79519 CDS +79068 78685 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +133587 137862 gene + locus_tag FUN_000011 +133587 134504 mRNA +134720 135510 +135569 136284 +137516 137862 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +133587 134504 CDS +134720 135510 +135569 136284 +137516 137862 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +144294 164495 gene + locus_tag FUN_000012 +144294 144551 mRNA +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153296 153630 +153689 155122 +155789 158975 +159190 164495 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +144294 144551 CDS +149012 149244 +149367 149588 +149654 149897 +149952 150112 +150174 150248 +151966 152072 +152314 152429 +152496 152751 +153296 153630 +153689 155122 +155789 158975 +159190 164495 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +167121 169212 gene + locus_tag FUN_000013 +167121 168360 mRNA +168722 169212 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +167121 168360 CDS +168722 169212 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +178916 176699 gene + locus_tag FUN_000014 +178916 178873 mRNA +177172 176887 +176824 176699 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +178916 178873 CDS +177172 176887 +176824 176699 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +192004 194669 gene + locus_tag FUN_000015 +192004 192067 mRNA +193549 193658 +194041 194455 +194518 194669 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +192004 192067 CDS +193549 193658 +194041 194455 +194518 194669 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +210553 209044 gene + locus_tag FUN_000016 +210553 210548 mRNA +210474 209044 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +210553 210548 CDS +210474 209044 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000017 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000017-T1_mrna + protein_id gnl|ncbi|FUN_000017-T1 +2126 >3537 gene + locus_tag FUN_000018 +2126 2199 mRNA +2258 3224 +3284 >3537 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 +2126 2199 CDS +2258 3224 +3284 >3537 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000018-T1_mrna + protein_id gnl|ncbi|FUN_000018-T1 |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_bam/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_bam/fly.parameters.json Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-22", "path": "/tmp/tmp2z22js7e/job_working_directory/000/5/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.cds-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.cds-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.discrepency.report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.discrepency.report.txt Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,135 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:gene: 16 present +DISC_FEATURE_COUNT:CDS: 16 present +DISC_FEATURE_COUNT:mRNA: 16 present +DISC_COUNT_NUCLEOTIDES:4 nucleotide Bioseqs are present +JOINED_FEATURES:26 features have joined locations. +NO_ANNOTATION:2 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +FATAL: DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS:1 features have partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +ONCALLER_COMMENT_PRESENT:4 comment descriptors were found (all same) +MISSING_GENOMEASSEMBLY_COMMENTS:4 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:4 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:4 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:4 sequences do not include structured comments. +MISSING_PROJECT:20 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::4 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::gene: 16 present +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 16 present +DiscRep_ALL:DISC_FEATURE_COUNT::mRNA: 16 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::4 nucleotide Bioseqs are present +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:JOINED_FEATURES::26 features have joined locations. +DiscRep_SUB:JOINED_FEATURES::26 features have joined location but no exception +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 +genome:mRNA hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 +genome:mRNA hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002 +genome:CDS hypothetical protein (sample:c3142-3138, c3004-2883, c2686-2565) FUN_000002 +genome:mRNA hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003 +genome:CDS hypothetical protein (sample:c5802-5797, c5539-4937, c4742-4248) FUN_000003 +genome:CDS hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004 +genome:mRNA hypothetical protein (sample:c10664-10657, c10499-8707, c8385-7691) FUN_000004 +genome:mRNA hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:c15214-15209, c14648-14247) FUN_000005 +genome:CDS hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006 +genome:mRNA hypothetical protein (sample:15539-15543, 15646-15919, 16485-16619) FUN_000006 +genome:CDS hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007 +genome:mRNA hypothetical protein (sample:c21705-21700, c21515-19638, c19482-18358) FUN_000007 +genome:CDS hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009 +genome:mRNA hypothetical protein (sample:40223-40396, 40659-41193, 41707-42080, 43409-43609, 43678-44130) FUN_000009 +genome:mRNA hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:87202-87207, 88054-88320) FUN_000010 +genome:CDS hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011 +genome:mRNA hypothetical protein (sample:c106221-106216, c104632-104258, c103947-103696, c103618-103229, c103151-102510) FUN_000011 +genome:CDS hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012 +genome:mRNA hypothetical protein (sample:167121-168069, 168722-169212) FUN_000012 +genome:CDS hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013 +genome:mRNA hypothetical protein (sample:180262-180267, 180400-180579) FUN_000013 +genome:CDS hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014 +genome:mRNA hypothetical protein (sample:c210553-210548, c210474-209053, c208645-208619) FUN_000014 + +DiscRep_ALL:NO_ANNOTATION::2 bioseqs have no features +genome:sample2 (length 2030) +genome:sample3 (length 2100) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +FATAL: DiscRep_ALL:DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS::1 featurepartial ends thands that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so +genome:CDS hypothetical protein (sample4:2126-2199, 2258-3224, 3284->3537) FUN_000016 + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::4 comment descriptors were found (all same) +genome:sample:"Annotated using 1.8.7" +genome:sample2:"Annotated using 1.8.7" +genome:sample3:"Annotated using 1.8.7" +genome:sample4:"Annotated using 1.8.7" + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::4 bioseqs are missing GenomeAssembly structured comments +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::4 molecule types are not set as mRNA. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::4 technique are not set as TSA +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::4 sequences do not include structured comments. +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + +DiscRep_ALL:MISSING_PROJECT::20 sequences do not include project. +genome:sample (length 215740) +genome:ncbi:FUN_000001-T1 (length 124) +genome:ncbi:FUN_000002-T1 (length 82) +genome:ncbi:FUN_000003-T1 (length 367) +genome:ncbi:FUN_000004-T1 (length 831) +genome:ncbi:FUN_000005-T1 (length 135) +genome:ncbi:FUN_000006-T1 (length 137) +genome:ncbi:FUN_000007-T1 (length 1002) +genome:ncbi:FUN_000008-T1 (length 278) +genome:ncbi:FUN_000009-T1 (length 578) +genome:ncbi:FUN_000010-T1 (length 90) +genome:ncbi:FUN_000011-T1 (length 554) +genome:ncbi:FUN_000012-T1 (length 479) +genome:ncbi:FUN_000013-T1 (length 61) +genome:ncbi:FUN_000014-T1 (length 484) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) +genome:ncbi:FUN_000015-T1 (length 124) +genome:ncbi:FUN_000016-T1 (length 432) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::4 Molinfos are missing field technique +genome:sample (length 215740) +genome:sample2 (length 2030) +genome:sample3 (length 2100) +genome:sample4 (length 7560) + |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.gbk Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4148 @@\n+LOCUS sample 215740 bp DNA linear 21-JUL-2021\n+DEFINITION Genus species.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Genus species\n+ ORGANISM Genus species\n+ Unclassified.\n+REFERENCE 1 (bases 1 to 215740)\n+ AUTHORS Palmer,J.M.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-JUL-2021) CFMR, USDA Forest Service, 1 Gifford\n+ Pinchot Drive, Madison, WI 53726, USA\n+COMMENT \'Annotated using 1.8.7\'.\n+FEATURES Location/Qualifiers\n+ source 1..215740\n+ /organism="Genus species"\n+ /mol_type="genomic DNA"\n+ gene complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ mRNA complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /product="hypothetical protein"\n+ CDS complement(1092..1466)\n+ /locus_tag="FUN_000001"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000001-T1"\n+ /translation="MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG\n+ ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY\n+ VSKRYKDLPPPHPGFGADQPPA"\n+ gene complement(2565..3142)\n+ /locus_tag="FUN_000002"\n+ mRNA complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUN_000002"\n+ /product="hypothetical protein"\n+ CDS complement(join(2565..2686,2883..3004,3138..3142))\n+ /locus_tag="FUN_000002"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000002-T1"\n+ /translation="MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQS\n+ LLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFAQP"\n+ gene complement(4248..5802)\n+ /locus_tag="FUN_000003"\n+ mRNA complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /product="hypothetical protein"\n+ CDS complement(join(4248..4742,4937..5539,5797..5802))\n+ /locus_tag="FUN_000003"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000003-T1"\n+ /translation="MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPN\n+ PSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGNNSTCTASQPYALSGALSMLQQS\n+ PSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH\n+ KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVM\n+ PYKCELCQKTFRYKVSQRTHRCPTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAIN\n+ SSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISLQPVAVVHFSGNGSP\n+ LQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT"\n+ gene complement(7691..10664)\n+ /locus_tag="FUN_000004"\n+ mRNA complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUN_000004"\n+ /product="hypothetical protein"\n+ CDS complement(join(7691..8385,8707..10499,10657..10664))\n+ /locus_tag="FUN_000004"\n+ /codon_start=1\n+ /product="hypothetical protein"\n+ /protein_id="ncbi:FUN_000004-T1"\n+ /translation="MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLD\n+ DQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTLEKNLERTACLYRSAHAERRQMV\n+ ETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA\n+ IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRD\n+ EKAKELENFASVMEKVNARLKS'..b'1 agcttttggc actgctcaac gacaatgtcg tcgattgact gcgaaagcag tgcctcctgc\n+ 4501 tcaggatcca caatggagct gctgttgatg gcagctattt cggcgctcgc tggtgaaggc\n+ 4561 tgagtatgcg agtcgttgcc ctccaggaac gcctttatca gctgctccgg tgtctgggcc\n+ 4621 tcctcggtgg gacatcgatg ggtcctttga ctgaccttgt acctgaacgt cttttggcag\n+ 4681 agctcgcact tgtagggcat cactcccgtg tggatgcgcg tgtggacaag gaaagagact\n+ 4741 cgctgccgga aacacttgcc tgtggacgag tccgaaatac gaaaaggtta gacatggagt\n+ 4801 gacccggaaa agaaggtata cctctccttc atttaaagta aatagggcaa atcgcaatgg\n+ 4861 agtatgctca tttataagct ggctaacaaa ataaggggcg gctaattaag gggtttgatc\n+ 4921 gatactcacc gcagacttcg cacttgaagg gcttttcgcc gctgtgaatc ctctggtggt\n+ 4981 tatgcagcgt agacagttcc ttgaaggcgc gtccacaaac cccgcagaca tggggcttta\n+ 5041 cctcgctgtg gtagagcaaa tgcttgtggt acgactgctg gaaggtgaag gtcctggcgc\n+ 5101 agatctcgca tgtgtacggc atctcgccgg tgtggagccg cttgtgcttc ttcagaaagt\n+ 5161 acttggtggt gaaggacttg ctgcacacat cgcactccca cagcttcggg gtggccgtgc\n+ 5221 ccgactccgg cgaactgggg gactgctgga gcatgctcag cgccccactc agcgcgtatg\n+ 5281 gctgggatgc ggtgcacgtg gagttatttc cgttgccaag ggctccaggc tctgtgtgtc\n+ 5341 gaatgcgatc gcaaatgctc agcttgggcg tggcagtaac tgcactggtg gtagaagcgg\n+ 5401 gtgtaggact gggattagga tttggattgg gattggagca gggcacgccc atcatgtgca\n+ 5461 ctactttcag gtggattcga agggagccct tcattcggaa cgtcttggag cagagatggc\n+ 5521 acttgtaggg ctcctggtcc tgtataaagc aataatcgga atttcactta tatttatcaa\n+ 5581 ttcatcaata tgccctcatg gccaaatatt ccattacatt accgtctgtc tctcagtttc\n+ 5641 aaatttatgc acaaaaatca ttcactttca ttcactatat cacaaagttg ccatggtttt\n+ 5701 aaattgatca aaaacaaatt aatatctatc atatatatac atagtcatat gaacagttga\n+ 5761 aaaattaatt gaaaataatg ggaacgatat acgtacatac atcagttgtt tttaaaatat\n+ 5821 aagggtatat agatttcttt cttgttgttg ttgattttaa ttacgtcaaa cttttgtttc\n+ 5881 agattcaatg taaatggtct agctttttaa gtatgatttt tttttgctgc cagtgagcat\n+ 5941 agaaaaaaaa aatcaaaatc gatataagaa tatgcgaaag tgcattacga aactctttag\n+ 6001 ataatagcac ttaatatatg tacatagcca atagttaccg gttccttctg ttggggttcc\n+ 6061 ttttgcttgg gttctccctc cgcattttcg tggactaagc ggacgtgcat gtccctcagc\n+ 6121 tcggtattcc ggaaactgaa ctcgcaaatg tcgcagtggg cgggcggggt ggtccgcaca\n+ 6181 ggcggtgggg ttgggacgac gggcttggac ctggttcgct tggccctccg tttgggagga\n+ 6241 gctgcggcaa ggaagccccg ggacgaggcg ggttggccat tcgctggact ctcgtttccc\n+ 6301 tcctgagcca tcagacttgt gtgcgagaac aggtggatgg tgagcttgtc cagccccagg\n+ 6361 aagagctcct tgcagttggc aaagggacag gccagtgggc cgttggccgc cttgatcagc\n+ 6421 ctctgctgca gtgcgtcaat actgccgaaa ctgggcaccg cgcagagcgg acacagcacc\n+ 6481 gaggtggaac acatttcgcc agtgcactca atcgaatctt atgcaaatgc ttcacctcct\n+ 6541 attgggatta tcctcctgtt cggtctgtga tcatctattc aggagtccat tcccagactg\n+ 6601 cctagtcttt ctgctttcaa aattttctaa aaatatcagc aagtgaagat ttttgaaaac\n+ 6661 tttgggccca gcaatctgac ttctcggcac cgatgccagc taacgaaata atgaaaaata\n+ 6721 atgaaatgcc cggcgcggat cgtcgaatcg tcaagaagac tttcggaaac actcgcagca\n+ 6781 ccgaaatccc atctctcgaa caaggcagtc tcttttctcc gtgtctctgg gtagctcatt\n+ 6841 tcgaaatata gctctgagca cggctatata ctatatgtat gtagaattat ttctggccga\n+ 6901 tatatgttgc actggcggcc atatagcctt cgttctagtc tttgtaacgc acgatgcgca\n+ 6961 ggagcaattc gctgagatga ccacatgcga tttgcgggac ttatctagag atctatcatt\n+ 7021 atcgccagat tggtttaata attggctttt ccgccaatat ccaattggaa tatggttggt\n+ 7081 tactgcaatt gtcgctccat tttttaagca ctccataaaa agtaaacaca ttaatatgta\n+ 7141 ctcttattgg agatttcttc tttcgatttt agtttcggac cagtgaaaat cattcgtttc\n+ 7201 attttcgtaa ataagaactg agaaaatatt attattatat atatttcttt attaggaaaa\n+ 7261 tacgaagatt gagtatttca gattgaatta gcatatccgt ctaaatctta atgctgtaat\n+ 7321 gagcttactt gagatctgat caaaaccaat acaaaaccca caccaaaggt ggtagctaat\n+ 7381 atacatattt tgtgtaatac ttttgtagag tatttactat tcagcgattt aaacaagcaa\n+ 7441 tcgcctagac acacacattt gtccgcctat gtgtatgtgc accgagctat acccccactg\n+ 7501 aatcgctgtg tgctattttt atggccgcga tgctctcttg ttttgacccg cttgggcaac\n+//\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.gff3 Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,117 @@\n+##gff-version 3\n+sample\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000001;\n+sample\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;\n+sample\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000001-T1.exon1;Parent=FUN_000001-T1;\n+sample\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000001-T1.cds;Parent=FUN_000001-T1;\n+sample\tfunannotate\tgene\t2565\t3142\t.\t-\t.\tID=FUN_000002;\n+sample\tfunannotate\tmRNA\t2565\t3142\t.\t-\t.\tID=FUN_000002-T1;Parent=FUN_000002;product=hypothetical protein;\n+sample\tfunannotate\texon\t3138\t3142\t.\t-\t.\tID=FUN_000002-T1.exon1;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2883\t3004\t.\t-\t.\tID=FUN_000002-T1.exon2;Parent=FUN_000002-T1;\n+sample\tfunannotate\texon\t2565\t2686\t.\t-\t.\tID=FUN_000002-T1.exon3;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t3138\t3142\t.\t-\t0\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2883\t3004\t.\t-\t1\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tCDS\t2565\t2686\t.\t-\t2\tID=FUN_000002-T1.cds;Parent=FUN_000002-T1;\n+sample\tfunannotate\tgene\t4248\t5802\t.\t-\t.\tID=FUN_000003;\n+sample\tfunannotate\tmRNA\t4248\t5802\t.\t-\t.\tID=FUN_000003-T1;Parent=FUN_000003;product=hypothetical protein;\n+sample\tfunannotate\texon\t5797\t5802\t.\t-\t.\tID=FUN_000003-T1.exon1;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4937\t5539\t.\t-\t.\tID=FUN_000003-T1.exon2;Parent=FUN_000003-T1;\n+sample\tfunannotate\texon\t4248\t4742\t.\t-\t.\tID=FUN_000003-T1.exon3;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t5797\t5802\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4937\t5539\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tCDS\t4248\t4742\t.\t-\t0\tID=FUN_000003-T1.cds;Parent=FUN_000003-T1;\n+sample\tfunannotate\tgene\t7691\t10664\t.\t-\t.\tID=FUN_000004;\n+sample\tfunannotate\tmRNA\t7691\t10664\t.\t-\t.\tID=FUN_000004-T1;Parent=FUN_000004;product=hypothetical protein;\n+sample\tfunannotate\texon\t10657\t10664\t.\t-\t.\tID=FUN_000004-T1.exon1;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t8707\t10499\t.\t-\t.\tID=FUN_000004-T1.exon2;Parent=FUN_000004-T1;\n+sample\tfunannotate\texon\t7691\t8385\t.\t-\t.\tID=FUN_000004-T1.exon3;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t10657\t10664\t.\t-\t0\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t8707\t10499\t.\t-\t1\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tCDS\t7691\t8385\t.\t-\t2\tID=FUN_000004-T1.cds;Parent=FUN_000004-T1;\n+sample\tfunannotate\tgene\t14247\t15214\t.\t-\t.\tID=FUN_000005;\n+sample\tfunannotate\tmRNA\t14247\t15214\t.\t-\t.\tID=FUN_000005-T1;Parent=FUN_000005;product=hypothetical protein;\n+sample\tfunannotate\texon\t15209\t15214\t.\t-\t.\tID=FUN_000005-T1.exon1;Parent=FUN_000005-T1;\n+sample\tfunannotate\texon\t14247\t14648\t.\t-\t.\tID=FUN_000005-T1.exon2;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t15209\t15214\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tCDS\t14247\t14648\t.\t-\t0\tID=FUN_000005-T1.cds;Parent=FUN_000005-T1;\n+sample\tfunannotate\tgene\t15539\t16619\t.\t+\t.\tID=FUN_000006;\n+sample\tfunannotate\tmRNA\t15539\t16619\t.\t+\t.\tID=FUN_000006-T1;Parent=FUN_000006;product=hypothetical protein;\n+sample\tfunannotate\texon\t15539\t15543\t.\t+\t.\tID=FUN_000006-T1.exon1;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t15646\t15919\t.\t+\t.\tID=FUN_000006-T1.exon2;Parent=FUN_000006-T1;\n+sample\tfunannotate\texon\t16485\t16619\t.\t+\t.\tID=FUN_000006-T1.exon3;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15539\t15543\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t15646\t15919\t.\t+\t1\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tCDS\t16485\t16619\t.\t+\t0\tID=FUN_000006-T1.cds;Parent=FUN_000006-T1;\n+sample\tfunannotate\tgene\t18358\t21705\t.\t-\t.\tID=FUN_000007;\n+sample\tfunannotate\tmRNA\t18358\t21705\t.\t-\t.\tID=FUN_000007-T1;Parent=FUN_000007;product=hypothetical protein;\n+sample\tfunannotate\texon\t21700\t21705\t.\t-\t.\tID=FUN_000007-T1.exon1;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t19638\t21515\t.\t-\t.\tID=FUN_000007-T1.exon2;Parent=FUN_000007-T1;\n+sample\tfunannotate\texon\t18358\t19482\t.\t-\t.\tID=FUN_000007-T1.exon3;Parent=FUN_00'..b'N_000010-T1;\n+sample\tfunannotate\tCDS\t87202\t87207\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tCDS\t88054\t88320\t.\t+\t0\tID=FUN_000010-T1.cds;Parent=FUN_000010-T1;\n+sample\tfunannotate\tgene\t102510\t106221\t.\t-\t.\tID=FUN_000011;\n+sample\tfunannotate\tmRNA\t102510\t106221\t.\t-\t.\tID=FUN_000011-T1;Parent=FUN_000011;product=hypothetical protein;\n+sample\tfunannotate\texon\t106216\t106221\t.\t-\t.\tID=FUN_000011-T1.exon1;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t104258\t104632\t.\t-\t.\tID=FUN_000011-T1.exon2;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103696\t103947\t.\t-\t.\tID=FUN_000011-T1.exon3;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t103229\t103618\t.\t-\t.\tID=FUN_000011-T1.exon4;Parent=FUN_000011-T1;\n+sample\tfunannotate\texon\t102510\t103151\t.\t-\t.\tID=FUN_000011-T1.exon5;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t106216\t106221\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t104258\t104632\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103696\t103947\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t103229\t103618\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tCDS\t102510\t103151\t.\t-\t0\tID=FUN_000011-T1.cds;Parent=FUN_000011-T1;\n+sample\tfunannotate\tgene\t167121\t169212\t.\t+\t.\tID=FUN_000012;\n+sample\tfunannotate\tmRNA\t167121\t169212\t.\t+\t.\tID=FUN_000012-T1;Parent=FUN_000012;product=hypothetical protein;\n+sample\tfunannotate\texon\t167121\t168069\t.\t+\t.\tID=FUN_000012-T1.exon1;Parent=FUN_000012-T1;\n+sample\tfunannotate\texon\t168722\t169212\t.\t+\t.\tID=FUN_000012-T1.exon2;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t167121\t168069\t.\t+\t0\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tCDS\t168722\t169212\t.\t+\t2\tID=FUN_000012-T1.cds;Parent=FUN_000012-T1;\n+sample\tfunannotate\tgene\t180262\t180579\t.\t+\t.\tID=FUN_000013;\n+sample\tfunannotate\tmRNA\t180262\t180579\t.\t+\t.\tID=FUN_000013-T1;Parent=FUN_000013;product=hypothetical protein;\n+sample\tfunannotate\texon\t180262\t180267\t.\t+\t.\tID=FUN_000013-T1.exon1;Parent=FUN_000013-T1;\n+sample\tfunannotate\texon\t180400\t180579\t.\t+\t.\tID=FUN_000013-T1.exon2;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180262\t180267\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tCDS\t180400\t180579\t.\t+\t0\tID=FUN_000013-T1.cds;Parent=FUN_000013-T1;\n+sample\tfunannotate\tgene\t208619\t210553\t.\t-\t.\tID=FUN_000014;\n+sample\tfunannotate\tmRNA\t208619\t210553\t.\t-\t.\tID=FUN_000014-T1;Parent=FUN_000014;product=hypothetical protein;\n+sample\tfunannotate\texon\t210548\t210553\t.\t-\t.\tID=FUN_000014-T1.exon1;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t209053\t210474\t.\t-\t.\tID=FUN_000014-T1.exon2;Parent=FUN_000014-T1;\n+sample\tfunannotate\texon\t208619\t208645\t.\t-\t.\tID=FUN_000014-T1.exon3;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t210548\t210553\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t209053\t210474\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample\tfunannotate\tCDS\t208619\t208645\t.\t-\t0\tID=FUN_000014-T1.cds;Parent=FUN_000014-T1;\n+sample4\tfunannotate\tgene\t1092\t1466\t.\t-\t.\tID=FUN_000015;\n+sample4\tfunannotate\tmRNA\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1;Parent=FUN_000015;product=hypothetical protein;\n+sample4\tfunannotate\texon\t1092\t1466\t.\t-\t.\tID=FUN_000015-T1.exon1;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tCDS\t1092\t1466\t.\t-\t0\tID=FUN_000015-T1.cds;Parent=FUN_000015-T1;\n+sample4\tfunannotate\tgene\t2126\t3537\t.\t+\t.\tID=FUN_000016;\n+sample4\tfunannotate\tmRNA\t2126\t3537\t.\t+\t.\tID=FUN_000016-T1;Parent=FUN_000016;product=hypothetical protein;\n+sample4\tfunannotate\texon\t2126\t2199\t.\t+\t.\tID=FUN_000016-T1.exon1;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t2258\t3224\t.\t+\t.\tID=FUN_000016-T1.exon2;Parent=FUN_000016-T1;\n+sample4\tfunannotate\texon\t3284\t3537\t.\t+\t.\tID=FUN_000016-T1.exon3;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2126\t2199\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t2258\t3224\t.\t+\t1\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n+sample4\tfunannotate\tCDS\t3284\t3537\t.\t+\t0\tID=FUN_000016-T1.cds;Parent=FUN_000016-T1;\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.mrna-transcripts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.mrna-transcripts.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,241 @@\n+>FUN_000001-T1 FUN_000001\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000002-T1 FUN_000002\n+ATGCCTCTGTACGGCGTTTCTCCAGCGTCTGCTCTGTGGGCAGGAAGTAGGCGACAAACTGTTCGCCGCTCTCGTCCATC\n+ACACCACGAATCATGGCCTGCGACATCTCCTCCAGCTGGGCGGGCACAGAGTCTCCTCCCGAAGCGACTTCTTGACGTTG\n+TAACCGACCTTGGCCTCGATGTTCTCCAGGTTCTGGGGCTGGAAGCGCGTCTGCTCGGTGGAGATGTACTCGGATTTGCG\n+CAACCATGA\n+>FUN_000003-T1 FUN_000003\n+ATGTATGACCAGGAGCCCTACAAGTGCCATCTCTGCTCCAAGACGTTCCGAATGAAGGGCTCCCTTCGAATCCACCTGAA\n+AGTAGTGCACATGATGGGCGTGCCCTGCTCCAATCCCAATCCAAATCCTAATCCCAGTCCTACACCCGCTTCTACCACCA\n+GTGCAGTTACTGCCACGCCCAAGCTGAGCATTTGCGATCGCATTCGACACACAGAGCCTGGAGCCCTTGGCAACGGAAAT\n+AACTCCACGTGCACCGCATCCCAGCCATACGCGCTGAGTGGGGCGCTGAGCATGCTCCAGCAGTCCCCCAGTTCGCCGGA\n+GTCGGGCACGGCCACCCCGAAGCTGTGGGAGTGCGATGTGTGCAGCAAGTCCTTCACCACCAAGTACTTTCTGAAGAAGC\n+ACAAGCGGCTCCACACCGGCGAGATGCCGTACACATGCGAGATCTGCGCCAGGACCTTCACCTTCCAGCAGTCGTACCAC\n+AAGCATTTGCTCTACCACAGCGAGGTAAAGCCCCATGTCTGCGGGGTTTGTGGACGCGCCTTCAAGGAACTGTCTACGCT\n+GCATAACCACCAGAGGATTCACAGCGGCGAAAAGCCCTTCAAGTGCGAACGAGTCTCTTTCCTTGTCCACACGCGCATCC\n+ACACGGGAGTGATGCCCTACAAGTGCGAGCTCTGCCAAAAGACGTTCAGGTACAAGGTCAGTCAAAGGACCCATCGATGT\n+CCCACCGAGGAGGCCCAGACACCGGAGCAGCTGATAAAGGCGTTCCTGGAGGGCAACGACTCGCATACTCAGCCTTCACC\n+AGCGAGCGCCGAAATAGCTGCCATCAACAGCAGCTCCATTGTGGATCCTGAGCAGGAGGCACTGCTTTCGCAGTCAATCG\n+ACGACATTGTCGTTGAGCAGTGCCAAAAGCTGGGCATCTGTGGTGTGGAGCCGCGGGAGGAGGGACAGCTCATATCCCTA\n+CAGCCGGTTGCGGTGGTACACTTCAGCGGGAATGGCTCTCCGCTGCAGCAACTCCAGAACTTGAGAATCTACTCACCGCA\n+ACAAACAGAGCTACCTAGTTCCGATGGCGAAGTCTTCCAGCGCTTTTTGATGGACGCCACGTAG\n+>FUN_000004-T1 FUN_000004\n+ATGTCAAACAAGCGCGAGATAGATGAGCTGACTTCCCGCATCAAGTCGGCTAAGACTACCTTGGTGGAGTGGACAGAGGC\n+CATGGAAGACGGAAACAAGGGCTACCAGCTAATCGAAAAGTACTACCTCGATGACCAGCAAAAGGCACGGGAGCTGAACA\n+TTAAGCGTCAGCTCTTGCAGGCGGACATCGACAAGCGGCGCAAGCAGGTGGTGCTCCTCTATGACGAGCAAATGACGCTG\n+GAGAAGAATCTGGAGCGAACCGCTTGTCTGTACAGGTCTGCCCACGCGGAACGCCGCCAGATGGTGGAGACGTGGAAGAG\n+CGCCGTGAACCAGATGACCCAGCGAGAGCACGACATTCAGCGCAGCGAGATAGAGTGTGCGGAGCTGGCCCAGAAAGCCC\n+AGCAGACGGCTCAGACCTACAAGGAGTACGACAACCAGCTAACCGAGGTCATCGAGAACAACCGCCAGGTGGAGCTCGCG\n+ATCGAGTCCCTCAACGAGGAAAGCTCCGACATGAAAAACCAAATCCAGATCCTGATTGATGCCACATTACTGAAGGAGCG\n+CGAGATCGACGGACTGCGTCGGGAACTGGAGAACCTTTCGAACCGGGTTCACCTGCAGCGCATGGATAACCGTAGTCAGA\n+TGAAAAAGCGCGATGAGAAGGCCAAGGAGCTGGAGAACTTCGCTTCGGTGATGGAGAAGGTTAACGCTCGGCTGAAGTCT\n+GTACAGAACAAGGCCCTCAATGCGGAGCAGCGGCTGCAAATTCTGGAGGAGATGATGCAGGCGGAGGAGACGGCTCTGCG\n+AAACCTGGACAAGGAGCAGGAGAAGGTCAACGAGATGTTGTACCGCACCCAGAGGCAGGTCATCGAGCTGCAGGACGAGG\n+AGAAGGTCTTGAAGGTGCAGAACGACTCGCTCAACTCCAACCTGGCAGCGATCAATCGCAATCAGCAGCAGGTCAATAAC\n+GAACTCAAGCGGCAGACGGAGATCCATTACAGCCTGTCCTTTAAGTGCTTGGAGGCGGAGCGGCGTCTCGCCGAGATCAA\n+GGGTCTGGCAGATGACCCCGAGGTAGAGGCCACAAACATGGCCCGGCTTAACACTCTGGAGCAGGAGTATGAAAAGTTGC\n+AACGCCTCATCGCCACCACGGAGGCGCAGAATAAGAAGCTAAACTATAACATGAACAACCTGGTCGTCCAGTACAACGCC\n+GACGAGAAGGAGCTCGAAATGGTCAGGTTCAAGATTAAAGAGGCCCAGGTGTACTGCGAGGGCACCGTCAAGAGACTGCG\n+CCAGATTCGATACGAGAATTCCGAGCTCATAGTCGACCTCAACATGGTCAAGATGCGCTGCAGCGACCTAGAGGTTGGCA\n+TCGGAGGCTGCGAGCAGGGCACCTACGACCTAGAGCAGCACCGCCTTGCCTTCCGGCGCGCCATCAAAGATCGCACCGTT\n+GAGCTTCGCAGCCAAGAGGATGTGCTCCTCCTCAAGAAGAAGCATCTTAACGAGGAACTAAGCACCCTGCGAGCCGACCT\n+CGGGGAGCGGAAGAAGCAGATTGAGGCGATGAAGGCGCGCTTTGAACTTACCGCCCAATTGCTAGGCAAAAACGAGGACG\n+GCTCCATCATGACCAGCACACAGCTGAAGGTGGTGAGCGCCCAGGAACGGCAGATGCTGGCCGACGAGGGCGATGCCCTC\n+AACAAAAAGGTTCTCAAGGCTGAGAAGGAAGTGGTCGCCCTGGAAAACACACTGCGCCAGTTCGACAAATCGAACGATAA\n+CTACCGAAAAACATTCCGATCTGTGGACGAAAACTCAAAGGATCGCGAGCGGGCCGAACTGGAGCTGAAGGAACTTGAGG\n+CGGCCTACTGCCGCGAGCTGGAAAAACTGAAGGTTCTCAGGTGCAAGGCACAGCACTACCAGCAGAAGCACGCAGCCCAG\n+CGAGCTGAGGAGGAAGACCTGATTTCCAAAATAGAGAAGGCGAAGGCTAGTCGGGCGGAGCACTCTGCAGTTCTGGAGAA\n+GATCGAGCGCGAACTAGATGACCAGCGAATGAAGCTGGACAGGGCCAA'..b'+GAGGGCCTGGACCTATTCGGTCTGAGCAACGGGAACAACAGCAGCCTGaacagcagtgtgaacggcgGCGGTCCGCTGAC\n+TACGCAgcagcagaagttgcagctgcagcaacagcagcaAAACAGGACGCAGCCATCCCGAAGCAATACGGCTCTGCACG\n+TCTGTTGGCATCGTGGCGCCACCGTAGGACTGGGCGATCACCTGATAGCCGCCGAGCACCAGTTGTCCGGTTATCTGCTG\n+CGAAAGTTCAAGAATAGCTCCGGCTGGCAGAAGCTCTGGGTGGTGTTCACGTCCTTTTGTCTGTACTTTTACAAGAGCTA\n+CCAGGACGAGTTCGCACTGGCTAGTCTGCCGCTATTGGGTTACACGGTGGGTCCTCCTGGTCACCAAGATGCCGTTCAGA\n+AGGAGTTCGTCTTCAAGCTTTCCTTTAAGAACCACGTCTACTTCTTCCGCGCGGAAAGTGCGCACACCTACAACAGGTAA\n+>FUN_000013-T1 FUN_000013\n+ATGCAGCCCATCGCCATGGAGATGGGGCAGAATCAACTCGAAGTCAAAGCGTCGGTTTTGGGTGGAAGCCCGCAGCTTGG\n+TGATCTCAAAGTTGGAGGCGCCCAGGATGTTCAGCTTCTTGGCTTTTACCGTGAGCCCGGCGGATCCATCCAGAATGCTC\n+AGATCTCCAATGTAGAGGGGCTCTAG\n+>FUN_000014-T1 FUN_000014\n+ATGGCATCCAAGTCCTTCGATTTGGTCATTGAGGAAAAGACAAAGAAACCGGAACGGCTCTACCAACCTCGTCGGATGCG\n+ATGGCTTAAATATATCATCCTGCCGGCAGTCTTCTCCTTTGCGCTTCTGCTGATCCTGGTCAATGTGGACTTCTCCGATA\n+ATAGTGAGGATTCCACGCATCTGGGCAATGACACATCGCTCATTATATCTGGCTATGGATTTGAAAATAACACACTGCGC\n+CGGGGATTTTTCTCCGGAGGAATTGCATTGCATAGCCTTGTCATCGAGAACTGCACCATAGTTCATATCAACGACGCAGC\n+TTTTAATCAGGAAAGCACTGTGAACATAACAAGCCTGCAGCTCATTAATGTGCAATTGGAAAACTTGACTGAATCTGCCC\n+TGGAAGGTCTTCAAAAGCTTCAGAACTTTACTCTAGTCAATGAAAATAATCACTTCAGGCCATTTGGATTCCTCTCAGCT\n+GTGGCTGAATCGTTAGTCAGTGCGGAAATCCACCAGTCACTAGCCGCGGCAATATCGTATTCAGTGTGTGATTTTTTGGG\n+ATCCCGGAATTTCCCACAGTTGAAATATTTAGATCTGAGTGGCACACACTTGGACAAAAGCCTTATTAAAGAGTCCTTTG\n+ACAATCTGCCCGCGTTGGAACAACTACTTCTAAGGAACTGTGGGTTGGGCAACATCGAGTGGGAAATCGTGAGGCCAAGA\n+CTCAAATTGTTGCACTATTTGGACTTGGGTGGAGCTCAAAAGACGGGCAACTATGAACATCAGTTGGACGTATCCGCGTT\n+CTCTCCTGAGACAACTACTAACGCTGAAGAGATATCTACTATCCTAGCGAAGAGAGCGATGGCTCCAGAAGTAGTGGGCA\n+CCACTACACTAGGACCTACTACTTCCATAGAAATTTCACCACCATCAACACAAAGTACAACAACACCCAAAGAAGAAAGT\n+ACATCTATGACAGAAACAACGATATTAACAACACCATCGCCAAAATGCGAAGAGGAACTTTGCCAGGATCTAGAGTGCTC\n+CAGAATTACCACCGATACGGTTGCATCTGCAGATCTGGGAAAATCGTCGTGCCAGGATGGTTTACTGGTGGAGATCTGCG\n+AATCGACTTGCACCACTCCCACATTCTTCTGTGTGATATTAGGCGAGAACTTTACCTCCGCATCCAACTGTTGTTCCCAC\n+CATACCATGCGATGTGTGGTCTCTGCACAGGTCTCCTGGTTCGAGGACCACAGCGGACTGGTCATTGGCTTGGGAGTGGG\n+TCTCCTCTTCATCGGCAGCTTCCTCGGCATGCTCATCGTCTTCGGAACCCTGCGCCTGAATCCATCCTGGTTGCGAGGCA\n+ATAAGCGGCGGGAGTCGAATACGATAGGACTGATTCAGGGAAGATTTGAGAAGGACCCGTATGAGCAAGAATTGCAACGT\n+ATACGAGAGCTGTGA\n+>FUN_000015-T1 FUN_000015\n+ATGTCTAGTTTCCTTTTGGTTATTTTCATTTTATTGGCGCTTAGAACGTCAGAGTCTTCTGAAACTGGAAACCCACTTGC\n+AAACGAGCCCGATCCATTATATATGAAACTGGTGGATCCCATGGTAGCAGGAGAATCACCTAAAAGGATGATTAAGGATC\n+AGAAAGATGTAGGCCTTAAATCAACTAGCAGTAGCGAAGAGCTCCGAAAATTGCCAAAAACGCGAGGTCGACAGAAGAGA\n+TTCATTCGGAATCCAAACTATGTGAAAGCTAACGAATTCTATGATAAGATGTTAAGCAGTGAATACGTAAGTAAGCGGTA\n+TAAGGATCTTCCGCCGCCTCATCCGGGATTTGGAGCGGATCAACCGCCAGCATGA\n+>FUN_000016-T1 FUN_000016\n+ATGCCACCCACGATCAACAATTCGGCGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAATCCGA\n+GATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCGGATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACA\n+GCCACCGCTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACGTGCTGACGGAACACGATTTGGGT\n+GTCACGGTGGACCTGATTAACCGGGAGCTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACTGCT\n+GGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGCATTCGAGGACGGTGTCATGGTTGCGCAAATCCG\n+AGTACATCTCCACCGAGCAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGGTTACAACGTCAAG\n+AAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCGAAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAA\n+GAGCGAAATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCCTATCTTCCCCGACTTCACCAACT\n+GGAAGTTCCCGTGCGCCCAGGTCATATTTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAGGAG\n+ATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACAGTTTGTCGCCTACTTCCTGCCCACAGAGCAGAC\n+GCTGGAGAAACGCCGTACAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtacaagATCGCTCGAG\n+AGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGGCTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATC\n+TACTACAACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGCCAGCAACCCAACAACACCAAGCT\n+GGTTGTCAAGCATCGTCCATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAGAAGTTCCTGGCG\n+AGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAGCAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGAC\n+GCAGCTGTTGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCGATCGACAGTCTCGTTCTCGGAG\n+TCGAACTCGCAGCGG\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.proteins.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.proteins.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,96 @@ +>FUN_000001-T1 FUN_000001 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR +FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA +>FUN_000002-T1 FUN_000002 +MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA +QP +>FUN_000003-T1 FUN_000003 +MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN +NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH +KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC +PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL +QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT +>FUN_000004-T1 FUN_000004 +MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL +EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA +IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS +VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN +ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA +DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV +ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL +NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ +RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT +KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS +DMSSLKDDTSSTTSHSGLSIISLELPLPKKK +>FUN_000005-T1 FUN_000005 +MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT +IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS +>FUN_000006-T1 FUN_000006 +MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV +GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL +>FUN_000007-T1 FUN_000007 +MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS +NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN +SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP +RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH +PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR +MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD +RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY +VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH +IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF +GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH +QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP +QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA +KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ +>FUN_000008-T1 FUN_000008 +MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI +VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL +ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL +QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM +>FUN_000009-T1 FUN_000009 +MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK +LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD +ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL +YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR +STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF +VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI +QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG +YKLQDLWNVMPTKMETME +>FUN_000010-T1 FUN_000010 +MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK +PCNPKRYLTT +>FUN_000011-T1 FUN_000011 +MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL +RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI +IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI +TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ +FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD +FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE +KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ +>FUN_000012-T1 FUN_000012 +MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA +EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR +GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI +LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE +EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL +RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR +>FUN_000013-T1 FUN_000013 +MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL +>FUN_000014-T1 FUN_000014 +MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR +RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA +VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR +LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES +TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH +HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR +IREL +>FUN_000015-T1 FUN_000015 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR +FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA +>FUN_000016-T1 FUN_000016 +MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG +VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK +KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE +MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI +YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED +AAVGAQAASGADSPAQVARDRQSRSRSRTRS |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.scaffolds.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.scaffolds.fa Mon Oct 04 19:39:38 2021 +0000 |
b |
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n' |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.stats.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.stats.json Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,119 @@ +{ + "format": "annotation", + "command": "/home/abretaud/miniconda3/envs/__funannotate@1.8.7/bin/funannotate predict --input /tmp/tmpm833xrq1/files/e/1/c/dataset_e1c34c74-e579-4cab-b0ed-5ce938ce4e4b.dat --out output --database /home/abretaud/.planemo/planemo_tmp_yntx6ieu/test-data/funannotate_db --species Genus species --isolate --strain --organism other --ploidy 1 --SeqCenter CFMR --SeqAccession 12345 --name FUN_ --numbering 1 --p2g_pident 80 --p2g_prefilter diamond --min_training_models 3 --busco_seed_species fly --busco_db insecta --evm-partition-interval 1500 --min_intronlen 10 --max_intronlen 3000 --min_protlen 50 --repeat_filter overlap blast --cpus 1", + "organism": "Genus_species", + "software": { + "name": "funannotate", + "version": "1.8.7", + "date": "2021-07-21", + "resources": { + "merops": { + "type": "diamond", + "version": "12.0", + "date": "2017-10-04", + "num-records": "5009" + }, + "uniprot": { + "type": "diamond", + "version": "2021_03", + "date": "2021-06-02", + "num-records": "565254" + }, + "dbCAN": { + "type": "hmmer3", + "version": "9.0", + "date": "2020-08-04", + "num-records": "641" + }, + "pfam": { + "type": "hmmer3", + "version": "34.0", + "date": "2021-03", + "num-records": "19179" + }, + "repeats": { + "type": "diamond", + "version": "1.0", + "date": "2021-07-19", + "num-records": "11950" + }, + "go": { + "type": "text", + "version": "2021-07-02", + "date": "2021-07-02", + "num-records": "47228" + }, + "mibig": { + "type": "diamond", + "version": "1.4", + "date": "2021-07-19", + "num-records": "31023" + }, + "interpro": { + "type": "xml", + "version": "86.0", + "date": "2021-06-03", + "num-records": "38913" + }, + "busco_outgroups": { + "type": "outgroups", + "version": "1.0", + "date": "2021-07-19", + "num-records": "8" + }, + "gene2product": { + "type": "text", + "version": "1.70", + "date": "2021-06-15", + "num-records": "34039" + } + } + }, + "assembly": { + "num_contigs": 4, + "length": 227430, + "mean_length": 56857.5, + "N50": 215740, + "L50": 1, + "N90": 215740, + "L90": 1, + "GC_content": 42.86 + }, + "annotation": { + "genes": 16, + "common_name": 0, + "mRNA": 16, + "tRNA": 0, + "ncRNA": 0, + "rRNA": 0, + "avg_gene_length": 1660.69, + "transcript-level": { + "CDS_transcripts": 16, + "CDS_five_utr": 0, + "CDS_three_utr": 0, + "CDS_no_utr": 16, + "CDS_five_three_utr": 0, + "CDS_complete": 15, + "CDS_no-start": 0, + "CDS_no-stop": 1, + "CDS_no-start_no-stop": 0, + "total_exons": 42, + "total_cds_exons": 42, + "multiple_exon_transcript": 13, + "single_exon_transcript": 3, + "avg_exon_length": 402.36, + "avg_protein_length": 359.81, + "functional": { + "go_terms": 0, + "interproscan": 0, + "eggnog": 0, + "pfam": 0, + "cazyme": 0, + "merops": 0, + "busco": 0, + "secretion": 0 + }, + "pct_exon_overlap_protein_evidence": 11.9 + } + } +} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/Genus_species.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/Genus_species.tbl Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,234 @@ +>Feature sample +1 215740 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000001 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000001-T1_mrna + protein_id gnl|ncbi|FUN_000001-T1 +3142 2565 gene + locus_tag FUN_000002 +3142 3138 mRNA +3004 2883 +2686 2565 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +3142 3138 CDS +3004 2883 +2686 2565 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000002-T1_mrna + protein_id gnl|ncbi|FUN_000002-T1 +5802 4248 gene + locus_tag FUN_000003 +5802 5797 mRNA +5539 4937 +4742 4248 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +5802 5797 CDS +5539 4937 +4742 4248 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000003-T1_mrna + protein_id gnl|ncbi|FUN_000003-T1 +10664 7691 gene + locus_tag FUN_000004 +10664 10657 mRNA +10499 8707 +8385 7691 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +10664 10657 CDS +10499 8707 +8385 7691 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000004-T1_mrna + protein_id gnl|ncbi|FUN_000004-T1 +15214 14247 gene + locus_tag FUN_000005 +15214 15209 mRNA +14648 14247 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15214 15209 CDS +14648 14247 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000005-T1_mrna + protein_id gnl|ncbi|FUN_000005-T1 +15539 16619 gene + locus_tag FUN_000006 +15539 15543 mRNA +15646 15919 +16485 16619 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +15539 15543 CDS +15646 15919 +16485 16619 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000006-T1_mrna + protein_id gnl|ncbi|FUN_000006-T1 +21705 18358 gene + locus_tag FUN_000007 +21705 21700 mRNA +21515 19638 +19482 18358 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +21705 21700 CDS +21515 19638 +19482 18358 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000007-T1_mrna + protein_id gnl|ncbi|FUN_000007-T1 +35679 34843 gene + locus_tag FUN_000008 +35679 34843 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +35679 34843 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000008-T1_mrna + protein_id gnl|ncbi|FUN_000008-T1 +40223 44130 gene + locus_tag FUN_000009 +40223 40396 mRNA +40659 41193 +41707 42080 +43409 43609 +43678 44130 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +40223 40396 CDS +40659 41193 +41707 42080 +43409 43609 +43678 44130 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000009-T1_mrna + protein_id gnl|ncbi|FUN_000009-T1 +87202 88320 gene + locus_tag FUN_000010 +87202 87207 mRNA +88054 88320 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +87202 87207 CDS +88054 88320 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000010-T1_mrna + protein_id gnl|ncbi|FUN_000010-T1 +106221 102510 gene + locus_tag FUN_000011 +106221 106216 mRNA +104632 104258 +103947 103696 +103618 103229 +103151 102510 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +106221 106216 CDS +104632 104258 +103947 103696 +103618 103229 +103151 102510 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000011-T1_mrna + protein_id gnl|ncbi|FUN_000011-T1 +167121 169212 gene + locus_tag FUN_000012 +167121 168069 mRNA +168722 169212 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +167121 168069 CDS +168722 169212 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000012-T1_mrna + protein_id gnl|ncbi|FUN_000012-T1 +180262 180579 gene + locus_tag FUN_000013 +180262 180267 mRNA +180400 180579 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +180262 180267 CDS +180400 180579 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000013-T1_mrna + protein_id gnl|ncbi|FUN_000013-T1 +210553 208619 gene + locus_tag FUN_000014 +210553 210548 mRNA +210474 209053 +208645 208619 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +210553 210548 CDS +210474 209053 +208645 208619 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000014-T1_mrna + protein_id gnl|ncbi|FUN_000014-T1 +>Feature sample4 +1 7560 REFERENCE + CFMR 12345 +1466 1092 gene + locus_tag FUN_000015 +1466 1092 mRNA + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +1466 1092 CDS + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000015-T1_mrna + protein_id gnl|ncbi|FUN_000015-T1 +2126 >3537 gene + locus_tag FUN_000016 +2126 2199 mRNA +2258 3224 +3284 >3537 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 +2126 2199 CDS +2258 3224 +3284 >3537 + codon_start 1 + product hypothetical protein + transcript_id gnl|ncbi|FUN_000016-T1_mrna + protein_id gnl|ncbi|FUN_000016-T1 |
b |
diff -r 000000000000 -r a5baa4ff168d test-data/predict_scratch/fly.parameters.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/predict_scratch/fly.parameters.json Mon Oct 04 19:39:38 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"augustus": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/augustus/species/genus_species"}], "genemark": [{}], "codingquarry": [{}], "snap": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/genus_species.snap.hmm"}], "glimmerhmm": [{"version": "funannotate v1.8.7", "source": "BUCSCO insecta", "date": "2021-07-21", "path": "/tmp/tmpm833xrq1/job_working_directory/000/2/working/output/predict_misc/ab_initio_parameters/glimmerhmm"}]} \ No newline at end of file |
b |
diff -r 000000000000 -r a5baa4ff168d tool-data/funannotate.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/funannotate.loc.sample Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,8 @@ +# this is a tab separated file describing the location of funannotate databases used for the +# funannotate annotation tool +# +# the columns are: +# value description format_version path +# +# for example +# 2021-07-20-120000 Funannotate database 2021-07-20-120000 1.0 /tmp/database/funannotate/funannotate/2021-07-20-120000 |
b |
diff -r 000000000000 -r a5baa4ff168d tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="funannotate" comment_char="#" allow_duplicate_entries="False"> + <columns>value, description, format_version, path</columns> + <file path="tool-data/funannotate.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r a5baa4ff168d tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Oct 04 19:39:38 2021 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="funannotate" comment_char="#" allow_duplicate_entries="False"> + <columns>value, description, format_version, path</columns> + <file path="${__HERE__}/test-data/funannotate.loc" /> + </table> +</tables> |