Repository 'braker3'
hg clone https://toolshed.g2.bx.psu.edu/repos/genouest/braker3

Changeset 0:3c0865d1172f (2023-09-06)
Next changeset 1:bd103884a09c (2023-10-10)
Commit message:
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/braker commit d6e73c0417506d93c905b4aeb77d92350b662fbb
added:
README.rst
braker3.xml
job_conf_braker3.xml
macros.xml
test-data/SRR7458692.bam
test-data/genemark_license.gm_key
test-data/genome_masked.fa
test-data/gm_key_64
test-data/out_bam/braker.gtf
test-data/out_genome/braker.gff3
test-data/out_genome/braker.gtf
test-data/out_prot/braker.gtf
test-data/proteins.fa
b
diff -r 000000000000 -r 3c0865d1172f README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Wed Sep 06 09:50:28 2023 +0000
b
@@ -0,0 +1,41 @@
+Braker3
+=======
+
+This tool is not in IUC because of the license issues with GeneMark and
+ProtHint that makes it impossible to test it using CI.
+
+GeneMark and ProtHint
+---------------------
+
+Braker requires GeneMark to predict gene, but due to licensing issues, we
+are not allowed to distribute GeneMark automatically.
+
+Braker also requires ProtHint to use protein sequences as hints to predict
+genes, but, again, due to licensing issues, we are not allowed to distribute
+ProtHint automatically.
+
+To use Braker3, the Galaxy administrator needs to install
+GeneMark, and set the ``GENEMARK_PATH`` variable on the job destination.
+
+The only working version of GeneMark to install needs to be downloaded from
+http://topaz.gatech.edu/GeneMark/etp.for_braker.tar.gz
+This archive also contains ProtHint and various other tools in specific versions needed by Braker3.
+
+Unzip it, and set the ``GENEMARK_PATH`` variable to point to the extracted ``bin``
+directory.
+
+Also set the ``PROTHINT_PATH`` variable on the job destination, pointing to the extracted
+``bin/gmes/ProtHint/bin/`` directory
+
+Running tests
+-------------
+
+Tests require working GeneMark and ProtHint installations, which means
+both GENEMARK_PATH and PROTHINT_PATH are set in job_conf_braker3.xml.
+
+You should then use the ``--job_config_file job_conf_braker3.xml``
+option for planemo commands.
+
+You should also copy a valid GeneMark license (from
+http://topaz.gatech.edu/GeneMark/license_download.cgi) in
+test-data/gm_key_64.
\ No newline at end of file
b
diff -r 000000000000 -r 3c0865d1172f braker3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/braker3.xml Wed Sep 06 09:50:28 2023 +0000
[
b'@@ -0,0 +1,203 @@\n+<tool id="braker3" name="BRAKER3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">\n+    <description>genome annotation</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <requirements>\n+        <expand macro="requirements" />\n+    </requirements>\n+    <version_command><![CDATA[braker.pl --version; $GENEMARK_PATH/gmes/gmes_petap.pl | grep version]]></version_command>\n+    <command><![CDATA[\n+if [ -z "\\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi &&\n+if [ ! -f "\\$GENEMARK_PATH/gmes/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi &&\n+\n+## This specific GeneMark version has some tools bundled with it, forced to use it.\n+## I feel dirty. Pardon me.\n+\n+export PATH="\\$GENEMARK_PATH/../tools/:\\$PATH" &&\n+\n+## GeneMark only search for license in ~/.gm_key\n+cp \'${genemark_license}\' ~/.gm_key &&\n+\n+braker.pl\n+--genome \'${genome}\'\n+cd \n+$softmasking\n+\n+#if $evidences.bam:\n+    --bam ${evidences.bam}\n+#end if\n+\n+#if $evidences.prot_seq:\n+    --prot_seq ${evidences.prot_seq}\n+#end if\n+\n+## No hints, use esmode\n+#if not $evidences.bam and not $evidences.prot_seq\n+    --esmode\n+#end if\n+\n+#if $output_format == \'gff3\'\n+    --gff3\n+#end if\n+\n+$genemark.fungus\n+\n+$augustus.crf\n+--rounds $augustus.rounds\n+$augustus.AUGUSTUS_ab_initio\n+$augustus.keepCrf\n+\n+$advanced.UTR\n+\n+$advanced.filterOutShort\n+\n+#if $advanced.eval:\n+    --eval ${advanced.eval}\n+#end if\n+\n+#if $advanced.eval_pseudo:\n+    --eval_pseudo ${advanced.eval_pseudo}\n+#end if\n+\n+#if $species:\n+    --species \'$species\'\n+#end if\n+\n+$advanced.alternatives_from_evidence\n+\n+#if $dev.splice_sites:\n+    --splice_sites \'$dev.splice_sites\'\n+#end if\n+\n+#if $dev.min_contig:\n+    --min_contig $dev.min_contig\n+#end if\n+\n+--gc_probability $dev.gc_probability\n+--downsampling_lambda $dev.downsampling_lambda\n+\n+#if $dev.gm_max_intergenic:\n+--gm_max_intergenic $dev.gm_max_intergenic\n+#end if\n+\n+--threads  \\${GALAXY_SLOTS:-2}\n+]]></command>\n+    <inputs>\n+        <param name="genemark_license" type="data" format="txt" label="GeneMark license file" help="Braker uses GeneMark, which is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators." />\n+\n+        <param argument="--genome" type="data" format="fasta" label="Assembly to annotate" help="The assembly should preferably be soft-masked (with RepeatMasker for example)" />\n+\n+        <param argument="--softmasking" type="boolean" checked="true" truevalue="" falsevalue="--softmasking_off" label="Genome sequence is soft-masked" />\n+\n+        <param argument="--species" type="text" label="Species name" optional="true" help="Using Sp_1, if no species is assigned"/>\n+\n+        <section name="evidences" expanded="true" title="Evidences">\n+            <param argument="--bam" type="data" format="bam" optional="true" label="RNA-seq mapped to genome to train Augustus/GeneMark" />\n+            <param argument="--prot_seq" type="data" format="fasta" optional="true" label="Proteins to map to genome" />\n+        </section>\n+\n+        <section name="genemark" expanded="true" title="GeneMark">\n+            <param argument="--fungus" type="boolean" checked="false" truevalue="--fungus" falsevalue="" label="Fungal genome" help="GeneMark-EX option, run algorithm with branch point model (most useful for fungal genomes)" />\n+        </section>\n+\n+        <section name="augustus" expanded="true" title="Augustus">\n+            <param argument="--crf" type="boolean" checked="false" truevalue="--crf" falsevalue="" label="Use CRF training for Augustus" help="Alternate training method (Conditional Random Field)" />\n+            <param argument="--rounds" type="integer" value="5" label="Number of optimization rounds used in optimize_augustus.pl" />\n+         '..b'ty" type="float" min="0" max="1" value="0.001" label="Probablity for donor splice site pattern GC for gene prediction with GeneMark-EX"/>\n+            <param argument="--gm_max_intergenic" type="integer" optional="true" label="Maximum allowed size of intergenic regions in GeneMark-EX" help="If not set, the value is automatically determined by GeneMark-EX"/>\n+            <param argument="--downsampling_lambda" type="integer" min="0" value="2" label="Lambda parameter of the Poisson distribution" help="for downsampling of training gene structures according to their number of introns distribution"/>\n+        </section>\n+\n+        <param name="output_format" type="select" label="Output format">\n+            <option value="gtf" selected="true">GTF</option>\n+            <option value="gff3">GFF3</option>\n+        </param>\n+    </inputs>\n+\n+    <outputs>\n+        <data name=\'output_gtf\' format=\'gtf\' label="GTF Annotation" from_work_dir="braker/braker.gtf">\n+            <filter>output_format == \'gtf\'</filter>\n+        </data>\n+        <data name=\'output_gff\' format=\'gff3\' label="GFF Annotation" from_work_dir="braker/braker.gff3">\n+            <filter>output_format == \'gff3\'</filter>\n+        </data>\n+    </outputs>\n+    \n+    <tests>\n+        <test expect_failure="true">\n+            <param name="genemark_license" value="gm_key_64"/>\n+            <param name="genome" value="genome_masked.fa"/>\n+            <section name="evidences">\n+                <param name="bam" value="SRR7458692.bam"/>\n+            </section>\n+            <param name="output_format" value="gtf" />\n+        </test>\n+    </tests>\n+\n+    <!-- <test expect_num_outputs="1">\n+            <param name="genemark_license" value="gm_key_64" />\n+            <param name="genome" value="genome_masked.fa" />\n+            <section name="augustus">\n+                <param name="rounds" value="2" />\n+            </section>\n+            <section name="evidences">\n+                <param name="bam" value="SRR7458692.bam" />\n+            </section>\n+            <param name="output_format" value="gtf" />\n+            <output name="output_gtf" file="out_genome/braker.gtf" sort="true"/>\n+        </test>\n+\n+        <test expect_num_outputs="1">\n+            <param name="genemark_license" value="gm_key_64" />\n+            <param name="genome" value="genome_masked.fa" />\n+            <section name="augustus">\n+                <param name="rounds" value="2" />\n+            </section>\n+            <section name="evidences">\n+                <param name="bam" value="SRR7458692.bam" />\n+                <param name="prot_seq" value="proteins.fa" />\n+            </section>\n+            <param name="output_format" value="gff3" />\n+            <output name="output_gff" file="out_genome/braker.gff3"/>\n+        </test>\n+\n+        <test expect_num_outputs="1">\n+            <param name="genemark_license" value="gm_key_64" />\n+            <param name="genome" value="genome_masked.fa" />\n+            <section name="augustus">\n+                <param name="rounds" value="2" />\n+            </section>\n+            <section name="evidences">\n+                <param name="prot_seq" value="proteins.fa" />\n+            </section>\n+            <param name="output_format" value="gff3" />\n+            <output name="output_gff" file="out_genome/braker.gff3"/>\n+        </test> -->\n+\n+    \n+    <help><![CDATA[\n+\n+Braker3_ allows for fully automated training of the gene prediction tools GeneMark-EX and AUGUSTUS from RNA-Seq and/or protein homology information, and that integrates the extrinsic evidence from RNA-Seq and protein homology information into the prediction.\n+\n+In contrast to other available methods that rely on protein homology information, BRAKER3 reaches high gene prediction accuracy even in the absence of the annotation of very closely related species and in the absence of RNA-Seq data.\n+\n+.. _Braker3: https://github.com/Gaius-Augustus/BRAKER\n+    ]]></help>\n+    <expand macro="citations" />\n+</tool>\n'
b
diff -r 000000000000 -r 3c0865d1172f job_conf_braker3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/job_conf_braker3.xml Wed Sep 06 09:50:28 2023 +0000
b
@@ -0,0 +1,20 @@
+<!-- Adapt the paths to your install of GeneMark and ProtHint, then run planemo with the job_config_file option -->
+<job_conf>
+    <plugins>
+        <plugin id="planemo_runner" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner" workers="4"/>
+    </plugins>
+    <handlers>
+    </handlers>
+    <destinations default="planemo_dest">
+        <destination id="planemo_dest" runner="planemo_runner">
+            <env id="GENEMARK_PATH">/home/abretaud/softs/etp.for_braker/bin/</env>
+            <env id="PROTHINT_PATH">/home/abretaud/softs/etp.for_braker/bin/gmes/ProtHint/bin/</env>
+        </destination>
+        <destination id="upload_dest" runner="planemo_runner">
+            <param id="docker_enabled">false</param>
+        </destination>
+    </destinations>
+    <tools>
+        <tool id="upload1" destination="upload_dest" />
+    </tools>
+</job_conf>
b
diff -r 000000000000 -r 3c0865d1172f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Sep 06 09:50:28 2023 +0000
b
@@ -0,0 +1,131 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">3.0.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">braker3</requirement>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/nargab/lqaa108</citation>
+            <citation type="doi">10.1007/978-1-4939-9173-0_5</citation>
+            <citation type="doi">10.1093/bioinformatics/btv661</citation>
+            <citation type="doi">10.1093/bioinformatics/btn013</citation>
+            <citation type="doi">10.1186/1471-2105-7-62</citation>
+        </citations>
+    </xml>
+
+    <xml name="augustus_species">
+        <!-- list generated from a Funannotate database directory, listing trained_species/* -->
+        <option value="adorsata">adorsata</option>
+        <option value="aedes">aedes</option>
+        <option value="amphimedon">amphimedon</option>
+        <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>
+        <option value="anidulans">anidulans</option>
+        <option value="arabidopsis">arabidopsis</option>
+        <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>
+        <option value="aspergillus_nidulans">aspergillus_nidulans</option>
+        <option value="aspergillus_oryzae">aspergillus_oryzae</option>
+        <option value="aspergillus_terreus">aspergillus_terreus</option>
+        <option value="bombus_impatiens1">bombus_impatiens1</option>
+        <option value="bombus_terrestris2">bombus_terrestris2</option>
+        <option value="botrytis_cinerea">botrytis_cinerea</option>
+        <option value="b_pseudomallei">b_pseudomallei</option>
+        <option value="brugia">brugia</option>
+        <option value="cacao">cacao</option>
+        <option value="caenorhabditis">caenorhabditis</option>
+        <option value="camponotus_floridanus">camponotus_floridanus</option>
+        <option value="candida_albicans">candida_albicans</option>
+        <option value="candida_guilliermondii">candida_guilliermondii</option>
+        <option value="candida_tropicalis">candida_tropicalis</option>
+        <option value="c_elegans_trsk">c_elegans_trsk</option>
+        <option value="chaetomium_globosum">chaetomium_globosum</option>
+        <option value="chicken">chicken</option>
+        <option value="chiloscyllium">chiloscyllium</option>
+        <option value="chlamy2011">chlamy2011</option>
+        <option value="chlamydomonas">chlamydomonas</option>
+        <option value="chlorella">chlorella</option>
+        <option value="ciona">ciona</option>
+        <option value="coccidioides_immitis">coccidioides_immitis</option>
+        <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>
+        <option value="coprinus">coprinus</option>
+        <option value="coprinus_cinereus">coprinus_cinereus</option>
+        <option value="coyote_tobacco">coyote_tobacco</option>
+        <option value="cryptococcus">cryptococcus</option>
+        <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>
+        <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>
+        <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>
+        <option value="culex">culex</option>
+        <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>
+        <option value="E_coli_K12">E_coli_K12</option>
+        <option value="elephant_shark">elephant_shark</option>
+        <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>
+        <option value="eremothecium_gossypii">eremothecium_gossypii</option>
+        <option value="fly">fly</option>
+        <option value="fly_exp">fly_exp</option>
+        <option value="fusarium">fusarium</option>
+        <option value="fusarium_graminearum">fusarium_graminearum</option>
+        <option value="galdieria">galdieria</option>
+        <option value="generic">generic</option>
+        <option value="heliconius_melpomene1">heliconius_melpomene1</option>
+        <option value="histoplasma">histoplasma</option>
+        <option value="histoplasma_capsulatum">histoplasma_capsulatum</option>
+        <option value="honeybee1">honeybee1</option>
+        <option value="human">human</option>
+        <option value="japaneselamprey">japaneselamprey</option>
+        <option value="kluyveromyces_lactis">kluyveromyces_lactis</option>
+        <option value="laccaria_bicolor">laccaria_bicolor</option>
+        <option value="leishmania_tarentolae">leishmania_tarentolae</option>
+        <option value="lodderomyces_elongisporus">lodderomyces_elongisporus</option>
+        <option value="magnaporthe_grisea">magnaporthe_grisea</option>
+        <option value="maize">maize</option>
+        <option value="maize5">maize5</option>
+        <option value="mnemiopsis_leidyi">mnemiopsis_leidyi</option>
+        <option value="nasonia">nasonia</option>
+        <option value="nematostella_vectensis">nematostella_vectensis</option>
+        <option value="neurospora">neurospora</option>
+        <option value="neurospora_crassa">neurospora_crassa</option>
+        <option value="parasteatoda">parasteatoda</option>
+        <option value="pchrysosporium">pchrysosporium</option>
+        <option value="pea_aphid">pea_aphid</option>
+        <option value="pfalciparum">pfalciparum</option>
+        <option value="phanerochaete_chrysosporium">phanerochaete_chrysosporium</option>
+        <option value="pichia_stipitis">pichia_stipitis</option>
+        <option value="pisaster">pisaster</option>
+        <option value="pneumocystis">pneumocystis</option>
+        <option value="rhincodon">rhincodon</option>
+        <option value="rhizopus_oryzae">rhizopus_oryzae</option>
+        <option value="rhodnius">rhodnius</option>
+        <option value="rice">rice</option>
+        <option value="saccharomyces">saccharomyces</option>
+        <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>
+        <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>
+        <option value="s_aureus">s_aureus</option>
+        <option value="schistosoma">schistosoma</option>
+        <option value="schistosoma2">schistosoma2</option>
+        <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>
+        <option value="scyliorhinus">scyliorhinus</option>
+        <option value="sealamprey">sealamprey</option>
+        <option value="s_pneumoniae">s_pneumoniae</option>
+        <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>
+        <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>
+        <option value="template_prokaryotic">template_prokaryotic</option>
+        <option value="tetrahymena">tetrahymena</option>
+        <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>
+        <option value="tomato">tomato</option>
+        <option value="toxoplasma">toxoplasma</option>
+        <option value="tribolium2012">tribolium2012</option>
+        <option value="trichinella">trichinella</option>
+        <option value="ustilago">ustilago</option>
+        <option value="ustilago_maydis">ustilago_maydis</option>
+        <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>
+        <option value="verticillium_longisporum1">verticillium_longisporum1</option>
+        <option value="volvox">volvox</option>
+        <option value="wheat">wheat</option>
+        <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>
+        <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>
+        <option value="zebrafish">zebrafish</option>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 3c0865d1172f test-data/SRR7458692.bam
b
Binary file test-data/SRR7458692.bam has changed
b
diff -r 000000000000 -r 3c0865d1172f test-data/genome_masked.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_masked.fa Wed Sep 06 09:50:28 2023 +0000
b
b'@@ -0,0 +1,14651 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r 3c0865d1172f test-data/out_bam/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_bam/braker.gtf Wed Sep 06 09:50:28 2023 +0000
b
b'@@ -0,0 +1,465 @@\n+sample\tAUGUSTUS\tstart_codon\t79526\t79528\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t79526\t80474\t0.4\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t79526\t80474\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t80475\t87860\t0.49\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t87861\t88125\t0.39\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t87861\t88125\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t88126\t92148\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t92149\t92252\t0.17\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t92149\t92252\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t92253\t94562\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94563\t94732\t0.96\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94563\t94732\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t94733\t94872\t0.91\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94873\t95333\t0.79\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94873\t95333\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t95334\t95584\t0.51\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t95585\t95762\t0.35\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tgene\t79526\t95762\t0.01\t+\t.\tg14\n+sample\tAUGUSTUS\ttranscript\t79526\t95762\t0.01\t+\t.\tg14.t1\n+sample\tAUGUSTUS\texon\t95585\t95762\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tstop_codon\t95760\t95762\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4248\t4738\t0.42\t-\t2\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4248\t4738\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t4739\t4929\t0.42\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t0.37\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t0.29\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t0.47\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t0.12\t-\t.\tg28\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t0.12\t-\t.\tg28.t1\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tgene\t209044\t210483\t0.38\t-\t.\tg24\n+sample\tAUGUSTUS\ttranscript\t209044\t210483\t0.38\t-\t.\tg24.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t0.49\t+\t1\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "fi'..b'e_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t102521\t103151\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_4_5";\n+sample\tGeneMark.hmm3\tintron\t103152\t103222\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t103223\t103812\t.\t-\t2\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_3_5";\n+sample\tGeneMark.hmm3\tintron\t103813\t111230\t.\t-\t1\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t111231\t111342\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_2_5";\n+sample\tGeneMark.hmm3\tintron\t111343\t113595\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tgene\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tmRNA\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t113596\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Initial"; count "file_2_1_5";\n+sample\tGeneMark.hmm3\tstart_codon\t113917\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tgene\t1092\t1364\t0.76\t-\t.\tg26\n+sample4\tAUGUSTUS\ttranscript\t1092\t1364\t0.76\t-\t.\tg26.t1\n+sample\tAUGUSTUS\tstop_codon\t34843\t34845\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tCDS\t34843\t35679\t0.35\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\texon\t34843\t35679\t.\t-\t.\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tstart_codon\t35677\t35679\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tgene\t34843\t35679\t0.35\t-\t.\tg7\n+sample\tAUGUSTUS\ttranscript\t34843\t35679\t0.35\t-\t.\tg7.t1\n+sample\tAUGUSTUS\tstop_codon\t40166\t40168\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tCDS\t40166\t40531\t0.99\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\texon\t40166\t40531\t.\t-\t.\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tstart_codon\t40529\t40531\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tgene\t40166\t40531\t0.99\t-\t.\tg8\n+sample\tAUGUSTUS\ttranscript\t40166\t40531\t0.99\t-\t.\tg8.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t0.45\t+\t1\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t0.33\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3284\t3512\t0.14\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t3284\t3512\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3513\t3807\t0.22\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3808\t3863\t0.59\t+\t2\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t0.07\t+\t.\tg27\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.07\t+\t.\tg27.t1\n+sample4\tAUGUSTUS\texon\t3808\t3863\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n'
b
diff -r 000000000000 -r 3c0865d1172f test-data/out_genome/braker.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_genome/braker.gff3 Wed Sep 06 09:50:28 2023 +0000
b
b'@@ -0,0 +1,280 @@\n+sample\tAUGUSTUS\tstart_codon\t40692\t40694\t.\t+\t0\tID=file_1_g9.t1.start1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t40692\t41193\t0.54\t+\t0\tID=file_1_g9.t1.CDS1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t40692\t41193\t.\t+\t.\tID=file_1_g9.t1.exon1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t41194\t41706\t0.54\t+\t.\tID=file_1_g9.t1.intron1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t41707\t42103\t0.64\t+\t2\tID=file_1_g9.t1.CDS2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t41707\t42103\t.\t+\t.\tID=file_1_g9.t1.exon2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t42104\t43371\t0.21\t+\t.\tID=file_1_g9.t1.intron2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t43372\t43609\t0.14\t+\t1\tID=file_1_g9.t1.CDS3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t43372\t43609\t.\t+\t.\tID=file_1_g9.t1.exon3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t43610\t43677\t0.46\t+\t.\tID=file_1_g9.t1.intron3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t43678\t44130\t0.46\t+\t0\tID=file_1_g9.t1.CDS4;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t43678\t44130\t.\t+\t.\tID=file_1_g9.t1.exon4;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tstop_codon\t44128\t44130\t.\t+\t0\tID=file_1_g9.t1.stop1;Parent=file_1_g9.t1;\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\tID=file_1_g26.t1.stop1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\tID=file_1_g26.t1.CDS1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\tID=file_1_g26.t1.exon1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\tID=file_1_g26.t1.start1;Parent=file_1_g26.t1;\n+sample\tAUGUSTUS\tstart_codon\t167121\t167123\t.\t+\t0\tID=file_1_g21.t1.start1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t167121\t168360\t1\t+\t0\tID=file_1_g21.t1.CDS1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t167121\t168360\t.\t+\t.\tID=file_1_g21.t1.exon1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t168361\t168721\t1\t+\t.\tID=file_1_g21.t1.intron1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t168722\t169167\t1\t+\t2\tID=file_1_g21.t1.CDS2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t168722\t169167\t.\t+\t.\tID=file_1_g21.t1.exon2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t169168\t172584\t0.87\t+\t.\tID=file_1_g21.t1.intron2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t172585\t172957\t0.81\t+\t0\tID=file_1_g21.t1.CDS3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t172585\t172957\t.\t+\t.\tID=file_1_g21.t1.exon3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t172958\t180019\t0.41\t+\t.\tID=file_1_g21.t1.intron3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t180020\t180579\t0.57\t+\t2\tID=file_1_g21.t1.CDS4;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t180020\t180579\t.\t+\t.\tID=file_1_g21.t1.exon4;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tstop_codon\t180577\t180579\t.\t+\t0\tID=file_1_g21.t1.stop1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tstart_codon\t119778\t119780\t.\t+\t0\tID=file_1_g18.t1.start1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t119778\t120378\t1\t+\t0\tID=file_1_g18.t1.CDS1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t119778\t120378\t.\t+\t.\tID=file_1_g18.t1.exon1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t120379\t123327\t0.98\t+\t.\tID=file_1_g18.t1.intron1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t123328\t123512\t0.99\t+\t2\tID=file_1_g18.t1.CDS2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t123328\t123512\t.\t+\t.\tID=file_1_g18.t1.exon2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t123513\t133208\t0.5\t+\t.\tID=file_1_g18.t1.intron2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t133209\t134539\t1\t+\t0\tID=file_1_g18.t1.CDS3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t133209\t134539\t.\t+\t.\tID=file_1_g18.t1.exon3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t134540\t134667\t1\t+\t.\tID=file_1_g18.t1.intron3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t134668\t135510\t1\t+\t1\tID=file_1_g18.t1.CDS4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t134668\t135510\t.\t+\t.\tID=file_1_g18.t1.exon4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t135511\t135568\t1\t+\t.\tID=file_1_g18.t1.intron4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t135569\t136346\t0.99\t+\t1\tID=file_1_g18.t1.CDS5;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t135569\t136346\t.\t+\t.\tID=file_1_g18.t1.exon5;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tstop'..b't_codon\t115274\t115276\t.\t+\t0\tID=file_1_g16.t1.start1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tCDS\t115274\t116488\t0.87\t+\t0\tID=file_1_g16.t1.CDS1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\texon\t115274\t116488\t.\t+\t.\tID=file_1_g16.t1.exon1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tstop_codon\t116486\t116488\t.\t+\t0\tID=file_1_g16.t1.stop1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\tID=file_1_g24.t1.stop1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\tID=file_1_g24.t1.CDS1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\tID=file_1_g24.t1.exon1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\tID=file_1_g24.t1.start1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tstop_codon\t100542\t100544\t.\t-\t0\tID=file_1_g15.t1.stop1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t100542\t100756\t0.3\t-\t2\tID=file_1_g15.t1.CDS1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t100542\t100756\t.\t-\t.\tID=file_1_g15.t1.exon1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t100757\t102520\t0.32\t-\t.\tID=file_1_g15.t1.intron1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t102521\t103133\t0.26\t-\t0\tID=file_1_g15.t1.CDS2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t102521\t103133\t.\t-\t.\tID=file_1_g15.t1.exon2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t103134\t103264\t0.25\t-\t.\tID=file_1_g15.t1.intron2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t103265\t104819\t0.09\t-\t1\tID=file_1_g15.t1.CDS3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t103265\t104819\t.\t-\t.\tID=file_1_g15.t1.exon3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t104820\t108228\t0.15\t-\t.\tID=file_1_g15.t1.intron3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t108229\t108299\t0.42\t-\t0\tID=file_1_g15.t1.CDS4;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t108229\t108299\t.\t-\t.\tID=file_1_g15.t1.exon4;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tstart_codon\t108297\t108299\t.\t-\t0\tID=file_1_g15.t1.start1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tstop_codon\t214566\t214568\t.\t-\t0\tID=file_1_g25.t1.stop1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tCDS\t214566\t214811\t0.19\t-\t0\tID=file_1_g25.t1.CDS1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\texon\t214566\t214811\t.\t-\t.\tID=file_1_g25.t1.exon1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tstart_codon\t214809\t214811\t.\t-\t0\tID=file_1_g25.t1.start1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tstart_codon\t58640\t58642\t.\t+\t0\tID=file_1_g12.t1.start1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t58640\t58677\t0.01\t+\t0\tID=file_1_g12.t1.CDS1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t58640\t58677\t.\t+\t.\tID=file_1_g12.t1.exon1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tintron\t58678\t59091\t0.01\t+\t.\tID=file_1_g12.t1.intron1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t59092\t59204\t0.01\t+\t1\tID=file_1_g12.t1.CDS2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t59092\t59204\t.\t+\t.\tID=file_1_g12.t1.exon2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tintron\t59205\t69536\t0.01\t+\t.\tID=file_1_g12.t1.intron2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t69537\t69745\t0.01\t+\t2\tID=file_1_g12.t1.CDS3;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t69537\t69745\t.\t+\t.\tID=file_1_g12.t1.exon3;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tstop_codon\t69743\t69745\t.\t+\t0\tID=file_1_g12.t1.stop1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tstop_codon\t45527\t45529\t.\t-\t0\tID=file_1_g10.t1.stop1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t45527\t46157\t0.51\t-\t1\tID=file_1_g10.t1.CDS1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t45527\t46157\t.\t-\t.\tID=file_1_g10.t1.exon1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tintron\t46158\t46752\t0.42\t-\t.\tID=file_1_g10.t1.intron1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t46753\t47379\t0.43\t-\t1\tID=file_1_g10.t1.CDS2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t46753\t47379\t.\t-\t.\tID=file_1_g10.t1.exon2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tintron\t47380\t49616\t0.44\t-\t.\tID=file_1_g10.t1.intron2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t49617\t49792\t0.45\t-\t0\tID=file_1_g10.t1.CDS3;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t49617\t49792\t.\t-\t.\tID=file_1_g10.t1.exon3;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tstart_codon\t49790\t49792\t.\t-\t0\tID=file_1_g10.t1.start1;Parent=file_1_g10.t1;\n'
b
diff -r 000000000000 -r 3c0865d1172f test-data/out_genome/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_genome/braker.gtf Wed Sep 06 09:50:28 2023 +0000
b
b'@@ -0,0 +1,1484 @@\n+sample\tAUGUSTUS\tgene\t2126\t3863\t.\t+\t.\tg1\n+sample\tAUGUSTUS\ttranscript\t2126\t3863\t0.33\t+\t.\tg1.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t1\t+\t1\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tintron\t3225\t3283\t1\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tCDS\t3284\t3490\t0.62\t+\t0\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\texon\t3284\t3490\t.\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tintron\t3491\t3548\t0.53\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tCDS\t3549\t3603\t0.44\t+\t0\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\texon\t3549\t3603\t.\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tintron\t3604\t3768\t0.43\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tCDS\t3769\t3863\t0.42\t+\t2\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\texon\t3769\t3863\t.\t+\t.\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "g1.t1"; gene_id "g1";\n+sample\tAUGUSTUS\tgene\t4248\t6494\t.\t-\t.\tg2\n+sample\tAUGUSTUS\ttranscript\t4248\t6494\t1\t-\t.\tg2.t1\n+sample\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tCDS\t4248\t4759\t1\t-\t2\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\texon\t4248\t4759\t.\t-\t.\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tintron\t4760\t4929\t1\t-\t.\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tCDS\t4930\t5539\t1\t-\t0\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tintron\t5540\t6038\t1\t-\t.\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tCDS\t6039\t6494\t1\t-\t0\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "g2.t1"; gene_id "g2";\n+sample\tAUGUSTUS\tgene\t7691\t11104\t.\t-\t.\tg3\n+sample\tAUGUSTUS\ttranscript\t7691\t11104\t0.91\t-\t.\tg3.t1\n+sample\tAUGUSTUS\tstop_codon\t7691\t7693\t.\t-\t0\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tCDS\t7691\t8385\t0.99\t-\t2\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\texon\t7691\t8385\t.\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tintron\t8386\t8706\t0.98\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tCDS\t8707\t10515\t0.95\t-\t2\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\texon\t8707\t10515\t.\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tintron\t10516\t10646\t0.96\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tCDS\t10647\t10794\t1\t-\t0\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\texon\t10647\t10794\t.\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tintron\t10795\t10852\t1\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tCDS\t10853\t11104\t0.97\t-\t0\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\texon\t10853\t11104\t.\t-\t.\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tstart_codon\t11102\t11104\t.\t-\t0\ttranscript_id "g3.t1"; gene_id "g3";\n+sample\tAUGUSTUS\tgene\t14247\t28143\t.\t-\t.\tg4\n+sample\tAUGUSTUS\ttranscript\t14247\t28143\t0.88\t-\t.\tg4.t1\n+sample\tAUGUSTUS\tstop_codon\t14247\t14249\t.\t-\t0\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\tCDS\t14247\t14707\t1\t-\t2\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\texon\t14247\t14707\t.\t-\t.\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\tintron\t14708\t14793\t1\t-\t.\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\tCDS\t14794\t14995\t1\t-\t0\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\texon\t14794\t14995\t.\t-\t.\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\tintron\t14996\t15100\t1\t-\t.\ttranscript_id "g4.t1"; gene_id "g4";\n+sample\tAUGUSTUS\tCDS\t15101\t15289\t1\t-\t0'..b'; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t716390\t716663\t1\t-\t1\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t716390\t716663\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t716664\t716945\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t716946\t717057\t1\t-\t2\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t716946\t717057\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t717058\t717121\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t717122\t717574\t1\t-\t2\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t717122\t717574\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t717575\t717636\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t717637\t717859\t1\t-\t0\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t717637\t717859\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t717860\t717963\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t717964\t719015\t1\t-\t2\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t717964\t719015\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t719016\t719073\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t719074\t719307\t1\t-\t2\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t719074\t719307\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tintron\t719308\t719524\t1\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tCDS\t719525\t719804\t0.65\t-\t0\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\texon\t719525\t719804\t.\t-\t.\ttranscript_id "g95.t1"; gene_id "g95";\n+sample\tAUGUSTUS\tstart_codon\t719802\t719804\t.\t-\t0\ttranscript_id "g95.t1"; gene_id "g95";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t.\t+\t.\tg96\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.37\t+\t.\tg96.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t1\t+\t1\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t1\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tCDS\t3284\t3490\t0.63\t+\t0\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\texon\t3284\t3490\t.\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tintron\t3491\t3548\t0.59\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tCDS\t3549\t3603\t0.54\t+\t0\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\texon\t3549\t3603\t.\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tintron\t3604\t3768\t0.52\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tCDS\t3769\t3863\t0.44\t+\t2\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\texon\t3769\t3863\t.\t+\t.\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "g96.t1"; gene_id "g96";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t.\t-\t.\tg97\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t1\t-\t.\tg97.t1\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tCDS\t4248\t4759\t1\t-\t2\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\texon\t4248\t4759\t.\t-\t.\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tintron\t4760\t4929\t1\t-\t.\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t1\t-\t0\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t1\t-\t.\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t1\t-\t0\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "g97.t1"; gene_id "g97";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "g97.t1"; gene_id "g97";\n'
b
diff -r 000000000000 -r 3c0865d1172f test-data/out_prot/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_prot/braker.gtf Wed Sep 06 09:50:28 2023 +0000
b
b'@@ -0,0 +1,465 @@\n+sample\tAUGUSTUS\tstart_codon\t79526\t79528\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t79526\t80474\t0.4\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t79526\t80474\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t80475\t87860\t0.49\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t87861\t88125\t0.39\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t87861\t88125\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t88126\t92148\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t92149\t92252\t0.17\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t92149\t92252\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t92253\t94562\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94563\t94732\t0.96\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94563\t94732\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t94733\t94872\t0.91\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94873\t95333\t0.79\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94873\t95333\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t95334\t95584\t0.51\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t95585\t95762\t0.35\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tgene\t79526\t95762\t0.01\t+\t.\tg14\n+sample\tAUGUSTUS\ttranscript\t79526\t95762\t0.01\t+\t.\tg14.t1\n+sample\tAUGUSTUS\texon\t95585\t95762\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tstop_codon\t95760\t95762\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4248\t4738\t0.42\t-\t2\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4248\t4738\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t4739\t4929\t0.42\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t0.37\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t0.29\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t0.47\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t0.12\t-\t.\tg28\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t0.12\t-\t.\tg28.t1\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tgene\t209044\t210483\t0.38\t-\t.\tg24\n+sample\tAUGUSTUS\ttranscript\t209044\t210483\t0.38\t-\t.\tg24.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t0.49\t+\t1\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "fi'..b'e_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t102521\t103151\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_4_5";\n+sample\tGeneMark.hmm3\tintron\t103152\t103222\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t103223\t103812\t.\t-\t2\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_3_5";\n+sample\tGeneMark.hmm3\tintron\t103813\t111230\t.\t-\t1\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t111231\t111342\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_2_5";\n+sample\tGeneMark.hmm3\tintron\t111343\t113595\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tgene\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tmRNA\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t113596\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Initial"; count "file_2_1_5";\n+sample\tGeneMark.hmm3\tstart_codon\t113917\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tgene\t1092\t1364\t0.76\t-\t.\tg26\n+sample4\tAUGUSTUS\ttranscript\t1092\t1364\t0.76\t-\t.\tg26.t1\n+sample\tAUGUSTUS\tstop_codon\t34843\t34845\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tCDS\t34843\t35679\t0.35\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\texon\t34843\t35679\t.\t-\t.\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tstart_codon\t35677\t35679\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tgene\t34843\t35679\t0.35\t-\t.\tg7\n+sample\tAUGUSTUS\ttranscript\t34843\t35679\t0.35\t-\t.\tg7.t1\n+sample\tAUGUSTUS\tstop_codon\t40166\t40168\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tCDS\t40166\t40531\t0.99\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\texon\t40166\t40531\t.\t-\t.\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tstart_codon\t40529\t40531\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tgene\t40166\t40531\t0.99\t-\t.\tg8\n+sample\tAUGUSTUS\ttranscript\t40166\t40531\t0.99\t-\t.\tg8.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t0.45\t+\t1\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t0.33\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3284\t3512\t0.14\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t3284\t3512\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3513\t3807\t0.22\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3808\t3863\t0.59\t+\t2\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t0.07\t+\t.\tg27\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.07\t+\t.\tg27.t1\n+sample4\tAUGUSTUS\texon\t3808\t3863\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n'
b
diff -r 000000000000 -r 3c0865d1172f test-data/proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.fa Wed Sep 06 09:50:28 2023 +0000
b
@@ -0,0 +1,96 @@
+>FUN_000001-T1 FUN_000001
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000002-T1 FUN_000002
+MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA
+QP
+>FUN_000003-T1 FUN_000003
+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN
+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH
+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC
+PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL
+QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT
+>FUN_000004-T1 FUN_000004
+MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL
+EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA
+IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS
+VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN
+ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA
+DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV
+ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL
+NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ
+RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT
+KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS
+DMSSLKDDTSSTTSHSGLSIISLELPLPKKK
+>FUN_000005-T1 FUN_000005
+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT
+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS
+>FUN_000006-T1 FUN_000006
+MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV
+GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL
+>FUN_000007-T1 FUN_000007
+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS
+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN
+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP
+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH
+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR
+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD
+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY
+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH
+IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF
+GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH
+QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP
+QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA
+KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ
+>FUN_000008-T1 FUN_000008
+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI
+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL
+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL
+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM
+>FUN_000009-T1 FUN_000009
+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK
+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD
+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL
+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR
+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF
+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI
+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG
+YKLQDLWNVMPTKMETME
+>FUN_000010-T1 FUN_000010
+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK
+PCNPKRYLTT
+>FUN_000011-T1 FUN_000011
+MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL
+RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI
+IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI
+TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ
+FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD
+FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE
+KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ
+>FUN_000012-T1 FUN_000012
+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA
+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR
+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI
+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE
+EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL
+RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR
+>FUN_000013-T1 FUN_000013
+MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL
+>FUN_000014-T1 FUN_000014
+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR
+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA
+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR
+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES
+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH
+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR
+IREL
+>FUN_000015-T1 FUN_000015
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000016-T1 FUN_000016
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG
+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE
+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI
+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED
+AAVGAQAASGADSPAQVARDRQSRSRSRTRS