Repository 'braker'
hg clone https://toolshed.g2.bx.psu.edu/repos/genouest/braker

Changeset 0:e50ea151f221 (2021-10-05)
Next changeset 1:ca7c12566894 (2021-10-21)
Commit message:
"planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/braker commit aa4a7f87d556b5af582d1ab8fe9a986922a59a1d"
added:
README.md
braker.xml
macros.xml
test-data/SRR7458692.bam
test-data/genemark_license.gm_key
test-data/genome_masked.fa
test-data/out_bam/braker.gtf
test-data/out_genome/braker.gff3
test-data/out_genome/braker.gtf
test-data/out_prot/braker.gtf
test-data/proteins.fa
b
diff -r 000000000000 -r e50ea151f221 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Tue Oct 05 12:35:42 2021 +0000
b
@@ -0,0 +1,21 @@
+# Braker
+
+This tool is not in IUC because of the license issues with GeneMark and ProtHint that makes it impossible to test it using CI.
+
+## GeneMark
+
+Braker can use GeneMark to predict gene, but due to licensing issues, we are not allowed to distribute GeneMark automatically.
+
+If you want to use it, the Galaxy administrator needs to install GeneMark, and set the `GENEMARK_PATH` variable on the job destination.
+
+## ProtHint
+
+Braker can use ProtHint to use protein sequences as hints to predict genes, but due to licensing issues, we are not allowed to distribute ProtHint automatically.
+
+If you want to use it, the Galaxy administrator needs to install ProtHint, and set the `PROTHINT_PATH` variable on the job destination.
+
+## Running tests
+
+Tests require working GeneMark and ProtHint installations, which means both GENEMARK_PATH and PROTHINT_PATH are set.
+
+You should also copy a valid GeneMark license (from http://topaz.gatech.edu/GeneMark/license_download.cgi) in test-data/genemark_license.gm_key
b
diff -r 000000000000 -r e50ea151f221 braker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/braker.xml Tue Oct 05 12:35:42 2021 +0000
[
@@ -0,0 +1,144 @@
+<tool id="braker" name="Braker genome annotation" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements" />
+    </requirements>
+    <version_command><![CDATA[braker.pl --version; $GENEMARK_PATH/gmes_petap.pl | grep version]]></version_command>
+    <command><![CDATA[
+if [ -z "\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi &&
+if [ ! -f "\$GENEMARK_PATH/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi &&
+## GeneMark only search for license in ~/.gm_key
+cp '${genemark_license}' ~/.gm_key &&
+
+braker.pl
+--genome '${genome}'
+
+$softmasking
+
+#if $evidences.bam:
+    --bam ${evidences.bam}
+#end if
+
+#if $evidences.prot_seq:
+    --prot_seq ${evidences.prot_seq}
+#end if
+
+## No hints, use esmode
+#if not $evidences.bam and not $evidences.prot_seq
+    --esmode
+#end if
+
+#if $output_format == 'gff3'
+    --gff3
+#end if
+
+$genemark.fungus
+
+$augustus.crf
+--rounds $augustus.rounds
+
+$advanced.UTR
+
+$advanced.filterOutShort
+
+#if $advanced.eval:
+    --eval ${advanced.eval}
+#end if
+
+#if $advanced.eval_pseudo:
+    --eval_pseudo ${advanced.eval_pseudo}
+#end if
+
+--cores  \${GALAXY_SLOTS:-2}
+    ]]></command>
+    <inputs>
+        <param name="genemark_license" type="data" format="txt" label="GeneMark license file" help="Braker uses GeneMark, which is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators." />
+
+        <param argument="--genome" type="data" format="fasta" label="Assembly to annotate" help="The assembly should be soft-masked (with RepeatMasker for example)" />
+
+        <param argument="--softmasking" type="boolean" checked="false" truevalue="--softmasking" falsevalue="" label="Genome sequence is soft-masked" />
+
+        <section name="evidences" expanded="true" title="Evidences">
+            <param argument="--bam" type="data" format="bam" optional="true" label="RNA-seq mapped to genome to train Augustus/GeneMark" />
+            <param argument="--prot_seq" type="data" format="fasta" optional="true" label="Proteins to map to genome" />
+        </section>
+
+        <section name="genemark" expanded="true" title="GeneMark">
+            <param argument="--fungus" type="boolean" checked="false" truevalue="--fungus" falsevalue="" label="Fungal genome" help="GeneMark-EX option, run algorithm with branch point model (most useful for fungal genomes)" />
+        </section>
+
+        <section name="augustus" expanded="true" title="Augustus">
+            <param argument="--crf" type="boolean" checked="false" truevalue="--crf" falsevalue="" label="Use CRF training for Augustus" help="Alternate training method (Conditional Random Field)" />
+            <param argument="--rounds" type="integer" value="5" label="Number of optimization rounds used in optimize_augustus.pl" />
+        </section>
+
+        <section name="advanced" expanded="false" title="Advanced">
+            <param argument="--UTR" type="boolean" checked="false" truevalue="--UTR=on" falsevalue="" label="" help="Experimental, requires RNASeq data (bam) and a softmasked genome" />
+            <param argument="--filterOutShort" type="boolean" checked="false" truevalue="--filterOutShort" falsevalue="" label="Filter out too short traingin gene predicted by GeneMark-EX" />
+            <param argument="--eval" type="data" format="gtf" optional="true" label="Reference set to evaluate predictions" help="using evaluation scripts from GaTech" />
+            <param argument="--eval_pseudo" type="data" format="gff3" optional="true" label="File with pseudogenes that will be excluded from accuracy evaluation" />
+        </section>
+
+        <param name="output_format" type="select" label="Output format">
+            <option value="gtf" selected="true">GTF</option>
+            <option value="gff3">GFF3</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name='output_gtf' format='gtf' label="${tool.name} on ${on_string}: annotation" from_work_dir="braker/braker.gtf">
+            <filter>output_format == 'gtf'</filter>
+        </data>
+        <data name='output_gff' format='gff3' label="${tool.name} on ${on_string}: annotation" from_work_dir="braker/braker.gff3">
+            <filter>output_format == 'gff3'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Disable next 2 for CI (it should pass locally)-->
+        <!--test>
+            <param name="genome" value="genome_masked.fa" />
+            <param name="softmasking" value="true" />
+            <param name="genemark_license" value="genemark_license.gm_key" />
+            <output name="output_gtf" file="out_genome/braker.gtf" sort="true" />
+        </test>
+        <test>
+            <param name="genome" value="genome_masked.fa" />
+            <param name="softmasking" value="true" />
+            <param name="genemark_license" value="genemark_license.gm_key" />
+            <param name="output_format" value="gff3" />
+            <output name="output_gff" file="out_genome/braker.gff3" sort="true" />
+        </test-->
+        <!-- following 2 tests throw an error because test dataset is too small  -->
+        <!--test>
+            <param name="genome" value="genome_masked.fa" />
+            <param name="softmasking" value="true" />
+            <param name="genemark_license" value="genemark_license.gm_key" />
+            <section name="evidences">
+                <param name="bam" value="SRR7458692.bam" />
+            </section>
+            <output name="output_gtf" file="out_bam/braker.gtf" sort="true" />
+        </test>
+        <test>
+            <param name="genome" value="genome_masked.fa" />
+            <param name="softmasking" value="true" />
+            <param name="genemark_license" value="genemark_license.gm_key" />
+            <section name="evidences">
+                <param name="prot_seq" value="proteins.fa" />
+            </section>
+            <output name="output_gtf" file="out_prot/braker.gtf" sort="true" />
+        </test-->
+    </tests>
+    <help><![CDATA[
+Braker_
+-------
+
+Braker_ allows for fully automated training of the gene prediction tools GeneMark-EX and AUGUSTUS from RNA-Seq and/or protein homology information, and that integrates the extrinsic evidence from RNA-Seq and protein homology information into the prediction.
+
+In contrast to other available methods that rely on protein homology information, BRAKER reaches high gene prediction accuracy even in the absence of the annotation of very closely related species and in the absence of RNA-Seq data.
+
+.. _Braker: https://github.com/Gaius-Augustus/BRAKER
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r e50ea151f221 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Oct 05 12:35:42 2021 +0000
b
@@ -0,0 +1,131 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">2.1.6</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">braker2</requirement>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/nargab/lqaa108</citation>
+            <citation type="doi">10.1007/978-1-4939-9173-0_5</citation>
+            <citation type="doi">10.1093/bioinformatics/btv661</citation>
+            <citation type="doi">10.1093/bioinformatics/btn013</citation>
+            <citation type="doi">10.1186/1471-2105-7-62</citation>
+        </citations>
+    </xml>
+
+    <xml name="augustus_species">
+        <!-- list generated from a Funannotate database directory, listing trained_species/* -->
+        <option value="adorsata">adorsata</option>
+        <option value="aedes">aedes</option>
+        <option value="amphimedon">amphimedon</option>
+        <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>
+        <option value="anidulans">anidulans</option>
+        <option value="arabidopsis">arabidopsis</option>
+        <option value="aspergillus_fumigatus">aspergillus_fumigatus</option>
+        <option value="aspergillus_nidulans">aspergillus_nidulans</option>
+        <option value="aspergillus_oryzae">aspergillus_oryzae</option>
+        <option value="aspergillus_terreus">aspergillus_terreus</option>
+        <option value="bombus_impatiens1">bombus_impatiens1</option>
+        <option value="bombus_terrestris2">bombus_terrestris2</option>
+        <option value="botrytis_cinerea">botrytis_cinerea</option>
+        <option value="b_pseudomallei">b_pseudomallei</option>
+        <option value="brugia">brugia</option>
+        <option value="cacao">cacao</option>
+        <option value="caenorhabditis">caenorhabditis</option>
+        <option value="camponotus_floridanus">camponotus_floridanus</option>
+        <option value="candida_albicans">candida_albicans</option>
+        <option value="candida_guilliermondii">candida_guilliermondii</option>
+        <option value="candida_tropicalis">candida_tropicalis</option>
+        <option value="c_elegans_trsk">c_elegans_trsk</option>
+        <option value="chaetomium_globosum">chaetomium_globosum</option>
+        <option value="chicken">chicken</option>
+        <option value="chiloscyllium">chiloscyllium</option>
+        <option value="chlamy2011">chlamy2011</option>
+        <option value="chlamydomonas">chlamydomonas</option>
+        <option value="chlorella">chlorella</option>
+        <option value="ciona">ciona</option>
+        <option value="coccidioides_immitis">coccidioides_immitis</option>
+        <option value="Conidiobolus_coronatus">Conidiobolus_coronatus</option>
+        <option value="coprinus">coprinus</option>
+        <option value="coprinus_cinereus">coprinus_cinereus</option>
+        <option value="coyote_tobacco">coyote_tobacco</option>
+        <option value="cryptococcus">cryptococcus</option>
+        <option value="cryptococcus_neoformans_gattii">cryptococcus_neoformans_gattii</option>
+        <option value="cryptococcus_neoformans_neoformans_B">cryptococcus_neoformans_neoformans_B</option>
+        <option value="cryptococcus_neoformans_neoformans_JEC21">cryptococcus_neoformans_neoformans_JEC21</option>
+        <option value="culex">culex</option>
+        <option value="debaryomyces_hansenii">debaryomyces_hansenii</option>
+        <option value="E_coli_K12">E_coli_K12</option>
+        <option value="elephant_shark">elephant_shark</option>
+        <option value="encephalitozoon_cuniculi_GB">encephalitozoon_cuniculi_GB</option>
+        <option value="eremothecium_gossypii">eremothecium_gossypii</option>
+        <option value="fly">fly</option>
+        <option value="fly_exp">fly_exp</option>
+        <option value="fusarium">fusarium</option>
+        <option value="fusarium_graminearum">fusarium_graminearum</option>
+        <option value="galdieria">galdieria</option>
+        <option value="generic">generic</option>
+        <option value="heliconius_melpomene1">heliconius_melpomene1</option>
+        <option value="histoplasma">histoplasma</option>
+        <option value="histoplasma_capsulatum">histoplasma_capsulatum</option>
+        <option value="honeybee1">honeybee1</option>
+        <option value="human">human</option>
+        <option value="japaneselamprey">japaneselamprey</option>
+        <option value="kluyveromyces_lactis">kluyveromyces_lactis</option>
+        <option value="laccaria_bicolor">laccaria_bicolor</option>
+        <option value="leishmania_tarentolae">leishmania_tarentolae</option>
+        <option value="lodderomyces_elongisporus">lodderomyces_elongisporus</option>
+        <option value="magnaporthe_grisea">magnaporthe_grisea</option>
+        <option value="maize">maize</option>
+        <option value="maize5">maize5</option>
+        <option value="mnemiopsis_leidyi">mnemiopsis_leidyi</option>
+        <option value="nasonia">nasonia</option>
+        <option value="nematostella_vectensis">nematostella_vectensis</option>
+        <option value="neurospora">neurospora</option>
+        <option value="neurospora_crassa">neurospora_crassa</option>
+        <option value="parasteatoda">parasteatoda</option>
+        <option value="pchrysosporium">pchrysosporium</option>
+        <option value="pea_aphid">pea_aphid</option>
+        <option value="pfalciparum">pfalciparum</option>
+        <option value="phanerochaete_chrysosporium">phanerochaete_chrysosporium</option>
+        <option value="pichia_stipitis">pichia_stipitis</option>
+        <option value="pisaster">pisaster</option>
+        <option value="pneumocystis">pneumocystis</option>
+        <option value="rhincodon">rhincodon</option>
+        <option value="rhizopus_oryzae">rhizopus_oryzae</option>
+        <option value="rhodnius">rhodnius</option>
+        <option value="rice">rice</option>
+        <option value="saccharomyces">saccharomyces</option>
+        <option value="saccharomyces_cerevisiae_rm11-1a_1">saccharomyces_cerevisiae_rm11-1a_1</option>
+        <option value="saccharomyces_cerevisiae_S288C">saccharomyces_cerevisiae_S288C</option>
+        <option value="s_aureus">s_aureus</option>
+        <option value="schistosoma">schistosoma</option>
+        <option value="schistosoma2">schistosoma2</option>
+        <option value="schizosaccharomyces_pombe">schizosaccharomyces_pombe</option>
+        <option value="scyliorhinus">scyliorhinus</option>
+        <option value="sealamprey">sealamprey</option>
+        <option value="s_pneumoniae">s_pneumoniae</option>
+        <option value="strongylocentrotus_purpuratus">strongylocentrotus_purpuratus</option>
+        <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>
+        <option value="template_prokaryotic">template_prokaryotic</option>
+        <option value="tetrahymena">tetrahymena</option>
+        <option value="thermoanaerobacter_tengcongensis">thermoanaerobacter_tengcongensis</option>
+        <option value="tomato">tomato</option>
+        <option value="toxoplasma">toxoplasma</option>
+        <option value="tribolium2012">tribolium2012</option>
+        <option value="trichinella">trichinella</option>
+        <option value="ustilago">ustilago</option>
+        <option value="ustilago_maydis">ustilago_maydis</option>
+        <option value="verticillium_albo_atrum1">verticillium_albo_atrum1</option>
+        <option value="verticillium_longisporum1">verticillium_longisporum1</option>
+        <option value="volvox">volvox</option>
+        <option value="wheat">wheat</option>
+        <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>
+        <option value="yarrowia_lipolytica">yarrowia_lipolytica</option>
+        <option value="zebrafish">zebrafish</option>
+    </xml>
+</macros>
b
diff -r 000000000000 -r e50ea151f221 test-data/SRR7458692.bam
b
Binary file test-data/SRR7458692.bam has changed
b
diff -r 000000000000 -r e50ea151f221 test-data/genome_masked.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_masked.fa Tue Oct 05 12:35:42 2021 +0000
b
b'@@ -0,0 +1,4554 @@\n+>sample\n+GTGGCCGGCTGATTTCACGTCCTAACTTTGGGCTTAACTGGTTCGCCAGT\n+TGACTTTCTTCGCCATCATGTGATGCATTAATTAAACAATAATTACTAAT\n+TGacagtaattaataattgTGGCAAAAAGCGCGACACGTTTTTTCGGCAA\n+ACTCCTCGGAAGACCGATTGTTTAAAGACGTAGGGAAAAGTAGTTCCCAA\n+GCATTTTAAAAAGATACCTATGACATGTGACACCTTTAAAGTGCAATACA\n+AGTTTTCATCTCTTTATATCCTTTTACTCCCTAATTTGAATATAAAAGGA\n+ATCGCATTGAGAGTATAAAGGCTTTAGTTCTTATCGATagatagttagtG\n+ATGAAATAAAATTATAACCGTGGTTTTAGTTTGAAATGTTGTAAAGACTT\n+TCTTTAAATTTAACCAAATTTATGTGATAAAATGGATATTCCATAGATAA\n+GACATTTAAGTTAAATGTTTTTATACATCAAAAAGGAAACATTGTGCACG\n+CTATCAAATGGTATTCTTAAAATCGAGTCAgttaggtaagttattaatta\n+aATGGTAACTTTTTAACGTGCGTCAAATAATCTAGAAATTCTTCTTCAAC\n+TCATCCAAAACATTCTCAACACCACAATATCTATGCTCAGCGATGACAAA\n+TTTCTCCTGATTTCTTAATTTTCTATCTATGctatgcGATCAATCAACGA\n+ATGTGTGCTAATTTCTTGTGACGATTATTTTGCAAAGTCGTCTCCGCGTT\n+AATATCCGATGTAAATAAACCTATGAAAATCGCAAAGATCTATTCCTTTG\n+CGCTTAACCTTGTTATTGAACTCCCTCCCGCCCGGATTTTCGCAGCTTCC\n+AACTAAGTGATACCTTTTAAACAAACGCCACAACAAAACAGGTGACAATC\n+ATATTTTATCAACAAAAGAAAAGAGAGATAATATCTGCTGCTAATTCAAT\n+TTATTGGGCTTTGTGTTTATTTGCATTGGGAATCCGTGGAGCTGATATTG\n+TTTACTTTGGCAATTTCCCAGTCATTATGGTCGCCGTATAATGTGAATGG\n+CCAACAGGAAAATTTCACAGATTCCACTGGTTATTCACTGTTCATGCTGG\n+CGGTTGATCCGCTCCAAATCCCGGATGAGGCGGCGGAAGATCCTTATACC\n+GCTTACTTACGTATTCACTGCTTAACATCTTATCATAGAATTCGTTAGCT\n+TTCACATAGTTTGGATTCCGAATGAATCTCTTCTGTCGACCTCGCGTTTT\n+TGGCAATTTTCGGAGCTCTTCGCTACTGCTAGTTGATTTAAGGCCTACAT\n+CTTTCTGATCCTTAATCATCCTTTTAGGTGATTCTCCTGCTACCATGGGA\n+TCCACCAGTTTCATATATAATGGATCGGGCTCGTTTGCAAGTGGGTTTCC\n+AGTTTCAGAAGACTCTGACGTTCTAAGCGCCAATAAAATGAAAATAACCA\n+AAAGGAAACTAGACATGTTTCGTTACAGACAGATATAGATTGGATATTAT\n+TGAAAGAAAATgaaaataAACAGCGATAATGATCTGTGACTTATTGGAAA\n+TTAGATGGCTTATGGATGATGGGGCGATAAATTCGAACAAACACTGAGAG\n+CATTTTTGGGAGCATTGTGGGAGCTTTTCTATTATCCAGTACTTTATATA\n+CATATATCATTTATATACTAATCATTTCTGGTAGCCGTTCGTAATCAGGA\n+TCGGATCCTTTTTTACCCGTTAGTCAGCTAGAAGAAACGAAAAATTAAAA\n+TAGTAAAatctaaaagtatacaaaaattcaaatagtaaaaccaaaaagta\n+ttaaaaaaAATATCAATCGTTTTTAAACGTTGATTTTTCAGCTTGTGGGG\n+TGATTTATCGCTAACTTGGAAAATGATAATAAAGCATTATCCATAATATT\n+AGTTGTGGAAATGAAATTCAAATAGATGTTGTGTTATATACGATGAGGAT\n+GTTGCATTTGAGTCCCCGGAAATATAGTATTTTTTTTACCGAAGGTATTA\n+TCGTACCGGTCAAGTACGGTCACACTGCCAAGCGCAGATTTGAGGATTTC\n+TAGATTTGGCCTCTTGATGGACTAGAAGCGCTACCAAAACTGGGGCTTGA\n+GTTGAATTACCTGTTGGAAGACACAATGCCACCCACGATCAACAATTCGG\n+CGGTAAACAGTGCCGCCGAAAAGCGACCCCAGCGGCAAACGGAGCGCAAG\n+TAAGTGAACAGATCCCTAAACAGACGCCAGATACTCAGACTGATGTGTAC\n+CTTGCAGATCCGAGATCATTTGCCGCGTGAAGTATGGAAACAACCTGCCG\n+GATATACCATTTGATCTGAAGTTTCTGCAGTACCCCTTCGACAGCCACCG\n+CTTCGTGCAGTACAACCCAACGTCGCTAGAGCGTAACTTCAAGTATGACG\n+TGCTGACGGAACACGATTTGGGTGTCACGGTGGACCTGATTAACCGGGAG\n+CTCTATCAGGCCGACTCCATGACGCTGCTGGACCCCGCCGATGAAAAACT\n+GCTGGAGGAGGAGACTCTGACGCCCACAGACTCTGTGCGTTCGCGCCAGC\n+ATTCGAGGACGGTGTCATGGTTGCGCAAATCCGAGTACATCTCCACCGAG\n+CAGACGCGCTTCCAGCCCCAGAACCTGGAGAACATCGAGGCCAAGGTCGG\n+TTACAACGTCAAGAAGTCGCTTCGGGAGGAGACTCTCTACCTGGACCGCG\n+AAGCCCAGATCAAAGCCATCGAGAAGACCTTCAGCGACACCAAGAGCGAA\n+ATTACCAAGCACTATTCCAAGCCCAATGTGGTGCCAGTGGAGGTACTGCC\n+TATCTTCCCCGACTTCACCAACTGGAAGTTCCCGTGCGCCCAGGTCATAT\n+TTGACAGTGATCCCGCTCCTGCGGGCAAGAACGTGCCCGCCCAGCTGGAG\n+GAGATGTCGCAGGCCATGATTCGTGGTGTGATGGACGAGAGCGGCGAACA\n+GTTTGTCGCCTACTTCCTGCCCACAGAGCAGACGCTGGAGAAACGCCGTA\n+CAGACTTCATCAATGGCGAGCTGTACAAGGAGGaggaggagtacgagtac\n+aagATCGCTCGAGAGTACAACTGGAACGTGAAGACCAAAGCTTCCAAGGG\n+CTACGAAGAAAACTACTTCTTCGTGATGCGTCAGGACGGCATCTACTACA\n+ACGAGCTAGAAACCCGTGTGCGCCTTAACAAGCGTCGCGTTAAGGTTGGC\n+CAGCAACCCAACAACACCAAGCTGGTAAGTATATTTATGCGCATACATCT\n+ATAGCGAGCTTTACTTTGTATTATTTCTACCAGGTTGTCAAGCATCGTCC\n+ATTGGACAGCATGGAGCATCGTATGCAGCGCTATCGCGAGCGCCAGCTAG\n+AAGTTCCTGGCGAGGaggaggaGATCGTGGAAGAAGTGAGGGAAGAGGAG\n+CAAATGCAAATCATTGGCGAGACGGAGAAGACGAGCGAGGACGCAGCTGT\n+TGGCGCACAGGCAGCATCTGGAGCGGACTCACCCGCCCAGGTAGCCCGCG\n+ATCGACAGTCTCGTTCTCGGAGTCGAACTCGCAGCGGGTCCAgttcagga\n+tctggatctggctccggctcTCGGGCCAgcagccgctcaaagtctggttc\n+tcggtctggtagcggctcCAGATCACGCACAAATTCGCCGGCAGGATCCC\n+AGAAATCCGGatccagatcgagatcggtatcacgttcccgatcccgttcc\n+aagtcCGGCTCTCGGtcgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTT'..b'cgcgttctaggtcgagatccaagtccggttcccg\n+atcacgttcgggctccagatctggctctgggtcgcgatcgCCCAGCCGGT\n+CTCGCAGTGGCTCGCCTTCTGGTTCAGGATCCAGCTCTGGAAGCGCCTCA\n+GATGAATGATTAATTACAAAAAACGGCGTTCATAATAAATAAGTTTATAA\n+TCAACCAAGTACATTTGAAAACTGAACTAACTCGATTTAATATCATTTTC\n+GCCTCAACTCAGCGCTCGGGTTCGTTGCCCAGAATAGTTTTAAATAAAAT\n+CGGCAGTTTAACATAATTTATATTAGATGTtgttgttgtATTGCAAACAA\n+GTCGGGTCCTAGTCGATTTACACTTGGCTGAGATAAAACAACTAAGATTC\n+AAATAATATCCTCATAAGAAGATGTAATTAAGACGTTTTTCTTAGGGGGT\n+GCTTAGGAATTGATTAGATCGCCTTTGGGGAAGTGCAAACAATGTAAaat\n+gatataaaagggtataaaTTAAGTGGATATATGCATCTTCGTTCCAACTA\n+CGTGGCGTCCATCAAAAAGCGCTGGAAGACTTCGCCATCGGAACTAGGTA\n+GCTCTGTTTGTTGCGGTGAGTAGATTCTCAAGTTCTGGAGTTGCTGCAGC\n+GGAGAGCCATTCCCGCTGAAGTGTACCACCGCAACCGGCTGTAGGGATAT\n+GAGCTGTCCCTCCTCCCGCGGCTCCACACCACAGATGCCCAGCTTTTGGC\n+ACTGCTCAACGACAATGTCGTCGATTGACTGCGAAAGCAGTGCCTCCTGC\n+TCAGGATCCACAATGGAGCTGCTGTTGATGGCAGCTATTTCGGCGCTCGC\n+TGGTGAAGGCTGAGTATGCGAGTCGTTGCCCTCCAGGAACGCCTTTATCA\n+GCTGCTCCGGTGTCTGGGCCTCCTCGGTGGGACATCGATGGGTCCTTTGA\n+CTGACCTTGTACCTGAACGTCTTTTGGCAGAGCTCGCACTTGTAGGGCAT\n+CACTCCCGTGTGGATGCGCGTGTGGACAAGGAAAGAGACTCGCTGCCGGA\n+AACACTTGCCTGTGGACGAGTCCGAAATACGAAAAGGTTAGACATGGAGT\n+GACCCGGAAAAGAAGGTATACCTCTCCTTCATTTAAAGTAAATAGGGCAA\n+ATCGCAATGGAGTATGCTCATTTATAAGCTGGCTAACAAAATAAGGGGCG\n+GCTAATTAAGGGGTTTGATCGATACTCACCGCAGACTTCGCACTTGAAGG\n+GCTTTTCGCCGCTGTGAATCCTCTGGTGGTTATGCAGCGTAGACAGTTCC\n+TTGAAGGCGCGTCCACAAACCCCGCAGACATGGGGCTTTACCTCGCTGTG\n+GTAGAGCAAATGCTTGTGGTACGACTGCTGGAAGGTGAAGGTCCTGGCGC\n+AGATCTCGCATGTGTACGGCATCTCGCCGGTGTGGAGCCGCTTGTGCTTC\n+TTCAGAAAGTACTTGGTGGTGAAGGACTTGCTGCACACATCGCACTCCCA\n+CAGCTTCGGGGTGGCCGTGCCCGACTCCGGCGAACTGGGGGACTGCTGGA\n+GCATGCTCAGCGCCCCACTCAGCGCGTATGGCTGGGATGCGGTGCACGTG\n+GAGTTATTTCCGTTGCCAAGGGCTCCAGGCTCTGTGTGTCGAATGCGATC\n+GCAAATGCTCAGCTTGGGCGTGGCAGTAACTGCACTGGTGGTAGAAGCGG\n+GTGTAGGACtgggattaggatttggattgggattggAGCAGGGCACGCCC\n+ATCATGTGCACTACTTTCAGGTGGATTCGAAGGGAGCCCTTCATTCGGAA\n+CGTCTTGGAGCAGAGATGGCACTTGTAGGGCTCCTGGTCCTGTATAAAGC\n+AATAATCGGAATTTCACTTATATTTATCAATTCATCAATATGCCCTCATG\n+GCCAAATATTCCATTACATTACCGTCTGTCTCTCAGTTTCAAATTTATGC\n+ACAAAAATCATTCACTTTCATTCACTATATCACAAAGTTGCCATGGTTTT\n+AAATTGATCAAAAACAAATTAATATCTATCATATATaTACATAGTCATAT\n+GAACAGTTGAAAAATTAATTGAAAATAATGGGAACGATATACGTACATAC\n+ATCAGTTGTTTTTAAAATATAAGGGTATATAGATTTCTTTCttgttgttg\n+ttgatTTTAATTACGTCAAACTTTTGTTTCAGATTCAATGTAAATGGTCT\n+AGCTTTTTAAGTATGATTTTTTTTTGCTGCCAGTGAGCATAGAAaaaaaa\n+aatCaaaATCGATATAAGAATATGCGAAAGTGCATTACGAAACTCTTTAG\n+ATAATAGCACTTAATATATGTACATAGCCAATAGTTACCGGTTCCTTCTG\n+TTGGggttccttttgcttgggttcTCCCTCCGCATTTTCGTGGACTAAGC\n+GGACGTGCATGTCCCTCAGCTCGGTATTCCGGAAACTGAACTCGCAAATG\n+TCGCAGTGGGCGGGCGGGGTGGTCCGCACAGGCGGTGGGGTTGGGACGAC\n+GGGCTTGGACCTGGTTCGCTTGGCCCTCCGTTTGGGAGGAGCTGCGGCAA\n+GGAAGCCCCGGGACGAGGCGGGTTGGCCATTCGCTGGACTCTCGTTTCCC\n+TCCTGAGCCATCAGACTTGTGTGCGAGAACAGGTGGATGGTGAGCTTGTC\n+CAGCCCCAGGAAGAGCTCCTTGCAGTTGGCAAAGGGACAGGCCAGTGGGC\n+CGTTGGCCGCCTTGATCAGCCTCTGCTGCAGTGCGTCAATACTGCCGAAA\n+CTGGGCACCGCGCAGAGCGGACACAGCACCGAGGTGGAACACATTTCGCC\n+AGTGCACTCAATCGAATCTTATGCAAATGCTTCACCTCCTATTGGGATTA\n+TCCTCCTGTTCGGTCTGTGATCATCTATTCAGGAGTCCATTCCCAGACTG\n+CCTAGTCTTTCTGCTTTCAAAATTTTCTAAAAATATCAGCAAGTGAAGAT\n+TTTTGAAAACTTTGGGCCCAGCAATCTGACTTCTCGGCACCGATGCCAGC\n+TAACGAAATAATGAAaaataatgaaaTGCCCGGCGCGGATCGTCGAATCG\n+TCAAGAAGACTTTCGGAAACACTCGCAGCACCGAAATCCCATCTCTCGAA\n+CAAGGCAGTCTCTTTTCTCCGTGTCTCTGGGTAGCTCATTTCGAAATATA\n+GCTCTGAGCACGGCTATATACTATATGTATGTAGAATTATTTCTGGCCGA\n+TATATGTTGCACTGGCGGCCATATAGCCTTCGTTCTAGTCTTTGTAACGC\n+ACGATGCGCAGGAGCAATTCGCTGAGATGACCACATGCGATTTGCGGGAC\n+TTATCTAGAGATCTATCATTATCGCCAGATTGGTTTAATAATTGGCTTTT\n+CCGCCAATATCCAATTGGAATATGGTTGGTTACTGCAATTGTCGCTCCAT\n+TTTTTAAGCACTCCATAAAAAGTAAACACATTAATATGTACTCTTATTGG\n+AGATTTCTTCTTTCGATTTTAGTTTCGGACCAGTGAAAATCATTCGTTTC\n+ATTTTCGTAAATAAGAACTGAGAAAATATtattattatATATATTTCTTT\n+ATTAGGAAAATACGAAGATTGAGTATTTCAGATTGAATTAGCATATCCGT\n+CTAAATCTTAATGCTGTAATGAGCTTACTTGAGATCTGATCAAAACCAAT\n+ACaaaacccacacCAAAGGTGGTAGCTAATATACATATTTTGTGTAATAC\n+TTTTGTAGAGTATTTACTATTCAGCGATTTAAACAAGCAATCGCCTAGAC\n+ACACACATTTGTCCGCCTATGTGTATGTGCACCGAGCTATACCCCCACTG\n+AATCGCTGTGTGCTATTTTTATGGCCGCGATGCTCTCTTGTTTTGACCCG\n+CTTGGGCAAC\n'
b
diff -r 000000000000 -r e50ea151f221 test-data/out_bam/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_bam/braker.gtf Tue Oct 05 12:35:42 2021 +0000
b
b'@@ -0,0 +1,465 @@\n+sample\tAUGUSTUS\tstart_codon\t79526\t79528\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t79526\t80474\t0.4\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t79526\t80474\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t80475\t87860\t0.49\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t87861\t88125\t0.39\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t87861\t88125\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t88126\t92148\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t92149\t92252\t0.17\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t92149\t92252\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t92253\t94562\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94563\t94732\t0.96\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94563\t94732\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t94733\t94872\t0.91\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94873\t95333\t0.79\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94873\t95333\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t95334\t95584\t0.51\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t95585\t95762\t0.35\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tgene\t79526\t95762\t0.01\t+\t.\tg14\n+sample\tAUGUSTUS\ttranscript\t79526\t95762\t0.01\t+\t.\tg14.t1\n+sample\tAUGUSTUS\texon\t95585\t95762\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tstop_codon\t95760\t95762\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4248\t4738\t0.42\t-\t2\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4248\t4738\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t4739\t4929\t0.42\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t0.37\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t0.29\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t0.47\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t0.12\t-\t.\tg28\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t0.12\t-\t.\tg28.t1\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tgene\t209044\t210483\t0.38\t-\t.\tg24\n+sample\tAUGUSTUS\ttranscript\t209044\t210483\t0.38\t-\t.\tg24.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t0.49\t+\t1\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "fi'..b'e_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t102521\t103151\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_4_5";\n+sample\tGeneMark.hmm3\tintron\t103152\t103222\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t103223\t103812\t.\t-\t2\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_3_5";\n+sample\tGeneMark.hmm3\tintron\t103813\t111230\t.\t-\t1\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t111231\t111342\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_2_5";\n+sample\tGeneMark.hmm3\tintron\t111343\t113595\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tgene\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tmRNA\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t113596\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Initial"; count "file_2_1_5";\n+sample\tGeneMark.hmm3\tstart_codon\t113917\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tgene\t1092\t1364\t0.76\t-\t.\tg26\n+sample4\tAUGUSTUS\ttranscript\t1092\t1364\t0.76\t-\t.\tg26.t1\n+sample\tAUGUSTUS\tstop_codon\t34843\t34845\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tCDS\t34843\t35679\t0.35\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\texon\t34843\t35679\t.\t-\t.\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tstart_codon\t35677\t35679\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tgene\t34843\t35679\t0.35\t-\t.\tg7\n+sample\tAUGUSTUS\ttranscript\t34843\t35679\t0.35\t-\t.\tg7.t1\n+sample\tAUGUSTUS\tstop_codon\t40166\t40168\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tCDS\t40166\t40531\t0.99\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\texon\t40166\t40531\t.\t-\t.\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tstart_codon\t40529\t40531\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tgene\t40166\t40531\t0.99\t-\t.\tg8\n+sample\tAUGUSTUS\ttranscript\t40166\t40531\t0.99\t-\t.\tg8.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t0.45\t+\t1\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t0.33\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3284\t3512\t0.14\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t3284\t3512\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3513\t3807\t0.22\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3808\t3863\t0.59\t+\t2\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t0.07\t+\t.\tg27\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.07\t+\t.\tg27.t1\n+sample4\tAUGUSTUS\texon\t3808\t3863\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n'
b
diff -r 000000000000 -r e50ea151f221 test-data/out_genome/braker.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_genome/braker.gff3 Tue Oct 05 12:35:42 2021 +0000
b
b'@@ -0,0 +1,280 @@\n+sample\tAUGUSTUS\tstart_codon\t40692\t40694\t.\t+\t0\tID=file_1_g9.t1.start1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t40692\t41193\t0.54\t+\t0\tID=file_1_g9.t1.CDS1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t40692\t41193\t.\t+\t.\tID=file_1_g9.t1.exon1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t41194\t41706\t0.54\t+\t.\tID=file_1_g9.t1.intron1;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t41707\t42103\t0.64\t+\t2\tID=file_1_g9.t1.CDS2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t41707\t42103\t.\t+\t.\tID=file_1_g9.t1.exon2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t42104\t43371\t0.21\t+\t.\tID=file_1_g9.t1.intron2;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t43372\t43609\t0.14\t+\t1\tID=file_1_g9.t1.CDS3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t43372\t43609\t.\t+\t.\tID=file_1_g9.t1.exon3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tintron\t43610\t43677\t0.46\t+\t.\tID=file_1_g9.t1.intron3;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tCDS\t43678\t44130\t0.46\t+\t0\tID=file_1_g9.t1.CDS4;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\texon\t43678\t44130\t.\t+\t.\tID=file_1_g9.t1.exon4;Parent=file_1_g9.t1;\n+sample\tAUGUSTUS\tstop_codon\t44128\t44130\t.\t+\t0\tID=file_1_g9.t1.stop1;Parent=file_1_g9.t1;\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\tID=file_1_g26.t1.stop1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\tID=file_1_g26.t1.CDS1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\tID=file_1_g26.t1.exon1;Parent=file_1_g26.t1;\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\tID=file_1_g26.t1.start1;Parent=file_1_g26.t1;\n+sample\tAUGUSTUS\tstart_codon\t167121\t167123\t.\t+\t0\tID=file_1_g21.t1.start1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t167121\t168360\t1\t+\t0\tID=file_1_g21.t1.CDS1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t167121\t168360\t.\t+\t.\tID=file_1_g21.t1.exon1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t168361\t168721\t1\t+\t.\tID=file_1_g21.t1.intron1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t168722\t169167\t1\t+\t2\tID=file_1_g21.t1.CDS2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t168722\t169167\t.\t+\t.\tID=file_1_g21.t1.exon2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t169168\t172584\t0.87\t+\t.\tID=file_1_g21.t1.intron2;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t172585\t172957\t0.81\t+\t0\tID=file_1_g21.t1.CDS3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t172585\t172957\t.\t+\t.\tID=file_1_g21.t1.exon3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tintron\t172958\t180019\t0.41\t+\t.\tID=file_1_g21.t1.intron3;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tCDS\t180020\t180579\t0.57\t+\t2\tID=file_1_g21.t1.CDS4;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\texon\t180020\t180579\t.\t+\t.\tID=file_1_g21.t1.exon4;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tstop_codon\t180577\t180579\t.\t+\t0\tID=file_1_g21.t1.stop1;Parent=file_1_g21.t1;\n+sample\tAUGUSTUS\tstart_codon\t119778\t119780\t.\t+\t0\tID=file_1_g18.t1.start1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t119778\t120378\t1\t+\t0\tID=file_1_g18.t1.CDS1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t119778\t120378\t.\t+\t.\tID=file_1_g18.t1.exon1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t120379\t123327\t0.98\t+\t.\tID=file_1_g18.t1.intron1;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t123328\t123512\t0.99\t+\t2\tID=file_1_g18.t1.CDS2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t123328\t123512\t.\t+\t.\tID=file_1_g18.t1.exon2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t123513\t133208\t0.5\t+\t.\tID=file_1_g18.t1.intron2;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t133209\t134539\t1\t+\t0\tID=file_1_g18.t1.CDS3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t133209\t134539\t.\t+\t.\tID=file_1_g18.t1.exon3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t134540\t134667\t1\t+\t.\tID=file_1_g18.t1.intron3;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t134668\t135510\t1\t+\t1\tID=file_1_g18.t1.CDS4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t134668\t135510\t.\t+\t.\tID=file_1_g18.t1.exon4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tintron\t135511\t135568\t1\t+\t.\tID=file_1_g18.t1.intron4;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tCDS\t135569\t136346\t0.99\t+\t1\tID=file_1_g18.t1.CDS5;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\texon\t135569\t136346\t.\t+\t.\tID=file_1_g18.t1.exon5;Parent=file_1_g18.t1;\n+sample\tAUGUSTUS\tstop'..b't_codon\t115274\t115276\t.\t+\t0\tID=file_1_g16.t1.start1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tCDS\t115274\t116488\t0.87\t+\t0\tID=file_1_g16.t1.CDS1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\texon\t115274\t116488\t.\t+\t.\tID=file_1_g16.t1.exon1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tstop_codon\t116486\t116488\t.\t+\t0\tID=file_1_g16.t1.stop1;Parent=file_1_g16.t1;\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\tID=file_1_g24.t1.stop1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\tID=file_1_g24.t1.CDS1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\tID=file_1_g24.t1.exon1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\tID=file_1_g24.t1.start1;Parent=file_1_g24.t1;\n+sample\tAUGUSTUS\tstop_codon\t100542\t100544\t.\t-\t0\tID=file_1_g15.t1.stop1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t100542\t100756\t0.3\t-\t2\tID=file_1_g15.t1.CDS1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t100542\t100756\t.\t-\t.\tID=file_1_g15.t1.exon1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t100757\t102520\t0.32\t-\t.\tID=file_1_g15.t1.intron1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t102521\t103133\t0.26\t-\t0\tID=file_1_g15.t1.CDS2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t102521\t103133\t.\t-\t.\tID=file_1_g15.t1.exon2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t103134\t103264\t0.25\t-\t.\tID=file_1_g15.t1.intron2;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t103265\t104819\t0.09\t-\t1\tID=file_1_g15.t1.CDS3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t103265\t104819\t.\t-\t.\tID=file_1_g15.t1.exon3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tintron\t104820\t108228\t0.15\t-\t.\tID=file_1_g15.t1.intron3;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tCDS\t108229\t108299\t0.42\t-\t0\tID=file_1_g15.t1.CDS4;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\texon\t108229\t108299\t.\t-\t.\tID=file_1_g15.t1.exon4;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tstart_codon\t108297\t108299\t.\t-\t0\tID=file_1_g15.t1.start1;Parent=file_1_g15.t1;\n+sample\tAUGUSTUS\tstop_codon\t214566\t214568\t.\t-\t0\tID=file_1_g25.t1.stop1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tCDS\t214566\t214811\t0.19\t-\t0\tID=file_1_g25.t1.CDS1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\texon\t214566\t214811\t.\t-\t.\tID=file_1_g25.t1.exon1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tstart_codon\t214809\t214811\t.\t-\t0\tID=file_1_g25.t1.start1;Parent=file_1_g25.t1;\n+sample\tAUGUSTUS\tstart_codon\t58640\t58642\t.\t+\t0\tID=file_1_g12.t1.start1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t58640\t58677\t0.01\t+\t0\tID=file_1_g12.t1.CDS1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t58640\t58677\t.\t+\t.\tID=file_1_g12.t1.exon1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tintron\t58678\t59091\t0.01\t+\t.\tID=file_1_g12.t1.intron1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t59092\t59204\t0.01\t+\t1\tID=file_1_g12.t1.CDS2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t59092\t59204\t.\t+\t.\tID=file_1_g12.t1.exon2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tintron\t59205\t69536\t0.01\t+\t.\tID=file_1_g12.t1.intron2;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tCDS\t69537\t69745\t0.01\t+\t2\tID=file_1_g12.t1.CDS3;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\texon\t69537\t69745\t.\t+\t.\tID=file_1_g12.t1.exon3;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tstop_codon\t69743\t69745\t.\t+\t0\tID=file_1_g12.t1.stop1;Parent=file_1_g12.t1;\n+sample\tAUGUSTUS\tstop_codon\t45527\t45529\t.\t-\t0\tID=file_1_g10.t1.stop1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t45527\t46157\t0.51\t-\t1\tID=file_1_g10.t1.CDS1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t45527\t46157\t.\t-\t.\tID=file_1_g10.t1.exon1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tintron\t46158\t46752\t0.42\t-\t.\tID=file_1_g10.t1.intron1;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t46753\t47379\t0.43\t-\t1\tID=file_1_g10.t1.CDS2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t46753\t47379\t.\t-\t.\tID=file_1_g10.t1.exon2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tintron\t47380\t49616\t0.44\t-\t.\tID=file_1_g10.t1.intron2;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tCDS\t49617\t49792\t0.45\t-\t0\tID=file_1_g10.t1.CDS3;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\texon\t49617\t49792\t.\t-\t.\tID=file_1_g10.t1.exon3;Parent=file_1_g10.t1;\n+sample\tAUGUSTUS\tstart_codon\t49790\t49792\t.\t-\t0\tID=file_1_g10.t1.start1;Parent=file_1_g10.t1;\n'
b
diff -r 000000000000 -r e50ea151f221 test-data/out_genome/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_genome/braker.gtf Tue Oct 05 12:35:42 2021 +0000
b
b'@@ -0,0 +1,465 @@\n+sample\tAUGUSTUS\tstart_codon\t79526\t79528\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t79526\t80474\t0.4\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t79526\t80474\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t80475\t87860\t0.49\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t87861\t88125\t0.39\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t87861\t88125\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t88126\t92148\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t92149\t92252\t0.17\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t92149\t92252\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t92253\t94562\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94563\t94732\t0.96\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94563\t94732\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t94733\t94872\t0.91\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94873\t95333\t0.79\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94873\t95333\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t95334\t95584\t0.51\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t95585\t95762\t0.35\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tgene\t79526\t95762\t0.01\t+\t.\tg14\n+sample\tAUGUSTUS\ttranscript\t79526\t95762\t0.01\t+\t.\tg14.t1\n+sample\tAUGUSTUS\texon\t95585\t95762\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tstop_codon\t95760\t95762\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4248\t4738\t0.42\t-\t2\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4248\t4738\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t4739\t4929\t0.42\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t0.37\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t0.29\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t0.47\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t0.12\t-\t.\tg28\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t0.12\t-\t.\tg28.t1\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tgene\t209044\t210483\t0.38\t-\t.\tg24\n+sample\tAUGUSTUS\ttranscript\t209044\t210483\t0.38\t-\t.\tg24.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t0.49\t+\t1\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "fi'..b'e_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t102521\t103151\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_4_5";\n+sample\tGeneMark.hmm3\tintron\t103152\t103222\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t103223\t103812\t.\t-\t2\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_3_5";\n+sample\tGeneMark.hmm3\tintron\t103813\t111230\t.\t-\t1\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t111231\t111342\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_2_5";\n+sample\tGeneMark.hmm3\tintron\t111343\t113595\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tgene\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tmRNA\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t113596\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Initial"; count "file_2_1_5";\n+sample\tGeneMark.hmm3\tstart_codon\t113917\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tgene\t1092\t1364\t0.76\t-\t.\tg26\n+sample4\tAUGUSTUS\ttranscript\t1092\t1364\t0.76\t-\t.\tg26.t1\n+sample\tAUGUSTUS\tstop_codon\t34843\t34845\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tCDS\t34843\t35679\t0.35\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\texon\t34843\t35679\t.\t-\t.\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tstart_codon\t35677\t35679\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tgene\t34843\t35679\t0.35\t-\t.\tg7\n+sample\tAUGUSTUS\ttranscript\t34843\t35679\t0.35\t-\t.\tg7.t1\n+sample\tAUGUSTUS\tstop_codon\t40166\t40168\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tCDS\t40166\t40531\t0.99\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\texon\t40166\t40531\t.\t-\t.\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tstart_codon\t40529\t40531\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tgene\t40166\t40531\t0.99\t-\t.\tg8\n+sample\tAUGUSTUS\ttranscript\t40166\t40531\t0.99\t-\t.\tg8.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t0.45\t+\t1\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t0.33\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3284\t3512\t0.14\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t3284\t3512\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3513\t3807\t0.22\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3808\t3863\t0.59\t+\t2\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t0.07\t+\t.\tg27\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.07\t+\t.\tg27.t1\n+sample4\tAUGUSTUS\texon\t3808\t3863\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n'
b
diff -r 000000000000 -r e50ea151f221 test-data/out_prot/braker.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_prot/braker.gtf Tue Oct 05 12:35:42 2021 +0000
b
b'@@ -0,0 +1,465 @@\n+sample\tAUGUSTUS\tstart_codon\t79526\t79528\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t79526\t80474\t0.4\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t79526\t80474\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t80475\t87860\t0.49\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t87861\t88125\t0.39\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t87861\t88125\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t88126\t92148\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t92149\t92252\t0.17\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t92149\t92252\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t92253\t94562\t0.19\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94563\t94732\t0.96\t+\t2\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94563\t94732\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t94733\t94872\t0.91\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t94873\t95333\t0.79\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\texon\t94873\t95333\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tintron\t95334\t95584\t0.51\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tCDS\t95585\t95762\t0.35\t+\t1\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tgene\t79526\t95762\t0.01\t+\t.\tg14\n+sample\tAUGUSTUS\ttranscript\t79526\t95762\t0.01\t+\t.\tg14.t1\n+sample\tAUGUSTUS\texon\t95585\t95762\t.\t+\t.\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample\tAUGUSTUS\tstop_codon\t95760\t95762\t.\t+\t0\ttranscript_id "file_1_g14.t1"; gene_id "file_1_g14";\n+sample4\tAUGUSTUS\tstop_codon\t4248\t4250\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4248\t4738\t0.42\t-\t2\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4248\t4738\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t4739\t4929\t0.42\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t4930\t5539\t0.37\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\texon\t4930\t5539\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tintron\t5540\t6038\t0.29\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tCDS\t6039\t6494\t0.47\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tgene\t4248\t6494\t0.12\t-\t.\tg28\n+sample4\tAUGUSTUS\ttranscript\t4248\t6494\t0.12\t-\t.\tg28.t1\n+sample4\tAUGUSTUS\texon\t6039\t6494\t.\t-\t.\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample4\tAUGUSTUS\tstart_codon\t6492\t6494\t.\t-\t0\ttranscript_id "file_1_g28.t1"; gene_id "file_1_g28";\n+sample\tAUGUSTUS\tstop_codon\t209044\t209046\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tCDS\t209044\t210483\t0.38\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\texon\t209044\t210483\t.\t-\t.\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tstart_codon\t210481\t210483\t.\t-\t0\ttranscript_id "file_1_g24.t1"; gene_id "file_1_g24";\n+sample\tAUGUSTUS\tgene\t209044\t210483\t0.38\t-\t.\tg24\n+sample\tAUGUSTUS\ttranscript\t209044\t210483\t0.38\t-\t.\tg24.t1\n+sample\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\tCDS\t2258\t3224\t0.49\t+\t1\ttranscript_id "file_1_g2.t1"; gene_id "file_1_g2";\n+sample\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "fi'..b'e_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t102521\t103151\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_4_5";\n+sample\tGeneMark.hmm3\tintron\t103152\t103222\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t103223\t103812\t.\t-\t2\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_3_5";\n+sample\tGeneMark.hmm3\tintron\t103813\t111230\t.\t-\t1\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t111231\t111342\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Internal"; count "file_2_2_5";\n+sample\tGeneMark.hmm3\tintron\t111343\t113595\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tgene\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tmRNA\t97476\t113919\t.\t-\t.\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample\tGeneMark.hmm3\tCDS\t113596\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t"; cds_type "file_2_Initial"; count "file_2_1_5";\n+sample\tGeneMark.hmm3\tstart_codon\t113917\t113919\t.\t-\t0\tgene_id "file_2_13_g"; transcript_id "file_2_13_t";\n+sample4\tAUGUSTUS\tstop_codon\t1092\t1094\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tCDS\t1092\t1364\t0.76\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\texon\t1092\t1364\t.\t-\t.\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tstart_codon\t1362\t1364\t.\t-\t0\ttranscript_id "file_1_g26.t1"; gene_id "file_1_g26";\n+sample4\tAUGUSTUS\tgene\t1092\t1364\t0.76\t-\t.\tg26\n+sample4\tAUGUSTUS\ttranscript\t1092\t1364\t0.76\t-\t.\tg26.t1\n+sample\tAUGUSTUS\tstop_codon\t34843\t34845\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tCDS\t34843\t35679\t0.35\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\texon\t34843\t35679\t.\t-\t.\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tstart_codon\t35677\t35679\t.\t-\t0\ttranscript_id "file_1_g7.t1"; gene_id "file_1_g7";\n+sample\tAUGUSTUS\tgene\t34843\t35679\t0.35\t-\t.\tg7\n+sample\tAUGUSTUS\ttranscript\t34843\t35679\t0.35\t-\t.\tg7.t1\n+sample\tAUGUSTUS\tstop_codon\t40166\t40168\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tCDS\t40166\t40531\t0.99\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\texon\t40166\t40531\t.\t-\t.\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tstart_codon\t40529\t40531\t.\t-\t0\ttranscript_id "file_1_g8.t1"; gene_id "file_1_g8";\n+sample\tAUGUSTUS\tgene\t40166\t40531\t0.99\t-\t.\tg8\n+sample\tAUGUSTUS\ttranscript\t40166\t40531\t0.99\t-\t.\tg8.t1\n+sample4\tAUGUSTUS\tstart_codon\t2126\t2128\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2126\t2199\t1\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2126\t2199\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t2200\t2257\t1\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t2258\t3224\t0.45\t+\t1\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t2258\t3224\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3225\t3283\t0.33\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3284\t3512\t0.14\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\texon\t3284\t3512\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tintron\t3513\t3807\t0.22\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tCDS\t3808\t3863\t0.59\t+\t2\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tgene\t2126\t3863\t0.07\t+\t.\tg27\n+sample4\tAUGUSTUS\ttranscript\t2126\t3863\t0.07\t+\t.\tg27.t1\n+sample4\tAUGUSTUS\texon\t3808\t3863\t.\t+\t.\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n+sample4\tAUGUSTUS\tstop_codon\t3861\t3863\t.\t+\t0\ttranscript_id "file_1_g27.t1"; gene_id "file_1_g27";\n'
b
diff -r 000000000000 -r e50ea151f221 test-data/proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.fa Tue Oct 05 12:35:42 2021 +0000
b
@@ -0,0 +1,96 @@
+>FUN_000001-T1 FUN_000001
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000002-T1 FUN_000002
+MPLYGVSPASALWAGSRRQTVRRSRPSHHESWPATSPPAGRAQSLLPKRLLDVVTDLGLDVLQVLGLEARLLGGDVLGFA
+QP
+>FUN_000003-T1 FUN_000003
+MYDQEPYKCHLCSKTFRMKGSLRIHLKVVHMMGVPCSNPNPNPNPSPTPASTTSAVTATPKLSICDRIRHTEPGALGNGN
+NSTCTASQPYALSGALSMLQQSPSSPESGTATPKLWECDVCSKSFTTKYFLKKHKRLHTGEMPYTCEICARTFTFQQSYH
+KHLLYHSEVKPHVCGVCGRAFKELSTLHNHQRIHSGEKPFKCERVSFLVHTRIHTGVMPYKCELCQKTFRYKVSQRTHRC
+PTEEAQTPEQLIKAFLEGNDSHTQPSPASAEIAAINSSSIVDPEQEALLSQSIDDIVVEQCQKLGICGVEPREEGQLISL
+QPVAVVHFSGNGSPLQQLQNLRIYSPQQTELPSSDGEVFQRFLMDAT
+>FUN_000004-T1 FUN_000004
+MSNKREIDELTSRIKSAKTTLVEWTEAMEDGNKGYQLIEKYYLDDQQKARELNIKRQLLQADIDKRRKQVVLLYDEQMTL
+EKNLERTACLYRSAHAERRQMVETWKSAVNQMTQREHDIQRSEIECAELAQKAQQTAQTYKEYDNQLTEVIENNRQVELA
+IESLNEESSDMKNQIQILIDATLLKEREIDGLRRELENLSNRVHLQRMDNRSQMKKRDEKAKELENFASVMEKVNARLKS
+VQNKALNAEQRLQILEEMMQAEETALRNLDKEQEKVNEMLYRTQRQVIELQDEEKVLKVQNDSLNSNLAAINRNQQQVNN
+ELKRQTEIHYSLSFKCLEAERRLAEIKGLADDPEVEATNMARLNTLEQEYEKLQRLIATTEAQNKKLNYNMNNLVVQYNA
+DEKELEMVRFKIKEAQVYCEGTVKRLRQIRYENSELIVDLNMVKMRCSDLEVGIGGCEQGTYDLEQHRLAFRRAIKDRTV
+ELRSQEDVLLLKKKHLNEELSTLRADLGERKKQIEAMKARFELTAQLLGKNEDGSIMTSTQLKVVSAQERQMLADEGDAL
+NKKVLKAEKEVVALENTLRQFDKSNDNYRKTFRSVDENSKDRERAELELKELEAAYCRELEKLKVLRCKAQHYQQKHAAQ
+RAEEEDLISKIEKAKASRAEHSAVLEKIERELDDQRMKLDRANREIRTQLREIKARPFSEEYLAQFERDLSLQELEARNT
+KALNMITDLANSDESGTDIIGILLRKGIKLPMHLKRTCSRVSWNSSSSGKSSQGQDTASYLNVKGKKFSCDGASARSSVS
+DMSSLKDDTSSTTSHSGLSIISLELPLPKKK
+>FUN_000005-T1 FUN_000005
+MRCVFCGSGSEQQHSRWEIKMLQESCRTDHGFHQDSQAIQYLYEILASYNRDEQRAFLQFVTGSPRLPTGGFKALTPPLT
+IVRKTLDENQNPNDYLPSVMTCVNYLKLPDYSSREVMRQKLKVAANEGSMSFHLS
+>FUN_000006-T1 FUN_000006
+MLILIGAGARGAVLHRLLASSATQDVRTTARACGRMLVLLCGCCCSGGAAAHKRRSCYCWSVLVRGGCLKSILDVQHSAV
+GLDYVYNGHRVLFASCTVTTSAGRELLRKGFRAEITTGAIPSCHSSVFLRMNLLLDL
+>FUN_000007-T1 FUN_000007
+MSFCDFLRDAALGANSANLSIVAAALSAARDVGGGSDGGGSAGGATPATGASASSVGNTSAVGASSSSNSSAGQAASSNS
+NNVTATGSGSAPGGGPTSTGTTSGTQHGSGSGAAAAVDSESDDSEVGRLQALLEARGLPPHLFGALGPRVTHILHRTIGN
+SSSSKANQLLQGLQSHDESQQLQAAIEMCQMLVMGNEDTLAGFPIKQVVPALIQLLRMEHNFDIMNNACRALAYMLEALP
+RSSGTVVEAVPVFLEKLQVIQCMDVAEQSLSALEILSRRHNKAILQANGISACLTYLDFFSIVAQRAALAIAANCCLNMH
+PEEFHFVAESLPLLARLLSQQDKKCIESVCSAFCRLVESFQHDGQRLQQIASPDLLKNCQQLLLVTPAILNTGTFTAVVR
+MLSLMCCSCPDLAISLLRNDIAATLLYLLTGNAEPAAASATHVELISRSPSELYELTCLIGELMPRLPLDGIFAVDSLLD
+RPTLNTQDQVHWQWRDDRGSWHNYSTIDSRLIEAANQSSEDEISLSTFGRTYTVDFHAMQQINEDTGTTRPVQRRLNHNY
+VAPMSAGQDLTTTSAGSAAAGGASTSAAAAAASSNNNNNNNNNPPGNSVNLNQVKRRPSLDARIACLKEERGLAADFIKH
+IFNVLYEVYSSSAGPNVRYKCLRALLRMVYYATPELLRQVLKYQLVSSHIAGMLGSNDLRIVVGALQMAEILMRQLPDVF
+GTHFRREGVIYQFTQLTDPNNPICANPSPKPLSATATPTANAGGSQSAPASANSLQVNPFFMDSAPGLSSASTTPSSSKH
+QSYSVKSFSHAMNALTASAKGTPSGALDATSSSTTAGGYNYSSSAPSSSSGAPAAYFVTQQGDPRQYVHFQQPAVPAPPP
+QQELLPSGVQQQGQQVPQVIYQPHHQQPAHLVLASTSSGAASSSSSSSSSSSASALQHKMTDMLKRKAPPKRKSQSGGRA
+KSRQEDAAVAPAGSGPGGAPPSSSGSAMHELLSRATSKYIFQ
+>FUN_000008-T1 FUN_000008
+MKFRALMQDPLYMKEFQAIVATLTKLAKDCVMILGSRQMHFIVNEDQSSAASPLVWAGITAEEYFPEYRMEAAHPDQEYI
+VLGVSSANLGRALSVLRGGGVNSCKLKLQRIQFPCISVIASVLTSSSTEAREVVHDVPVTIIPGSDWSAYVVPRVPNSQL
+ALGLPSLRLLKSLIDKLKNISPSLEFQVNVDGELNVIATSEMSTVTSRFQKLLIRTVSGSQQEASCSVDSRKASAFFGAL
+QLPNEELTIGIDREHSIHLQIDVRQDVVLHSILPAVCM
+>FUN_000009-T1 FUN_000009
+MCGNPAVGNGTRALILVGGYGTRLRPLTLSTPKPLVEFANKPILLHQLEALVDAGCRQVILAVSYRAEQMEKELKVEAKK
+LGVELIFSHETEPLGTAGPLALAKTILAASSEPFFVLNSDVICDFPFKQLVQFHCNHGKEGTIVVTKVEEPSKYGVVLYD
+ENGCIKNFIEKPQEFVSNKINAGIYIFNPSVLDRIEVKPTSIEKEVFPEMTQQQELYAMDLTGFWMDIGQPKDFLTGMCL
+YLSSLRQKQSPKLYTGPGVVGNVLVDPTAKIGEGCRIGPNVTIGPDVVIEDGVCIKRSTILKGAIVRSHSWLDSCIVGWR
+STVGRWVRIEGITVLGEDVIVKDELYINGGQVLPHKSIAASLRGAIVQAGQLVILPDEEVFSHVQGVWNLSSDQGNLGSF
+VVTNIRLVWFADANETFNISLPYLQIESSKYGPALVIQTAETGGGYVLGFRVDPAERLNELFKELSSLHTVYGEHPNFGI
+QYNANDARRRLEAASEEAAQASQIKVDNFEELDERQEREINTKLNSYLAEGCLGKVPSQGERAPVYCKELGFAMEPIGDG
+YKLQDLWNVMPTKMETME
+>FUN_000010-T1 FUN_000010
+MDFIHEAVGTSAAPSANPGPMPLCQPVRLELPFSPRHSFALGHFPFQLCPCLYAVWVQTMGLVLLLLIVFAPFLFRVLFK
+PCNPKRYLTT
+>FUN_000011-T1 FUN_000011
+MCHSKDNLHSGNEDGGMPKDTEYISSDHDDSPSWSQQSLLSSDRSKSYSQICSEILEESKERQEKAECAFRVYNINRSKL
+RRSHQQSLSRGPGSGSYGSSMASEYSSKSEAGYQDYDSPSTDPSREHTAEVTFLQLRHRNRAHKEIIFRAAAHAIVIILI
+IIARGVCQRHIKIVEIVPLTSRRGARRATTRNLTSHFAPRKWRRRFSSADQGDRQFKGHDGDCLRSTEKKRSISNEQSPI
+TLRNTNAKDVDIPDCFGSFAMNKHLSVITEDASQHHKDPDEDMIDSQLSNSVLLETYDEGEKYAYSYQYSYKPEICNNNQ
+FVSDESDLKVSSKEGYQMDQEDYVMDKQELVHEGGSDASLSEVAKSKSFLSLKIYDADEALMEIPEDFEGPAIVLDDDAD
+FLDITLTDDEEKIRAKLMAAALTTRKTTSSISPNISLRTRSPIEPSSLSYKPNVIFTRRSEVIKDNYTPRPDDRVALLAE
+KFLQSFSESAPNDYGWKPSKQEVTSAVSISHLFNENGVTRRGGDTPLCGDRQLLSVEFNRKLQRQLKVIVESFQ
+>FUN_000012-T1 FUN_000012
+MSLDRRGEITTPPTRYDLTLGSDKSSSLSRSEAGTYDVIQAEIQHAKRQELATGVATASHQNGNGNGNGHTLSTQHDIEA
+EVKKRKWPTEPSYFLAKELLMTERTYKKDLDVLNTTFRQVLSLGDVEQLQPLFELLDSLAQHHNLFLRDIEHRMVQWEGR
+GGHEAHRIGDVMMKHMAALPIYDEYVQTHLDILHCMNDMYEGDERFRQVYKEFEQQKVCYLPIGELLLKPLNRLLHYQLI
+LERLCDYYGEEHIDYADAMAVHHLLVRSTKGIRSQLPDSANFVELCELQRDINFEQLVQPHRRLIRQGCLLKHSKRGSSE
+EGLDLFGLSNGNNSSLNSSVNGGGPLTTQQQKLQLQQQQQNRTQPSRSNTALHVCWHRGATVGLGDHLIAAEHQLSGYLL
+RKFKNSSGWQKLWVVFTSFCLYFYKSYQDEFALASLPLLGYTVGPPGHQDAVQKEFVFKLSFKNHVYFFRAESAHTYNR
+>FUN_000013-T1 FUN_000013
+MQPIAMEMGQNQLEVKASVLGGSPQLGDLKVGGAQDVQLLGFYREPGGSIQNAQISNVEGL
+>FUN_000014-T1 FUN_000014
+MASKSFDLVIEEKTKKPERLYQPRRMRWLKYIILPAVFSFALLLILVNVDFSDNSEDSTHLGNDTSLIISGYGFENNTLR
+RGFFSGGIALHSLVIENCTIVHINDAAFNQESTVNITSLQLINVQLENLTESALEGLQKLQNFTLVNENNHFRPFGFLSA
+VAESLVSAEIHQSLAAAISYSVCDFLGSRNFPQLKYLDLSGTHLDKSLIKESFDNLPALEQLLLRNCGLGNIEWEIVRPR
+LKLLHYLDLGGAQKTGNYEHQLDVSAFSPETTTNAEEISTILAKRAMAPEVVGTTTLGPTTSIEISPPSTQSTTTPKEES
+TSMTETTILTTPSPKCEEELCQDLECSRITTDTVASADLGKSSCQDGLLVEICESTCTTPTFFCVILGENFTSASNCCSH
+HTMRCVVSAQVSWFEDHSGLVIGLGVGLLFIGSFLGMLIVFGTLRLNPSWLRGNKRRESNTIGLIQGRFEKDPYEQELQR
+IREL
+>FUN_000015-T1 FUN_000015
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAGESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKR
+FIRNPNYVKANEFYDKMLSSEYVSKRYKDLPPPHPGFGADQPPA
+>FUN_000016-T1 FUN_000016
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFDLKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLG
+VTVDLINRELYQADSMTLLDPADEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPCAQVIFDSDPAPAGKNVPAQLEE
+MSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFINGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGI
+YYNELETRVRLNKRRVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIGETEKTSED
+AAVGAQAASGADSPAQVARDRQSRSRSRTRS