Mercurial > repos > iuc > maker
changeset 1:73a79dec987b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit a1535cdf1f6fe06a9b11110c0c9627eef732d398
author | iuc |
---|---|
date | Sun, 01 Jul 2018 16:12:18 -0400 |
parents | 16e44ec438c4 |
children | d3a2072d8745 |
files | maker.xml test-data/annot.gff3 test-data/annot_mapped.gff3 test-data/annot_repbase.gff3 test-data/annot_reuse.gff3 test-data/evidences.gff3 test-data/evidences_reuse.gff3 test-data/fake_repbase.embl |
diffstat | 8 files changed, 249 insertions(+), 157 deletions(-) [+] |
line wrap: on
line diff
--- a/maker.xml Thu Oct 19 15:58:39 2017 -0400 +++ b/maker.xml Sun Jul 01 16:12:18 2018 -0400 @@ -1,11 +1,19 @@ <?xml version="1.0"?> -<tool id="maker" name="Maker" profile="16.04" version="@VERSION@"> +<tool id="maker" name="Maker" profile="16.04" version="@VERSION@.1"> <description>genome annotation pipeline</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command><![CDATA[ + RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries && + mkdir lib && + export REPEATMASKER_LIB_DIR=\$(pwd)/lib && + for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done && + #if $repeat_masking.repeat_source.source_type == "repbase": + cp '${repeat_masking.repeat_source.repbase_file}' 'lib/${repeat_masking.repeat_source.repbase_file_name}' && + #end if + maker -CTL && @@ -104,28 +112,41 @@ #end if #-----Repeat Masking (leave values blank to skip repeat masking) -#if $repeat_masking.repeatmasker.do_rm == 'simple' -model_org=simple # select a model organism for RepBase masking in RepeatMasker +#if $repeat_masking.repeat_source.source_type == 'repbase' + +#if $repeat_masking.repeat_source.species_source.species_from_list == 'yes' +model_org=${repeat_masking.repeat_source.species_source.species_list} +#else +model_org=${repeat_masking.repeat_source.species_source.species_name} +#end if + rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker -#else if $repeat_masking.repeatmasker.do_rm == 'lib' +softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) + +#else if $repeat_masking.repeat_source.source_type == 'library' model_org= # select a model organism for RepBase masking in RepeatMasker -rmlib=${repeat_masking.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker +rmlib=${repeat_masking.repeat_source.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker + +#if $repeat_masking.repeat_source.repeat_protein +repeat_protein=${repeat_masking.repeat_source.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner +#else +repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner +#end if +#if $repeat_masking.repeat_source.rm_gff +rm_gff=${repeat_masking.repeat_source.rm_gff} # pre-identified repeat elements from an external GFF3 file +#else +rm_gff= # pre-identified repeat elements from an external GFF3 file +#end if + +softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) + #else model_org= # select a model organism for RepBase masking in RepeatMasker rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker -#end if -#if $repeat_masking.repeat_protein -repeat_protein=${repeat_masking.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner -#else -repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner +softmask=0 # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) #end if -#if $repeat_masking.rm_gff -rm_gff=${repeat_masking.rm_gff} # pre-identified repeat elements from an external GFF3 file -#else -rm_gff= # pre-identified repeat elements from an external GFF3 file -#end if + prok_rm=0 # forces MAKER to repeatmask prokaryotes (no reason to change this), 1 = yes, 0 = no -softmask=${repeat_masking.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) #-----Gene Prediction #if $abinitio_gene_prediction.snaphmm @@ -359,32 +380,69 @@ </section> <section name="repeat_masking" title="Repeat masking" expanded="True"> - <conditional name="repeatmasker"> - <param name="do_rm" type="select" label="Enable repeat masking with RepeatMasker"> + <conditional name="repeat_source"> + <param label="Repeat library source" name="source_type" type="select"> + <option selected="true" value="repbase">RepBase</option> + <option value="library">Custom library of repeats</option> + <option value="no">Disable repeat masking (not recommended)</option> + </param> + <when value="repbase"> + <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" /> + <param name="repbase_file_name" type="hidden" value="RMRBSeqs.embl"/> <!-- This is an ugly hack to allow testing with a fake repbase --> + <conditional name="species_source"> + <param label="Select species name from a list?" name="species_from_list" type="select"> + <option value="yes" selected="true">Yes</option> <option value="no">No</option> - <option value="simple" selected="true">Yes, run RepeatMasker with default simple models</option> - <option value="lib">Yes, use an organism specific repeat library (fasta)</option> - </param> - <!-- full repbase cannot be redistributed (for licensing reasons, see https://hpc.nih.gov/apps/repbase_license.html), - using only the default maker base - model_org is ignored and replaced by 'simple' if the full RepBase is not available. - model_org=simple means maker will search using the simple models shipped by default - Installing RepBase requires to replace files in the RepeatMasker installation dir - --> - <when value="no"/> - <when value="simple"/> - <when value="lib"> - <param name="rmlib" type="data" format="fasta" label="Organism specific repeat library for RepeatMasker (fasta)"/> - </when> + </param> + <when value="yes"> + <param name="species_list" type="select" label="Species"> + <option value="anopheles" selected="true">anopheles</option> + <option value="arabidopsis">arabidopsis</option> + <option value="artiodactyl">artiodactyl</option> + <option value="aspergillus">aspergillus</option> + <option value="carnivore">carnivore</option> + <option value="cat">cat</option> + <option value="chicken">chicken</option> + <option value="ciona intestinalis">ciona intestinalis</option> + <option value="ciona savignyi">ciona savignyi</option> + <option value="cow">cow</option> + <option value="danio">danio</option> + <option value="diatoaea">diatomea</option> + <option value="dog">dog</option> + <option value="drosophila">drosophila</option> + <option value="elegans">elegans</option> + <option value="fugu">fugu</option> + <option value="fungi" selected="true">fungi</option> + <option value="human">human</option> + <option value="maize">maize</option> + <option value="mammal">mammal</option> + <option value="mouse">mouse</option> + <option value="pig">pig</option> + <option value="rat">rat</option> + <option value="rice">rice</option> + <option value="rodentia">rodentia</option> + <option value="ruminantia">ruminantia</option> + <option value="wheat">wheat</option> + </param> + </when> + <when value="no"> + <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" /> + </when> + </conditional> + <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/> + </when> + <when value="library"> + <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner" optional="True" /> + <param name="rm_gff" type="data" format="fasta" label="Pre-identified repeat elements from an external GFF file" optional="True" /> + <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/> + </when> + <when value="no"/> </conditional> - <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner (fasta)" help="Leave empty to skip" optional="True"/> - <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" help="Leave empty to skip" optional="True"/> - <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/> </section> <section name="gene_prediction" title="Other predictions" expanded="True"> <param name="pred_gff" type="data" format="gff" label="Predictions from an external GFF3 file" optional="True"/> - <param name="model_gff" type="data" format="gff" label="Annotated gene models an external GFF3 file" help="annotation pass-through" optional="True"/> + <param name="model_gff" type="data" format="gff" label="Annotated gene models from an external GFF3 file" help="annotation pass-through" optional="True"/> <param name="trna" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Find tRNAs with tRNAscan"/> <param name="snoscan_rrna" type="data" format="fasta" label="rRNA file to have Snoscan find snoRNAs" optional="True"/> </section> @@ -428,6 +486,7 @@ <param name="genome" value="genome.fasta"/> <param name="est_evidences|est" value="est.fasta"/> <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot.gff3"/> <output name="output_evidences" file="evidences.gff3" compare="sim_size"/> </test> @@ -436,6 +495,7 @@ <param name="organism_type" value="prokaryotic"/> <param name="est_evidences|est" value="est.fasta"/> <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot_proc.gff3"/> <output name="output_evidences" file="evidences_proc.gff3" compare="sim_size"/> </test> @@ -445,6 +505,7 @@ <param name="reannotation|maker_gff" value="evidences.gff3"/> <param name="reannotation|est_pass" value="true"/> <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot_reuse.gff3"/> <output name="output_evidences" file="evidences_reuse.gff3" compare="sim_size"/> </test> @@ -455,6 +516,7 @@ <param name="abinitio_gene_prediction|aug_prediction|augustus_species" value="human"/> <param name="est_evidences|est" value="est.fasta"/> <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot_human.gff3" compare="sim_size"/> <output name="output_evidences" file="evidences_human.gff3" compare="sim_size"/> </test> @@ -465,6 +527,7 @@ <param name="abinitio_gene_prediction|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> <param name="est_evidences|est" value="est.fasta"/> <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot_model.gff3" compare="sim_size"/> <output name="output_evidences" file="evidences_model.gff3" compare="sim_size"/> </test> @@ -472,10 +535,21 @@ <param name="genome" value="genome.fasta"/> <param name="est_evidences|est" value="est.fasta"/> <param name="est_evidences|est2genome" value="1"/> - <param name="repeat_masking|repeatmasker|do_rm" value="no"/> + <param name="repeat_masking|repeat_source|source_type" value="no"/> <output name="output_gff" file="annot_norm.gff3"/> <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/> </test> + <test> + <param name="genome" value="genome.fasta"/> + <param name="est_evidences|est" value="est.fasta"/> + <param name="est_evidences|est2genome" value="1"/> + <param name="repeat_masking|repeat_source|source_type" value="repbase"/> + <param name="repeat_masking|repeat_source|repbase_file" value="fake_repbase.embl" /> + <param name="repeat_masking|repeat_source|repbase_file_name" value="fake.embl" /> + <param name="repeat_masking|repeat_source|species_list" value="anopheles" /> + <output name="output_gff" file="annot_repbase.gff3"/> + <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/> + </test> </tests> <help><![CDATA[ MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources.
--- a/test-data/annot.gff3 Thu Oct 19 15:58:39 2017 -0400 +++ b/test-data/annot.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -17,7 +17,6 @@ HS08198 maker CDS 1587 1688 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 HS08198 maker CDS 1772 1848 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 ### -### HS04636 maker gene 1813 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0 HS04636 maker mRNA 1813 6903 8728 + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=49|1|1|1|0|0|9|0|572 HS04636 maker exon 1813 1934 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:0;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 @@ -40,4 +39,3 @@ HS04636 maker CDS 5860 6007 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 HS04636 maker CDS 6494 6903 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 ### -###
--- a/test-data/annot_mapped.gff3 Thu Oct 19 15:58:39 2017 -0400 +++ b/test-data/annot_mapped.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -17,7 +17,6 @@ HS08198 maker CDS 1587 1688 . + 2 ID=YES000002-RA:cds;Parent=YES000002-RA; HS08198 maker CDS 1772 1848 . + 2 ID=YES000002-RA:cds;Parent=YES000002-RA; ### -### HS04636 maker gene 1813 6903 . + . ID=YES000001;Name=YES000001;Alias=maker-HS04636-exonerate_est2genome-gene-0.0; HS04636 maker mRNA 1813 6903 8728 + . ID=YES000001-RA;Parent=YES000001;Name=YES000001-RA;Alias=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_QI=49|1|1|1|0|0|9|0|572;_eAED=0.00; HS04636 maker exon 1813 1934 . + . ID=YES000001-RA:exon:0;Parent=YES000001-RA; @@ -40,4 +39,3 @@ HS04636 maker CDS 5860 6007 . + 0 ID=YES000001-RA:cds;Parent=YES000001-RA; HS04636 maker CDS 6494 6903 . + 2 ID=YES000001-RA:cds;Parent=YES000001-RA; ### -###
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annot_repbase.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -0,0 +1,43 @@ +##gff-version 3 +HS08198 maker gene 352 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0 +HS08198 maker mRNA 352 1848 2869 + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=70|1|1|1|0|0|7|0|192 +HS08198 maker exon 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:9;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 421 582 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:10;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 812 894 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:11;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1053 1123 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:12;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1208 1315 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:13;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1587 1688 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:14;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1772 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:15;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker five_prime_UTR 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker five_prime_UTR 421 444 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 445 582 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 812 894 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1053 1123 . + 1 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1208 1315 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1587 1688 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1772 1848 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +### +### +HS04636 maker gene 1813 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0 +HS04636 maker mRNA 1813 6903 8728 + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=49|1|1|1|0|0|9|0|572 +HS04636 maker exon 1813 1934 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:0;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 2055 2198 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 2852 2995 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:2;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 3426 3607 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:3;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 4340 4423 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:4;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 4543 4789 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:5;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 5072 5358 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:6;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 5860 6007 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:7;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 6494 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:8;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker five_prime_UTR 1813 1861 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 1862 1934 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 2055 2198 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 2852 2995 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 3426 3607 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 4340 4423 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 4543 4789 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 5072 5358 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 5860 6007 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 6494 6903 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +### +###
--- a/test-data/annot_reuse.gff3 Thu Oct 19 15:58:39 2017 -0400 +++ b/test-data/annot_reuse.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -17,7 +17,6 @@ HS08198 maker CDS 1587 1688 . + 2 ID=maker-HS08198-est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-est2genome-gene-0.0-mRNA-1 HS08198 maker CDS 1772 1848 . + 2 ID=maker-HS08198-est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-est2genome-gene-0.0-mRNA-1 ### -### HS04636 maker gene 1813 6903 . + . ID=maker-HS04636-est2genome-gene-0.0;Name=maker-HS04636-est2genome-gene-0.0;score=8728 HS04636 maker mRNA 1813 6903 8728 + . ID=maker-HS04636-est2genome-gene-0.0-mRNA-1;Parent=maker-HS04636-est2genome-gene-0.0;Name=maker-HS04636-est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=49|1|1|1|0|0|9|0|572 HS04636 maker exon 1813 1934 . + . ID=maker-HS04636-est2genome-gene-0.0-mRNA-1:exon:0;Parent=maker-HS04636-est2genome-gene-0.0-mRNA-1 @@ -40,4 +39,3 @@ HS04636 maker CDS 5860 6007 . + 0 ID=maker-HS04636-est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-est2genome-gene-0.0-mRNA-1 HS04636 maker CDS 6494 6903 . + 2 ID=maker-HS04636-est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-est2genome-gene-0.0-mRNA-1 ### -###
--- a/test-data/evidences.gff3 Thu Oct 19 15:58:39 2017 -0400 +++ b/test-data/evidences.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -1,62 +1,42 @@ ##gff-version 3 HS08198 . contig 1 2344 . . . ID=HS08198;Name=HS08198 ### -HS08198 repeatmasker match 578 651 13 + . ID=HS08198:hit:10:1.3.0.0;Name=species:%28AGAAGGT%29n|genus:Simple_repeat;Target=species:%28AGAAGGT%29n|genus:Simple_repeat 1 75 + -HS08198 repeatmasker match_part 578 651 13 + . ID=HS08198:hsp:27:1.3.0.0;Parent=HS08198:hit:10:1.3.0.0;Target=species:%2528AGAAGGT%2529n|genus:Simple_repeat 1 75 + -### -HS08198 blastn expressed_sequence_match 444 1848 140 + . ID=HS08198:hit:11:3.2.0.0;Name=HS08198.g2 -HS08198 blastn match_part 444 583 140 + . ID=HS08198:hsp:28:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 79 218 +;Gap=M140 -HS08198 blastn match_part 445 523 79 + . ID=HS08198:hsp:29:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 1 79 +;Gap=M79 -HS08198 blastn match_part 808 895 88 + . ID=HS08198:hsp:30:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 214 301 +;Gap=M88 -HS08198 blastn match_part 1053 1123 71 + . ID=HS08198:hsp:31:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 301 371 +;Gap=M71 -HS08198 blastn match_part 1206 1315 110 + . ID=HS08198:hsp:32:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 370 479 +;Gap=M110 -HS08198 blastn match_part 1586 1689 104 + . ID=HS08198:hsp:33:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 479 582 +;Gap=M104 -HS08198 blastn match_part 1771 1848 78 + . ID=HS08198:hsp:34:3.2.0.0;Parent=HS08198:hit:11:3.2.0.0;Target=HS08198.g2 581 658 +;Gap=M78 -HS08198 est2genome expressed_sequence_match 352 1848 2869 + . ID=HS08198:hit:12:3.2.0.0;Name=HS08198.g2 -HS08198 est2genome match_part 352 397 2869 + . ID=HS08198:hsp:35:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 3 52 +;Gap=M18 D1 M2 I4 M10 I1 M15 -HS08198 est2genome match_part 421 582 2869 + . ID=HS08198:hsp:36:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 53 217 +;Gap=M4 I2 M3 I3 M5 D1 M9 D1 M139 -HS08198 est2genome match_part 812 894 2869 + . ID=HS08198:hsp:37:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 218 300 +;Gap=M83 -HS08198 est2genome match_part 1053 1123 2869 + . ID=HS08198:hsp:38:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 301 371 +;Gap=M71 -HS08198 est2genome match_part 1208 1315 2869 + . ID=HS08198:hsp:39:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 372 479 +;Gap=M108 -HS08198 est2genome match_part 1587 1688 2869 + . ID=HS08198:hsp:40:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 480 581 +;Gap=M102 -HS08198 est2genome match_part 1772 1848 2869 + . ID=HS08198:hsp:41:3.2.0.0;Parent=HS08198:hit:12:3.2.0.0;Target=HS08198.g2 582 658 +;Gap=M77 +HS08198 blastn expressed_sequence_match 444 1848 140 + . ID=HS08198:hit:2:3.2.0.0;Name=HS08198.g2 +HS08198 blastn match_part 444 583 140 + . ID=HS08198:hsp:19:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 79 218 +;Gap=M140 +HS08198 blastn match_part 445 523 79 + . ID=HS08198:hsp:20:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 1 79 +;Gap=M79 +HS08198 blastn match_part 808 895 88 + . ID=HS08198:hsp:21:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 214 301 +;Gap=M88 +HS08198 blastn match_part 1053 1123 71 + . ID=HS08198:hsp:22:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 301 371 +;Gap=M71 +HS08198 blastn match_part 1206 1315 110 + . ID=HS08198:hsp:23:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 370 479 +;Gap=M110 +HS08198 blastn match_part 1586 1689 104 + . ID=HS08198:hsp:24:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 479 582 +;Gap=M104 +HS08198 blastn match_part 1771 1848 78 + . ID=HS08198:hsp:25:3.2.0.0;Parent=HS08198:hit:2:3.2.0.0;Target=HS08198.g2 581 658 +;Gap=M78 +HS08198 est2genome expressed_sequence_match 352 1848 2869 + . ID=HS08198:hit:3:3.2.0.0;Name=HS08198.g2 +HS08198 est2genome match_part 352 397 2869 + . ID=HS08198:hsp:26:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 3 52 +;Gap=M18 D1 M2 I4 M10 I1 M15 +HS08198 est2genome match_part 421 582 2869 + . ID=HS08198:hsp:27:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 53 217 +;Gap=M4 I2 M3 I3 M5 D1 M9 D1 M139 +HS08198 est2genome match_part 812 894 2869 + . ID=HS08198:hsp:28:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 218 300 +;Gap=M83 +HS08198 est2genome match_part 1053 1123 2869 + . ID=HS08198:hsp:29:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 301 371 +;Gap=M71 +HS08198 est2genome match_part 1208 1315 2869 + . ID=HS08198:hsp:30:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 372 479 +;Gap=M108 +HS08198 est2genome match_part 1587 1688 2869 + . ID=HS08198:hsp:31:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 480 581 +;Gap=M102 +HS08198 est2genome match_part 1772 1848 2869 + . ID=HS08198:hsp:32:3.2.0.0;Parent=HS08198:hit:3:3.2.0.0;Target=HS08198.g2 582 658 +;Gap=M77 HS04636 . contig 1 9453 . . . ID=HS04636;Name=HS04636 ### -HS04636 repeatmasker match 9419 9435 16 + . ID=HS04636:hit:0:1.3.0.0;Name=species:%28A%29n|genus:Simple_repeat;Target=species:%28A%29n|genus:Simple_repeat 1 17 + -HS04636 repeatmasker match_part 9419 9435 16 + . ID=HS04636:hsp:0:1.3.0.0;Parent=HS04636:hit:0:1.3.0.0;Target=species:%2528A%2529n|genus:Simple_repeat 1 17 + -HS04636 repeatmasker match 939 980 13 + . ID=HS04636:hit:1:1.3.0.0;Name=species:%28CCGCG%29n|genus:Simple_repeat;Target=species:%28CCGCG%29n|genus:Simple_repeat 1 42 + -HS04636 repeatmasker match_part 939 980 13 + . ID=HS04636:hsp:1:1.3.0.0;Parent=HS04636:hit:1:1.3.0.0;Target=species:%2528CCGCG%2529n|genus:Simple_repeat 1 42 + -HS04636 repeatmasker match 3791 3839 22 + . ID=HS04636:hit:2:1.3.0.0;Name=species:%28AC%29n|genus:Simple_repeat;Target=species:%28AC%29n|genus:Simple_repeat 1 49 + -HS04636 repeatmasker match_part 3791 3839 22 + . ID=HS04636:hsp:2:1.3.0.0;Parent=HS04636:hit:2:1.3.0.0;Target=species:%2528AC%2529n|genus:Simple_repeat 1 49 + -HS04636 repeatmasker match 6918 6977 17 + . ID=HS04636:hit:3:1.3.0.0;Name=species:%28TATT%29n|genus:Simple_repeat;Target=species:%28TATT%29n|genus:Simple_repeat 1 54 + -HS04636 repeatmasker match_part 6918 6977 17 + . ID=HS04636:hsp:3:1.3.0.0;Parent=HS04636:hit:3:1.3.0.0;Target=species:%2528TATT%2529n|genus:Simple_repeat 1 54 + -HS04636 repeatmasker match 6028 6072 21 + . ID=HS04636:hit:4:1.3.0.0;Name=species:%28TTGT%29n|genus:Simple_repeat;Target=species:%28TTGT%29n|genus:Simple_repeat 1 46 + -HS04636 repeatmasker match_part 6028 6072 21 + . ID=HS04636:hsp:4:1.3.0.0;Parent=HS04636:hit:4:1.3.0.0;Target=species:%2528TTGT%2529n|genus:Simple_repeat 1 46 + -HS04636 repeatmasker match 4518 4541 17 + . ID=HS04636:hit:5:1.3.0.0;Name=species:%28TTTA%29n|genus:Simple_repeat;Target=species:%28TTTA%29n|genus:Simple_repeat 1 24 + -HS04636 repeatmasker match_part 4518 4541 17 + . ID=HS04636:hsp:5:1.3.0.0;Parent=HS04636:hit:5:1.3.0.0;Target=species:%2528TTTA%2529n|genus:Simple_repeat 1 24 + -HS04636 repeatmasker match 3852 3888 12 + . ID=HS04636:hit:6:1.3.0.0;Name=species:%28TTGTAAT%29n|genus:Simple_repeat;Target=species:%28TTGTAAT%29n|genus:Simple_repeat 1 37 + -HS04636 repeatmasker match_part 3852 3888 12 + . ID=HS04636:hsp:6:1.3.0.0;Parent=HS04636:hit:6:1.3.0.0;Target=species:%2528TTGTAAT%2529n|genus:Simple_repeat 1 37 + -HS04636 repeatmasker match 3840 3847 17 + . ID=HS04636:hit:7:1.3.0.0;Name=species:%28ATAC%29n|genus:Simple_repeat;Target=species:%28ATAC%29n|genus:Simple_repeat 3 57 + -HS04636 repeatmasker match_part 3840 3847 17 + . ID=HS04636:hsp:7:1.3.0.0;Parent=HS04636:hit:7:1.3.0.0;Target=species:%2528ATAC%2529n|genus:Simple_repeat 3 57 + -### -HS04636 blastn expressed_sequence_match 1815 6903 120 + . ID=HS04636:hit:8:3.2.0.0;Name=HS04636.g1 -HS04636 blastn match_part 1815 1934 120 + . ID=HS04636:hsp:8:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 129 248 +;Gap=M120 -HS04636 blastn match_part 1815 1845 31 + . ID=HS04636:hsp:9:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 50 80 +;Gap=M31 -HS04636 blastn match_part 2055 2198 144 + . ID=HS04636:hsp:10:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 249 392 +;Gap=M144 -HS04636 blastn match_part 2852 2997 146 + . ID=HS04636:hsp:11:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 393 538 +;Gap=M146 -HS04636 blastn match_part 3424 3609 186 + . ID=HS04636:hsp:12:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 535 720 +;Gap=M186 -HS04636 blastn match_part 4339 4423 85 + . ID=HS04636:hsp:13:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 718 802 +;Gap=M85 -HS04636 blastn match_part 4541 4790 250 + . ID=HS04636:hsp:14:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 801 1050 +;Gap=M250 -HS04636 blastn match_part 5068 5360 293 + . ID=HS04636:hsp:15:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 1046 1338 +;Gap=M293 -HS04636 blastn match_part 5859 6008 150 + . ID=HS04636:hsp:16:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 1336 1485 +;Gap=M150 -HS04636 blastn match_part 6490 6903 414 + . ID=HS04636:hsp:17:3.2.0.0;Parent=HS04636:hit:8:3.2.0.0;Target=HS04636.g1 1481 1894 +;Gap=M414 -HS04636 est2genome expressed_sequence_match 1813 6903 8728 + . ID=HS04636:hit:9:3.2.0.0;Name=HS04636.g1 -HS04636 est2genome match_part 1813 1934 8728 + . ID=HS04636:hsp:18:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 127 248 +;Gap=M122 -HS04636 est2genome match_part 2055 2198 8728 + . ID=HS04636:hsp:19:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 249 392 +;Gap=M144 -HS04636 est2genome match_part 2852 2995 8728 + . ID=HS04636:hsp:20:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 393 536 +;Gap=M144 -HS04636 est2genome match_part 3426 3607 8728 + . ID=HS04636:hsp:21:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 537 718 +;Gap=M182 -HS04636 est2genome match_part 4340 4423 8728 + . ID=HS04636:hsp:22:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 719 802 +;Gap=M84 -HS04636 est2genome match_part 4543 4789 8728 + . ID=HS04636:hsp:23:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 803 1049 +;Gap=M247 -HS04636 est2genome match_part 5072 5358 8728 + . ID=HS04636:hsp:24:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 1050 1336 +;Gap=M287 -HS04636 est2genome match_part 5860 6007 8728 + . ID=HS04636:hsp:25:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 1337 1484 +;Gap=M148 -HS04636 est2genome match_part 6494 6903 8728 + . ID=HS04636:hsp:26:3.2.0.0;Parent=HS04636:hit:9:3.2.0.0;Target=HS04636.g1 1485 1894 +;Gap=M410 +HS04636 blastn expressed_sequence_match 1815 6903 120 + . ID=HS04636:hit:0:3.2.0.0;Name=HS04636.g1 +HS04636 blastn match_part 1815 1934 120 + . ID=HS04636:hsp:0:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 129 248 +;Gap=M120 +HS04636 blastn match_part 1815 1845 31 + . ID=HS04636:hsp:1:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 50 80 +;Gap=M31 +HS04636 blastn match_part 2055 2198 144 + . ID=HS04636:hsp:2:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 249 392 +;Gap=M144 +HS04636 blastn match_part 2852 2997 146 + . ID=HS04636:hsp:3:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 393 538 +;Gap=M146 +HS04636 blastn match_part 3424 3609 186 + . ID=HS04636:hsp:4:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 535 720 +;Gap=M186 +HS04636 blastn match_part 4339 4423 85 + . ID=HS04636:hsp:5:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 718 802 +;Gap=M85 +HS04636 blastn match_part 4542 4790 249 + . ID=HS04636:hsp:6:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 802 1050 +;Gap=M249 +HS04636 blastn match_part 5068 5360 293 + . ID=HS04636:hsp:7:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 1046 1338 +;Gap=M293 +HS04636 blastn match_part 5859 6008 150 + . ID=HS04636:hsp:8:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 1336 1485 +;Gap=M150 +HS04636 blastn match_part 6490 6903 414 + . ID=HS04636:hsp:9:3.2.0.0;Parent=HS04636:hit:0:3.2.0.0;Target=HS04636.g1 1481 1894 +;Gap=M414 +HS04636 est2genome expressed_sequence_match 1813 6903 8728 + . ID=HS04636:hit:1:3.2.0.0;Name=HS04636.g1 +HS04636 est2genome match_part 1813 1934 8728 + . ID=HS04636:hsp:10:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 127 248 +;Gap=M122 +HS04636 est2genome match_part 2055 2198 8728 + . ID=HS04636:hsp:11:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 249 392 +;Gap=M144 +HS04636 est2genome match_part 2852 2995 8728 + . ID=HS04636:hsp:12:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 393 536 +;Gap=M144 +HS04636 est2genome match_part 3426 3607 8728 + . ID=HS04636:hsp:13:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 537 718 +;Gap=M182 +HS04636 est2genome match_part 4340 4423 8728 + . ID=HS04636:hsp:14:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 719 802 +;Gap=M84 +HS04636 est2genome match_part 4543 4789 8728 + . ID=HS04636:hsp:15:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 803 1049 +;Gap=M247 +HS04636 est2genome match_part 5072 5358 8728 + . ID=HS04636:hsp:16:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 1050 1336 +;Gap=M287 +HS04636 est2genome match_part 5860 6007 8728 + . ID=HS04636:hsp:17:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 1337 1484 +;Gap=M148 +HS04636 est2genome match_part 6494 6903 8728 + . ID=HS04636:hsp:18:3.2.0.0;Parent=HS04636:hit:1:3.2.0.0;Target=HS04636.g1 1485 1894 +;Gap=M410
--- a/test-data/evidences_reuse.gff3 Thu Oct 19 15:58:39 2017 -0400 +++ b/test-data/evidences_reuse.gff3 Sun Jul 01 16:12:18 2018 -0400 @@ -1,62 +1,42 @@ ##gff-version 3 HS08198 . contig 1 2344 . . . ID=HS08198;Name=HS08198 ### -HS08198 repeatmasker match 578 651 13 + . ID=HS08198:hit:10:1.3.0.0;Name=species:%28AGAAGGT%29n|genus:Simple_repeat;Target=species:%28AGAAGGT%29n|genus:Simple_repeat 1 75 + -HS08198 repeatmasker match_part 578 651 13 + . ID=HS08198:hsp:27:1.3.0.0;Parent=HS08198:hit:10:1.3.0.0;Target=species:%2528AGAAGGT%2529n|genus:Simple_repeat 1 75 + -### -HS08198 blastn expressed_sequence_match 444 1848 140 + . ID=HS08198:hit:11:3.12.0.0;Name=HS08198.g2;score=140 -HS08198 blastn match_part 444 583 140 + . ID=HS08198:hsp:28:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 79 218 +;Gap=M140 -HS08198 blastn match_part 445 523 79 + . ID=HS08198:hsp:29:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 1 79 +;Gap=M79 -HS08198 blastn match_part 808 895 88 + . ID=HS08198:hsp:30:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 214 301 +;Gap=M88 -HS08198 blastn match_part 1053 1123 71 + . ID=HS08198:hsp:31:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 301 371 +;Gap=M71 -HS08198 blastn match_part 1206 1315 110 + . ID=HS08198:hsp:32:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 370 479 +;Gap=M110 -HS08198 blastn match_part 1586 1689 104 + . ID=HS08198:hsp:33:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 479 582 +;Gap=M104 -HS08198 blastn match_part 1771 1848 78 + . ID=HS08198:hsp:34:3.12.0.0;Parent=HS08198:hit:11:3.12.0.0;Target=HS08198.g2 581 658 +;Gap=M78 -HS08198 est2genome expressed_sequence_match 352 1848 2869 + . ID=HS08198:hit:12:3.12.0.0;Name=HS08198.g2;score=2869 -HS08198 est2genome match_part 352 397 2869 + . ID=HS08198:hsp:35:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 3 52 +;Gap=M46 -HS08198 est2genome match_part 421 582 2869 + . ID=HS08198:hsp:36:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 53 217 +;Gap=M162 -HS08198 est2genome match_part 812 894 2869 + . ID=HS08198:hsp:37:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 218 300 +;Gap=M83 -HS08198 est2genome match_part 1053 1123 2869 + . ID=HS08198:hsp:38:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 301 371 +;Gap=M71 -HS08198 est2genome match_part 1208 1315 2869 + . ID=HS08198:hsp:39:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 372 479 +;Gap=M108 -HS08198 est2genome match_part 1587 1688 2869 + . ID=HS08198:hsp:40:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 480 581 +;Gap=M102 -HS08198 est2genome match_part 1772 1848 2869 + . ID=HS08198:hsp:41:3.12.0.0;Parent=HS08198:hit:12:3.12.0.0;Target=HS08198.g2 582 658 +;Gap=M77 +HS08198 blastn expressed_sequence_match 444 1848 140 + . ID=HS08198:hit:2:3.12.0.0;Name=HS08198.g2;score=140 +HS08198 blastn match_part 444 583 140 + . ID=HS08198:hsp:19:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 79 218 +;Gap=M140 +HS08198 blastn match_part 445 523 79 + . ID=HS08198:hsp:20:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 1 79 +;Gap=M79 +HS08198 blastn match_part 808 895 88 + . ID=HS08198:hsp:21:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 214 301 +;Gap=M88 +HS08198 blastn match_part 1053 1123 71 + . ID=HS08198:hsp:22:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 301 371 +;Gap=M71 +HS08198 blastn match_part 1206 1315 110 + . ID=HS08198:hsp:23:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 370 479 +;Gap=M110 +HS08198 blastn match_part 1586 1689 104 + . ID=HS08198:hsp:24:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 479 582 +;Gap=M104 +HS08198 blastn match_part 1771 1848 78 + . ID=HS08198:hsp:25:3.12.0.0;Parent=HS08198:hit:2:3.12.0.0;Target=HS08198.g2 581 658 +;Gap=M78 +HS08198 est2genome expressed_sequence_match 352 1848 2869 + . ID=HS08198:hit:3:3.12.0.0;Name=HS08198.g2;score=2869 +HS08198 est2genome match_part 352 397 2869 + . ID=HS08198:hsp:26:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 3 52 +;Gap=M46 +HS08198 est2genome match_part 421 582 2869 + . ID=HS08198:hsp:27:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 53 217 +;Gap=M162 +HS08198 est2genome match_part 812 894 2869 + . ID=HS08198:hsp:28:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 218 300 +;Gap=M83 +HS08198 est2genome match_part 1053 1123 2869 + . ID=HS08198:hsp:29:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 301 371 +;Gap=M71 +HS08198 est2genome match_part 1208 1315 2869 + . ID=HS08198:hsp:30:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 372 479 +;Gap=M108 +HS08198 est2genome match_part 1587 1688 2869 + . ID=HS08198:hsp:31:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 480 581 +;Gap=M102 +HS08198 est2genome match_part 1772 1848 2869 + . ID=HS08198:hsp:32:3.12.0.0;Parent=HS08198:hit:3:3.12.0.0;Target=HS08198.g2 582 658 +;Gap=M77 HS04636 . contig 1 9453 . . . ID=HS04636;Name=HS04636 ### -HS04636 repeatmasker match 3852 3888 12 + . ID=HS04636:hit:0:1.3.0.0;Name=species:%28TTGTAAT%29n|genus:Simple_repeat;Target=species:%28TTGTAAT%29n|genus:Simple_repeat 1 37 + -HS04636 repeatmasker match_part 3852 3888 12 + . ID=HS04636:hsp:0:1.3.0.0;Parent=HS04636:hit:0:1.3.0.0;Target=species:%2528TTGTAAT%2529n|genus:Simple_repeat 1 37 + -HS04636 repeatmasker match 6028 6072 21 + . ID=HS04636:hit:1:1.3.0.0;Name=species:%28TTGT%29n|genus:Simple_repeat;Target=species:%28TTGT%29n|genus:Simple_repeat 1 46 + -HS04636 repeatmasker match_part 6028 6072 21 + . ID=HS04636:hsp:1:1.3.0.0;Parent=HS04636:hit:1:1.3.0.0;Target=species:%2528TTGT%2529n|genus:Simple_repeat 1 46 + -HS04636 repeatmasker match 3840 3847 17 + . ID=HS04636:hit:2:1.3.0.0;Name=species:%28ATAC%29n|genus:Simple_repeat;Target=species:%28ATAC%29n|genus:Simple_repeat 3 57 + -HS04636 repeatmasker match_part 3840 3847 17 + . ID=HS04636:hsp:2:1.3.0.0;Parent=HS04636:hit:2:1.3.0.0;Target=species:%2528ATAC%2529n|genus:Simple_repeat 3 57 + -HS04636 repeatmasker match 3791 3839 22 + . ID=HS04636:hit:3:1.3.0.0;Name=species:%28AC%29n|genus:Simple_repeat;Target=species:%28AC%29n|genus:Simple_repeat 1 49 + -HS04636 repeatmasker match_part 3791 3839 22 + . ID=HS04636:hsp:3:1.3.0.0;Parent=HS04636:hit:3:1.3.0.0;Target=species:%2528AC%2529n|genus:Simple_repeat 1 49 + -HS04636 repeatmasker match 9419 9435 16 + . ID=HS04636:hit:4:1.3.0.0;Name=species:%28A%29n|genus:Simple_repeat;Target=species:%28A%29n|genus:Simple_repeat 1 17 + -HS04636 repeatmasker match_part 9419 9435 16 + . ID=HS04636:hsp:4:1.3.0.0;Parent=HS04636:hit:4:1.3.0.0;Target=species:%2528A%2529n|genus:Simple_repeat 1 17 + -HS04636 repeatmasker match 939 980 13 + . ID=HS04636:hit:5:1.3.0.0;Name=species:%28CCGCG%29n|genus:Simple_repeat;Target=species:%28CCGCG%29n|genus:Simple_repeat 1 42 + -HS04636 repeatmasker match_part 939 980 13 + . ID=HS04636:hsp:5:1.3.0.0;Parent=HS04636:hit:5:1.3.0.0;Target=species:%2528CCGCG%2529n|genus:Simple_repeat 1 42 + -HS04636 repeatmasker match 6918 6977 17 + . ID=HS04636:hit:6:1.3.0.0;Name=species:%28TATT%29n|genus:Simple_repeat;Target=species:%28TATT%29n|genus:Simple_repeat 1 54 + -HS04636 repeatmasker match_part 6918 6977 17 + . ID=HS04636:hsp:6:1.3.0.0;Parent=HS04636:hit:6:1.3.0.0;Target=species:%2528TATT%2529n|genus:Simple_repeat 1 54 + -HS04636 repeatmasker match 4518 4541 17 + . ID=HS04636:hit:7:1.3.0.0;Name=species:%28TTTA%29n|genus:Simple_repeat;Target=species:%28TTTA%29n|genus:Simple_repeat 1 24 + -HS04636 repeatmasker match_part 4518 4541 17 + . ID=HS04636:hsp:7:1.3.0.0;Parent=HS04636:hit:7:1.3.0.0;Target=species:%2528TTTA%2529n|genus:Simple_repeat 1 24 + -### -HS04636 blastn expressed_sequence_match 1815 6903 120 + . ID=HS04636:hit:8:3.12.0.0;Name=HS04636.g1;score=120 -HS04636 blastn match_part 1815 1934 120 + . ID=HS04636:hsp:8:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 129 248 +;Gap=M120 -HS04636 blastn match_part 1815 1845 31 + . ID=HS04636:hsp:9:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 50 80 +;Gap=M31 -HS04636 blastn match_part 2055 2198 144 + . ID=HS04636:hsp:10:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 249 392 +;Gap=M144 -HS04636 blastn match_part 2852 2997 146 + . ID=HS04636:hsp:11:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 393 538 +;Gap=M146 -HS04636 blastn match_part 3424 3609 186 + . ID=HS04636:hsp:12:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 535 720 +;Gap=M186 -HS04636 blastn match_part 4339 4423 85 + . ID=HS04636:hsp:13:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 718 802 +;Gap=M85 -HS04636 blastn match_part 4541 4790 250 + . ID=HS04636:hsp:14:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 801 1050 +;Gap=M250 -HS04636 blastn match_part 5068 5360 293 + . ID=HS04636:hsp:15:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 1046 1338 +;Gap=M293 -HS04636 blastn match_part 5859 6008 150 + . ID=HS04636:hsp:16:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 1336 1485 +;Gap=M150 -HS04636 blastn match_part 6490 6903 414 + . ID=HS04636:hsp:17:3.12.0.0;Parent=HS04636:hit:8:3.12.0.0;Target=HS04636.g1 1481 1894 +;Gap=M414 -HS04636 est2genome expressed_sequence_match 1813 6903 8728 + . ID=HS04636:hit:9:3.12.0.0;Name=HS04636.g1;score=8728 -HS04636 est2genome match_part 1813 1934 8728 + . ID=HS04636:hsp:18:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 127 248 +;Gap=M122 -HS04636 est2genome match_part 2055 2198 8728 + . ID=HS04636:hsp:19:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 249 392 +;Gap=M144 -HS04636 est2genome match_part 2852 2995 8728 + . ID=HS04636:hsp:20:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 393 536 +;Gap=M144 -HS04636 est2genome match_part 3426 3607 8728 + . ID=HS04636:hsp:21:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 537 718 +;Gap=M182 -HS04636 est2genome match_part 4340 4423 8728 + . ID=HS04636:hsp:22:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 719 802 +;Gap=M84 -HS04636 est2genome match_part 4543 4789 8728 + . ID=HS04636:hsp:23:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 803 1049 +;Gap=M247 -HS04636 est2genome match_part 5072 5358 8728 + . ID=HS04636:hsp:24:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 1050 1336 +;Gap=M287 -HS04636 est2genome match_part 5860 6007 8728 + . ID=HS04636:hsp:25:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 1337 1484 +;Gap=M148 -HS04636 est2genome match_part 6494 6903 8728 + . ID=HS04636:hsp:26:3.12.0.0;Parent=HS04636:hit:9:3.12.0.0;Target=HS04636.g1 1485 1894 +;Gap=M410 +HS04636 blastn expressed_sequence_match 1815 6903 120 + . ID=HS04636:hit:0:3.12.0.0;Name=HS04636.g1;score=120 +HS04636 blastn match_part 1815 1934 120 + . ID=HS04636:hsp:0:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 129 248 +;Gap=M120 +HS04636 blastn match_part 1815 1845 31 + . ID=HS04636:hsp:1:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 50 80 +;Gap=M31 +HS04636 blastn match_part 2055 2198 144 + . ID=HS04636:hsp:2:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 249 392 +;Gap=M144 +HS04636 blastn match_part 2852 2997 146 + . ID=HS04636:hsp:3:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 393 538 +;Gap=M146 +HS04636 blastn match_part 3424 3609 186 + . ID=HS04636:hsp:4:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 535 720 +;Gap=M186 +HS04636 blastn match_part 4339 4423 85 + . ID=HS04636:hsp:5:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 718 802 +;Gap=M85 +HS04636 blastn match_part 4541 4790 250 + . ID=HS04636:hsp:6:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 801 1050 +;Gap=M250 +HS04636 blastn match_part 5068 5360 293 + . ID=HS04636:hsp:7:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 1046 1338 +;Gap=M293 +HS04636 blastn match_part 5859 6008 150 + . ID=HS04636:hsp:8:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 1336 1485 +;Gap=M150 +HS04636 blastn match_part 6490 6903 414 + . ID=HS04636:hsp:9:3.12.0.0;Parent=HS04636:hit:0:3.12.0.0;Target=HS04636.g1 1481 1894 +;Gap=M414 +HS04636 est2genome expressed_sequence_match 1813 6903 8728 + . ID=HS04636:hit:1:3.12.0.0;Name=HS04636.g1;score=8728 +HS04636 est2genome match_part 1813 1934 8728 + . ID=HS04636:hsp:10:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 127 248 +;Gap=M122 +HS04636 est2genome match_part 2055 2198 8728 + . ID=HS04636:hsp:11:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 249 392 +;Gap=M144 +HS04636 est2genome match_part 2852 2995 8728 + . ID=HS04636:hsp:12:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 393 536 +;Gap=M144 +HS04636 est2genome match_part 3426 3607 8728 + . ID=HS04636:hsp:13:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 537 718 +;Gap=M182 +HS04636 est2genome match_part 4340 4423 8728 + . ID=HS04636:hsp:14:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 719 802 +;Gap=M84 +HS04636 est2genome match_part 4543 4789 8728 + . ID=HS04636:hsp:15:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 803 1049 +;Gap=M247 +HS04636 est2genome match_part 5072 5358 8728 + . ID=HS04636:hsp:16:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 1050 1336 +;Gap=M287 +HS04636 est2genome match_part 5860 6007 8728 + . ID=HS04636:hsp:17:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 1337 1484 +;Gap=M148 +HS04636 est2genome match_part 6494 6903 8728 + . ID=HS04636:hsp:18:3.12.0.0;Parent=HS04636:hit:1:3.12.0.0;Target=HS04636.g1 1485 1894 +;Gap=M410
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fake_repbase.embl Sun Jul 01 16:12:18 2018 -0400 @@ -0,0 +1,21 @@ +CC Fake repbase-like embl file, using data from DfamConsensus.embl* +CC **************************************************************** +XX +ID ACROBAT1 repeatmasker; DNA; ???; 768 BP. +CC consensus - See RepBase for additional annotations. +XX +SQ Sequence 768 BP; 178 A; 194 C; 215 G; 181 T; 0 other; + ggtgatgctg ccaacttact gatttagtgt atgatggtgt ttttgaggtg ctccagtggc 60 + ttctgtttct atcagctgtc cctcctgttc agctactgac ggggtggtgc gtaacggcaa 120 + aagcaccgcc ggacatcagc gctatctctg ctctcactgc cgtaaaacat ggcaactgca 180 + gttcacttac accgcttctc aacccggtac gcaccagaaa atcattgata tggccatgaa 240 + tggcgttgga tgccgggcaa cagcccgcat tatgggcgtt ggcctcaaca cgattttacg 300 + tcacttaaaa aactcaggcc gcagtcggta acctcgcgca tacagccggg cagtgacgtc 360 + atcgtctgcg cggaaatgga cgaacagtgg ggctatgtcg gggctaaatc gcgccagcgc 420 + tggctgtttt acgcgtatga cagtctccgg aagacggttg ttgcgcacgt attcggtgaa 480 + cgcactatgg cgacgctggg gcgtcttatg agcctgctgt caccctttga cgtggtgata 540 + tggatgacgg atggctggcc gctgtatgaa tcccgcctga agggaaagct gcacgtaatc 600 + agcaagcgat atacgcagcg aattgagcgg cataacctga atctgaggca gcacctggca 660 + cggctgggac ggaagtcgct gtcgttctca aaatcggtgg agctgcatga caaagtcatc 720 + gggcattatc tgaacataaa acactatcaa taagttggag tcattacc 768 +//