Previous changeset 2:5673e72241aa (2013-09-17) Next changeset 4:04f5c3d7448e (2018-05-04) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224 |
added:
repeatmasker.xml test-data/repeats.fasta test-data/small.fasta test-data/small.fasta.align test-data/small.fasta.cat test-data/small.fasta.gff test-data/small.fasta.log test-data/small.fasta.masked test-data/small.fasta.poly test-data/small.fasta.stats |
removed:
RepeatMasker.xml readme.rst |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 RepeatMasker.xml --- a/RepeatMasker.xml Tue Sep 17 03:23:49 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,307 +0,0 @@\n-<tool id="repeatmasker_wrapper" name="RepeatMasker" version="0.1.2">\n- <description>Masks different kind of repeats</description>\n- <requirements>\n- <requirement type="binary">RepeatMasker</requirement>\n- </requirements>\n- <command>\n-## The command is a Cheetah template which allows some Python based syntax.\n-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n-\n-## create temp directory\n-#import tempfile, os\n-#set $dirname = os.path.abspath( tempfile.mkdtemp() )\n-#set $input_filename = os.path.split( str($query) )[-1]\n-#set $output_basename = os.path.join( $dirname, $input_filename )\n-\n-\n-RepeatMasker \n--parallel 8\n-\n-$nolow\n-$noint\n-$norna\n-\n-#if str($species)!="all":\n- $species\n-#end if\n-\n-\n--dir $dirname\n-\n-#if $adv_opts.adv_opts_selector=="advanced":\n-\n- #if str($adv_opts.gc)!="0":\n- -gc $adv_opts.gc\n- #end if\n-\n- $adv_opts.gccalc\n-\n- #set $output_files_list = str($adv_opts.output_files).split(\',\')\n- #if "gff" in $output_files_list:\n- -gff\n- #end if\n- #if "html" in $output_files_list:\n- -html\n- #end if\n-\n- $adv_opts.slow_search\n- $adv_opts.quick_search\n- $adv_opts.rush_search\n- $adv_opts.only_alus\n- $adv_opts.is_only\n-\n-#else:\n- ## Set defaults\n- -gff\n-\n-## End of advanced options:\n-#end if\n-\n-$query\n-\n-2>&1;\n-\n-## Copy the output files to galaxy\n-## AgR: if there are no repeats, the output files may not exist.\n-## This causes the job to fail, so touch files to ensure they exist.\n-#if $adv_opts.adv_opts_selector=="advanced":\n-\n- #if "summary" in $output_files_list:\n- ## Write out the summary file (default)\n- #set $summary_file = $output_basename + \'.tbl\'\n- touch $summary_file;\n- cp $summary_file $output_summary;\n- #end if\n-\n- #if "gff" in $output_files_list:\n- ## Write out the gff file (default)\n- #set $gff_file = $output_basename + \'.out.gff\'\n- touch $gff_file;\n- cp $gff_file $output_gff;\n- #end if\n-\n- #if "html" in $output_files_list:\n- ## Write out the html file\n- #set $html_file = $output_basename + \'.out.html\'\n- touch $html_file;\n- cp $html_file $output_html;\n- #end if\n-\n-#else:\n-\n- ## Write out the summary file (default)\n- #set $summary_file = $output_basename + \'.tbl\'\n- touch $summary_file;\n- cp $summary_file $output_summary;\n-\n- ## Write out the gff file (default)\n- #set $gff_file = $output_basename + \'.out.gff\'\n- touch $gff_file;\n- cp $gff_file $output_gff;\n-\n-\n-## End of advanced options:\n-#end if\n-\n-## Write out mask sequence file\n-#set $mask_sequence_file = $output_basename + \'.masked\'\n-touch $mask_sequence_file;\n-cp $mask_sequence_file $output_mask;\n-\n-## Write out standard file (default)\n-## The default \'.out\' file from RepeatMasker has a 3-line header and spaces rather\n-## than tabs. Remove the header and replace the whitespaces with tab\n-#set $standard_file = $output_basename + \'.out\'\n-tail -n +4 $standard_file | tr -s \' \' \'\\t\' > $output_std;\n-\n-## Delete all temporary files\n-rm $dirname -r\n-\n- </command>\n- <inputs>\n- <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-\n- <param name="nolow" type="boolean" label="No low complexity DNA" truevalue="-nolow" falsevalue="" checked="false" help="Does not mask low_complexity DNA or simple repeats."/>\n- <param name="noint" type="boolean" label="No interspersed repeats" truevalue="-noint" falsevalue="" checked="false" help="Only masks low complex/simple repeats (no interspersed repeats)."/>\n-\n- <param name="norna" type="boolean" label="No small RNA genes" truevalue="-norna" falsevalue="" checked="false" help="Does not mask small RNA (pseudo) genes."/>\n-\n- <!--\n- Specify the species or clade of the input sequence. The species name\n- must be a valid NCBI Taxonomy Database species name and be contained\n- '..b' 1\n- 12204 10.0 2.4 1.8 HSU08988 6782 7714 \\(21529) C TIGGER1 DNA/MER2_type 2418 1493 \\(0) 2\n- 279 3.0 0.0 0.0 HSU08988 7719 7751 \\(21492) + (TTTTA)n Simple_repeat 1 33 \\(0) 3\n- 1765 13.4 6.5 1.8 HSU08988 7752 8022 \\(21221) C AluSx SINE/Alu 289 1 \\(23) 4\n- 12204 10.0 2.4 1.8 HSU08988 8023 8694 \\(20549) C TIGGER1 DNA/MER2_type 1493 827 \\(925) 5\n- 1984 11.1 0.3 0.7 HSU08988 8695 9000 \\(20243) C AluSg SINE/Alu 305 1 \\(5) 6\n- 12204 10.0 2.4 1.8 HSU08988 9001 9695 \\(19548) C TIGGER1 DNA/MER2_type 827 2 \\(1591) 7\n- 711 21.2 1.4 0.0 HSU08988 9696 9816 \\(19427) C MER7A DNA/MER2_type 122 2 \\(224) 8\n-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==\n-\n-This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.\n-Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the\n-poly A of the Alu element. The first line is interpreted like this:\n-\n-:Table description:\n-\n-1. **1306** = Smith-Waterman score of the match, usually complexity adjusted\n- The SW scores are not always directly comparable. Sometimes\n- the complexity adjustment has been turned off, and a variety of\n- scoring-matrices are used.\n-\n-#. **15.6** = % substitutions in matching region compared to the consensus\n-#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)\n-#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)\n-#. **HSU08988** = name of query sequence\n-#. **6563** = starting position of match in query sequence\n-#. **7714** = ending position of match in query sequence\n-#. **(22462)** = no. of bases in query sequence past the ending position of match\n-#. **C** = match is with the Complement of the consensus sequence in the database\n-#. **MER7A** = name of the matching interspersed repeat\n-#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)\n-#. **2418** = starting position of match in database sequence (using top-strand numbering)\n-#. **1465** = ending position of match in database sequence\n-#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)\n-#. **1** = Identifier\n-\n-An asterisk (\\*) in the final column (no example shown) indicates that there is\n-a higher-scoring match whose domain partly (<80%) includes the domain of this match. \n-\n-Note that the SW score and divergence numbers for the three Tigger1 lines are identical.\n-This is because the information is derived from a single alignment (the Alus were deleted\n-from the query before the alignment with the Tigger element was performed).\n-The program makes educated guesses about many fragments if they are derived from\n-the same element (e.g. it knows that the MER7A fragments represent one insert).\n-In a next version I can identify each element with a unique ID, if interest exists\n-(this could help to represent repeats cleaner in graphic displays). \n-\n-\n--------\n-\n-**References**\n-\n-Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.\n-\n-http://www.repeatmasker.org/\n-\n- </help>\n-</tool>\n' |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 readme.rst --- a/readme.rst Tue Sep 17 03:23:49 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,63 +0,0 @@ -=============================== -Galaxy wrapper for RepeatMasker -=============================== - -This wrapper is copyright 2013 by Björn Grüning. - -This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology. -http://www.repeatmasker.org/ - - -Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0. -1996-2010 <http://www.repeatmasker.org>. - - -Additional Information: -Using RepeatMasker to identify repetitive elements in genomic sequences. -http://www.ncbi.nlm.nih.gov/pubmed/19274634 - -============ -Installation -============ - -To install RepeatMasker, please use the following instructions: - -http://www.repeatmasker.org/RMDownload.html - -To install the wrapper copy the file RepeatMasker.xml in the galaxy tools -folder and modify the tools_conf.xml file to make the tool available to Galaxy. -Add a line like the following: - -Add the tool definition to your tool_conf.xml file under Galaxy root. - <tool file="RepeatMasker/RepeatMasker.xml" /> - -======= -History -======= - -- v1.1: Initial public release -- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found -- v0.1.2: remove trailing semicolon, redirect all output to stdout - -=============================== -Wrapper Licence (MIT/BSD style) -=============================== - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. - |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 repeatmasker.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repeatmasker.xml Wed May 02 20:18:11 2018 -0400 |
[ |
b'@@ -0,0 +1,230 @@\n+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01">\n+ <description>RepeatMasker</description>\n+\n+ <requirements>\n+ <requirement type="package" version="4.0.7">repeatmasker</requirement>\n+ </requirements>\n+\n+ <command detect_errors="exit_code"><![CDATA[\n+ RM_LIB_PATH=\\$(dirname \\$(which RepeatMasker))/../share/RepeatMasker/Libraries &&\n+ mkdir lib &&\n+ export REPEATMASKER_LIB_DIR=\\$(pwd)/lib &&\n+ for file in \\$(ls \\$RM_LIB_PATH) ; do ln -s \\$RM_LIB_PATH/\\$file lib/\\$file ; done &&\n+ #if $repeat_source.source_type == "repbase":\n+ cp \'${repeat_source.repbase_file}\' lib/RMRBSeqs.embl &&\n+ #end if\n+ ln -s \'${input_fasta}\' rm_input.fasta &&\n+ RepeatMasker -dir \\$(pwd)\n+ #if $repeat_source.source_type == "library":\n+ -lib \'${repeat_source.repeat_lib}\'\n+ -cutoff \'${repeat_source.cutoff}\'\n+ #else if $repeat_source.source_type == "repbase":\n+ #if $repeat_source.species_source.species_from_list == \'yes\':\n+ $repeat_source.species_source.species_list\n+ #else\n+ -species \'${repeat_source.species_source.species_name}\'\n+ #end if\n+ #end if\n+ -parallel \\${GALAXY_SLOTS:-1}\n+ \'${gff}\'\n+ \'${ignore_n_stretches}\'\n+ \'${advanced.is_only}\'\n+ \'${advanced.is_clip}\'\n+ \'${advanced.no_is}\'\n+ \'${advanced.rodspec}\'\n+ \'${advanced.primspec}\'\n+ \'${advanced.nolow}\'\n+ \'${advanced.noint}\'\n+ \'${advanced.norna}\'\n+ \'${advanced.alu}\'\n+ \'${advanced.div}\'\n+ \'${advanced.search_speed}\'\n+ \'${advanced.frag}\'\n+ \'${advanced.maxsize}\'\n+ #if $advanced.gc is not None:\n+ \'${advanced.gc}\'\n+ #end if\n+ \'${advanced.gccalc}\'\n+ \'${advanced.nocut}\'\n+ \'${advanced.keep_alignments}\'\n+ \'${advanced.invert_alignments}\'\n+ \'${advanced.xout}\'\n+ \'${advanced.xsmall}\'\n+ \'${advanced.poly}\'\n+ rm_input.fasta &&\n+ #if $advanced.is_only != \'-is_only\':\n+ mv rm_input.fasta.masked \'${output_masked_genome}\' &&\n+ sed -r \'s/^ *// ; s/ *$//; s/\\+ //; s/ +/\\t/g ; 1,2c SW score\\t% div.\\t% del.\\t% ins.\\tquery sequence\\tpos in query: begin\\tend\\t(left)\\trepeat\\tclass/family\\tpos in repeat: begin\\tend\\t(left)\\tID\' rm_input.fasta.out >\'${output_log}\' &&\n+ mv rm_input.fasta.tbl \'${output_table}\' &&\n+ #if $gff == \'-gff\':\n+ mv rm_input.fasta.out.gff \'${output_gff}\' &&\n+ #end if\n+ #if $advanced.keep_alignments == \'-ali\':\n+ mv rm_input.fasta.align \'${output_alignment}\' &&\n+ #end if\n+ #if $advanced.poly == \'-poly\':\n+ sed -r \'s/^ *// ; s/ *$//; s/\\+ //; s/ +/\\t/g\' rm_input.fasta.polyout >\'${output_polymorphic}\' &&\n+ #end if\n+ #end if\n+ mv rm_input.fasta.cat \'${output_repeat_catalog}\'\n+ ]]>\n+ </command>\n+\n+ <inputs>\n+ <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />\n+ <conditional name="repeat_source">\n+ <param label="Repeat library source" name="source_type" type="select">\n+ <option selected="true" value="repbase">RepBase</option>\n+ <option value="library">Custom library of repeats</option>\n+ </param>\n+ <when value="repbase">\n+ <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />\n+ <conditional name="species_source">\n+ <param label="Select species name from a list?" name="species_from_list" type="select">\n+ <option value="yes" selected="true">Yes</option>\n+ <option value="no">No</option>\n+ </param>\n+ <when value="yes">\n+ <param name="species_list" type="select" label="Species">\n+ <option value="-species anopheles" selected="true">anopheles</option>\n+ <option value="-species arabidopsis">arabidopsis</option>\n+ <option value="-species artiodactyl">artiodactyl</option>\n+ <option value="-species aspergillus">aspergillus</option>\n+ <option value="-species carnivore">carnivore</option>\n+ '..b'nce, not the query sequence" />\n+ <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />\n+ <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />\n+ </section>\n+ </inputs>\n+ <outputs>\n+ <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">\n+ <filter>not advanced[\'is_only\']</filter>\n+ </data>\n+ <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">\n+ <filter>not advanced[\'is_only\']</filter>\n+ </data>\n+ <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">\n+ <filter>not advanced[\'is_only\']</filter>\n+ </data>\n+ <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />\n+ <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">\n+ <filter>not advanced[\'is_only\'] and advanced[\'keep_alignments\']</filter>\n+ </data>\n+ <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">\n+ <filter>not advanced[\'is_only\'] and advanced[\'poly\']</filter>\n+ </data>\n+ <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">\n+ <filter>not advanced[\'is_only\'] and gff is True</filter>\n+ </data>\n+ </outputs>\n+ <tests>\n+ <test expect_num_outputs="4">\n+ <param name="input_fasta" value="small.fasta" ftype="fasta" />\n+ <param name="source_type" value="library" />\n+ <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />\n+ <output name="output_masked_genome" file="small.fasta.masked" />\n+ <output name="output_table" file="small.fasta.stats" lines_diff="2" />\n+ <output name="output_repeat_catalog" file="small.fasta.cat" />\n+ <output name="output_log" file="small.fasta.log" />\n+ </test>\n+ <test expect_num_outputs="7">\n+ <param name="input_fasta" value="small.fasta" ftype="fasta" />\n+ <param name="source_type" value="library" />\n+ <param name="gff" value="-gff" />\n+ <!-- <param name="show" value="yes" /> -->\n+ <param name="keep_alignments" value="-ali" />\n+ <param name="poly" value="-poly" />\n+ <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />\n+ <output name="output_masked_genome" file="small.fasta.masked" />\n+ <output name="output_table" file="small.fasta.stats" lines_diff="4" />\n+ <output name="output_repeat_catalog" file="small.fasta.cat" />\n+ <output name="output_log" file="small.fasta.log" />\n+ <output name="output_alignment" file="small.fasta.align" />\n+ <output name="output_polymorphic" file="small.fasta.poly" />\n+ <output name="output_gff" file="small.fasta.gff" lines_diff="4" />\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+RepeatMasker is a program that screens DNA for interspersed repeats and low\n+complexity DNA sequences. The database of repeats to screen for can be\n+provided as a FASTA file or downloaded from RepBase_. If the RepBase option is\n+chosen the RepBaseRepeatMaskerEdition file should be downloaded and\n+unpacked, and the enclosed EMBL format file (\'RMRBSeqs.embl\') should\n+be uploaded to Galaxy for use with this tool.\n+\n+Further documentation is available on the RepeatMasker homepage_.\n+\n+.. _RepBase: http://www.girinst.org/repbase/\n+.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html\n+ ]]>\n+ </help>\n+ <citations>\n+ <citation type="bibtex">\n+ @misc{RepeatMasker,\n+ title = {RepeatMasker Open-4.0},\n+ howpublished = {\\url{http://www.repeatmasker.org}},\n+ author = {Smit, AFA and Hubley, R and Green, P.},\n+ year = {2013-2015}}\n+ </citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/repeats.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/repeats.fasta Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,64 @@ +>Asian_seabass_ONSAT_SB_Concensus_Lenght_170_bp +CGAAAAATTTAATAATTTAGGGGTCTTGAGCATGGGCGTGGTAAAATGCCCTCGGTAGCG +CCACCTACATTTTTAAACGGAACAGCCCCTCAAGCCCGTTGCGCCTAAAAATCTGAAAAT +CTGCACACATATGTAACATCCCATGACGCACCAAAAAGTCTCTTGGAGCCA +>Asian_seabass_MOSAT_SB_T_34a_satellite_DNA +TTTGTGACATCACTACATAGTTTGTTGAAAACGTAC +>Asian_seabass_MOSAT_SB_T_34b_satellite_DNA +TTTGTGACATCACACATAGTTGTGGGTCAGTAC +>Sat_38 +AAAAAATGTCATAGTATAGTATGGCGTCAAAAAACATG +>Asian_seabass_Sat_217_Consensus_Length_217_bp +AGTAAACAAGCATTATGGTTGAAACCATAATTTCCTGTCGGGAGAGCCTTTCCCTCTTTT +GTGCACTGTATGCAATCCCAGAGTGTGAATAAGCGCTTTTCCAGCGTTTTGAGGCTTATT +CAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGACAAAGACAAACTAAGAACTCA +GCCACACGGACATGAAAGTTGTTTTACTTACAATATT +>Asian_seabass_Sat_217_Consensus_Length_427_bp +CCAGCGTTTTGAGGCTTATTCAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGAC +AAGACAAACTAAGAACTCAACACACTGACATGAAAGTTTCTATATTTCTATTAAAATAAA +CAAACATTATGGTTGAAACCATAATTTCATTTCGGGAGAGCCTTTCCCTCTTTGTGGCAC +TGTATGTAATCTGAAGTGTGAATAACGCTTTTCCCGCGTTTGAGGCTTATTCAGCTCAGA +ATGGCTTAATACTGCACTATCTGACCAGGACAAGACAAACTAAGAACTCAGCCACACGGA +CATGAAAGTTGTTTACTTTACAATATTAGTAAACAAGCATTATGGTTGAAACCATAATTT +CCTGTCGGGAAGAGCTTCCCTCTTTTTGTGCACTGTATGCAATCCCAGAGTGTGAATAAG +CGCTTTT +>Asian_seabass_Sat_LM_Consensus_Length_453_bp +CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG +TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC +ATGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAG +AAATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTT +CAATGTAGTCAGTTGCTTTGTCACTTAAAACTGCCCCAACAAGTAGCACTTACTGCAATT +TGGTAGTCAAAGTCAATGCTCAAGTTAAGGGTAATAGTAGTGACATAATAGAAAAATCTC +TAAATATATTTGCTGGCAGCTTTAATATACAGAATGAGTGCCATGATGAATTCTTGATAC +AGAACAGGGACTTCCAAAATCAGCCAACACTAA +>Asian_seabass_Sat_LM_Consensus_Length_218_bp +CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG +TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC +TGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGA +AATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGG +>Asian_seabass_Sat_LM Consensus_Length_150_bp +CTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGAAATCATGAGTGGCTC +TTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTTCAATGTAGTCAGTTGC +TTTGTCACTTAAAACTGCCCCAACAAGTAGCA +>Asian_seabass_Sat_Unk_Consensus_Length_341_bp +GTTTGGGTAAAAATATTGTCTATTTACGAGCTATCCTCTATATATTTTTGATCTGATTAA +TAAATACCTCTAGCCCACAAACTGTGGTATTTTGCTATGTGGCAGTTCACCCGAAAGTCC +AGTAGCATTTATCGCATTTTCTAAAGATAGTCAGTGCCTGAAAGTTTGAGGCAGATAAAC +AAATTGTTCAAGTAAGAACTATATCTTTCTTATGATTTTACCGCAATCATACAGGTTGTT +TCTTGTGGTCTGCTGGGCATTGTATCCCTTTGTTGTATGGATTTTTCCTTTCTTTAATGA +TCTCCTCCCTGGAGTTTGTAATCCCTGTTTGTAGTGGAATT +>Asian_seabass_Sat_Unk_Consensus_Length_789_bp +CAAAAAAATGGAAAAAAAAAAAGTGGCTCATTTGAAGTGAATCAGAGTTGGGTAACATTG +TCCTTAATTACAGCTATGCTTATATACTTGATCTGATAATAATACTCTAGCCACAATCTG +TGGGTATTCTGCTATTTGCAGTCCACCCAAAGTCATAGCATTTAATCCATTCTAGGAAGA +TACGTCAGTGCTGAGTTGAGCCAATAACACCAAATTGTCATAAGAACTATTATTTTCTTT +ATGATTTTTACGCATCAGTACAGGTGTTTTCTTGTGGTTTCTGCTGCATGTATCCCTGTT +GTAAGTGGATTCTCCTTTGCTTATGATCTCCTCCGCTGCGTTGTATCCTGTTTGTAGTGG +ATTTCCTTGCACTGATTCTGCCCGCTCTTAATACTGAATGAACGCCCTCCCACCAGTAGT +ACTGCCACCTTTGTTTTTTCACAAAGTGTTCAATGGCCTTGATAATGACTTGGTAACTAC +ATCACCACTTTTTTGTACGTAATCAACAGACAATCACCCATTAAATTCATGCGGCCATTA +GGCAGCTTGATGAGCTGGACATTTGACCTCCCCAATGAATCTTTGCTGATGGGAAACTTA +TTGCCACCATAAAGTGAACCAGCATTGCTATGTCCATGCTGTAATTTGAAAACCCAATAC +AAAGAAGCCCTGCATATATACCTCCCAAATTGGCTTCTGAGACCCGGGCGTAATCACTGC +ATGGCCATAGATAAACATTGAAATCTTCTGTTGCAGTCAGTTTGCTTTTTGTCAACTTTA +AAAACTGCC |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta Wed May 02 20:18:11 2018 -0400 |
b |
b'@@ -0,0 +1,238 @@\n+>scaffold_1\n+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATCCAATATATTC\n+CTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGACATCCTCAGTGATTGAAGTGA\n+CAAAAAGTGGAGCGCACGCAGTGGTCATTACCCTCCAATGGTACTTCTAATTAAGGAAAG\n+ATTTTTTGCATTCACTGAGCAAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGG\n+GTCCATTAAACAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT\n+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGCCCCAAATTGT\n+CTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATAATTGTGTTTCTCTCTAGATG\n+TGCGGTGTCCATCAAGACACTGGATGTCACGTGGTCACTCCTGTGAAGAGCGAACTGTGT\n+GGAACCCGAAGTACTGTGTGGTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGG\n+TGGTGAGAGCAGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC\n+TCTCATCTGCTCGTGTGTGTGTGTGTGTGTGTCCTTGTCAATGTCATTACACAATTTATG\n+TTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACATCTACTTTCTGTCACCTGATA\n+CCTCATACTTTACACTCTAATCCCTTTTTCCTTTCTAACTGCTGCTGTGTTAAGCTGTCA\n+TATTAATATACTAAATAATGATAATATTAATTCTAATAATGATAGTGAATGGAGATTCAC\n+AATGAAGAAACACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT\n+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCAAGGCTGAGTC\n+TGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGGATTATGCTAGTTTGTGTGAG\n+ATTAGAGGTCCTGTTGTTACGAGCAAATCAGCAGTCAGCAGGGTAATCTAAATTACATTG\n+TTCTGTCGGCAATATCCTCTCCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCC\n+CCACAGTAGTGCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT\n+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTGTGATTGTTGG\n+TGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGATCTCCTGGGATTTTCACACAC\n+GGCATTCTCTAGAGTTTACTCAGAACGGTGTGGAAAGCAAAAAAACATCCCGCGAGCTGC\n+AATCCTGCGGATGGAAACACCTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGAT\n+CAATCTGACAGAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA\n+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTACAGCAGCAGAT\n+GAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACATCTCAGACTGCACAGGACACC\n+AAAACAAAAACACATCCTGTTTTCTACTGGTGGTAGAGTCACAATTTGGCAATAAGATAA\n+ATCCATGGACCCAACTTGCCTTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATG\n+TTTTCTTTACACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG\n+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGATACCTGCGGC\n+AATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTATTATAGAGAGTAATTTCACTG\n+TCCATGTCTTTTTTGATCATAAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAAT\n+GCTGAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCCAATATATC\n+AATCTATCTGTCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT\n+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCACCACAGCATTT\n+ACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGTTACCTGTTATTGGCCTGGCT\n+CTATGGCATGCAGAAAACAGTCAGCCAATCAGAGGACAGACTCAGAGACAGACACAAAGT\n+GCCCTGTTCTTGTTAGAGCAGAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGA\n+TGGTTTTTTGGTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA\n+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGTTTCGTAGTTC\n+TAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTAAAGCGGCTACATGTAGAATT\n+TGACCCACTTTGGTGCCCACATATGGTAACTAAAACACTACAGACAGTATGCACTCCAAC\n+CCTATATAATTTATGTCCTTCAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGAT\n+TAATACCTAGAATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG\n+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAACCTGGTCATT\n+CCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCTCCTGCTGTGGGTGCCAATAG\n+CTCATGCAAGTTGGTGTCTTTCAACTAAGGCCACTATGTGAGGACCATCAGCAGCAAATT\n+AAATTAGAAGTGCCTTGGAGTTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGA\n+CAGAACAGTGCCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT\n+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAAGGCATAGTTA\n+CTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGCTGGAACTTGGAACATGTAGC\n+CTAAGTTTCCCTTTGTCTGCAATTACTGAGGTATGCCATGTTGAAATAGAACAATAATTG\n+CAGGGAGGAATTATTCCAATCGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTT\n+CACAGCAACAAAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT\n+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAAGCGATGATAC\n+AGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACATGGTTTAAACAAGCTCAGTT\n+AAATGGATCAGAGCATCAACTGCTGTGAACAAGCAATCACACGGTCAAGGTCAAACATTT\n+ACACTTCTGAGAGATCTGGAGAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTA\n+ACTGATTACGCCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA\n+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCATGCAGCCTAAT\n+TTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATGTTTTAGGCAGTTTTAGGCAC\n+TAAAGGTGAACTGAGGATGCAGTCCCACGATTAATTTTTATTCATCAGTTAACCTCATGT\n+GAAGTGTAGTAAACAGAAAAAACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCC\n'..b'TCATTGGCGTAGGCCCCACCTGCTCTTGTC\n+CTTGTGTGACACAGGCTCTAAATAAGCAGCATGATGAATAAAAATGACACTGAGATGAAT\n+AGGAATCCACAGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG\n+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAACATGCCAGAGA\n+CAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTTAATTTTACTGAGGTGAAAGT\n+TTTTTTTTTTTTTCTGGATAAAGCTTTGGAAAAGTTCTCAACTGTTGCTTCTTTAAAGAT\n+GCTGGGGCATATTCTGCCCTATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAA\n+GCTGTAACATTTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT\n+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTATCTTAATATT\n+CCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCATCTCTCTCCTCATAGTTTTT\n+CATTACAATTAGCTCTCATTAGATAGAATTGTATTTGTTGTCTTTGTGCCAGTCACTCCA\n+GTCTATTTTGCCAGCACACAACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACA\n+GTTTTAATGATGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG\n+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCATTTGGCCACAA\n+AATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGTACAGGTTGCATATGAGCTGG\n+TAGGAAAGATATAGAATCATTATCAGCTGATAATCTAACAGTAGCAGTCAGTGTAGATGC\n+TGTGCTAACGCAAAGTTGTGAAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCT\n+GCCAAGCTTAAGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC\n+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTTTTCAAGCAAA\n+AATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAGATTAAATATCTTTTGGGTTT\n+TGGCACAGGCTGGACAAAAAAACCTCTGAGACGCTGTGATAAGAATTTATTTTCACATTT\n+TTTTTACTTTTCAGGGACTACACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCC\n+CAAATGTTAACTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT\n+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTGGTCAGGTTTT\n+TGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGGGAAGTTCCTAAATTCTTCTG\n+GCATGTTAATGTTTTCCAAACTGATGTTACCAAGTCCTCGTTATGATGAGAAAAAAATAT\n+GCTGAGAGTGAAATTGATCAAAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCA\n+TCTTTCTCCCTCTCTCTCCACCGCAACGTGGAAACTGCTCCTCTCTCTCTCTCCCTCTCC\n+CTCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCCTCCCTCCCA\n+TTCTGTGCTCCGGTATACTCGCTCTCTCACTCTCTCCCTCTCTCCCCACTCTCCAGAGAG\n+GGGTTCAGTCAGACAGATGTAACACAGCAGTAGAAGCCTGAGCTGAGCTGGCAGGCTGCG\n+GAGGCCAGACCAGAGCCAGCAGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCA\n+GCGGCAGCACAGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT\n+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCATCACCCTGTG\n+AATTTAACATGGACCCTGACACCAGCACCCATCCAGAGACACAACATGGTGAGTGAGATT\n+TGAAGGAGGAAAAGATTAGAAACAATGAGAGTGATACTGTGGGAAAAGTTGAGGAGCGTG\n+TTAGTGAACAAGGGAGTCAGGTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGG\n+CTCTCCATTTGGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC\n+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCAATGTGATGAA\n+ATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTCTTGTCATTACTTTAACAGGG\n+TTGTAAAGTTTTTTTTGCTTTTTTGTAGCTAAGAAAGTTGAGAATTGTTTTGTCTGAACT\n+CTCTCTGGGATTTGTCTTGTCGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAG\n+GAAGCTCTCCTCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT\n+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATATTTGTATGCTG\n+TGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACAGAGGGGCACCAAAGAATGAA\n+AGTGGGAGACAGAGGGAACGAGAAGGGGAGAGACCGAGAGAGAAGGACTTATGTACACAA\n+ATAAATCCAGGGGGATCTAGACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTT\n+CCCCCTCATTTTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA\n+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGAAAAGACGATG\n+TTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTGCTGATGAAGAATGTGGTTAA\n+CAGAATGAGTCAACAGAGAACATATTTCCAAGAGACTGTAGTTTCTCTTCGCCAGCGTCT\n+AAGCCACATTGCTTTATGCACTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGA\n+GGGGAAAGAGAATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC\n+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCACCTTCCCAAAA\n+TAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTCTCAACCAGTCAGCCAGCCTC\n+TTTCAGTGCATACAGCTTGAGGAGATCCCTTCTAAAGGTCCAATATAAATAGAAAAGTGG\n+GAGTAGAAAGGGCAATAATCTGATATCATCTGATTACATTCACACCTCAGGCTTGCACGC\n+TACAGGAAGAGTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT\n+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAATCCTGGAGTA\n+GACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCATATATTACACATTGCTTTTTG\n+TACTTTGCATATAAAGTAGATGCTGATCTGCTATCTGCATATATAGTAGCTGCAGATAGC\n+ACTGTAACTACATCTACATATTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATG\n+CTGGACAACCTCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC\n+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGTTCACCCAATA\n+GTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTACAAGCACCCATGCCTAAATCA\n' |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.align --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.align Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,104 @@ +18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 1 + + scaffold_1 613 GTGTGTGTGTGTGTGTGTGT 632 + + (GT)n#Simple_ 1 GTGTGTGTGTGTGTGTGTGT 20 + +Matrix = Unknown +Transitions / transversions = 1.00 (0/0) +Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0) + + +16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 2 + + scaffold_1 780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824 + v - v - i v vv i + (ATAATA)n#Sim 1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45 + +Matrix = Unknown +Transitions / transversions = 0.40 (2/5) +Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2) + + +12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 3 + + scaffold_1 2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274 + - i v v iv -i vv v + (CAGA)n#Simpl 1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46 + +Matrix = Unknown +Transitions / transversions = 0.50 (3/6) +Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2) + + +15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 4 + + scaffold_1 4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898 + v i i - vv vv i - - - + (TC)n#Simple_ 1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50 + + scaffold_1 4899 -CTC 4901 + - + (TC)n#Simple_ 51 TCTC 54 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5) + + +13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 5 + + scaffold_1 6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278 + v i - v -i - i v - v v + (TAATTAA)n#Si 1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47 + + scaffold_1 6279 AAATAA 6284 + - + (TAATTAA)n#Si 48 -AATAA 52 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5) + + +15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 6 + + scaffold_1 6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597 + v i i viv i vi v -v i - + (GACA)n#Simpl 1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48 + + scaffold_1 6598 GACAGAGAG 6606 + v + (GACA)n#Simpl 49 GACAGACAG 57 + +Matrix = Unknown +Transitions / transversions = 0.86 (6/7) +Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2) + + +67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 7 + + scaffold_1 11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029 + i i - + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50 + + scaffold_1 12030 CTCTCTCTCTCTCTCTCTCTC 12050 + + (CT)n#Simple_ 51 CTCTCTCTCTCTCTCTCTCTC 71 + +Matrix = Unknown +Transitions / transversions = 1.00 (2/0) +Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1) + + +19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 8 + + scaffold_1 12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113 + v v i - i v + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37 + +Matrix = Unknown +Transitions / transversions = 0.67 (2/3) +Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1) + + |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.cat --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.cat Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,103 @@ +18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 + + scaffold_1 613 GTGTGTGTGTGTGTGTGTGT 632 + + (GT)n#Simple_ 1 GTGTGTGTGTGTGTGTGTGT 20 + +Matrix = Unknown +Transitions / transversions = 1.00 (0/0) +Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0) + +16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 + + scaffold_1 780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824 + v - v - i v vv i + (ATAATA)n#Sim 1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45 + +Matrix = Unknown +Transitions / transversions = 0.40 (2/5) +Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2) + +12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 + + scaffold_1 2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274 + - i v v iv -i vv v + (CAGA)n#Simpl 1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46 + +Matrix = Unknown +Transitions / transversions = 0.50 (3/6) +Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2) + +15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 + + scaffold_1 4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898 + v i i - vv vv i - - - + (TC)n#Simple_ 1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50 + + scaffold_1 4899 -CTC 4901 + - + (TC)n#Simple_ 51 TCTC 54 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5) + +13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 + + scaffold_1 6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278 + v i - v -i - i v - v v + (TAATTAA)n#Si 1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47 + + scaffold_1 6279 AAATAA 6284 + - + (TAATTAA)n#Si 48 -AATAA 52 + +Matrix = Unknown +Transitions / transversions = 0.60 (3/5) +Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5) + +15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 + + scaffold_1 6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597 + v i i viv i vi v -v i - + (GACA)n#Simpl 1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48 + + scaffold_1 6598 GACAGAGAG 6606 + v + (GACA)n#Simpl 49 GACAGACAG 57 + +Matrix = Unknown +Transitions / transversions = 0.86 (6/7) +Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2) + +67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 + + scaffold_1 11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029 + i i - + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50 + + scaffold_1 12030 CTCTCTCTCTCTCTCTCTCTC 12050 + + (CT)n#Simple_ 51 CTCTCTCTCTCTCTCTCTCTC 71 + +Matrix = Unknown +Transitions / transversions = 1.00 (2/0) +Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1) + +19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 + + scaffold_1 12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113 + v v i - i v + (CT)n#Simple_ 1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37 + +Matrix = Unknown +Transitions / transversions = 0.67 (2/3) +Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1) + +## Total Sequences: 1 +## Total Length: 14220 +## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 +## Total NonSub ( excluding all non ACGT bases ):14220 +RepeatMasker version open-4.0.7 , default mode +run with rmblastn version 2.2.27+ +RepeatMasker Combined Database: Dfam_Consensus-20170127 |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.gff Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,11 @@ +##gff-version 2 +##date 2018-04-21 +##sequence-region dataset_12.dat +scaffold_1 RepeatMasker similarity 613 632 0.0 + . Target "Motif:(GT)n" 1 20 +scaffold_1 RepeatMasker similarity 780 824 18.3 + . Target "Motif:(ATAATA)n" 1 45 +scaffold_1 RepeatMasker similarity 2231 2274 23.9 + . Target "Motif:(CAGA)n" 1 46 +scaffold_1 RepeatMasker similarity 4853 4901 18.4 + . Target "Motif:(TC)n" 1 54 +scaffold_1 RepeatMasker similarity 6230 6284 19.1 + . Target "Motif:(TAATTAA)n" 1 52 +scaffold_1 RepeatMasker similarity 6548 6606 28.3 + . Target "Motif:(GACA)n" 1 57 +scaffold_1 RepeatMasker similarity 11981 12050 2.9 + . Target "Motif:(CT)n" 1 71 +scaffold_1 RepeatMasker similarity 12078 12113 15.4 + . Target "Motif:(CT)n" 1 37 |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.log Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,10 @@ +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID + +18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 +16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2 +12 23.9 4.5 0.0 scaffold_1 2231 2274 (11946) (CAGA)n Simple_repeat 1 46 (0) 3 +15 18.4 10.2 0.0 scaffold_1 4853 4901 (9319) (TC)n Simple_repeat 1 54 (0) 4 +13 19.1 1.8 7.7 scaffold_1 6230 6284 (7936) (TAATTAA)n Simple_repeat 1 52 (0) 5 +15 28.3 0.0 3.5 scaffold_1 6548 6606 (7614) (GACA)n Simple_repeat 1 57 (0) 6 +67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0) 7 +19 15.4 2.8 0.0 scaffold_1 12078 12113 (2107) (CT)n Simple_repeat 1 37 (0) 8 |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.masked --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.masked Wed May 02 20:18:11 2018 -0400 |
b |
b'@@ -0,0 +1,286 @@\n+>scaffold_1\n+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATC\n+CAATATATTCCTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGAC\n+ATCCTCAGTGATTGAAGTGACAAAAAGTGGAGCGCACGCAGTGGTCATTA\n+CCCTCCAATGGTACTTCTAATTAAGGAAAGATTTTTTGCATTCACTGAGC\n+AAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGGGTCCATTAAA\n+CAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT\n+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGC\n+CCCAAATTGTCTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATA\n+ATTGTGTTTCTCTCTAGATGTGCGGTGTCCATCAAGACACTGGATGTCAC\n+GTGGTCACTCCTGTGAAGAGCGAACTGTGTGGAACCCGAAGTACTGTGTG\n+GTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGGTGGTGAGAGC\n+AGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC\n+TCTCATCTGCTCNNNNNNNNNNNNNNNNNNNNCCTTGTCAATGTCATTAC\n+ACAATTTATGTTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACAT\n+CTACTTTCTGTCACCTGATACCTCATACTTTACACTCTAATCCCTTTTTC\n+CTTTCTAACTGCTGCTGTGTTAAGCTGTCNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNGTGAATGGAGATTCACAATGAAGAAA\n+CACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT\n+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCA\n+AGGCTGAGTCTGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGG\n+ATTATGCTAGTTTGTGTGAGATTAGAGGTCCTGTTGTTACGAGCAAATCA\n+GCAGTCAGCAGGGTAATCTAAATTACATTGTTCTGTCGGCAATATCCTCT\n+CCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCCCCACAGTAGT\n+GCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT\n+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTG\n+TGATTGTTGGTGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGAT\n+CTCCTGGGATTTTCACACACGGCATTCTCTAGAGTTTACTCAGAACGGTG\n+TGGAAAGCAAAAAAACATCCCGCGAGCTGCAATCCTGCGGATGGAAACAC\n+CTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGATCAATCTGACA\n+GAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA\n+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTAC\n+AGCAGCAGATGAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACAT\n+CTCAGACTGCACAGGACACCAAAACAAAAACACATCCTGTTTTCTACTGG\n+TGGTAGAGTCACAATTTGGCAATAAGATAAATCCATGGACCCAACTTGCC\n+TTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATGTTTTCTTTAC\n+ACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG\n+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGA\n+TACCTGCGGCAATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTAT\n+TATAGAGAGTAATTTCACTGTCCATGTCTTTTTTGATCATAAAGCAGGTC\n+TAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCAGGTCTAGGTGC\n+TGTATACAACACTGTATTGAAATGCTGAAGCCAATATATCAATCTATCTG\n+TCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT\n+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCAC\n+CACAGCATTTACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGT\n+TACCTGTTATTGGCCTGGCTCTATGGCATGNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNCAAAGTGCCCTGTTCTTGTTAGAGCA\n+GAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGATGGTTTTTTG\n+GTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA\n+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGT\n+TTCGTAGTTCTAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTA\n+AAGCGGCTACATGTAGAATTTGACCCACTTTGGTGCCCACATATGGTAAC\n+TAAAACACTACAGACAGTATGCACTCCAACCCTATATAATTTATGTCCTT\n+CAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGATTAATACCTAG\n+AATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG\n+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAA\n+CCTGGTCATTCCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCT\n+CCTGCTGTGGGTGCCAATAGCTCATGCAAGTTGGTGTCTTTCAACTAAGG\n+CCACTATGTGAGGACCATCAGCAGCAAATTAAATTAGAAGTGCCTTGGAG\n+TTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGACAGAACAGTG\n+CCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT\n+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAA\n+GGCATAGTTACTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGC\n+TGGAACTTGGAACATGTAGCCTAAGTTTCCCTTTGTCTGCAATTACTGAG\n+GTATGCCATGTTGAAATAGAACAATAATTGCAGGGAGGAATTATTCCAAT\n+CGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTTCACAGCAACA\n+AAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT\n+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAA\n+GCGATGATACAGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACA\n+TGGTTTAAACAAGCTCAGTTAAATGGATCAGAGCATCAACTGCTGTGAAC\n+AAGCAATCACACGGTCAAGGTCAAACATTTACACTTCTGAGAGATCTGGA\n+GAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTAACTGATTACG\n+CCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA\n+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCAT\n+GCAGCCTAATTTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATG\n+TTTTAGGCAGTTTTAGGCACTAAAGGTGAACTGAGGATGCAGTCCCACGA\n+TTAATTTTTATTCATCAGTTAACCTCATGTGAAGTGTAGTAAACAGAAAA\n+AACCTAAATCAGATC'..b'TGTCCTTGTGTGACACAGGCTCTA\n+AATAAGCAGCATGATGAATAAAAATGACACTGAGATGAATAGGAATCCAC\n+AGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG\n+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAAC\n+ATGCCAGAGACAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTT\n+AATTTTACTGAGGTGAAAGTTTTTTTTTTTTTTCTGGATAAAGCTTTGGA\n+AAAGTTCTCAACTGTTGCTTCTTTAAAGATGCTGGGGCATATTCTGCCCT\n+ATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAAGCTGTAACAT\n+TTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT\n+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTA\n+TCTTAATATTCCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCA\n+TCTCTCTCCTCATAGTTTTTCATTACAATTAGCTCTCATTAGATAGAATT\n+GTATTTGTTGTCTTTGTGCCAGTCACTCCAGTCTATTTTGCCAGCACACA\n+ACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACAGTTTTAATGA\n+TGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG\n+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCAT\n+TTGGCCACAAAATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGT\n+ACAGGTTGCATATGAGCTGGTAGGAAAGATATAGAATCATTATCAGCTGA\n+TAATCTAACAGTAGCAGTCAGTGTAGATGCTGTGCTAACGCAAAGTTGTG\n+AAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCTGCCAAGCTTA\n+AGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC\n+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTT\n+TTCAAGCAAAAATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAG\n+ATTAAATATCTTTTGGGTTTTGGCACAGGCTGGACAAAAAAACCTCTGAG\n+ACGCTGTGATAAGAATTTATTTTCACATTTTTTTTACTTTTCAGGGACTA\n+CACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCCCAAATGTTAA\n+CTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT\n+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTG\n+GTCAGGTTTTTGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGG\n+GAAGTTCCTAAATTCTTCTGGCATGTTAATGTTTTCCAAACTGATGTTAC\n+CAAGTCCTCGTTATGATGAGAAAAAAATATGCTGAGAGTGAAATTGATCA\n+AAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCATCTTTCTCCC\n+TCTCTCTCCACCGCAACGTGGAAACTGCTCNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+CTCCCTCCCATTCTGTGCTCCGGTATANNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNCAGAGAGGGGTTCAGTCAGACAGATGTAACACAGCAG\n+TAGAAGCCTGAGCTGAGCTGGCAGGCTGCGGAGGCCAGACCAGAGCCAGC\n+AGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCAGCGGCAGCAC\n+AGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT\n+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCA\n+TCACCCTGTGAATTTAACATGGACCCTGACACCAGCACCCATCCAGAGAC\n+ACAACATGGTGAGTGAGATTTGAAGGAGGAAAAGATTAGAAACAATGAGA\n+GTGATACTGTGGGAAAAGTTGAGGAGCGTGTTAGTGAACAAGGGAGTCAG\n+GTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGGCTCTCCATTT\n+GGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC\n+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCA\n+ATGTGATGAAATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTC\n+TTGTCATTACTTTAACAGGGTTGTAAAGTTTTTTTTGCTTTTTTGTAGCT\n+AAGAAAGTTGAGAATTGTTTTGTCTGAACTCTCTCTGGGATTTGTCTTGT\n+CGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAGGAAGCTCTCC\n+TCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT\n+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATAT\n+TTGTATGCTGTGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACA\n+GAGGGGCACCAAAGAATGAAAGTGGGAGACAGAGGGAACGAGAAGGGGAG\n+AGACCGAGAGAGAAGGACTTATGTACACAAATAAATCCAGGGGGATCTAG\n+ACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTTCCCCCTCATT\n+TTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA\n+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGA\n+AAAGACGATGTTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTG\n+CTGATGAAGAATGTGGTTAACAGAATGAGTCAACAGAGAACATATTTCCA\n+AGAGACTGTAGTTTCTCTTCGCCAGCGTCTAAGCCACATTGCTTTATGCA\n+CTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGAGGGGAAAGAG\n+AATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC\n+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCAC\n+CTTCCCAAAATAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTC\n+TCAACCAGTCAGCCAGCCTCTTTCAGTGCATACAGCTTGAGGAGATCCCT\n+TCTAAAGGTCCAATATAAATAGAAAAGTGGGAGTAGAAAGGGCAATAATC\n+TGATATCATCTGATTACATTCACACCTCAGGCTTGCACGCTACAGGAAGA\n+GTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT\n+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAA\n+TCCTGGAGTAGACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCAT\n+ATATTACACATTGCTTTTTGTACTTTGCATATAAAGTAGATGCTGATCTG\n+CTATCTGCATATATAGTAGCTGCAGATAGCACTGTAACTACATCTACATA\n+TTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATGCTGGACAACC\n+TCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC\n+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGT\n+TCACCCAATAGTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTAC\n+AAGCACCCATGCCTAAATCA\n' |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.poly --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.poly Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,2 @@ +18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) +67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0) |
b |
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.stats --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fasta.stats Wed May 02 20:18:11 2018 -0400 |
b |
@@ -0,0 +1,51 @@ +================================================== +file name: dataset_12.dat +sequences: 1 +total length: 14220 bp (14220 bp excl N/X-runs) +GC level: 39.94 % +bases masked: 378 bp ( 2.66 %) +================================================== + number of length percentage + elements* occupied of sequence +-------------------------------------------------- +SINEs: 0 0 bp 0.00 % + ALUs 0 0 bp 0.00 % + MIRs 0 0 bp 0.00 % + +LINEs: 0 0 bp 0.00 % + LINE1 0 0 bp 0.00 % + LINE2 0 0 bp 0.00 % + L3/CR1 0 0 bp 0.00 % + +LTR elements: 0 0 bp 0.00 % + ERVL 0 0 bp 0.00 % + ERVL-MaLRs 0 0 bp 0.00 % + ERV_classI 0 0 bp 0.00 % + ERV_classII 0 0 bp 0.00 % + +DNA elements: 0 0 bp 0.00 % + hAT-Charlie 0 0 bp 0.00 % + TcMar-Tigger 0 0 bp 0.00 % + +Unclassified: 0 0 bp 0.00 % + +Total interspersed repeats: 0 bp 0.00 % + + +Small RNA: 0 0 bp 0.00 % + +Satellites: 0 0 bp 0.00 % +Simple repeats: 8 378 bp 2.66 % +Low complexity: 0 0 bp 0.00 % +================================================== + +* most repeats fragmented by insertions or deletions + have been counted as one element + + +The query species was assumed to be homo +RepeatMasker Combined Database: Dfam_Consensus-20170127 + +run with rmblastn version 2.2.27+ +The query was compared to unclassified sequences in ".../dataset_2.dat" + |