# HG changeset patch
# User bgruening
# Date 1372149221 14400
# Node ID d4a2c739da3fce9945416b7bce427c9ad9521dc1
Initial release under a consistent username. Fixes for stdout and the trailing semicolon.
diff -r 000000000000 -r d4a2c739da3f RepeatMasker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/RepeatMasker.xml Tue Jun 25 04:33:41 2013 -0400
@@ -0,0 +1,306 @@
+
+ Masks different kind of repeats
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+
+## create temp directory
+#import tempfile, os
+#set $dirname = os.path.abspath( tempfile.mkdtemp() )
+#set $input_filename = os.path.split( str($query) )[-1]
+#set $output_basename = os.path.join( $dirname, $input_filename )
+
+
+RepeatMasker
+-parallel 8
+
+$nolow
+$noint
+$norna
+
+#if str($species)!="all":
+ $species
+#end if
+
+
+-dir $dirname
+
+#if $adv_opts.adv_opts_selector=="advanced":
+
+ #if str($adv_opts.gc)!="0":
+ -gc $adv_opts.gc
+ #end if
+
+ $adv_opts.gccalc
+
+ #set $output_files_list = str($adv_opts.output_files).split(',')
+ #if "gff" in $output_files_list:
+ -gff
+ #end if
+ #if "html" in $output_files_list:
+ -html
+ #end if
+
+ $adv_opts.slow_search
+ $adv_opts.quick_search
+ $adv_opts.rush_search
+ $adv_opts.only_alus
+ $adv_opts.is_only
+
+#else:
+ ## Set defaults
+ -gff
+
+## End of advanced options:
+#end if
+
+$query
+
+2>&1;
+
+## Copy the output files to galaxy
+## AgR: if there are no repeats, the output files may not exist.
+## This causes the job to fail, so touch files to ensure they exist.
+#if $adv_opts.adv_opts_selector=="advanced":
+
+ #if "summary" in $output_files_list:
+ ## Write out the summary file (default)
+ #set $summary_file = $output_basename + '.tbl'
+ touch $summary_file
+ cp $summary_file $output_summary;
+ #end if
+
+ #if "gff" in $output_files_list:
+ ## Write out the gff file (default)
+ #set $gff_file = $output_basename + '.out.gff'
+ touch $gff_file
+ cp $gff_file $output_gff;
+ #end if
+
+ #if "html" in $output_files_list:
+ ## Write out the html file
+ #set $html_file = $output_basename + '.out.html'
+ touch $html_file
+ cp $html_file $output_html;
+ #end if
+
+#else:
+
+ ## Write out the summary file (default)
+ #set $summary_file = $output_basename + '.tbl'
+ touch $summary_file
+ cp $summary_file $output_summary;
+
+ ## Write out the gff file (default)
+ #set $gff_file = $output_basename + '.out.gff'
+ touch $gff_file
+ cp $gff_file $output_gff;
+
+
+## End of advanced options:
+#end if
+
+## Write out mask sequence file
+#set $mask_sequence_file = $output_basename + '.masked'
+touch $mask_sequence_file
+cp $mask_sequence_file $output_mask;
+
+## Write out standard file (default)
+## The default '.out' file from RepeatMasker has a 3-line header and spaces rather
+## than tabs. Remove the header and replace the whitespaces with tab
+#set $standard_file = $output_basename + '.out'
+tail -n +4 $standard_file | tr -s ' ' '\t' > $output_std;
+
+## Delete all temporary files
+rm $dirname -r
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'mask' in adv_opts['output_files'])
+
+
+
+ (
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'summary' in adv_opts['output_files'])
+ or
+ (adv_opts['adv_opts_selector'] == 'basic')
+ )
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'html' in adv_opts['output_files'])
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'gff' in adv_opts['output_files'])
+
+
+
+
+ RepeatMasker
+
+
+
+.. class:: warningmark
+
+**What it does**
+
+RepeatMasker is a program that screens DNA sequences for *interspersed repeats*
+and *low complexity* DNA sequences. The output of the program is a detailed
+annotation of the repeats that are present in the query sequence as well as a
+modified version of the query sequence in which all the annotated repeats have
+been masked (default: replaced by Ns).
+
+-----
+
+**How to read the results**
+
+
+
+The annotation file contains the cross_match output lines. It lists all best matches
+(above a set minimum score) between the query sequence and any of the sequences in
+the repeat database or with low complexity DNA. The term "best matches" reflects
+that a match is not shown if its domain is over 80% contained within the domain
+of a higher scoring match, where the "domain" of a match is the region in
+the query sequence that is defined by the alignment start and stop. These domains
+have been masked in the returned masked sequence file. In the output, matches are
+ordered by query name, and for each query by position of the start of the alignment.
+
+Example:
+
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+SW score perc div. perc del. perc ins. query seq. q-pos begin q-pos end (left) w complement matching repeat repeat class/family repeat-pos begin repeat-pos end (left) ID
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+ 1306 15.6 6.2 0.0 HSU08988 6563 6781 \(22462) C MER7A DNA/MER2_type 336 103 \(0) 1
+ 12204 10.0 2.4 1.8 HSU08988 6782 7714 \(21529) C TIGGER1 DNA/MER2_type 2418 1493 \(0) 2
+ 279 3.0 0.0 0.0 HSU08988 7719 7751 \(21492) + (TTTTA)n Simple_repeat 1 33 \(0) 3
+ 1765 13.4 6.5 1.8 HSU08988 7752 8022 \(21221) C AluSx SINE/Alu 289 1 \(23) 4
+ 12204 10.0 2.4 1.8 HSU08988 8023 8694 \(20549) C TIGGER1 DNA/MER2_type 1493 827 \(925) 5
+ 1984 11.1 0.3 0.7 HSU08988 8695 9000 \(20243) C AluSg SINE/Alu 305 1 \(5) 6
+ 12204 10.0 2.4 1.8 HSU08988 9001 9695 \(19548) C TIGGER1 DNA/MER2_type 827 2 \(1591) 7
+ 711 21.2 1.4 0.0 HSU08988 9696 9816 \(19427) C MER7A DNA/MER2_type 122 2 \(224) 8
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+
+This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.
+Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the
+poly A of the Alu element. The first line is interpreted like this:
+
+:Table description:
+
+1. **1306** = Smith-Waterman score of the match, usually complexity adjusted
+ The SW scores are not always directly comparable. Sometimes
+ the complexity adjustment has been turned off, and a variety of
+ scoring-matrices are used.
+
+#. **15.6** = % substitutions in matching region compared to the consensus
+#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)
+#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)
+#. **HSU08988** = name of query sequence
+#. **6563** = starting position of match in query sequence
+#. **7714** = ending position of match in query sequence
+#. **(22462)** = no. of bases in query sequence past the ending position of match
+#. **C** = match is with the Complement of the consensus sequence in the database
+#. **MER7A** = name of the matching interspersed repeat
+#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)
+#. **2418** = starting position of match in database sequence (using top-strand numbering)
+#. **1465** = ending position of match in database sequence
+#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)
+#. **1** = Identifier
+
+An asterisk (\*) in the final column (no example shown) indicates that there is
+a higher-scoring match whose domain partly (<80%) includes the domain of this match.
+
+Note that the SW score and divergence numbers for the three Tigger1 lines are identical.
+This is because the information is derived from a single alignment (the Alus were deleted
+from the query before the alignment with the Tigger element was performed).
+The program makes educated guesses about many fragments if they are derived from
+the same element (e.g. it knows that the MER7A fragments represent one insert).
+In a next version I can identify each element with a unique ID, if interest exists
+(this could help to represent repeats cleaner in graphic displays).
+
+
+-------
+
+**References**
+
+Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.
+
+http://www.repeatmasker.org/
+
+
+
diff -r 000000000000 -r d4a2c739da3f readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Tue Jun 25 04:33:41 2013 -0400
@@ -0,0 +1,63 @@
+===============================
+Galaxy wrapper for RepeatMasker
+===============================
+
+This wrapper is copyright 2013 by Björn Grüning.
+
+This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology.
+http://www.repeatmasker.org/
+
+
+Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0.
+1996-2010 .
+
+
+Additional Information:
+Using RepeatMasker to identify repetitive elements in genomic sequences.
+http://www.ncbi.nlm.nih.gov/pubmed/19274634
+
+============
+Installation
+============
+
+To install RepeatMasker, please use the following instructions:
+
+http://www.repeatmasker.org/RMDownload.html
+
+To install the wrapper copy the file RepeatMasker.xml in the galaxy tools
+folder and modify the tools_conf.xml file to make the tool available to Galaxy.
+Add a line like the following:
+
+Add the tool definition to your tool_conf.xml file under Galaxy root.
+
+
+=======
+History
+=======
+
+- v1.1: Initial public release
+- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found
+- v0.1.2: remove trailing semicolon, redirect all output to stdout
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+