Galaxy |

Changeset 0:d4a2c739da3f (2013-06-25)

Next changeset 1:880265000696 (2013-06-25)

Commit message:
Initial release under a consistent username. Fixes for stdout and the trailing semicolon.

added:
RepeatMasker.xml
readme.rst

diff -r 000000000000 -r d4a2c739da3f RepeatMasker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/RepeatMasker.xml Tue Jun 25 04:33:41 2013 -0400

[

b'@@ -0,0 +1,306 @@\n+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="0.1.2">\n+ <description>Masks different kind of repeats</description>\n+ <command>\n+## The command is a Cheetah template which allows some Python based syntax.\n+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n+\n+## create temp directory\n+#import tempfile, os\n+#set $dirname = os.path.abspath( tempfile.mkdtemp() )\n+#set $input_filename = os.path.split( str($query) )[-1]\n+#set $output_basename = os.path.join( $dirname, $input_filename )\n+\n+\n+RepeatMasker \n+-parallel 8\n+\n+$nolow\n+$noint\n+$norna\n+\n+#if str($species)!="all":\n+ $species\n+#end if\n+\n+\n+-dir $dirname\n+\n+#if $adv_opts.adv_opts_selector=="advanced":\n+\n+ #if str($adv_opts.gc)!="0":\n+ -gc $adv_opts.gc\n+ #end if\n+\n+ $adv_opts.gccalc\n+\n+ #set $output_files_list = str($adv_opts.output_files).split(\',\')\n+ #if "gff" in $output_files_list:\n+ -gff\n+ #end if\n+ #if "html" in $output_files_list:\n+ -html\n+ #end if\n+\n+ $adv_opts.slow_search\n+ $adv_opts.quick_search\n+ $adv_opts.rush_search\n+ $adv_opts.only_alus\n+ $adv_opts.is_only\n+\n+#else:\n+ ## Set defaults\n+ -gff\n+\n+## End of advanced options:\n+#end if\n+\n+$query\n+\n+2>&1;\n+\n+## Copy the output files to galaxy\n+## AgR: if there are no repeats, the output files may not exist.\n+## This causes the job to fail, so touch files to ensure they exist.\n+#if $adv_opts.adv_opts_selector=="advanced":\n+\n+ #if "summary" in $output_files_list:\n+ ## Write out the summary file (default)\n+ #set $summary_file = $output_basename + \'.tbl\'\n+ touch $summary_file\n+ cp $summary_file $output_summary;\n+ #end if\n+\n+ #if "gff" in $output_files_list:\n+ ## Write out the gff file (default)\n+ #set $gff_file = $output_basename + \'.out.gff\'\n+ touch $gff_file\n+ cp $gff_file $output_gff;\n+ #end if\n+\n+ #if "html" in $output_files_list:\n+ ## Write out the html file\n+ #set $html_file = $output_basename + \'.out.html\'\n+ touch $html_file\n+ cp $html_file $output_html;\n+ #end if\n+\n+#else:\n+\n+ ## Write out the summary file (default)\n+ #set $summary_file = $output_basename + \'.tbl\'\n+ touch $summary_file\n+ cp $summary_file $output_summary;\n+\n+ ## Write out the gff file (default)\n+ #set $gff_file = $output_basename + \'.out.gff\'\n+ touch $gff_file\n+ cp $gff_file $output_gff;\n+\n+\n+## End of advanced options:\n+#end if\n+\n+## Write out mask sequence file\n+#set $mask_sequence_file = $output_basename + \'.masked\'\n+touch $mask_sequence_file\n+cp $mask_sequence_file $output_mask;\n+\n+## Write out standard file (default)\n+## The default \'.out\' file from RepeatMasker has a 3-line header and spaces rather\n+## than tabs. Remove the header and replace the whitespaces with tab\n+#set $standard_file = $output_basename + \'.out\'\n+tail -n +4 $standard_file | tr -s \' \' \'\\t\' > $output_std;\n+\n+## Delete all temporary files\n+rm $dirname -r\n+\n+ </command>\n+ <inputs>\n+ <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n+\n+ <param name="nolow" type="boolean" label="No low complexity DNA" truevalue="-nolow" falsevalue="" checked="false" help="Does not mask low_complexity DNA or simple repeats."/>\n+ <param name="noint" type="boolean" label="No interspersed repeats" truevalue="-noint" falsevalue="" checked="false" help="Only masks low complex/simple repeats (no interspersed repeats)."/>\n+\n+ <param name="norna" type="boolean" label="No small RNA genes" truevalue="-norna" falsevalue="" checked="false" help="Does not mask small RNA (pseudo) genes."/>\n+\n+ \n+ <pa'..b' 1\n+ 12204 10.0 2.4 1.8 HSU08988 6782 7714 \\(21529) C TIGGER1 DNA/MER2_type 2418 1493 \\(0) 2\n+ 279 3.0 0.0 0.0 HSU08988 7719 7751 \\(21492) + (TTTTA)n Simple_repeat 1 33 \\(0) 3\n+ 1765 13.4 6.5 1.8 HSU08988 7752 8022 \\(21221) C AluSx SINE/Alu 289 1 \\(23) 4\n+ 12204 10.0 2.4 1.8 HSU08988 8023 8694 \\(20549) C TIGGER1 DNA/MER2_type 1493 827 \\(925) 5\n+ 1984 11.1 0.3 0.7 HSU08988 8695 9000 \\(20243) C AluSg SINE/Alu 305 1 \\(5) 6\n+ 12204 10.0 2.4 1.8 HSU08988 9001 9695 \\(19548) C TIGGER1 DNA/MER2_type 827 2 \\(1591) 7\n+ 711 21.2 1.4 0.0 HSU08988 9696 9816 \\(19427) C MER7A DNA/MER2_type 122 2 \\(224) 8\n+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==\n+\n+This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.\n+Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the\n+poly A of the Alu element. The first line is interpreted like this:\n+\n+:Table description:\n+\n+1. **1306** = Smith-Waterman score of the match, usually complexity adjusted\n+ The SW scores are not always directly comparable. Sometimes\n+ the complexity adjustment has been turned off, and a variety of\n+ scoring-matrices are used.\n+\n+#. **15.6** = % substitutions in matching region compared to the consensus\n+#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)\n+#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)\n+#. **HSU08988** = name of query sequence\n+#. **6563** = starting position of match in query sequence\n+#. **7714** = ending position of match in query sequence\n+#. **(22462)** = no. of bases in query sequence past the ending position of match\n+#. **C** = match is with the Complement of the consensus sequence in the database\n+#. **MER7A** = name of the matching interspersed repeat\n+#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)\n+#. **2418** = starting position of match in database sequence (using top-strand numbering)\n+#. **1465** = ending position of match in database sequence\n+#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)\n+#. **1** = Identifier\n+\n+An asterisk (\\*) in the final column (no example shown) indicates that there is\n+a higher-scoring match whose domain partly (<80%) includes the domain of this match. \n+\n+Note that the SW score and divergence numbers for the three Tigger1 lines are identical.\n+This is because the information is derived from a single alignment (the Alus were deleted\n+from the query before the alignment with the Tigger element was performed).\n+The program makes educated guesses about many fragments if they are derived from\n+the same element (e.g. it knows that the MER7A fragments represent one insert).\n+In a next version I can identify each element with a unique ID, if interest exists\n+(this could help to represent repeats cleaner in graphic displays). \n+\n+\n+-------\n+\n+**References**\n+\n+Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.\n+\n+http://www.repeatmasker.org/\n+\n+ </help>\n+</tool>\n'

diff -r 000000000000 -r d4a2c739da3f readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Tue Jun 25 04:33:41 2013 -0400

@@ -0,0 +1,63 @@
+===============================
+Galaxy wrapper for RepeatMasker
+===============================
+
+This wrapper is copyright 2013 by Björn Grüning.
+
+This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology.
+http://www.repeatmasker.org/
+
+
+Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0.
+1996-2010 <http://www.repeatmasker.org>.
+
+
+Additional Information:
+Using RepeatMasker to identify repetitive elements in genomic sequences.
+http://www.ncbi.nlm.nih.gov/pubmed/19274634
+
+============
+Installation
+============
+
+To install RepeatMasker, please use the following instructions:
+
+http://www.repeatmasker.org/RMDownload.html
+
+To install the wrapper copy the file RepeatMasker.xml in the galaxy tools
+folder and modify the tools_conf.xml file to make the tool available to Galaxy.
+Add a line like the following:
+
+Add the tool definition to your tool_conf.xml file under Galaxy root.
+ <tool file="RepeatMasker/RepeatMasker.xml" />
+
+=======
+History
+=======
+
+- v1.1: Initial public release
+- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found
+- v0.1.2: remove trailing semicolon, redirect all output to stdout
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+