Next changeset 1:880265000696 (2013-06-25) |
Commit message:
Initial release under a consistent username. Fixes for stdout and the trailing semicolon. |
added:
RepeatMasker.xml readme.rst |
b |
diff -r 000000000000 -r d4a2c739da3f RepeatMasker.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RepeatMasker.xml Tue Jun 25 04:33:41 2013 -0400 |
[ |
b'@@ -0,0 +1,306 @@\n+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="0.1.2">\n+ <description>Masks different kind of repeats</description>\n+ <command>\n+## The command is a Cheetah template which allows some Python based syntax.\n+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n+\n+## create temp directory\n+#import tempfile, os\n+#set $dirname = os.path.abspath( tempfile.mkdtemp() )\n+#set $input_filename = os.path.split( str($query) )[-1]\n+#set $output_basename = os.path.join( $dirname, $input_filename )\n+\n+\n+RepeatMasker \n+-parallel 8\n+\n+$nolow\n+$noint\n+$norna\n+\n+#if str($species)!="all":\n+ $species\n+#end if\n+\n+\n+-dir $dirname\n+\n+#if $adv_opts.adv_opts_selector=="advanced":\n+\n+ #if str($adv_opts.gc)!="0":\n+ -gc $adv_opts.gc\n+ #end if\n+\n+ $adv_opts.gccalc\n+\n+ #set $output_files_list = str($adv_opts.output_files).split(\',\')\n+ #if "gff" in $output_files_list:\n+ -gff\n+ #end if\n+ #if "html" in $output_files_list:\n+ -html\n+ #end if\n+\n+ $adv_opts.slow_search\n+ $adv_opts.quick_search\n+ $adv_opts.rush_search\n+ $adv_opts.only_alus\n+ $adv_opts.is_only\n+\n+#else:\n+ ## Set defaults\n+ -gff\n+\n+## End of advanced options:\n+#end if\n+\n+$query\n+\n+2>&1;\n+\n+## Copy the output files to galaxy\n+## AgR: if there are no repeats, the output files may not exist.\n+## This causes the job to fail, so touch files to ensure they exist.\n+#if $adv_opts.adv_opts_selector=="advanced":\n+\n+ #if "summary" in $output_files_list:\n+ ## Write out the summary file (default)\n+ #set $summary_file = $output_basename + \'.tbl\'\n+ touch $summary_file\n+ cp $summary_file $output_summary;\n+ #end if\n+\n+ #if "gff" in $output_files_list:\n+ ## Write out the gff file (default)\n+ #set $gff_file = $output_basename + \'.out.gff\'\n+ touch $gff_file\n+ cp $gff_file $output_gff;\n+ #end if\n+\n+ #if "html" in $output_files_list:\n+ ## Write out the html file\n+ #set $html_file = $output_basename + \'.out.html\'\n+ touch $html_file\n+ cp $html_file $output_html;\n+ #end if\n+\n+#else:\n+\n+ ## Write out the summary file (default)\n+ #set $summary_file = $output_basename + \'.tbl\'\n+ touch $summary_file\n+ cp $summary_file $output_summary;\n+\n+ ## Write out the gff file (default)\n+ #set $gff_file = $output_basename + \'.out.gff\'\n+ touch $gff_file\n+ cp $gff_file $output_gff;\n+\n+\n+## End of advanced options:\n+#end if\n+\n+## Write out mask sequence file\n+#set $mask_sequence_file = $output_basename + \'.masked\'\n+touch $mask_sequence_file\n+cp $mask_sequence_file $output_mask;\n+\n+## Write out standard file (default)\n+## The default \'.out\' file from RepeatMasker has a 3-line header and spaces rather\n+## than tabs. Remove the header and replace the whitespaces with tab\n+#set $standard_file = $output_basename + \'.out\'\n+tail -n +4 $standard_file | tr -s \' \' \'\\t\' > $output_std;\n+\n+## Delete all temporary files\n+rm $dirname -r\n+\n+ </command>\n+ <inputs>\n+ <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n+\n+ <param name="nolow" type="boolean" label="No low complexity DNA" truevalue="-nolow" falsevalue="" checked="false" help="Does not mask low_complexity DNA or simple repeats."/>\n+ <param name="noint" type="boolean" label="No interspersed repeats" truevalue="-noint" falsevalue="" checked="false" help="Only masks low complex/simple repeats (no interspersed repeats)."/>\n+\n+ <param name="norna" type="boolean" label="No small RNA genes" truevalue="-norna" falsevalue="" checked="false" help="Does not mask small RNA (pseudo) genes."/>\n+\n+ <!--\n+ Specify the species or clade of the input sequence. The species name\n+ must be a valid NCBI Taxonomy Database species name and be contained\n+ in the RepeatMasker repeat database. The following collection is not complete.\n+ -->\n+ <pa'..b' 1\n+ 12204 10.0 2.4 1.8 HSU08988 6782 7714 \\(21529) C TIGGER1 DNA/MER2_type 2418 1493 \\(0) 2\n+ 279 3.0 0.0 0.0 HSU08988 7719 7751 \\(21492) + (TTTTA)n Simple_repeat 1 33 \\(0) 3\n+ 1765 13.4 6.5 1.8 HSU08988 7752 8022 \\(21221) C AluSx SINE/Alu 289 1 \\(23) 4\n+ 12204 10.0 2.4 1.8 HSU08988 8023 8694 \\(20549) C TIGGER1 DNA/MER2_type 1493 827 \\(925) 5\n+ 1984 11.1 0.3 0.7 HSU08988 8695 9000 \\(20243) C AluSg SINE/Alu 305 1 \\(5) 6\n+ 12204 10.0 2.4 1.8 HSU08988 9001 9695 \\(19548) C TIGGER1 DNA/MER2_type 827 2 \\(1591) 7\n+ 711 21.2 1.4 0.0 HSU08988 9696 9816 \\(19427) C MER7A DNA/MER2_type 122 2 \\(224) 8\n+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==\n+\n+This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.\n+Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the\n+poly A of the Alu element. The first line is interpreted like this:\n+\n+:Table description:\n+\n+1. **1306** = Smith-Waterman score of the match, usually complexity adjusted\n+ The SW scores are not always directly comparable. Sometimes\n+ the complexity adjustment has been turned off, and a variety of\n+ scoring-matrices are used.\n+\n+#. **15.6** = % substitutions in matching region compared to the consensus\n+#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)\n+#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)\n+#. **HSU08988** = name of query sequence\n+#. **6563** = starting position of match in query sequence\n+#. **7714** = ending position of match in query sequence\n+#. **(22462)** = no. of bases in query sequence past the ending position of match\n+#. **C** = match is with the Complement of the consensus sequence in the database\n+#. **MER7A** = name of the matching interspersed repeat\n+#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)\n+#. **2418** = starting position of match in database sequence (using top-strand numbering)\n+#. **1465** = ending position of match in database sequence\n+#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)\n+#. **1** = Identifier\n+\n+An asterisk (\\*) in the final column (no example shown) indicates that there is\n+a higher-scoring match whose domain partly (<80%) includes the domain of this match. \n+\n+Note that the SW score and divergence numbers for the three Tigger1 lines are identical.\n+This is because the information is derived from a single alignment (the Alus were deleted\n+from the query before the alignment with the Tigger element was performed).\n+The program makes educated guesses about many fragments if they are derived from\n+the same element (e.g. it knows that the MER7A fragments represent one insert).\n+In a next version I can identify each element with a unique ID, if interest exists\n+(this could help to represent repeats cleaner in graphic displays). \n+\n+\n+-------\n+\n+**References**\n+\n+Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.\n+\n+http://www.repeatmasker.org/\n+\n+ </help>\n+</tool>\n' |
b |
diff -r 000000000000 -r d4a2c739da3f readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Tue Jun 25 04:33:41 2013 -0400 |
b |
@@ -0,0 +1,63 @@ +=============================== +Galaxy wrapper for RepeatMasker +=============================== + +This wrapper is copyright 2013 by Björn Grüning. + +This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology. +http://www.repeatmasker.org/ + + +Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0. +1996-2010 <http://www.repeatmasker.org>. + + +Additional Information: +Using RepeatMasker to identify repetitive elements in genomic sequences. +http://www.ncbi.nlm.nih.gov/pubmed/19274634 + +============ +Installation +============ + +To install RepeatMasker, please use the following instructions: + +http://www.repeatmasker.org/RMDownload.html + +To install the wrapper copy the file RepeatMasker.xml in the galaxy tools +folder and modify the tools_conf.xml file to make the tool available to Galaxy. +Add a line like the following: + +Add the tool definition to your tool_conf.xml file under Galaxy root. + <tool file="RepeatMasker/RepeatMasker.xml" /> + +======= +History +======= + +- v1.1: Initial public release +- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found +- v0.1.2: remove trailing semicolon, redirect all output to stdout + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. + |