diff repeatmasker.xml @ 3:bdfc22c1c3e3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224
author iuc
date Wed, 02 May 2018 20:18:11 -0400
parents
children 04f5c3d7448e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repeatmasker.xml	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,230 @@
+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01">
+  <description>RepeatMasker</description>
+
+  <requirements>
+    <requirement type="package" version="4.0.7">repeatmasker</requirement>
+  </requirements>
+
+  <command detect_errors="exit_code"><![CDATA[
+    RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries &&
+    mkdir lib &&
+    export REPEATMASKER_LIB_DIR=\$(pwd)/lib &&
+      for file in \$(ls \$RM_LIB_PATH) ; do  ln -s \$RM_LIB_PATH/\$file lib/\$file ; done &&
+    #if $repeat_source.source_type == "repbase":
+      cp '${repeat_source.repbase_file}' lib/RMRBSeqs.embl &&
+    #end if
+    ln -s '${input_fasta}' rm_input.fasta &&
+    RepeatMasker -dir \$(pwd)
+    #if $repeat_source.source_type == "library":
+      -lib '${repeat_source.repeat_lib}'
+      -cutoff '${repeat_source.cutoff}'
+    #else if $repeat_source.source_type == "repbase":
+      #if $repeat_source.species_source.species_from_list == 'yes':
+        $repeat_source.species_source.species_list
+      #else
+        -species '${repeat_source.species_source.species_name}'
+      #end if
+    #end if
+    -parallel \${GALAXY_SLOTS:-1}
+    '${gff}'
+    '${ignore_n_stretches}'
+    '${advanced.is_only}'
+    '${advanced.is_clip}'
+    '${advanced.no_is}'
+    '${advanced.rodspec}'
+    '${advanced.primspec}'
+    '${advanced.nolow}'
+    '${advanced.noint}'
+    '${advanced.norna}'
+    '${advanced.alu}'
+    '${advanced.div}'
+    '${advanced.search_speed}'
+    '${advanced.frag}'
+    '${advanced.maxsize}'
+    #if $advanced.gc is not None:
+      '${advanced.gc}'
+    #end if
+    '${advanced.gccalc}'
+    '${advanced.nocut}'
+    '${advanced.keep_alignments}'
+    '${advanced.invert_alignments}'
+    '${advanced.xout}'
+    '${advanced.xsmall}'
+    '${advanced.poly}'
+    rm_input.fasta &&
+    #if $advanced.is_only != '-is_only':
+      mv rm_input.fasta.masked '${output_masked_genome}' &&
+      sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ;  1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in  query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' &&
+      mv rm_input.fasta.tbl '${output_table}' &&
+      #if $gff == '-gff':
+        mv rm_input.fasta.out.gff '${output_gff}' &&
+      #end if
+      #if $advanced.keep_alignments == '-ali':
+        mv rm_input.fasta.align '${output_alignment}' &&
+      #end if
+      #if $advanced.poly == '-poly':
+        sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' &&
+      #end if
+    #end if
+    mv rm_input.fasta.cat '${output_repeat_catalog}'
+    ]]>
+  </command>
+
+  <inputs>
+    <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />
+    <conditional name="repeat_source">
+      <param label="Repeat library source" name="source_type" type="select">
+        <option selected="true" value="repbase">RepBase</option>
+        <option value="library">Custom library of repeats</option>
+      </param>
+      <when value="repbase">
+        <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
+        <conditional name="species_source">
+          <param label="Select species name from a list?" name="species_from_list" type="select">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+          </param>
+          <when value="yes">
+            <param name="species_list" type="select" label="Species">
+              <option value="-species anopheles" selected="true">anopheles</option>
+              <option value="-species arabidopsis">arabidopsis</option>
+              <option value="-species artiodactyl">artiodactyl</option>
+              <option value="-species aspergillus">aspergillus</option>
+              <option value="-species carnivore">carnivore</option>
+              <option value="-species cat">cat</option>
+              <option value="-species chicken">chicken</option>
+              <option value="-species 'ciona intestinalis'">ciona intestinalis</option>
+              <option value="-species 'ciona savignyi'">ciona savignyi</option>
+              <option value="-species cow">cow</option>
+              <option value="-species danio">danio</option>
+              <option value="-species diatoaea">diatomea</option>
+              <option value="-species dog">dog</option>
+              <option value="-species drosophila">drosophila</option>
+              <option value="-species elegans">elegans</option>
+              <option value="-species fugu">fugu</option>
+              <option value="-species fungi" selected="true">fungi</option>
+              <option value="-species human">human</option>
+              <option value="-species maize">maize</option>
+              <option value="-species mammal">mammal</option>
+              <option value="-species mouse">mouse</option>
+              <option value="-species pig">pig</option>
+              <option value="-species rat">rat</option>
+              <option value="-species rice">rice</option>
+              <option value="-species rodentia">rodentia</option>
+              <option value="-species ruminantia">ruminantia</option>
+              <option value="-species wheat">wheat</option>
+            </param>
+          </when>
+          <when value="no">
+            <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" />
+          </when>
+        </conditional>
+      </when>
+      <when value="library">
+        <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" />
+        <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" />
+      </when>
+    </conditional>
+    <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" />
+    <param name="ignore_n_stretches" type="boolean" argument="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" />
+    <section name="advanced" title="Advanced options" expanded="false">
+      <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" />
+      <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" />
+      <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" />
+      <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." />
+      <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." />
+      <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" />
+      <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" />
+      <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" />
+      <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off">
+        <option value="">Default</option>
+        <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option>
+        <option value="-qq">Rush (10% less sensitive)</option>
+        <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option>
+      </param>
+      <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" />
+      <param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" />
+      <param type="integer" argument="-gc" optional="True" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" />
+      <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" />
+      <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" />
+      <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" />
+      <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" />
+      <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" />
+      <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />
+      <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />
+    </section>
+  </inputs>
+  <outputs>
+    <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />
+    <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['keep_alignments']</filter>
+    </data>
+    <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['poly']</filter>
+    </data>
+    <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">
+      <filter>not advanced['is_only'] and gff is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" />
+      <output name="output_log" file="small.fasta.log" />
+    </test>
+    <test expect_num_outputs="7">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="gff" value="-gff" />
+      <!-- <param name="show" value="yes" /> -->
+      <param name="keep_alignments" value="-ali" />
+      <param name="poly" value="-poly" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="4" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" />
+      <output name="output_log" file="small.fasta.log" />
+      <output name="output_alignment" file="small.fasta.align" />
+      <output name="output_polymorphic" file="small.fasta.poly" />
+      <output name="output_gff" file="small.fasta.gff" lines_diff="4" />
+    </test>
+  </tests>
+  <help><![CDATA[
+RepeatMasker is a program that screens DNA for interspersed repeats and low
+complexity DNA sequences. The database of repeats to screen for can be
+provided as a FASTA file or downloaded from RepBase_. If the RepBase option is
+chosen the RepBaseRepeatMaskerEdition file should be downloaded and
+unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should
+be uploaded to Galaxy for use with this tool.
+
+Further documentation is available on the RepeatMasker homepage_.
+
+.. _RepBase: http://www.girinst.org/repbase/
+.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
+    ]]>
+  </help>
+  <citations>
+    <citation type="bibtex">
+      @misc{RepeatMasker,
+        title = {RepeatMasker Open-4.0},
+        howpublished = {\url{http://www.repeatmasker.org}},
+        author = {Smit, AFA and Hubley, R and Green, P.},
+        year = {2013-2015}}
+    </citation>
+  </citations>
+</tool>