comparison repeatmasker.xml @ 3:bdfc22c1c3e3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224
author iuc
date Wed, 02 May 2018 20:18:11 -0400
parents
children 04f5c3d7448e
comparison
equal deleted inserted replaced
2:5673e72241aa 3:bdfc22c1c3e3
1 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01">
2 <description>RepeatMasker</description>
3
4 <requirements>
5 <requirement type="package" version="4.0.7">repeatmasker</requirement>
6 </requirements>
7
8 <command detect_errors="exit_code"><![CDATA[
9 RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries &&
10 mkdir lib &&
11 export REPEATMASKER_LIB_DIR=\$(pwd)/lib &&
12 for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done &&
13 #if $repeat_source.source_type == "repbase":
14 cp '${repeat_source.repbase_file}' lib/RMRBSeqs.embl &&
15 #end if
16 ln -s '${input_fasta}' rm_input.fasta &&
17 RepeatMasker -dir \$(pwd)
18 #if $repeat_source.source_type == "library":
19 -lib '${repeat_source.repeat_lib}'
20 -cutoff '${repeat_source.cutoff}'
21 #else if $repeat_source.source_type == "repbase":
22 #if $repeat_source.species_source.species_from_list == 'yes':
23 $repeat_source.species_source.species_list
24 #else
25 -species '${repeat_source.species_source.species_name}'
26 #end if
27 #end if
28 -parallel \${GALAXY_SLOTS:-1}
29 '${gff}'
30 '${ignore_n_stretches}'
31 '${advanced.is_only}'
32 '${advanced.is_clip}'
33 '${advanced.no_is}'
34 '${advanced.rodspec}'
35 '${advanced.primspec}'
36 '${advanced.nolow}'
37 '${advanced.noint}'
38 '${advanced.norna}'
39 '${advanced.alu}'
40 '${advanced.div}'
41 '${advanced.search_speed}'
42 '${advanced.frag}'
43 '${advanced.maxsize}'
44 #if $advanced.gc is not None:
45 '${advanced.gc}'
46 #end if
47 '${advanced.gccalc}'
48 '${advanced.nocut}'
49 '${advanced.keep_alignments}'
50 '${advanced.invert_alignments}'
51 '${advanced.xout}'
52 '${advanced.xsmall}'
53 '${advanced.poly}'
54 rm_input.fasta &&
55 #if $advanced.is_only != '-is_only':
56 mv rm_input.fasta.masked '${output_masked_genome}' &&
57 sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' &&
58 mv rm_input.fasta.tbl '${output_table}' &&
59 #if $gff == '-gff':
60 mv rm_input.fasta.out.gff '${output_gff}' &&
61 #end if
62 #if $advanced.keep_alignments == '-ali':
63 mv rm_input.fasta.align '${output_alignment}' &&
64 #end if
65 #if $advanced.poly == '-poly':
66 sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' &&
67 #end if
68 #end if
69 mv rm_input.fasta.cat '${output_repeat_catalog}'
70 ]]>
71 </command>
72
73 <inputs>
74 <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />
75 <conditional name="repeat_source">
76 <param label="Repeat library source" name="source_type" type="select">
77 <option selected="true" value="repbase">RepBase</option>
78 <option value="library">Custom library of repeats</option>
79 </param>
80 <when value="repbase">
81 <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
82 <conditional name="species_source">
83 <param label="Select species name from a list?" name="species_from_list" type="select">
84 <option value="yes" selected="true">Yes</option>
85 <option value="no">No</option>
86 </param>
87 <when value="yes">
88 <param name="species_list" type="select" label="Species">
89 <option value="-species anopheles" selected="true">anopheles</option>
90 <option value="-species arabidopsis">arabidopsis</option>
91 <option value="-species artiodactyl">artiodactyl</option>
92 <option value="-species aspergillus">aspergillus</option>
93 <option value="-species carnivore">carnivore</option>
94 <option value="-species cat">cat</option>
95 <option value="-species chicken">chicken</option>
96 <option value="-species 'ciona intestinalis'">ciona intestinalis</option>
97 <option value="-species 'ciona savignyi'">ciona savignyi</option>
98 <option value="-species cow">cow</option>
99 <option value="-species danio">danio</option>
100 <option value="-species diatoaea">diatomea</option>
101 <option value="-species dog">dog</option>
102 <option value="-species drosophila">drosophila</option>
103 <option value="-species elegans">elegans</option>
104 <option value="-species fugu">fugu</option>
105 <option value="-species fungi" selected="true">fungi</option>
106 <option value="-species human">human</option>
107 <option value="-species maize">maize</option>
108 <option value="-species mammal">mammal</option>
109 <option value="-species mouse">mouse</option>
110 <option value="-species pig">pig</option>
111 <option value="-species rat">rat</option>
112 <option value="-species rice">rice</option>
113 <option value="-species rodentia">rodentia</option>
114 <option value="-species ruminantia">ruminantia</option>
115 <option value="-species wheat">wheat</option>
116 </param>
117 </when>
118 <when value="no">
119 <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" />
120 </when>
121 </conditional>
122 </when>
123 <when value="library">
124 <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" />
125 <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" />
126 </when>
127 </conditional>
128 <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" />
129 <param name="ignore_n_stretches" type="boolean" argument="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" />
130 <section name="advanced" title="Advanced options" expanded="false">
131 <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" />
132 <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" />
133 <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" />
134 <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" />
135 <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" />
136 <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." />
137 <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." />
138 <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" />
139 <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" />
140 <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" />
141 <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off">
142 <option value="">Default</option>
143 <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option>
144 <option value="-qq">Rush (10% less sensitive)</option>
145 <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option>
146 </param>
147 <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" />
148 <param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" />
149 <param type="integer" argument="-gc" optional="True" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" />
150 <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" />
151 <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" />
152 <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" />
153 <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" />
154 <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" />
155 <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />
156 <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />
157 </section>
158 </inputs>
159 <outputs>
160 <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">
161 <filter>not advanced['is_only']</filter>
162 </data>
163 <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">
164 <filter>not advanced['is_only']</filter>
165 </data>
166 <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">
167 <filter>not advanced['is_only']</filter>
168 </data>
169 <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />
170 <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">
171 <filter>not advanced['is_only'] and advanced['keep_alignments']</filter>
172 </data>
173 <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">
174 <filter>not advanced['is_only'] and advanced['poly']</filter>
175 </data>
176 <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">
177 <filter>not advanced['is_only'] and gff is True</filter>
178 </data>
179 </outputs>
180 <tests>
181 <test expect_num_outputs="4">
182 <param name="input_fasta" value="small.fasta" ftype="fasta" />
183 <param name="source_type" value="library" />
184 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
185 <output name="output_masked_genome" file="small.fasta.masked" />
186 <output name="output_table" file="small.fasta.stats" lines_diff="2" />
187 <output name="output_repeat_catalog" file="small.fasta.cat" />
188 <output name="output_log" file="small.fasta.log" />
189 </test>
190 <test expect_num_outputs="7">
191 <param name="input_fasta" value="small.fasta" ftype="fasta" />
192 <param name="source_type" value="library" />
193 <param name="gff" value="-gff" />
194 <!-- <param name="show" value="yes" /> -->
195 <param name="keep_alignments" value="-ali" />
196 <param name="poly" value="-poly" />
197 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
198 <output name="output_masked_genome" file="small.fasta.masked" />
199 <output name="output_table" file="small.fasta.stats" lines_diff="4" />
200 <output name="output_repeat_catalog" file="small.fasta.cat" />
201 <output name="output_log" file="small.fasta.log" />
202 <output name="output_alignment" file="small.fasta.align" />
203 <output name="output_polymorphic" file="small.fasta.poly" />
204 <output name="output_gff" file="small.fasta.gff" lines_diff="4" />
205 </test>
206 </tests>
207 <help><![CDATA[
208 RepeatMasker is a program that screens DNA for interspersed repeats and low
209 complexity DNA sequences. The database of repeats to screen for can be
210 provided as a FASTA file or downloaded from RepBase_. If the RepBase option is
211 chosen the RepBaseRepeatMaskerEdition file should be downloaded and
212 unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should
213 be uploaded to Galaxy for use with this tool.
214
215 Further documentation is available on the RepeatMasker homepage_.
216
217 .. _RepBase: http://www.girinst.org/repbase/
218 .. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
219 ]]>
220 </help>
221 <citations>
222 <citation type="bibtex">
223 @misc{RepeatMasker,
224 title = {RepeatMasker Open-4.0},
225 howpublished = {\url{http://www.repeatmasker.org}},
226 author = {Smit, AFA and Hubley, R and Green, P.},
227 year = {2013-2015}}
228 </citation>
229 </citations>
230 </tool>