comparison repeatmasker.xml.orig @ 14:7563ea7a922d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmasker commit 7a5f368a5859e659aa36d0358bb96ca12574e2cc
author iuc
date Mon, 24 Apr 2023 10:29:31 +0000
parents
children
comparison
equal deleted inserted replaced
13:3f987772e283 14:7563ea7a922d
1 <<<<<<< HEAD
2 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01">
3 <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro='xrefs'/>
8 <expand macro='edam_ontology' />
9 <expand macro='requirements' />
10 <version_command>repeatmasker --version</version_command>
11 <command detect_errors="exit_code"><![CDATA[
12 =======
13 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.1.1" profile="17.01">
14 <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
15 <xrefs>
16 <xref type="bio.tools">RepeatMasker</xref>
17 </xrefs>
18 <requirements>
19 <requirement type="package" version="4.1.1">repeatmasker</requirement>
20 </requirements>
21
22 <command detect_errors="exit_code"><![CDATA[
23 >>>>>>> c895e2728 (Update repeatmasker.xml)
24 RM_PATH=\$(which RepeatMasker) &&
25 if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi &&
26
27 RM_LIB_PATH=\$(dirname \$RM_PATH)/../share/RepeatMasker/Libraries &&
28 #if $repeat_source.source_type == "dfam_up":
29 mkdir lib/ &&
30 ln -s '${repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 &&
31 RM_LIB_PATH=\$(pwd)/lib &&
32 #end if
33
34 ln -s '${input_fasta}' rm_input.fasta &&
35
36 RepeatMasker -dir \$(pwd)
37 -libdir \$RM_LIB_PATH
38 #if $repeat_source.source_type == "library":
39 -lib '${repeat_source.repeat_lib}'
40 -cutoff '${repeat_source.cutoff}'
41 #else if $repeat_source.source_type == "dfam":
42 #if $repeat_source.species_source.species_from_list == 'yes':
43 -species $repeat_source.species_source.species_list
44 #else
45 -species '${repeat_source.species_source.species_name}'
46 #end if
47 #else if $repeat_source.source_type == "dfam_up":
48 -species '${repeat_source.species_name}'
49 #end if
50 -parallel \${GALAXY_SLOTS:-1}
51 ${gff}
52 ${excln}
53 ${advanced.is_only}
54 ${advanced.is_clip}
55 ${advanced.no_is}
56 ${advanced.rodspec}
57 ${advanced.primspec}
58 ${advanced.nolow}
59 ${advanced.noint}
60 ${advanced.norna}
61 ${advanced.alu}
62 ${advanced.div}
63 ${advanced.search_speed}
64 -frag ${advanced.frag}
65 ## -maxsize ${advanced.maxsize}
66 #if str($advanced.gc):
67 -gc ${advanced.gc}
68 #end if
69 ${advanced.gccalc}
70 ${advanced.nocut}
71 ${advanced.keep_alignments}
72 ${advanced.invert_alignments}
73 ${advanced.xout}
74 ${advanced.xsmall}
75 ${advanced.poly}
76 rm_input.fasta &&
77 #if $advanced.is_only != '-is_only':
78 mv rm_input.fasta.masked '${output_masked_genome}' &&
79 sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' &&
80 mv rm_input.fasta.tbl '${output_table}' &&
81 #if $gff == '-gff':
82 mv rm_input.fasta.out.gff '${output_gff}' &&
83 #end if
84 #if $advanced.keep_alignments == '-ali':
85 mv rm_input.fasta.align '${output_alignment}' &&
86 #end if
87 #if $advanced.poly == '-poly':
88 sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' &&
89 #end if
90 #end if
91 if [ -f 'rm_input.fasta.cat.gz' ]; then
92 zcat 'rm_input.fasta.cat.gz' > '${output_repeat_catalog}';
93 else
94 mv rm_input.fasta.cat '${output_repeat_catalog}';
95 fi
96 ]]>
97 </command>
98
99 <inputs>
100 <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />
101 <conditional name="repeat_source">
102 <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database.">
103 <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option>
104 <option value="dfam_up">DFam (full/specific version)</option>
105 <option value="library">Custom library of repeats</option>
106 </param>
107 <when value="dfam">
108 <conditional name="species_source">
109 <param label="Select species name from a list?" name="species_from_list" type="select">
110 <option value="yes" selected="true">Yes</option>
111 <option value="no">No</option>
112 </param>
113 <when value="yes">
114 <param name="species_list" type="select" label="Species">
115 <option value="human" selected="true">Human (Homo sapiens)</option>
116 <option value="rodent">Rodent (Order Rodentia)</option>
117 <option value="mouse">Mouse (Mus musculus)</option>
118 <option value="rattus">Rat (Rattus sp.)</option>
119 <option value="danio">Danio (zebra fish)</option>
120 <option value="drosophila">Fruit fly (Drosophila melanogaster)</option>
121 <option value="elegans">Caenorhabditis elegans (nematode)</option>
122 </param>
123 </when>
124 <when value="no">
125 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
126 </when>
127 </conditional>
128 </when>
129 <when value="dfam_up">
130 <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" />
131 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
132 </when>
133 <when value="library">
134 <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" />
135 <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" />
136 </when>
137 </conditional>
138 <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" />
139 <param argument="-excln" type="boolean" truevalue="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" />
140 <section name="advanced" title="Advanced options" expanded="false">
141 <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" />
142 <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" />
143 <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" />
144 <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" />
145 <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" />
146 <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." />
147 <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." />
148 <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" />
149 <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" />
150 <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" />
151 <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off">
152 <option value="">Default</option>
153 <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option>
154 <option value="-qq">Rush (10% less sensitive)</option>
155 <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option>
156 </param>
157 <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" />
158 <!-- -maxsize option is in the help, but not in the code of repeatmasker-->
159 <!--param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /-->
160 <param type="integer" argument="-gc" optional="true" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" />
161 <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" />
162 <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" />
163 <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" />
164 <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" />
165 <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" />
166 <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />
167 <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />
168 </section>
169 </inputs>
170 <outputs>
171 <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">
172 <filter>not advanced['is_only']</filter>
173 </data>
174 <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">
175 <filter>not advanced['is_only']</filter>
176 </data>
177 <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">
178 <filter>not advanced['is_only']</filter>
179 </data>
180 <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />
181 <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">
182 <filter>not advanced['is_only'] and advanced['keep_alignments']</filter>
183 </data>
184 <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">
185 <filter>not advanced['is_only'] and advanced['poly']</filter>
186 </data>
187 <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">
188 <filter>not advanced['is_only'] and gff is True</filter>
189 </data>
190 </outputs>
191 <tests>
192 <test expect_num_outputs="4">
193 <param name="input_fasta" value="small.fasta" ftype="fasta" />
194 <param name="source_type" value="library" />
195 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
196 <output name="output_masked_genome" file="small.fasta.masked" />
197 <output name="output_table" file="small.fasta.stats" lines_diff="6" />
198 <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" />
199 <output name="output_log" file="small.fasta.log" lines_diff="2"/>
200 </test>
201 <test expect_num_outputs="7">
202 <param name="input_fasta" value="small.fasta" ftype="fasta" />
203 <param name="source_type" value="library" />
204 <param name="gff" value="-gff" />
205 <param name="keep_alignments" value="-ali" />
206 <param name="poly" value="-poly" />
207 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
208 <output name="output_masked_genome" file="small.fasta.masked" />
209 <output name="output_table" file="small.fasta.stats" lines_diff="6" />
210 <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" />
211 <output name="output_log" file="small.fasta.log" lines_diff="2"/>
212 <output name="output_alignment" file="small.fasta.align" />
213 <output name="output_polymorphic" file="small.fasta.poly" />
214 <output name="output_gff" file="small.fasta.gff" lines_diff="4" />
215 </test>
216 <test expect_num_outputs="4">
217 <param name="input_fasta" value="small.fasta" ftype="fasta" />
218 <param name="source_type" value="dfam" />
219 <param name="species_list" value="human" />
220 <output name="output_masked_genome" file="small_dfam.fasta.masked" />
221 <output name="output_table" file="small_dfam.fasta.stats" lines_diff="2" />
222 <output name="output_repeat_catalog" file="small_dfam.fasta.cat" lines_diff="2" />
223 <output name="output_log" file="small_dfam.fasta.log" lines_diff="2"/>
224 </test>
225 <test expect_num_outputs="4">
226 <param name="input_fasta" value="small.fasta" ftype="fasta" />
227 <param name="source_type" value="dfam_up" />
228 <param name="dfam_lib" value="Dfam_partial_test.h5" ftype="h5" />
229 <param name="species_name" value="rodent" />
230 <output name="output_masked_genome" file="small_dfam_up.fasta.masked" />
231 <output name="output_table" file="small_dfam_up.fasta.stats" lines_diff="2" />
232 <output name="output_repeat_catalog" file="small_dfam_up.fasta.cat" lines_diff="2" />
233 <output name="output_log" file="small_dfam_up.fasta.log" lines_diff="2"/>
234 </test>
235 <test expect_num_outputs="4">
236 <param name="input_fasta" value="small.fasta" ftype="fasta" />
237 <param name="source_type" value="dfam" />
238 <param name="species_list" value="rattus" />
239 <output name="output_masked_genome" file="small_dfam_rattus.fasta.masked" />
240 <output name="output_table" file="small_dfam_rattus.fasta.stats" lines_diff="2" />
241 <output name="output_repeat_catalog" file="small_dfam_rattus.fasta.cat" lines_diff="2" />
242 <output name="output_log" file="small_dfam_rattus.fasta.log" lines_diff="2"/>
243 </test>
244 </tests>
245 <help><![CDATA[
246 RepeatMasker is a program that screens DNA for interspersed repeats and low
247 complexity DNA sequences. The database of repeats to screen for can be
248 provided as a FASTA file or downloaded from RepBase_. If the RepBase option is
249 chosen the RepBaseRepeatMaskerEdition file should be downloaded and
250 unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should
251 be uploaded to Galaxy for use with this tool.
252
253 Further documentation is available on the RepeatMasker homepage_.
254
255 .. _RepBase: http://www.girinst.org/repbase/
256 .. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
257 ]]>
258 </help>
259 <expand macro="citations" />
260 </tool>