comparison rnazWindow.xml @ 0:fdfe3dcf8fc4 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_team/rnaz commit d261ddb93500e1ea309845fa3989c87c6312583d-dirty
author bgruening
date Wed, 30 Jan 2019 04:12:58 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fdfe3dcf8fc4
1 <tool id="rnaz_window" name="RNAz windows" version="2.1">
2 <requirements>
3 <requirement type="package" version="2.1">rnaz</requirement>
4 </requirements>
5 <command detect_errors="exit_code"><![CDATA[
6 rnazWindow.pl
7 --window $window
8 --slide $slide
9 --max-length $maxlength
10 --max-gap $maxgap
11 --max-masked $maxmask
12 --min-id $minid
13 --min-seqs $minseqs
14 --max-seqs $maxseqs
15 --num-samples $numsamples
16 --min-length $minlength
17 --opt-id $optid
18 --$forward_or_reverse
19 #if $noref:
20 $noref
21 #end ifx
22 '$input'
23 > '$output'
24 ]]></command>
25 <inputs>
26 <param format="txt" name="input" type="data" label="Input Alignment File" />
27 <param argument="--window" name="window" type="integer" value="120" min="80" label="Window size, default 120" />
28 <param argument="--slide" name="slide" type="integer" value="120" min="1" label="Window step size, default 120" />
29 <param argument="--max-length" name="maxlength" type="integer" value="120" label="Min size of block before slicing, default is window size 120" />
30 <param argument="--max-gap" name="maxgap" type="float" value="0.25" label="Maximum fraction of gaps, default 0.25" />
31 <param argument="--max-masked" name="maxmask" type="float" value="0.1" label="Maximum fraction of masked letters in sequence, default 0.1" />
32 <param argument="--min-id" name="minid" type="integer" value="50"
33 label="Discard alignment windows with an overall mean pairwise identity smaller than X%. (Default: 50)" />
34 <param argument="--min-seqs" name="minseqs" type="integer" value="2"
35 label="Minimum number of sequences in an alignment. Discard any windows with less than N sequences (Default:2)" />
36 <param argument="--max-seqs" name="maxseqs" type="integer" value="6"
37 label="Maximum number of sequences in an alignment. Discard any windows with less than N sequences (Default:6)" />
38 <param argument="--num-samples" name="numsamples" type="integer" value="1"
39 label="Number of different subsets of sequences that is sampled if there are more sequences in the alignment than --max-seqs (Default: 1)" />
40 <param argument="--min-length" name="minlength" type="integer" value="0"
41 label="Minimum number of columns of an alignment slice. After removing sequences from the alignment, all-gap columns are removed. If the resulting alignment has fewer than N columns, the complete alignment is discarded." />
42 <param argument="--opt-id" name="optid" type="integer" value="80"
43 label="If the number of sequences has to be reduced (see --max-seqs) a subset of sequences is chosen which is optimized for this value of mean pairwise identity. (In percent, default: 80)" />
44 <param name="forward_or_reverse" type="select" label="Scored strand">
45 <option value="forward">Score forward strand (-f)</option>
46 <option value="reverse">Score reverse strand (-r)</option>
47 <option value="both-strands" selected="true">Score both strands (-b)</option>
48 </param>
49 <param argument="--no-reference" name="noref" type="boolean" checked="false" truevalue="--no-reference" falsevalue=""
50 label="By default the first sequence is interpreted as reference sequence. This means, for example, that if the reference sequence is removed during filtering steps the complete alignment is discarded. Also, if there are too many sequences in the alignment, the reference sequence is never removed when choosing an appropriate subset. Having a reference sequence is crucial if you are doing screens of genomic regions. For some other applications it might not be necessary and in such cases you can change the default behaviour by setting this option." />
51 </inputs>
52
53 <outputs>
54 <data name="output" format="txt" />
55 </outputs>
56
57 <tests>
58 <test>
59 <param name="input" value="unknown.aln"/>
60 <output name="output" file="unknown.aln.window"/>
61 </test>
62 <test>
63 <param name="input" value="tRNA.maf"/>
64 <output name="output" file="tRNA.maf.window"/>
65 </test>
66 </tests>
67
68 <help>
69 <![CDATA[
70
71 RNAz cannot score alignments longer than 400
72 columns. In practice, it is generally advisable that you score
73 long alignments, say more than 200 columns, in shorter, overlapping
74 windows. For general purpose screens we recommend a window
75 size of 120. This window size appears large enough to detect
76 local secondary structures within long ncRNAs and, on the
77 other hand, small enough to find short secondary structures
78 without loosing the signal in a much too long window
79
80 Usage: rnazWindow.pl [options] [file]
81 Options:
82 -w, --window=N Size of the window (Default: 120)
83
84 -s, --slide=N Step size (Default: 120)
85
86 -m, --max-length Slice only alignments longer than N columns. This
87 means blocks longer than the window size given by --window but shorter
88 than N are kept intact and not sliced. Per default this length is set
89 to the window size given by --window (or 120 by default).
90
91 --max-gap=X Maximum fraction of gaps. If a reference sequence is used
92 (i.e. "--no-reference" is not set), each sequence is compared to the
93 reference sequence and if in the pairwise comparison the fraction of
94 columns with gaps is higher than X the sequence is discarded. If no
95 reference sequence is used, all sequences with a fraction of gaps
96 higher than X are discarded. (Default: 0.25)
97
98 --max-masked=X Maximum fraction of masked (=lowercase letters) in a
99 sequence. All sequences with a fraction of more than X lowercase
100 letters are discarded. This is usually used for excluding repeat
101 sequences marked by RepeatMasker but any other information can be
102 encoded by using lowercase letters. (Default: 0.1)
103
104 --min-id=X Discard alignment windows with an overall mean pairwise
105 identity smaller than X%. (Default: 50)
106
107 --min-seqs=N Minimum number of sequences in an alignment. Discard any
108 windows with less than N sequences (Default:2).
109
110 --max-seqs=N Maximum number of sequences in an alignment. If the
111 number of sequences in a window is higher than N, a subset of
112 sequences is used with exactly N sequences. The greedy algorithm of
113 the program "rnazSelectSeqs.pl" is used which optimizes for a user
114 specified mean pairwise identity (see "--opt-id"). (Default: 6)
115
116 --num-samples=N Number of different subsets of sequences that is
117 sampled if there are more sequences in the alignment than
118 "--max-seqs". (Default: 1)
119
120 --min-length=N Minimum number of columns of an alignment slice. After
121 removing sequences from the alignment, all-gap columns are
122 removed. If the resulting alignment has fewer than N columns, the
123 complete alignment is discarded.
124
125 --opt-id=X If the number of sequences has to be reduced (see
126 "--max-seqs") a subset of sequences is chosen which is optimized for
127 this value of mean pairwise identity. (In percent, default: 80)
128
129 --max-id=X One sequence from pairs with pairwise identity higher than
130 X % this is removed (default: 99, i.e. only almost identical sequences
131 are removed) NOT IMPLEMENTED
132
133 --forward --reverse --both-strands Output forward, reverse complement
134 or both of the sequences in the windows. Please note: "RNAz" has the
135 same options, so if you use "rnazWindow.pl" for an RNAz screen, we
136 recommend to set the option directly in "RNAz" and leave the default
137 here. (Default: ---forward)
138
139 --no-reference By default the first sequence is interpreted as
140 reference sequence. This means, for example, that if the reference
141 sequence is removed during filtering steps the complete alignment is
142 discarded. Also, if there are too many sequences in the alignment, the
143 reference sequence is never removed when choosing an appropriate
144 subset. Having a reference sequence is crucial if you are doing
145 screens of genomic regions. For some other applications it might not
146 be necessary and in such cases you can change the default behaviour by
147 setting this option.
148
149 ]]>
150 </help>
151 <citations>
152 <citation type="doi">10.1142/9789814295291_0009</citation>
153 </citations>
154 </tool>