comparison rnacode.xml @ 0:2c51e264432a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnacode commit 3f891a4e86b4b127815dc72a4292c232cda79293
author rnateam
date Fri, 19 Jun 2015 11:13:11 -0400
parents
children 7a84c6c1c4e0
comparison
equal deleted inserted replaced
-1:000000000000 0:2c51e264432a
1 <tool id="rbc_rnacode" name="RNAcode" version="0.3.0">
2 <description>Analyze the protein coding potential in MSA</description>
3 <requirements>
4 <requirement type="package" version="0.3">rnacode</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error"/>
8 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error"/>
9 </stdio>
10 <version_command>RNAcode --version</version_command>
11 <command>
12 <![CDATA[
13 RNAcode
14
15 $outputFormat
16
17 #if $cutoff and $cutoff is not None
18 --cutoff $cutoff
19 #end if
20
21 #if $num_samples and $num_samples is not None
22 --num-samples $num_samples
23 #end if
24
25 $stop_early
26 $best_region
27 $best_only
28
29 #if $cond_generateEPS.generateEPS == 'create'
30 --eps
31 #if $cond_generateEPS.eps_cutoff and $cond_generateEPS.eps_cutoff is not None
32 --eps-cutoff $cond_generateEPS.eps_cutoff
33 #end if
34 #end if
35
36 #if $cond_scoringParameters.scoringParameters == 'custom'
37 --pars "$pars"
38 #end if
39
40 $alignment
41
42 #if $outputFormat.value == '--tabular'
43 --outfile $outFileDefault
44 #elif $outputFormat.value == '--gtf'
45 --outfile $outFileGTF
46 #end if
47
48 ]]>
49 </command>
50 <inputs>
51 <param name="alignment" type="data" format="clustal,maf" label="Multiple Alignment" help="Alignment needs to be formatted in ClustalW or MAF format"/>
52 <param argument="--cutoff" name="cutoff" type="float" optional="true" value="1.0" label="Cutoff" help="Show only regions that have a p-value below the given number. By default all hits are shown."/>
53 <param argument="--num_samples" name="num_samples" type="integer" optional="true" value="100" label="Number of samples" help="Number of random alignments that are sampled to calculate the p-value. RNAcode estimates the significance of a coding prediction by sampling a given number of random alignments. Default is 100 which gives reasonably stable p-values that are useful for assessing the relevance of a prediction."/>
54 <param argument="--stop_early" name="stop_early" type="boolean" truevalue="--stop-early" falsevalue="" checked="false" label="Stop early" help="Setting this option stops the sampling process as soon as it is clear that the best hit will not fall below the given p-value cutoff. For example, assume a p-value cutoff of 0.05 (see --cutoff) and a sample size of 1000 is given (see --num-samples). As soon as 50 random samples score better than the original alignment, the process is stopped and all hits in the original alignment are reported as p>0.05 (or by convention as 1.0 in gtf and tabular output)."/>
55 <param argument="--best_region" name="best_region" type="boolean" truevalue="--best-region" falsevalue="" checked="false" label="Show only best non-overlapping hits" help="By default all positive scoring segments are shown in the output if they fall below the given p-value cutoff. If two hits overlap (different frame or different strand) and --best_region is given only the hit with the highest score is shown. Strong coding regions often lead to statistically significant signals also in other frames. These hits are suppressed by this option and only the correct reading frame is reported."/>
56 <param argument="--best_only" name="best_only" type="boolean" truevalue="--best-only" falsevalue="" checked="false" label="Show only best hit" help="This options shows only the one single best hit for each alignment."/>
57 <conditional name="cond_scoringParameters">
58 <param name="scoringParameters" type="select" label="Scoring parameters" help="">
59 <option value="default" selected="true">Default</option>
60 <option value="custom">Custom</option>
61 </param>
62 <when value="default"/>
63 <when value="custom">
64 <param argument="--pars" name="pars" type="text" label="Scoring parameters as comma separated string:'DELTA,OMEGA,omega,stop_penalty'" help="See the appendix of the Paper for an explanation for the meaning of these parameters. Default: '-10.0,-4.0,-2.0,-8.0'"/>
65 </when>
66 </conditional>
67 <conditional name="cond_generateEPS">
68 <param name="generateEPS" type="select" label="Create colored plots in EPS format" help="The generated plots are resolution independent vector graphics that can be included in any graphics software. For each high scoring segment below a given cutoff (see --eps-cutoff) a file named hss-N.eps is created (N is the running number of the high scoring segment)">
69 <option value="create" selected="true">Create Plots</option>
70 <option value="nocreate">Do not generate EPS plots</option>
71 </param>
72 <when value="create">
73 <param argument="--eps_cutoff" name="eps_cutoff" type="float" optional="true" value="0.05" label="Create plots only for high scoring segments with p better than:" help=""/>
74 </when>
75 <when value="nocreate"/>
76 </conditional>
77
78 <param name="outputFormat" type="select" label="Output format">
79 <option value="--tabular" selected="true">Default</option>
80 <option value="--gtf">GTF genome annotation file</option>
81 </param>
82 </inputs>
83 <outputs>
84 <data argument="--outfile" name="outFileDefault" format="tabular" label="${tool.name} on ${on_string}">
85 <filter>outputFormat == '--tabular'</filter>
86 </data>
87 <data argument="--outfile" name="outFileGTF" format="gtf" label="${tool.name} on ${on_string}">
88 <filter>outputFormat == '--gtf'</filter>
89 </data>
90 <collection name="output_eps" type="list" label="Plots for ${tool.name} on ${on_string}">
91 <filter>cond_generateEPS['generateEPS'] == "create"</filter>
92 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.eps" directory="eps" ext="eps" visible="false"/>
93 </collection>
94 </outputs>
95 <tests>
96 <test>
97 <param name="alignment" value="coding.aln"/>
98 <param name="generateEPS" value="nocreate"/>
99 <param name="outputFormat" value="--tabular"/>
100 <output name="outFileDefault" ftype="tabular" file="rnacode_result1.tabular" compare="sim_size"/>
101 <!-- sim_size is needed due to rnacode using random sampling: result files differ, better tests should be implemented -->
102 </test>
103 <test>
104 <param name="alignment" value="coding.maf"/>
105 <param name="generateEPS" value="nocreate"/>
106 <param name="outputFormat" value="--gtf"/>
107 <output name="outFileGTF" ftype="gtf" file="rnacode_result2.gtf" compare="sim_size"/>
108 <!-- sim_size is needed due to rnacode using random sampling: result files differ, better tests should be implemented -->
109 </test>
110 </tests>
111 <help>
112 <![CDATA[
113 **RNAcode**
114 Predicts protein coding regions in an alignment of homologous
115 nucleotide sequences. The prediction is based on evolutionary
116 signatures typical for protein genese, i.e. the presence of
117 synonyomous/conservative nucleotide mutations, conservation of the
118 reading frame and absence of stop codons.
119
120 RNAcode does not rely on any species specific sequence characteristics
121 whatsoever and does not use any machine learning techniques. The only
122 input required for RNAcode is a multiple sequence alignment either in
123 MAF or Clustal W format. RNAcode reports local regions of unusual high
124 coding potential together with an associated p-value.
125
126 **Input alignment**
127
128 The input alignment needs to be formatted in ClustalW format or MAF
129 format (http://genome.ucsc.edu/FAQ/FAQformat#format5). The latter
130 format allows to include genomic coordinates which can be used to
131 produce annotation files.
132
133 Important: RNAcode uses the first sequence as reference sequence,
134 i.e. all results and reported coding regions apply to this reference
135 sequence.
136
137 Currently the alignments has to contain at least 3 sequences. Gaps
138 have to be given as dash ('-'). Unspecified letters given as 'N' are
139 allowed and treated neutrally during all calculations. No difference is
140 made between uppercase or lowercase input, i.e. 'softly'-repeat masked
141 sequences which use lowercase letters for masked regions are treated
142 the same way as unmasked sequences.
143
144 **Output format**
145
146 In the default output each prediction is reported on one line by 10 fields.
147
148 1. HSS id
149 Unique running number for each high scoring segment
150 predicted in one RNAcode call
151
152 2. Frame
153 The reading frame phasing relative to the starting
154 nucleotide position in the reference sequence. +1 means
155 that the first nucleotide in the reference sequence is in
156 the same frame as the predicted coding region. Negative
157 frames indicate that the predicted regions are on the
158 reverse complement strand.
159
160 3. Length
161 The length of the predicted region in amino acids
162
163 4. From 5. To
164 The position of the first/last amino acid in the translated
165 nucleotide sequence of the reference sequence starting
166 with 1.
167
168 6. Name
169 The name of the reference sequence as given in the input alignment.
170
171 7. Start 8. End
172 The nucleotide position in the reference sequence of the
173 predicted coding region. If no genomic coordinates are given
174 (if you provide a CLUSTAL W as input) the first nucleotide position in
175 the references sequence is set to 1, otherwise the positions are the
176 1-based genomic coordinates as given in the input MAF file.
177
178 9. Score
179 The coding potential score. High scores indicate high coding potential.
180
181 10. P
182 The p-value associated with the score. This is the probability
183 that a random alignment with same properties contains an equally good
184 or better hit.
185 ]]>
186 </help>
187 <citations>
188 <citation type="doi">10.1261/rna.2536111</citation>
189 </citations>
190 </tool>