comparison CDS_search.xml @ 0:eb95bf7f90ae draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:26:37 -0500
parents
children c79bdda8abfb
comparison
equal deleted inserted replaced
-1:000000000000 0:eb95bf7f90ae
1 <tool name="CDS_search" id="cds_search" version="2.1.2">
2
3 <description>
4 ORF and CDS search
5 </description>
6
7 <macros>
8 <import>macros.xml</import>
9 </macros>
10
11 <requirements>
12 <expand macro="python_required" />
13 </requirements>
14
15 <command><![CDATA[
16 #for $input in $inputs
17 ln -s '$input' '$input.element_identifier';
18 echo '$input.element_identifier' >> list_files;
19 #end for
20
21 ln -s $__tool_directory__/scripts/dico.py . &&
22
23 python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py
24 $__tool_directory__/scripts/code_universel_modified.txt
25 $length.min_length_seq
26 $nb_species_keep
27 list_files
28 > '$log' &&
29
30 python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py
31 $nb_species_keep
32 $methionine
33 $length.min_length_seq
34 $length.min_length_subseq
35 >> '$log' &&
36
37 python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py
38 $nb_species_keep
39 $length.min_length_nuc
40 >> '$log';
41 ]]></command>
42
43 <inputs>
44 <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" help="Only a fasta file with nucleic align sequences" />
45 <!-- <param name="code_file" type="data" format="txt" label="Choose your file containing the universal code (codons and their amino acids)" /> -->
46
47 <param name="nb_species_keep" type="integer" value="10" min="2" label="Minimal number of species in each locus" help="If you want to remove all the indels the maximum number of species is required" />
48
49 <param name="methionine" type="boolean" checked="true" truevalue="oui" falsevalue="non" label="Do you want to consider the Methionine in the search of CDS? " />
50
51 <section name="length" title="Do you want to choose the minimum length of the CDS?">
52 <param name="min_length_seq" type="integer" value="50" min="0" label="Minimal length of the CDS, in proteic" help="By default it's 50" />
53 <param name="min_length_subseq" type="integer" value="15" min="0" label="Minimal length of the subsequence, in proteic between two series of indels" help="By default it's 15" />
54 <param name="min_length_nuc" type="integer" value="50" min="0" label="Minimal length of the CDS, in nucleic without the indel" help="By default it's 50" />
55 </section>
56
57 <param name="out_BESTORF" type="select" label="Do you want the outputs (dataset collection list) containing files with the BEST ORF? ">
58 <option value="no">No</option>
59 <option value="aa">Yes, with the proteic format</option>
60 <option value="nuc">Yes, with the nucleic format</option>
61 <option value="both">Yes, with the proteic and nucleic format</option>
62 </param>
63
64 <param name="out_CDS" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS? ">
65 <option value="no">No</option>
66 <option value="aa">Yes, with the proteic format</option>
67 <option value="nuc">Yes, with the nucleic format</option>
68 <option value="both">Yes, with the proteic and nucleic format</option>
69 </param>
70
71 <param name="out_CDS_filter" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS without indel? ">
72 <option value="no">No</option>
73 <option value="aa">Yes, with the proteic format</option>
74 <option value="nuc">Yes, with the nucleic format</option>
75 <option value="both">Yes, with the proteic and nucleic format</option>
76 </param>
77 </inputs>
78
79 <outputs>
80 <data format="txt" name="log" label="ORF_Search" />
81 <collection name="output_BESTORF_aa" type="list" label="ORF_Search_Best_ORF_aa">
82 <filter>out_BESTORF in ["aa","both"]</filter>
83 <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_aa" />
84 </collection>
85
86 <collection name="output_BESTORF_nuc" type="list" label="ORF_Search_Best_ORF_nuc">
87 <filter>out_BESTORF in ["nuc","both"]</filter>
88 <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_nuc" />
89 </collection>
90
91 <collection name="output_CDS_aa" type="list" label="ORF_Search_CDS_aa">
92 <filter>out_CDS in ["aa","both"] and not methionine</filter>
93 <discover_datasets pattern="__name_and_ext__" directory="05_CDS_aa" />
94 </collection>
95
96 <collection name="output_CDS_nuc" type="list" label="ORF_Search_CDS_nuc">
97 <filter>out_CDS in ["nuc","both"] and not methionine</filter>
98 <discover_datasets pattern="__name_and_ext__" directory="05_CDS_nuc" />
99 </collection>
100
101 <collection name="output_CDS_M_aa" type="list" label="ORF_Search_CDS_with_M_aa">
102 <filter>(out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine)</filter>
103 <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_aa" />
104 </collection>
105
106 <collection name="output_CDS_M_nuc" type="list" label="ORF_Search_CDS_with_M_nuc">
107 <filter>(out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine)</filter>
108 <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_nuc" />
109 </collection>
110
111 <collection name="output_filter_aa" type="list" label="ORF_Search_CDS_without_indel_aa">
112 <filter>out_CDS_filter in ["aa","both"]</filter>
113 <discover_datasets pattern="__name_and_ext__" directory="08_CDS_aa_MINIMUM_MISSING_SEQUENCES" />
114 </collection>
115
116 <collection name="output_filter_nuc" type="list" label="ORF_Search_CDS_without_indel_nuc">
117 <filter>out_CDS_filter in ["nuc","both"]</filter>
118 <discover_datasets pattern="__name_and_ext__" directory="08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" />
119 </collection>
120 </outputs>
121
122 <tests>
123
124 <test>
125 <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" />
126 <param name="nb_species_keep" value="3" />
127 <param name="methionine" value="non" />
128 <section name="length">
129 <param name="min_length_seq" value="50" />
130 <param name="min_length_subseq" value="15" />
131 <param name="min_length_nuc" value="50" />
132 </section>
133 <param name="out_BESTORF" value="both" />
134 <param name="out_CDS" value="both" />
135 <param name="out_CDS_filter" value="both" />
136 <output_collection name="output_BESTORF_aa" type="list" count="2">
137 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta" />
138 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta" />
139 </output_collection>
140 <output_collection name="output_BESTORF_nuc" type="list" count="2">
141 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta" />
142 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta" />
143 </output_collection>
144 <output_collection name="output_CDS_aa" type="list" count="2">
145 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta" />
146 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta" />
147 </output_collection>
148 <output_collection name="output_CDS_nuc" type="list" count="2">
149 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta" />
150 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta" />
151 </output_collection>
152 <output_collection name="output_filter_aa" type="list" count="1">
153 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta" />
154 </output_collection>
155 <output_collection name="output_filter_nuc" type="list" count="1">
156 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta" />
157 </output_collection>
158 </test>
159
160 <test>
161 <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" />
162 <param name="nb_species_keep" value="2" />
163 <param name="methionine" value="oui" />
164 <section name="length">
165 <param name="min_length_seq" value="50" />
166 <param name="min_length_subseq" value="15" />
167 <param name="min_length_nuc" value="50" />
168 </section>
169 <param name="out_BESTORF" value="both" />
170 <param name="out_CDS" value="both" />
171 <param name="out_CDS_filter" value="both" />
172 <output_collection name="output_BESTORF_aa" type="list" count="4">
173 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta" />
174 <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta" />
175 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta" />
176 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta" />
177 </output_collection>
178 <output_collection name="output_BESTORF_nuc" type="list" count="4">
179 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta" />
180 <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta" />
181 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta" />
182 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta" />
183 </output_collection>
184 <output_collection name="output_filter_aa" type="list" count="1">
185 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta" />
186 </output_collection>
187 <output_collection name="output_filter_nuc" type="list" count="1">
188 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta" />
189 </output_collection>
190 </test>
191
192 </tests>
193 <help>
194
195 @HELP_AUTHORS@
196
197 <![CDATA[
198
199 **Description**
200
201 This tool takes files containing nucleic aligned sequences and search the ORF and the CDS.
202
203 --------
204
205 **Inputs**
206
207 Input files : (multiple) fasta files with nucleic aligned sequences.
208
209 --------
210
211 **Parameters**
212
213 - methionine : choose to consider the methionine in the search of CDS.
214 yes/no.
215
216 - 'Minimal number of species in each locus'
217 Default : 10 (integer).
218
219 - 'min_length_seq' :
220 minimal length of the sequence (in amino acids).
221 when the removal of the indel is done, the minimal length equals : previous length - 20.
222 for example if you choose 50 for the minimal length, the actual length equals 30.
223 Default : 50 (integer).
224
225 - 'min_length_subseq' :
226 minimal length of the subsequence (in amino acids).
227 subsequence means the part of the original sequence between 2 sets of indels.
228 an indel set is composed by more than 2 indels, if not the set is considered as unknown amino acid.
229 Default : 15 (integer).
230
231 - 'min_length_nuc' :
232 Minimal length of the sequence in the nucleic format, without indels.
233 Default : 50 (integer).
234
235 - others parameters allowing to choose which outputs you desire :
236 - outputs with best ORFs.
237 - outputs with CDS, with or without indels.
238 - in proteic or nucleic format.
239
240 --------
241
242 **Outputs**
243
244 - ORF_Search
245 the log file (mainly statistics about the tool).
246
247 - ORF_Search_Best_ORF_aa
248 the output with the best ORF in the proteic format.
249
250 - ORF_Search_Best_ORF_nuc
251 the output with the best ORF in the nucleic format.
252
253 - ORF_Search_CDS_aa
254 the output with the CDS (regardless the Methionine) in the proteic format.
255
256 - ORF_Search_CDS_nuc
257 the output with the CDS (regardless the Methionine) in the nucleic format.
258
259 - ORF_Search_CDS_with_M_aa
260 the output with the CDS (considering the Methionine) in proteic format.
261 the rule : they must have a methionine before the minimal length of the sequence.
262 for example before the 30 last amino acid.
263
264 - ORF_Search_CDS_with_M_nuc
265 the output with the CDS (considering the Methionine) in nucleic format.
266 the rule : they must have a methionine before the minimale length of the sequence.
267 for example before the 30 last amino acid.
268
269 - ORF_Search_CDS_without_indel_aa
270 is the output with the CDS without indel in proteic format.
271 considering the Methionine or not : according to the option chosen.
272
273 - ORF_Search_CDS_without_indel_nuc
274 is the output with the CDS without indel in proteic format.
275 considering the Methionine or not : according to the option chosen.
276
277 ---------
278
279 **The AdaptSearch Pipeline**
280
281 .. image:: adaptsearch_picture_helps.png
282
283 ---------
284
285 Changelog
286 ---------
287
288 **Version 2.0 - 05/07/2017**
289
290 - NEW: Replace the zip between tools by Dataset Collection
291
292 **Version 1.0 - 13/04/2017**
293
294 - Added functional test with planemo
295 - planemo test with conda dependency for python
296 - Scripts renamed + symlinks to the directory 'scripts'
297
298 ]]>
299
300 </help>
301
302 <citations>
303
304 </citations>
305
306 </tool>