Mercurial > repos > iuc > raxml
comparison raxml.py @ 0:6805e85573b8 draft
planemo upload for repository https://github.com/stamatak/standard-RAxML commit 174be06d7c7e7789df16ea5d5068f20b21257a2f
author | iuc |
---|---|
date | Mon, 14 Nov 2016 14:03:39 -0500 |
parents | |
children | ba29b5e2a4be |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6805e85573b8 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Runs RAxML on a sequence file. | |
4 For use with RAxML version 8.2.4 | |
5 """ | |
6 import fnmatch | |
7 import glob | |
8 import optparse | |
9 import os | |
10 import subprocess | |
11 import sys | |
12 | |
13 | |
14 def stop_err(msg): | |
15 sys.stderr.write("%s\n" % msg) | |
16 sys.exit() | |
17 | |
18 | |
19 def getint(name): | |
20 basename = name.partition('RUN.') | |
21 if basename[2] != '': | |
22 num = basename[2] | |
23 return int(num) | |
24 | |
25 | |
26 def __main__(): | |
27 usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]" | |
28 | |
29 # Parse the primary wrapper's command line options | |
30 parser = optparse.OptionParser(usage=usage) | |
31 # raxml binary name, hardcoded in the xml file | |
32 parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run") | |
33 # (-a) | |
34 parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file") | |
35 # (-A) | |
36 parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model") | |
37 # (-b) | |
38 parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed") | |
39 # (-c) | |
40 parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories") | |
41 # (-d) | |
42 parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree") | |
43 # (-D) | |
44 parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion") | |
45 # (-e) | |
46 parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)") | |
47 # (-E) | |
48 parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name") | |
49 # (-f) | |
50 parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm") | |
51 # (-F) | |
52 parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models") | |
53 # (-g) | |
54 parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name") | |
55 # (-G) | |
56 parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics") | |
57 # (-i) | |
58 parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting") | |
59 # (-I) | |
60 parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis") | |
61 # (-J) | |
62 parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus") | |
63 # (-k) | |
64 parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths") | |
65 # (-K) | |
66 parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model") | |
67 # (-m) | |
68 parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type") | |
69 parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model") | |
70 parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences") | |
71 parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix") | |
72 # (-n) | |
73 parser.add_option("--name", action="store", type="string", dest="name", help="Run Name") | |
74 # (-N/#) | |
75 parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs") | |
76 parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria") | |
77 # (-M) | |
78 parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths") | |
79 # (-o) | |
80 parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name") | |
81 # (-O) | |
82 parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check") | |
83 # (-p) | |
84 parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed") | |
85 # (-P) | |
86 parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model") | |
87 # (-q) | |
88 parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File") | |
89 # (-r) | |
90 parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File") | |
91 # (-R) | |
92 parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File") | |
93 # (-s) | |
94 parser.add_option("--source", action="store", type="string", dest="source", help="Input file") | |
95 # (-S) | |
96 parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file") | |
97 # (-t) | |
98 parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree") | |
99 # (-T) | |
100 parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use") | |
101 # (-u) | |
102 parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation") | |
103 # (-U) | |
104 parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments") | |
105 # (-V) | |
106 parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity") | |
107 # (-W) | |
108 parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size") | |
109 # (-x) | |
110 parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed") | |
111 # (-y) | |
112 parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only") | |
113 # (-z) | |
114 parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File") | |
115 | |
116 (options, args) = parser.parse_args() | |
117 cmd = [] | |
118 | |
119 # Required parameters | |
120 binary = options.binary | |
121 cmd.append(binary) | |
122 # Threads | |
123 if options.threads > 1: | |
124 threads = "-T %d" % options.threads | |
125 cmd.append(threads) | |
126 # Source | |
127 source = "-s %s" % options.source | |
128 cmd.append(source) | |
129 # Hardcode to "galaxy" first to simplify the output part of the wrapper | |
130 # name = "-n %s" % options.name | |
131 name = "-n galaxy" | |
132 cmd.append(name) | |
133 # Model | |
134 model_type = options.model_type | |
135 base_model = options.base_model | |
136 aa_search_matrix = options.aa_search_matrix | |
137 aa_empirical_freq = options.aa_empirical_freq | |
138 if model_type == 'aminoacid': | |
139 model = "-m %s%s" % (base_model, aa_search_matrix) | |
140 if aa_empirical_freq: | |
141 model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F') | |
142 # (-P) | |
143 if options.external_protein_model: | |
144 external_protein_model = "-P %s" % options.external_protein_model | |
145 cmd.append(external_protein_model) | |
146 else: | |
147 model = "-m %s" % base_model | |
148 cmd.append(model) | |
149 if model == "GTRCAT": | |
150 # (-c) | |
151 if options.numofcats: | |
152 numofcats = "-c %d" % options.numofcats | |
153 cmd.append(numofcats) | |
154 # Optional parameters | |
155 if options.number_of_runs_bootstop: | |
156 number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop | |
157 cmd.append(number_of_runs_bootstop) | |
158 else: | |
159 number_of_runs_bootstop = '' | |
160 if options.number_of_runs: | |
161 number_of_runs_opt = "-N %d" % options.number_of_runs | |
162 cmd.append(number_of_runs_opt) | |
163 else: | |
164 number_of_runs_opt = 0 | |
165 # (-a) | |
166 if options.weightfile: | |
167 weightfile = "-a %s" % options.weightfile | |
168 cmd.append(weightfile) | |
169 # (-A) | |
170 if options.secondary_structure_model: | |
171 secondary_structure_model = "-A %s" % options.secondary_structure_model | |
172 cmd.append(secondary_structure_model ) | |
173 # (-b) | |
174 if options.bootseed: | |
175 bootseed = "-b %d" % options.bootseed | |
176 cmd.append(bootseed) | |
177 else: | |
178 bootseed = 0 | |
179 # -C - doesn't work in pthreads version, skipped | |
180 if options.search_complete_random_tree: | |
181 cmd.append("-d") | |
182 if options.ml_search_convergence: | |
183 cmd.append("-D" ) | |
184 if options.model_opt_precision: | |
185 model_opt_precision = "-e %f" % options.model_opt_precision | |
186 cmd.append(model_opt_precision) | |
187 if options.excludefile: | |
188 excludefile = "-E %s" % options.excludefile | |
189 cmd.append(excludefile) | |
190 if options.search_algorithm: | |
191 search_algorithm = "-f %s" % options.search_algorithm | |
192 cmd.append(search_algorithm) | |
193 if options.save_memory_cat_model: | |
194 cmd.append("-F") | |
195 if options.groupingfile: | |
196 groupingfile = "-g %s" % options.groupingfile | |
197 cmd.append(groupingfile) | |
198 if options.enable_evol_heuristics: | |
199 enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics | |
200 cmd.append(enable_evol_heuristics ) | |
201 if options.initial_rearrangement_setting: | |
202 initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting | |
203 cmd.append(initial_rearrangement_setting) | |
204 if options.posterior_bootstopping_analysis: | |
205 posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis | |
206 cmd.append(posterior_bootstopping_analysis) | |
207 if options.majority_rule_consensus: | |
208 majority_rule_consensus = "-J %s" % options.majority_rule_consensus | |
209 cmd.append(majority_rule_consensus) | |
210 if options.print_branch_lengths: | |
211 cmd.append("-k") | |
212 if options.multistate_sub_model: | |
213 multistate_sub_model = "-K %s" % options.multistate_sub_model | |
214 cmd.append(multistate_sub_model) | |
215 if options.estimate_individual_branch_lengths: | |
216 cmd.append("-M") | |
217 if options.outgroup_name: | |
218 outgroup_name = "-o %s" % options.outgroup_name | |
219 cmd.append(outgroup_name) | |
220 if options.disable_undetermined_seq_check: | |
221 cmd.append("-O") | |
222 if options.random_seed: | |
223 random_seed = "-p %d" % options.random_seed | |
224 cmd.append(random_seed) | |
225 multiple_model = None | |
226 if options.multiple_model: | |
227 multiple_model = "-q %s" % options.multiple_model | |
228 cmd.append(multiple_model) | |
229 if options.constraint_file: | |
230 constraint_file = "-r %s" % options.constraint_file | |
231 cmd.append(constraint_file) | |
232 if options.bin_model_parameter_file: | |
233 bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy" | |
234 os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name ) | |
235 bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file | |
236 # Needs testing. Is the hardcoded name or the real path needed? | |
237 cmd.append(bin_model_parameter_file) | |
238 if options.secondary_structure_file: | |
239 secondary_structure_file = "-S %s" % options.secondary_structure_file | |
240 cmd.append(secondary_structure_file) | |
241 if options.starting_tree: | |
242 starting_tree = "-t %s" % options.starting_tree | |
243 cmd.append(starting_tree) | |
244 if options.use_median_approximation: | |
245 cmd.append("-u") | |
246 if options.save_memory_gappy_alignments: | |
247 cmd.append("-U") | |
248 if options.disable_rate_heterogeneity: | |
249 cmd.append("-V") | |
250 if options.sliding_window_size: | |
251 sliding_window_size = "-W %d" % options.sliding_window_size | |
252 cmd.append(sliding_window_size) | |
253 if options.rapid_bootstrap_random_seed: | |
254 rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed | |
255 cmd.append(rapid_bootstrap_random_seed) | |
256 else: | |
257 rapid_bootstrap_random_seed = 0 | |
258 if options.parsimony_starting_tree_only: | |
259 cmd.append("-y") | |
260 if options.file_multiple_trees: | |
261 file_multiple_trees = "-z %s" % options.file_multiple_trees | |
262 cmd.append(file_multiple_trees) | |
263 | |
264 print "cmd list: ", cmd, "\n" | |
265 | |
266 full_cmd = " ".join(cmd) | |
267 print "Command string: %s" % full_cmd | |
268 | |
269 try: | |
270 proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
271 except Exception as err: | |
272 sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err)) | |
273 sys.exit(1) | |
274 stdout, stderr = proc.communicate() | |
275 return_code = proc.returncode | |
276 if return_code: | |
277 sys.stdout.write(stdout) | |
278 sys.stderr.write(stderr) | |
279 sys.stderr.write("Return error code %i from command:\n" % return_code) | |
280 sys.stderr.write("%s\n" % cmd) | |
281 else: | |
282 sys.stdout.write(stdout) | |
283 sys.stdout.write(stderr) | |
284 | |
285 # Multiple runs - concatenate | |
286 if number_of_runs_opt > 0: | |
287 if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ): | |
288 runfiles = glob.glob('RAxML*RUN*') | |
289 runfiles.sort(key=getint) | |
290 # Logs | |
291 outfile = open('RAxML_log.galaxy', 'w') | |
292 for filename in runfiles: | |
293 if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'): | |
294 infile = open(filename, 'r') | |
295 filename_line = "%s\n" % filename | |
296 outfile.write(filename_line) | |
297 for line in infile: | |
298 outfile.write(line) | |
299 infile.close() | |
300 outfile.close() | |
301 # Parsimony Trees | |
302 outfile = open('RAxML_parsimonyTree.galaxy', 'w') | |
303 for filename in runfiles: | |
304 if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'): | |
305 infile = open(filename, 'r') | |
306 filename_line = "%s\n" % filename | |
307 outfile.write(filename_line) | |
308 for line in infile: | |
309 outfile.write(line) | |
310 infile.close() | |
311 outfile.close() | |
312 # Results | |
313 outfile = open('RAxML_result.galaxy', 'w') | |
314 for filename in runfiles: | |
315 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'): | |
316 infile = open(filename, 'r') | |
317 filename_line = "%s\n" % filename | |
318 outfile.write(filename_line) | |
319 for line in infile: | |
320 outfile.write(line) | |
321 infile.close() | |
322 outfile.close() | |
323 # Multiple Model Partition Files | |
324 if multiple_model: | |
325 files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*') | |
326 if len(files) > 0: | |
327 files.sort(key=getint) | |
328 outfile = open('RAxML_bestTreePartitions.galaxy', 'w') | |
329 # Best Tree Partitions | |
330 for filename in files: | |
331 if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'): | |
332 infile = open(filename, 'r') | |
333 filename_line = "%s\n" % filename | |
334 outfile.write(filename_line) | |
335 for line in infile: | |
336 outfile.write(line) | |
337 infile.close() | |
338 outfile.close() | |
339 else: | |
340 outfile = open('RAxML_bestTreePartitions.galaxy', 'w') | |
341 outfile.write("No partition files were produced.\n") | |
342 outfile.close() | |
343 | |
344 # Result Partitions | |
345 files = glob.glob('RAxML_result.galaxy.PARTITION.*') | |
346 if len(files) > 0: | |
347 files.sort(key=getint) | |
348 outfile = open('RAxML_resultPartitions.galaxy', 'w') | |
349 for filename in files: | |
350 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'): | |
351 infile = open(filename, 'r') | |
352 filename_line = "%s\n" % filename | |
353 outfile.write(filename_line) | |
354 for line in infile: | |
355 outfile.write(line) | |
356 infile.close() | |
357 outfile.close() | |
358 else: | |
359 outfile = open('RAxML_resultPartitions.galaxy', 'w') | |
360 outfile.write("No partition files were produced.\n") | |
361 outfile.close() | |
362 | |
363 # DEBUG options | |
364 infof = open('RAxML_info.galaxy', 'a') | |
365 infof.write('\nOM: CLI options DEBUG START:\n') | |
366 infof.write(options.__repr__()) | |
367 infof.write('\nOM: CLI options DEBUG END\n') | |
368 | |
369 if __name__ == "__main__": | |
370 __main__() |