comparison raxml.py @ 0:6805e85573b8 draft

planemo upload for repository https://github.com/stamatak/standard-RAxML commit 174be06d7c7e7789df16ea5d5068f20b21257a2f
author iuc
date Mon, 14 Nov 2016 14:03:39 -0500
parents
children ba29b5e2a4be
comparison
equal deleted inserted replaced
-1:000000000000 0:6805e85573b8
1 #!/usr/bin/env python
2 """
3 Runs RAxML on a sequence file.
4 For use with RAxML version 8.2.4
5 """
6 import fnmatch
7 import glob
8 import optparse
9 import os
10 import subprocess
11 import sys
12
13
14 def stop_err(msg):
15 sys.stderr.write("%s\n" % msg)
16 sys.exit()
17
18
19 def getint(name):
20 basename = name.partition('RUN.')
21 if basename[2] != '':
22 num = basename[2]
23 return int(num)
24
25
26 def __main__():
27 usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]"
28
29 # Parse the primary wrapper's command line options
30 parser = optparse.OptionParser(usage=usage)
31 # raxml binary name, hardcoded in the xml file
32 parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run")
33 # (-a)
34 parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file")
35 # (-A)
36 parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model")
37 # (-b)
38 parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed")
39 # (-c)
40 parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories")
41 # (-d)
42 parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree")
43 # (-D)
44 parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion")
45 # (-e)
46 parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)")
47 # (-E)
48 parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name")
49 # (-f)
50 parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm")
51 # (-F)
52 parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models")
53 # (-g)
54 parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name")
55 # (-G)
56 parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics")
57 # (-i)
58 parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting")
59 # (-I)
60 parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis")
61 # (-J)
62 parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus")
63 # (-k)
64 parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths")
65 # (-K)
66 parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model")
67 # (-m)
68 parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type")
69 parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model")
70 parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences")
71 parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix")
72 # (-n)
73 parser.add_option("--name", action="store", type="string", dest="name", help="Run Name")
74 # (-N/#)
75 parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs")
76 parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria")
77 # (-M)
78 parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths")
79 # (-o)
80 parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name")
81 # (-O)
82 parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check")
83 # (-p)
84 parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed")
85 # (-P)
86 parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model")
87 # (-q)
88 parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File")
89 # (-r)
90 parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File")
91 # (-R)
92 parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File")
93 # (-s)
94 parser.add_option("--source", action="store", type="string", dest="source", help="Input file")
95 # (-S)
96 parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file")
97 # (-t)
98 parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree")
99 # (-T)
100 parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use")
101 # (-u)
102 parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation")
103 # (-U)
104 parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments")
105 # (-V)
106 parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity")
107 # (-W)
108 parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size")
109 # (-x)
110 parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed")
111 # (-y)
112 parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only")
113 # (-z)
114 parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File")
115
116 (options, args) = parser.parse_args()
117 cmd = []
118
119 # Required parameters
120 binary = options.binary
121 cmd.append(binary)
122 # Threads
123 if options.threads > 1:
124 threads = "-T %d" % options.threads
125 cmd.append(threads)
126 # Source
127 source = "-s %s" % options.source
128 cmd.append(source)
129 # Hardcode to "galaxy" first to simplify the output part of the wrapper
130 # name = "-n %s" % options.name
131 name = "-n galaxy"
132 cmd.append(name)
133 # Model
134 model_type = options.model_type
135 base_model = options.base_model
136 aa_search_matrix = options.aa_search_matrix
137 aa_empirical_freq = options.aa_empirical_freq
138 if model_type == 'aminoacid':
139 model = "-m %s%s" % (base_model, aa_search_matrix)
140 if aa_empirical_freq:
141 model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F')
142 # (-P)
143 if options.external_protein_model:
144 external_protein_model = "-P %s" % options.external_protein_model
145 cmd.append(external_protein_model)
146 else:
147 model = "-m %s" % base_model
148 cmd.append(model)
149 if model == "GTRCAT":
150 # (-c)
151 if options.numofcats:
152 numofcats = "-c %d" % options.numofcats
153 cmd.append(numofcats)
154 # Optional parameters
155 if options.number_of_runs_bootstop:
156 number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop
157 cmd.append(number_of_runs_bootstop)
158 else:
159 number_of_runs_bootstop = ''
160 if options.number_of_runs:
161 number_of_runs_opt = "-N %d" % options.number_of_runs
162 cmd.append(number_of_runs_opt)
163 else:
164 number_of_runs_opt = 0
165 # (-a)
166 if options.weightfile:
167 weightfile = "-a %s" % options.weightfile
168 cmd.append(weightfile)
169 # (-A)
170 if options.secondary_structure_model:
171 secondary_structure_model = "-A %s" % options.secondary_structure_model
172 cmd.append(secondary_structure_model )
173 # (-b)
174 if options.bootseed:
175 bootseed = "-b %d" % options.bootseed
176 cmd.append(bootseed)
177 else:
178 bootseed = 0
179 # -C - doesn't work in pthreads version, skipped
180 if options.search_complete_random_tree:
181 cmd.append("-d")
182 if options.ml_search_convergence:
183 cmd.append("-D" )
184 if options.model_opt_precision:
185 model_opt_precision = "-e %f" % options.model_opt_precision
186 cmd.append(model_opt_precision)
187 if options.excludefile:
188 excludefile = "-E %s" % options.excludefile
189 cmd.append(excludefile)
190 if options.search_algorithm:
191 search_algorithm = "-f %s" % options.search_algorithm
192 cmd.append(search_algorithm)
193 if options.save_memory_cat_model:
194 cmd.append("-F")
195 if options.groupingfile:
196 groupingfile = "-g %s" % options.groupingfile
197 cmd.append(groupingfile)
198 if options.enable_evol_heuristics:
199 enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics
200 cmd.append(enable_evol_heuristics )
201 if options.initial_rearrangement_setting:
202 initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting
203 cmd.append(initial_rearrangement_setting)
204 if options.posterior_bootstopping_analysis:
205 posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis
206 cmd.append(posterior_bootstopping_analysis)
207 if options.majority_rule_consensus:
208 majority_rule_consensus = "-J %s" % options.majority_rule_consensus
209 cmd.append(majority_rule_consensus)
210 if options.print_branch_lengths:
211 cmd.append("-k")
212 if options.multistate_sub_model:
213 multistate_sub_model = "-K %s" % options.multistate_sub_model
214 cmd.append(multistate_sub_model)
215 if options.estimate_individual_branch_lengths:
216 cmd.append("-M")
217 if options.outgroup_name:
218 outgroup_name = "-o %s" % options.outgroup_name
219 cmd.append(outgroup_name)
220 if options.disable_undetermined_seq_check:
221 cmd.append("-O")
222 if options.random_seed:
223 random_seed = "-p %d" % options.random_seed
224 cmd.append(random_seed)
225 multiple_model = None
226 if options.multiple_model:
227 multiple_model = "-q %s" % options.multiple_model
228 cmd.append(multiple_model)
229 if options.constraint_file:
230 constraint_file = "-r %s" % options.constraint_file
231 cmd.append(constraint_file)
232 if options.bin_model_parameter_file:
233 bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy"
234 os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name )
235 bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file
236 # Needs testing. Is the hardcoded name or the real path needed?
237 cmd.append(bin_model_parameter_file)
238 if options.secondary_structure_file:
239 secondary_structure_file = "-S %s" % options.secondary_structure_file
240 cmd.append(secondary_structure_file)
241 if options.starting_tree:
242 starting_tree = "-t %s" % options.starting_tree
243 cmd.append(starting_tree)
244 if options.use_median_approximation:
245 cmd.append("-u")
246 if options.save_memory_gappy_alignments:
247 cmd.append("-U")
248 if options.disable_rate_heterogeneity:
249 cmd.append("-V")
250 if options.sliding_window_size:
251 sliding_window_size = "-W %d" % options.sliding_window_size
252 cmd.append(sliding_window_size)
253 if options.rapid_bootstrap_random_seed:
254 rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed
255 cmd.append(rapid_bootstrap_random_seed)
256 else:
257 rapid_bootstrap_random_seed = 0
258 if options.parsimony_starting_tree_only:
259 cmd.append("-y")
260 if options.file_multiple_trees:
261 file_multiple_trees = "-z %s" % options.file_multiple_trees
262 cmd.append(file_multiple_trees)
263
264 print "cmd list: ", cmd, "\n"
265
266 full_cmd = " ".join(cmd)
267 print "Command string: %s" % full_cmd
268
269 try:
270 proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
271 except Exception as err:
272 sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err))
273 sys.exit(1)
274 stdout, stderr = proc.communicate()
275 return_code = proc.returncode
276 if return_code:
277 sys.stdout.write(stdout)
278 sys.stderr.write(stderr)
279 sys.stderr.write("Return error code %i from command:\n" % return_code)
280 sys.stderr.write("%s\n" % cmd)
281 else:
282 sys.stdout.write(stdout)
283 sys.stdout.write(stderr)
284
285 # Multiple runs - concatenate
286 if number_of_runs_opt > 0:
287 if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ):
288 runfiles = glob.glob('RAxML*RUN*')
289 runfiles.sort(key=getint)
290 # Logs
291 outfile = open('RAxML_log.galaxy', 'w')
292 for filename in runfiles:
293 if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'):
294 infile = open(filename, 'r')
295 filename_line = "%s\n" % filename
296 outfile.write(filename_line)
297 for line in infile:
298 outfile.write(line)
299 infile.close()
300 outfile.close()
301 # Parsimony Trees
302 outfile = open('RAxML_parsimonyTree.galaxy', 'w')
303 for filename in runfiles:
304 if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'):
305 infile = open(filename, 'r')
306 filename_line = "%s\n" % filename
307 outfile.write(filename_line)
308 for line in infile:
309 outfile.write(line)
310 infile.close()
311 outfile.close()
312 # Results
313 outfile = open('RAxML_result.galaxy', 'w')
314 for filename in runfiles:
315 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'):
316 infile = open(filename, 'r')
317 filename_line = "%s\n" % filename
318 outfile.write(filename_line)
319 for line in infile:
320 outfile.write(line)
321 infile.close()
322 outfile.close()
323 # Multiple Model Partition Files
324 if multiple_model:
325 files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*')
326 if len(files) > 0:
327 files.sort(key=getint)
328 outfile = open('RAxML_bestTreePartitions.galaxy', 'w')
329 # Best Tree Partitions
330 for filename in files:
331 if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'):
332 infile = open(filename, 'r')
333 filename_line = "%s\n" % filename
334 outfile.write(filename_line)
335 for line in infile:
336 outfile.write(line)
337 infile.close()
338 outfile.close()
339 else:
340 outfile = open('RAxML_bestTreePartitions.galaxy', 'w')
341 outfile.write("No partition files were produced.\n")
342 outfile.close()
343
344 # Result Partitions
345 files = glob.glob('RAxML_result.galaxy.PARTITION.*')
346 if len(files) > 0:
347 files.sort(key=getint)
348 outfile = open('RAxML_resultPartitions.galaxy', 'w')
349 for filename in files:
350 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'):
351 infile = open(filename, 'r')
352 filename_line = "%s\n" % filename
353 outfile.write(filename_line)
354 for line in infile:
355 outfile.write(line)
356 infile.close()
357 outfile.close()
358 else:
359 outfile = open('RAxML_resultPartitions.galaxy', 'w')
360 outfile.write("No partition files were produced.\n")
361 outfile.close()
362
363 # DEBUG options
364 infof = open('RAxML_info.galaxy', 'a')
365 infof.write('\nOM: CLI options DEBUG START:\n')
366 infof.write(options.__repr__())
367 infof.write('\nOM: CLI options DEBUG END\n')
368
369 if __name__ == "__main__":
370 __main__()