Mercurial > repos > iuc > raxml
diff raxml.py @ 1:ba29b5e2a4be draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raxml commit b23553a3d29d50e05d8b37a5c5780e3ffc937069
author | iuc |
---|---|
date | Tue, 27 Jun 2017 16:27:39 -0400 |
parents | 6805e85573b8 |
children | a4b71be30c3c |
line wrap: on
line diff
--- a/raxml.py Mon Nov 14 14:03:39 2016 -0500 +++ b/raxml.py Tue Jun 27 16:27:39 2017 -0400 @@ -6,14 +6,6 @@ import fnmatch import glob import optparse -import os -import subprocess -import sys - - -def stop_err(msg): - sys.stderr.write("%s\n" % msg) - sys.exit() def getint(name): @@ -24,347 +16,91 @@ def __main__(): - usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]" - # Parse the primary wrapper's command line options - parser = optparse.OptionParser(usage=usage) - # raxml binary name, hardcoded in the xml file - parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run") - # (-a) - parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file") - # (-A) - parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model") + parser = optparse.OptionParser() # (-b) - parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed") - # (-c) - parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories") - # (-d) - parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree") - # (-D) - parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion") - # (-e) - parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)") - # (-E) - parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name") - # (-f) - parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm") - # (-F) - parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models") - # (-g) - parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name") - # (-G) - parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics") - # (-i) - parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting") - # (-I) - parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis") - # (-J) - parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus") - # (-k) - parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths") - # (-K) - parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model") - # (-m) - parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type") - parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model") - parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences") - parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix") - # (-n) - parser.add_option("--name", action="store", type="string", dest="name", help="Run Name") + parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Random number for non-parametric bootstrapping") # (-N/#) - parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs") - parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria") - # (-M) - parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths") - # (-o) - parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name") - # (-O) - parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check") - # (-p) - parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed") - # (-P) - parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model") + parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", default=1, help="Number of alternative runs") # (-q) parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File") - # (-r) - parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File") - # (-R) - parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File") - # (-s) - parser.add_option("--source", action="store", type="string", dest="source", help="Input file") - # (-S) - parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file") - # (-t) - parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree") - # (-T) - parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use") - # (-u) - parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation") - # (-U) - parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments") - # (-V) - parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity") - # (-W) - parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size") # (-x) parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed") - # (-y) - parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only") - # (-z) - parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File") (options, args) = parser.parse_args() - cmd = [] - - # Required parameters - binary = options.binary - cmd.append(binary) - # Threads - if options.threads > 1: - threads = "-T %d" % options.threads - cmd.append(threads) - # Source - source = "-s %s" % options.source - cmd.append(source) - # Hardcode to "galaxy" first to simplify the output part of the wrapper - # name = "-n %s" % options.name - name = "-n galaxy" - cmd.append(name) - # Model - model_type = options.model_type - base_model = options.base_model - aa_search_matrix = options.aa_search_matrix - aa_empirical_freq = options.aa_empirical_freq - if model_type == 'aminoacid': - model = "-m %s%s" % (base_model, aa_search_matrix) - if aa_empirical_freq: - model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F') - # (-P) - if options.external_protein_model: - external_protein_model = "-P %s" % options.external_protein_model - cmd.append(external_protein_model) - else: - model = "-m %s" % base_model - cmd.append(model) - if model == "GTRCAT": - # (-c) - if options.numofcats: - numofcats = "-c %d" % options.numofcats - cmd.append(numofcats) - # Optional parameters - if options.number_of_runs_bootstop: - number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop - cmd.append(number_of_runs_bootstop) - else: - number_of_runs_bootstop = '' - if options.number_of_runs: - number_of_runs_opt = "-N %d" % options.number_of_runs - cmd.append(number_of_runs_opt) - else: - number_of_runs_opt = 0 - # (-a) - if options.weightfile: - weightfile = "-a %s" % options.weightfile - cmd.append(weightfile) - # (-A) - if options.secondary_structure_model: - secondary_structure_model = "-A %s" % options.secondary_structure_model - cmd.append(secondary_structure_model ) - # (-b) - if options.bootseed: - bootseed = "-b %d" % options.bootseed - cmd.append(bootseed) - else: - bootseed = 0 - # -C - doesn't work in pthreads version, skipped - if options.search_complete_random_tree: - cmd.append("-d") - if options.ml_search_convergence: - cmd.append("-D" ) - if options.model_opt_precision: - model_opt_precision = "-e %f" % options.model_opt_precision - cmd.append(model_opt_precision) - if options.excludefile: - excludefile = "-E %s" % options.excludefile - cmd.append(excludefile) - if options.search_algorithm: - search_algorithm = "-f %s" % options.search_algorithm - cmd.append(search_algorithm) - if options.save_memory_cat_model: - cmd.append("-F") - if options.groupingfile: - groupingfile = "-g %s" % options.groupingfile - cmd.append(groupingfile) - if options.enable_evol_heuristics: - enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics - cmd.append(enable_evol_heuristics ) - if options.initial_rearrangement_setting: - initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting - cmd.append(initial_rearrangement_setting) - if options.posterior_bootstopping_analysis: - posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis - cmd.append(posterior_bootstopping_analysis) - if options.majority_rule_consensus: - majority_rule_consensus = "-J %s" % options.majority_rule_consensus - cmd.append(majority_rule_consensus) - if options.print_branch_lengths: - cmd.append("-k") - if options.multistate_sub_model: - multistate_sub_model = "-K %s" % options.multistate_sub_model - cmd.append(multistate_sub_model) - if options.estimate_individual_branch_lengths: - cmd.append("-M") - if options.outgroup_name: - outgroup_name = "-o %s" % options.outgroup_name - cmd.append(outgroup_name) - if options.disable_undetermined_seq_check: - cmd.append("-O") - if options.random_seed: - random_seed = "-p %d" % options.random_seed - cmd.append(random_seed) - multiple_model = None - if options.multiple_model: - multiple_model = "-q %s" % options.multiple_model - cmd.append(multiple_model) - if options.constraint_file: - constraint_file = "-r %s" % options.constraint_file - cmd.append(constraint_file) - if options.bin_model_parameter_file: - bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy" - os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name ) - bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file - # Needs testing. Is the hardcoded name or the real path needed? - cmd.append(bin_model_parameter_file) - if options.secondary_structure_file: - secondary_structure_file = "-S %s" % options.secondary_structure_file - cmd.append(secondary_structure_file) - if options.starting_tree: - starting_tree = "-t %s" % options.starting_tree - cmd.append(starting_tree) - if options.use_median_approximation: - cmd.append("-u") - if options.save_memory_gappy_alignments: - cmd.append("-U") - if options.disable_rate_heterogeneity: - cmd.append("-V") - if options.sliding_window_size: - sliding_window_size = "-W %d" % options.sliding_window_size - cmd.append(sliding_window_size) - if options.rapid_bootstrap_random_seed: - rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed - cmd.append(rapid_bootstrap_random_seed) - else: - rapid_bootstrap_random_seed = 0 - if options.parsimony_starting_tree_only: - cmd.append("-y") - if options.file_multiple_trees: - file_multiple_trees = "-z %s" % options.file_multiple_trees - cmd.append(file_multiple_trees) - - print "cmd list: ", cmd, "\n" - - full_cmd = " ".join(cmd) - print "Command string: %s" % full_cmd - - try: - proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except Exception as err: - sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err)) - sys.exit(1) - stdout, stderr = proc.communicate() - return_code = proc.returncode - if return_code: - sys.stdout.write(stdout) - sys.stderr.write(stderr) - sys.stderr.write("Return error code %i from command:\n" % return_code) - sys.stderr.write("%s\n" % cmd) - else: - sys.stdout.write(stdout) - sys.stdout.write(stderr) # Multiple runs - concatenate - if number_of_runs_opt > 0: - if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ): + if options.number_of_runs > 1: + if options.bootseed is None and options.rapid_bootstrap_random_seed is None: runfiles = glob.glob('RAxML*RUN*') runfiles.sort(key=getint) - # Logs - outfile = open('RAxML_log.galaxy', 'w') - for filename in runfiles: - if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'): - infile = open(filename, 'r') - filename_line = "%s\n" % filename - outfile.write(filename_line) - for line in infile: - outfile.write(line) - infile.close() - outfile.close() - # Parsimony Trees - outfile = open('RAxML_parsimonyTree.galaxy', 'w') - for filename in runfiles: - if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'): - infile = open(filename, 'r') - filename_line = "%s\n" % filename - outfile.write(filename_line) - for line in infile: - outfile.write(line) - infile.close() - outfile.close() - # Results - outfile = open('RAxML_result.galaxy', 'w') - for filename in runfiles: - if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'): - infile = open(filename, 'r') - filename_line = "%s\n" % filename - outfile.write(filename_line) - for line in infile: - outfile.write(line) - infile.close() - outfile.close() + # Logs + with open('RAxML_log.galaxy', 'w') as outfile: + for filename in runfiles: + if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'): + with open(filename, 'r') as infile: + filename_line = "%s\n" % filename + outfile.write(filename_line) + for line in infile: + outfile.write(line) + # Parsimony Trees + with open('RAxML_parsimonyTree.galaxy', 'w') as outfile: + for filename in runfiles: + if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'): + with open(filename, 'r') as infile: + filename_line = "%s\n" % filename + outfile.write(filename_line) + for line in infile: + outfile.write(line) + # Results + with open('RAxML_result.galaxy', 'w') as outfile: + for filename in runfiles: + if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'): + with open(filename, 'r') as infile: + filename_line = "%s\n" % filename + outfile.write(filename_line) + for line in infile: + outfile.write(line) # Multiple Model Partition Files - if multiple_model: + if options.multiple_model: files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*') if len(files) > 0: files.sort(key=getint) - outfile = open('RAxML_bestTreePartitions.galaxy', 'w') # Best Tree Partitions - for filename in files: - if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'): - infile = open(filename, 'r') - filename_line = "%s\n" % filename - outfile.write(filename_line) - for line in infile: - outfile.write(line) - infile.close() - outfile.close() + with open('RAxML_bestTreePartitions.galaxy', 'w') as outfile: + for filename in files: + if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'): + with open(filename, 'r') as infile: + filename_line = "%s\n" % filename + outfile.write(filename_line) + for line in infile: + outfile.write(line) else: - outfile = open('RAxML_bestTreePartitions.galaxy', 'w') - outfile.write("No partition files were produced.\n") - outfile.close() + with open('RAxML_bestTreePartitions.galaxy', 'w') as outfile: + outfile.write("No partition files were produced.\n") # Result Partitions files = glob.glob('RAxML_result.galaxy.PARTITION.*') if len(files) > 0: files.sort(key=getint) - outfile = open('RAxML_resultPartitions.galaxy', 'w') - for filename in files: - if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'): - infile = open(filename, 'r') - filename_line = "%s\n" % filename - outfile.write(filename_line) - for line in infile: - outfile.write(line) - infile.close() - outfile.close() + with open('RAxML_resultPartitions.galaxy', 'w') as outfile: + for filename in files: + if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'): + with open(filename, 'r') as infile: + filename_line = "%s\n" % filename + outfile.write(filename_line) + for line in infile: + outfile.write(line) else: - outfile = open('RAxML_resultPartitions.galaxy', 'w') - outfile.write("No partition files were produced.\n") - outfile.close() + with open('RAxML_resultPartitions.galaxy', 'w') as outfile: + outfile.write("No partition files were produced.\n") # DEBUG options - infof = open('RAxML_info.galaxy', 'a') - infof.write('\nOM: CLI options DEBUG START:\n') - infof.write(options.__repr__()) - infof.write('\nOM: CLI options DEBUG END\n') + with open('RAxML_info.galaxy', 'a') as infof: + infof.write('\nOM: CLI options DEBUG START:\n') + infof.write(options.__repr__()) + infof.write('\nOM: CLI options DEBUG END\n') + if __name__ == "__main__": __main__()