Mercurial > repos > tiagoantao > raxml
view raxml.py @ 0:66bcdd1ac9ca draft
planemo upload commit 2c71c5e5604b9c05648b883fddbb163a8e03c807
author | tiagoantao |
---|---|
date | Tue, 03 Nov 2015 16:27:41 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ Runs RAxML on a sequence file. For use with RAxML version 8.2.4 """ import os, shutil, subprocess, sys, optparse, fnmatch, glob def stop_err(msg): sys.stderr.write("%s\n" % msg) sys.exit() def getint(name): basename = name.partition('RUN.') if basename[2] != '': num = basename[2] return int(num) def __main__(): usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]" # Parse the primary wrapper's command line options parser = optparse.OptionParser(usage = usage) # raxml binary name, hardcoded in the xml file parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run") # (-a) parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file") # (-A) parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model") # (-b) parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed") # (-c) parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories") # (-d) parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree") # (-D) parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion") # (-e) parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)") # (-E) parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name") # (-f) parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm") # (-F) parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models") # (-g) parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name") # (-G) parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics") # (-i) parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting") # (-I) parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis") # (-J) parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus") # (-k) parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths") # (-K) parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model") # (-m) parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type") parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model") parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences") parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix") # (-n) parser.add_option("--name", action="store", type="string", dest="name", help="Run Name") # (-N/#) parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs") parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria") # (-M) parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths") # (-o) parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name") # (-O) parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check") # (-p) parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed") # (-P) parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model") # (-q) parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File") # (-r) parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File") # (-R) parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File") # (-s) parser.add_option("--source", action="store", type="string", dest="source", help="Input file") # (-S) parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file") # (-t) parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree") # (-T) parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use") # (-u) parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation") # (-U) parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments") # (-V) parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity") # (-W) parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size") # (-x) parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed") # (-y) parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only") # (-z) parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File") (options, args) = parser.parse_args() cmd = [] # Required parameters binary = options.binary cmd.append(binary) # Threads threads = "-T %d" % options.threads cmd.append(threads) # Source source = "-s %s" % options.source cmd.append(source) #Hardcode to "galaxy" first to simplify the output part of the wrapper #name = "-n %s" % options.name name = "-n galaxy" cmd.append(name) ## Model model_type = options.model_type base_model = options.base_model aa_search_matrix = options.aa_search_matrix aa_empirical_freq = options.aa_empirical_freq if model_type == 'aminoacid': model = "-m %s%s" % (base_model, aa_search_matrix) if aa_empirical_freq: model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F') # (-P) if options.external_protein_model: external_protein_model = "-P %s" % options.external_protein_model cmd.append(external_protein_model) else: model = "-m %s" % base_model cmd.append(model) if model == "GTRCAT": # (-c) if options.numofcats: numofcats = "-c %d" % options.numofcats cmd.append(numofcats) # Optional parameters if options.number_of_runs_bootstop: number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop cmd.append(number_of_runs_bootstop) else: number_of_runs_bootstop = '' if options.number_of_runs: number_of_runs_opt = "-N %d" % options.number_of_runs cmd.append(number_of_runs_opt) else: number_of_runs_opt = 0 # (-a) if options.weightfile: weightfile = "-a %s" % options.weightfile cmd.append(weightfile) # (-A) if options.secondary_structure_model: secondary_structure_model = "-A %s" % options.secondary_structure_model cmd.append(secondary_structure_model ) # (-b) if options.bootseed: bootseed = "-b %d" % options.bootseed cmd.append(bootseed) else: bootseed = 0 # -C - doesn't work in pthreads version, skipped if options.search_complete_random_tree: cmd.append("-d") if options.ml_search_convergence: cmd.append("-D" ) if options.model_opt_precision: model_opt_precision = "-e %f" % options.model_opt_precision cmd.append(model_opt_precision) if options.excludefile: excludefile = "-E %s" % options.excludefile cmd.append(excludefile) if options.search_algorithm: search_algorithm = "-f %s" % options.search_algorithm cmd.append(search_algorithm) if options.save_memory_cat_model: cmd.append("-F") if options.groupingfile: groupingfile = "-g %s" % options.groupingfile cmd.append(groupingfile) if options.enable_evol_heuristics: enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics cmd.append(enable_evol_heuristics ) if options.initial_rearrangement_setting: initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting cmd.append(initial_rearrangement_setting) if options.posterior_bootstopping_analysis: posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis cmd.append(posterior_bootstopping_analysis) if options.majority_rule_consensus: majority_rule_consensus = "-J %s" % options.majority_rule_consensus cmd.append(majority_rule_consensus) if options.print_branch_lengths: cmd.append("-k") if options.multistate_sub_model: multistate_sub_model = "-K %s" % options.multistate_sub_model cmd.append(multistate_sub_model) if options.estimate_individual_branch_lengths: cmd.append("-M") if options.outgroup_name: outgroup_name = "-o %s" % options.outgroup_name cmd.append(outgroup_name) if options.disable_undetermined_seq_check: cmd.append("-O") if options.random_seed: random_seed = "-p %d" % options.random_seed cmd.append(random_seed) multiple_model = None if options.multiple_model: multiple_model = "-q %s" % options.multiple_model cmd.append(multiple_model) if options.constraint_file: constraint_file = "-r %s" % options.constraint_file cmd.append(constraint_file) if options.bin_model_parameter_file: bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy" os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name ) bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file #Needs testing. Is the hardcoded name or the real path needed? cmd.append(bin_model_parameter_file) if options.secondary_structure_file: secondary_structure_file = "-S %s" % options.secondary_structure_file cmd.append(secondary_structure_file) if options.starting_tree: starting_tree = "-t %s" % options.starting_tree cmd.append(starting_tree) if options.use_median_approximation: cmd.append("-u") if options.save_memory_gappy_alignments: cmd.append("-U") if options.disable_rate_heterogeneity: cmd.append("-V") if options.sliding_window_size: sliding_window_size = "-W %d" % options.sliding_window_size cmd.append(sliding_window_size) if options.rapid_bootstrap_random_seed: rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed cmd.append(rapid_bootstrap_random_seed) else: rapid_bootstrap_random_seed = 0 if options.parsimony_starting_tree_only: cmd.append("-y") if options.file_multiple_trees: file_multiple_trees = "-z %s" % options.file_multiple_trees cmd.append(file_multiple_trees) print "cmd list: ", cmd, "\n" full_cmd = " ".join(cmd) print "Command string: %s" % full_cmd try: proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except Exception, err: sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err)) sys.exit(1) stdout, stderr = proc.communicate() return_code = proc.returncode if return_code: sys.stdout.write(stdout) sys.stderr.write(stderr) sys.stderr.write("Return error code %i from command:\n" % return_code) sys.stderr.write("%s\n" % cmd) else: sys.stdout.write(stdout) sys.stdout.write(stderr) #Multiple runs - concatenate if number_of_runs_opt > 0: if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ): runfiles = glob.glob('RAxML*RUN*') runfiles.sort(key=getint) # Logs outfile = open('RAxML_log.galaxy','w') for filename in runfiles: if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'): infile = open(filename, 'r') filename_line = "%s\n" % filename outfile.write(filename_line) for line in infile: outfile.write(line) infile.close() outfile.close() # Parsimony Trees outfile = open('RAxML_parsimonyTree.galaxy','w') for filename in runfiles: if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'): infile = open(filename, 'r') filename_line = "%s\n" % filename outfile.write(filename_line) for line in infile: outfile.write(line) infile.close() outfile.close() # Results outfile = open('RAxML_result.galaxy','w') for filename in runfiles: if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'): infile = open(filename, 'r') filename_line = "%s\n" % filename outfile.write(filename_line) for line in infile: outfile.write(line) infile.close() outfile.close() # Multiple Model Partition Files if multiple_model: files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*') if len(files) > 0: files.sort(key=getint) outfile = open('RAxML_bestTreePartitions.galaxy','w') # Best Tree Partitions for filename in files: if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'): infile = open(filename, 'r') filename_line = "%s\n" % filename outfile.write(filename_line) for line in infile: outfile.write(line) infile.close() outfile.close() else: outfile = open('RAxML_bestTreePartitions.galaxy','w') outfile.write("No partition files were produced.\n") outfile.close() # Result Partitions files = glob.glob('RAxML_result.galaxy.PARTITION.*') if len(files) > 0: files.sort(key=getint) outfile = open('RAxML_resultPartitions.galaxy','w') for filename in files: if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'): infile = open(filename, 'r') filename_line = "%s\n" % filename outfile.write(filename_line) for line in infile: outfile.write(line) infile.close() outfile.close() else: outfile = open('RAxML_resultPartitions.galaxy','w') outfile.write("No partition files were produced.\n") outfile.close() # DEBUG options infof = open('RAxML_info.galaxy','a') infof.write('\nOM: CLI options DEBUG START:\n') infof.write(options.__repr__()) infof.write('\nOM: CLI options DEBUG END\n') if __name__=="__main__": __main__()