Mercurial > repos > malex > beast
changeset 1:677344fb75c1 draft
Update the wrapper to handle output log and tree files better by parsing the .xml file for names and then falling back to globbing if necessary.
author | malex |
---|---|
date | Thu, 26 Apr 2012 11:08:35 -0400 |
parents | 3ffe0202fa38 |
children | 1914e55fee82 |
files | beast/beast.py beast/beast.xml |
diffstat | 2 files changed, 59 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/beast/beast.py Tue Apr 17 10:52:00 2012 -0400 +++ b/beast/beast.py Thu Apr 26 11:08:35 2012 -0400 @@ -14,11 +14,23 @@ A variable number of '.tree' files depending on the XML input """ import os, shutil, subprocess, sys, optparse, glob, string +from xml.dom.minidom import parse, Node def stop_err(msg): sys.stderr.write("%s\n" % msg) sys.exit() +def parseFnames(nodelist): + filenames = [] + for node in nodelist: + if node.hasAttributes(): + fname = node.getAttribute('fileName') + if fname != "": + filenames.append(fname) + else: + pass + return filenames + def __main__(): usage = "usage: %prog inputXML" parser = optparse.OptionParser(usage = usage) @@ -93,9 +105,41 @@ else: sys.stdout.write(stdout) sys.stdout.write(stderr) +#2012-04-24 - 2nd approach, parse the .xml file: + xml_file = os.path.abspath(inputxml) + if not os.path.exists(inputxml): + sys.stderr.write("Cannot find the input XML file for parsing.\n") + dom = parse(inputxml) + xml_logs = dom.getElementsByTagName('log') + xml_trees = dom.getElementsByTagName('logTree') + logfiles_orig = parseFnames(xml_logs) + treefiles_orig = parseFnames(xml_trees) try: - for logfile in glob.glob('*.log'): - shutil.copyfile(os.path.basename(logfile), 'beast.log') + if len(logfiles_orig) == 0: + logfiles_orig = glob.glob("*.log*") + if len(logfiles_orig) == 0: + logfiles_orig.append('Error_no_log') + dummy_file = open('Error_no_log','w') + dummy_file.write("BEAST run has not produced a log or it's named in such a way that I can't locate it. Configure BEAST to produce .log files without spaces in their names and rerun the analysis.\n") + dummy_file.close() + logfiles = [] + if os.path.isdir(newfilepath): + for filename in logfiles_orig: + if os.path.isfile(filename): + name = string.replace(os.path.splitext(filename)[0], "_", "-") + filestring = "primary_%s_%s_visible_nexus" % (treefile_id, name) + newpath = os.path.join(newfilepath,filestring) + logfiles.append(newpath) +# else: +# sys.stderr.write("Can't find the log file to rename.\n") + logfiles[0] = logs + for i in range(len(logfiles_orig)): + src = logfiles_orig[i] + dst = logfiles[i] + if os.path.exists(src): + shutil.copy(src, dst) +# else: +# print "File '%s' can't be found.\n" % src except Exception, err: sys.stderr.write("Error copying log file: \n%s\n" % err) try: @@ -106,12 +150,15 @@ except Exception, err: sys.stderr.write("Error copying mcmc.operators file: \n%s\n" % err) try: - treefiles_orig = glob.glob('*.trees') if len(treefiles_orig) == 0: - treefiles_orig.append('Error_no_tree') - dummy_file = open('Error_no_tree','w') - dummy_file.write("BEAST run has not produced an output tree\n") - dummy_file.close() + print "No tree files found by the xml file parser.\n" + treefiles_orig = glob.glob("*.trees*") +# print "Original tree files from the directory:\n\t%s" % " ".join(treefiles_orig) + if len(treefiles_orig) == 0: + treefiles_orig.append('Error_no_tree') + dummy_file = open('Error_no_tree','w') + dummy_file.write("BEAST run has not produced an output tree or it's named in such a way that I can't locate it. Configure BEAST to produce .tree files without spaces in their names and rerun the analysis.\n") + dummy_file.close() treefiles = [] if os.path.isdir(newfilepath): for filename in treefiles_orig: @@ -122,7 +169,10 @@ treefiles.append(newpath) treefiles[0] = trees for i in range(len(treefiles_orig)): - shutil.move(treefiles_orig[i], treefiles[i]) + src = treefiles_orig[i] + dst = treefiles[i] + if os.path.exists(src): + shutil.copy(src, dst) except Exception, err: sys.stderr.write("Error copying trees file(s): \n%s\n" % err) if __name__=="__main__": __main__()
--- a/beast/beast.xml Tue Apr 17 10:52:00 2012 -0400 +++ b/beast/beast.xml Thu Apr 26 11:08:35 2012 -0400 @@ -22,7 +22,7 @@ <help> .. class:: warningmark -The input dataset needs to be in BEAST XML format. +The input dataset needs to be in BEAST XML format. The names of the log output files configured in the xml file should have the '.log' extension and the trees file(s) should have the '.tree' extension for the best presentation. If the random seed is not chosen "12345" will be used.