# HG changeset patch # User damion # Date 1426876039 14400 # Node ID 671667722d3d7fd4369d98b6200e07bbc56cebc0 # Parent d1c88b118a3f854bf59e1ac8fed2351e5e423013 fix: ffptree taxonomy name file convert () to _ diff -r d1c88b118a3f -r 671667722d3d README.md --- a/README.md Fri Mar 13 20:59:28 2015 -0400 +++ b/README.md Fri Mar 20 14:27:19 2015 -0400 @@ -35,6 +35,7 @@ The command line ffpjsd can hang if one provides an l-mer length greater than the length of file content. One must identify its process id ("ps aux | grep ffpjsd") and kill it ("kill [process id]"). Finally, it is possible for the ffptree program to generate a tree where some of the branch distances are negative. See https://www.biostars.org/p/45597/ + ------- **References** diff -r d1c88b118a3f -r 671667722d3d ffp_phylogeny.py --- a/ffp_phylogeny.py Fri Mar 13 20:59:28 2015 -0400 +++ b/ffp_phylogeny.py Fri Mar 20 14:27:19 2015 -0400 @@ -8,7 +8,7 @@ import shlex, subprocess from string import maketrans -VERSION_NUMBER = "0.1.00" +VERSION_NUMBER = "0.1.03" class MyParser(optparse.OptionParser): """ @@ -40,16 +40,16 @@ """ # Take off prefix/suffix whitespace/comma : taxonomy = filenames.strip().strip(',').split(',') - translations = maketrans(' .- ','____') names=[] ptr = 0 for file in filepaths: - # First, convert space, period to underscore in file names. ffprwn IS VERY SENSITIVE ABOUT THIS. - # Also trim labels to 50 characters. Turns out ffpjsd is kneecapping a taxonomy label to 10 characters if it is greater than 50 chars. - taxonomyitem = taxonomy[ptr].strip().translate(translations)[:50] - # print taxonomyitem - if not type in 'text' and multiple: + # Trim labels to 50 characters max. ffpjsd kneecaps a taxonomy label to 10 characters if it is greater than 50 chars. + taxonomyitem = taxonomy[ptr].strip()[:50] #.translate(translations) + # Convert non-alphanumeric characters to underscore in taxonomy names. ffprwn IS VERY SENSITIVE ABOUT THIS. + taxonomyitem = re.sub('[^0-9a-zA-Z]+', '_', taxonomyitem) + + if (not type in 'text') and multiple: #Must read each fasta file, looking for all lines beginning ">" with open(file) as fastafile: lineptr = 0 @@ -61,6 +61,7 @@ names.append(name) lineptr += 1 else: + names.append(taxonomyitem) ptr += 1 @@ -178,7 +179,7 @@ # error code signal for that process, i.e. so that retcode returns a code. retcode = processes[ptr-1].poll() stderrdata = processes[ptr-1].stderr.read() - #Issue with ffptree is it outputs ----....---- on stderr + #Issue with ffptree is it outputs ---- ... ---- on stderr even when ok. if retcode or (len(stderrdata) > 0 and substantive.search(stderrdata)): stop_err(stderrdata) diff -r d1c88b118a3f -r 671667722d3d ffp_phylogeny.xml --- a/ffp_phylogeny.xml Fri Mar 13 20:59:28 2015 -0400 +++ b/ffp_phylogeny.xml Fri Mar 20 14:27:19 2015 -0400 @@ -1,4 +1,4 @@ - + An alignment free comparison tool for phylogenetic analysis and text comparison ffp-phylogeny