changeset 2:671667722d3d draft

fix: ffptree taxonomy name file convert () to _
author damion
date Fri, 20 Mar 2015 14:27:19 -0400
parents d1c88b118a3f
children 79a4a86981d3
files README.md ffp_phylogeny.py ffp_phylogeny.xml
diffstat 3 files changed, 11 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Fri Mar 13 20:59:28 2015 -0400
+++ b/README.md	Fri Mar 20 14:27:19 2015 -0400
@@ -35,6 +35,7 @@
 The command line ffpjsd can hang if one provides an l-mer length greater than the length of file content.  One must identify its process id ("ps aux | grep ffpjsd") and kill it ("kill [process id]").
 
 Finally, it is possible for the ffptree program to generate a tree where some of the branch distances are negative. See https://www.biostars.org/p/45597/
+
 -------
 **References**
  
--- a/ffp_phylogeny.py	Fri Mar 13 20:59:28 2015 -0400
+++ b/ffp_phylogeny.py	Fri Mar 20 14:27:19 2015 -0400
@@ -8,7 +8,7 @@
 import shlex, subprocess
 from string import maketrans
 
-VERSION_NUMBER = "0.1.00"
+VERSION_NUMBER = "0.1.03"
 
 class MyParser(optparse.OptionParser):
 	"""
@@ -40,16 +40,16 @@
 	"""
 	# Take off prefix/suffix whitespace/comma :
 	taxonomy = filenames.strip().strip(',').split(',')
-	translations = maketrans(' .-	','____')
 	names=[]
 	ptr = 0
 
 	for file in filepaths:
-		# First, convert space, period to underscore in file names.	  ffprwn IS VERY SENSITIVE ABOUT THIS.
-		# Also trim labels to 50 characters.  Turns out ffpjsd is kneecapping a taxonomy label to 10 characters if it is greater than 50 chars.
-		taxonomyitem = taxonomy[ptr].strip().translate(translations)[:50]
-		# print taxonomyitem
-		if not type in 'text' and multiple:
+		# Trim labels to 50 characters max.  ffpjsd kneecaps a taxonomy label to 10 characters if it is greater than 50 chars.
+		taxonomyitem = taxonomy[ptr].strip()[:50] #.translate(translations)
+		# Convert non-alphanumeric characters to underscore in taxonomy names.  ffprwn IS VERY SENSITIVE ABOUT THIS.
+		taxonomyitem = re.sub('[^0-9a-zA-Z]+', '_', taxonomyitem)
+
+		if (not type in 'text') and multiple:
 			#Must read each fasta file, looking for all lines beginning ">"
 			with open(file) as fastafile:
 				lineptr = 0
@@ -61,6 +61,7 @@
 						names.append(name)
 						lineptr += 1
 		else:
+
 			names.append(taxonomyitem)
 		
 		ptr += 1
@@ -178,7 +179,7 @@
 			# error code signal for that process, i.e. so that retcode returns a code.
 			retcode = processes[ptr-1].poll()
 			stderrdata = processes[ptr-1].stderr.read()
-			#Issue with ffptree is it outputs ----....---- on stderr
+			#Issue with ffptree is it outputs ---- ... ---- on stderr even when ok.
 			if retcode or (len(stderrdata) > 0 and substantive.search(stderrdata)):
 				stop_err(stderrdata)			
 
--- a/ffp_phylogeny.xml	Fri Mar 13 20:59:28 2015 -0400
+++ b/ffp_phylogeny.xml	Fri Mar 20 14:27:19 2015 -0400
@@ -1,4 +1,4 @@
-<tool id="ffp_phylogeny" name="Feature Frequency Profile Phylogeny" version="0.1.02">
+<tool id="ffp_phylogeny" name="Feature Frequency Profile Phylogeny" version="0.1.03">
 	<description>An alignment free comparison tool for phylogenetic analysis and text comparison</description>
 	<requirements>
 		<requirement type="package" version="0.3.19_d4382db015acec0e5cc43d6c1ac80ae12cb7e6b3">ffp-phylogeny</requirement>