Mercurial > repos > george-weingart > micropita

"""
Author: Timothy Tickle
Description: Class to call circlader and create dendrograms.
"""

#####################################################################################
#Copyright (C) <2012>
#
#Permission is hereby granted, free of charge, to any person obtaining a copy of
#this software and associated documentation files (the "Software"), to deal in the
#Software without restriction, including without limitation the rights to use, copy,
#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
#and to permit persons to whom the Software is furnished to do so, subject to
#the following conditions:
#
#The above copyright notice and this permission notice shall be included in all copies
#or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#####################################################################################

__author__ = "Timothy Tickle"
__copyright__ = "Copyright 2012"
__credits__ = ["Timothy Tickle"]
__license__ = "MIT"
__maintainer__ = "Timothy Tickle"
__email__ = "ttickle@sph.harvard.edu"
__status__ = "Development"

#External libraries
from AbundanceTable import AbundanceTable
from CommandLine import CommandLine
from ConstantsBreadCrumbs import ConstantsBreadCrumbs
from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
import math
import numpy as np
import os
import re
import scipy.stats
from ValidateData import ValidateData
#import scipy.stats.stats as stats

class Cladogram:
  """
  This class manages creating files for Circlader and calling circulator.
  """

  #Script name
  circladerScript=None

  #Constants
  c_sTaxa="Taxa"
  c_sCircle="Circle"
  c_sBorder="Border"
  c_sShape="Shape"
  c_sAlpha="Alpha"
  c_sForced="Forced"

  #Numpy array (structured array) holding data
  #Should be SampleID, Sample Abundances/Data (samples = columns).....
  npaAbundance = None
  #List of sample names
  lsSampleNames = None
  #Name of output image
  strImageName = "Cladogram.png"
  #String used to call the sample id column
  strSampleID = "ID"
  strUnclassified = "unclassified"

  #Minimum size of clade (terminal node count for clade)
  iMinCladeSize = 1
  #Level of ancestry to filter at (starts with 0 and based on the input file)
  iCladeLevelToMeasure = 1
  iCladeLevelToReduce = 1
  cFeatureDelimiter = "|"

  #Flags
  #Turns on (True) or off (False) abundance-based filtering
  fAbundanceFilter = False
  #Turns on (True) or off (False) clade size-based filtering
  fCladeSizeFilter = False
  #Indicate if the following files were made
  fSizeFileMade=False
  fCircleFileMade=False
  fColorFileMade=False
  fTickFileMade=False
  fHighlightFileMade=False

  #Circlader files
  strTreeFilePath="_Taxa.txt"
  strCircleFilePath = "_Circle.txt"
  strColorFilePath="_Color.txt"
  strTickFilePath="_Tick.txt"
  strHighLightFilePath="_HighLight.txt"
  strSizeFilePath="_Size.txt"
  strStyleFilePath=""

  #Thresholds
  #Controls the showing of taxa
  c_dPercentileCutOff = 90.0
  c_dPercentageAbovePercentile = 1.0

  #Minimum average abundance score when using log scale
  c_dMinLogSize = 0.0000000001
  #Constant used to maginfy the size difference in the taxa (log only)
  c_dLogScale = 1000000
  #When after log10, an addition scaling adjustment (use this)
  c_dCircleScale = 3

  #Data for circular files
  #Used to change IDs to proper labels
  dictConvertIDs = None
  #Labels to be relabeled
  dictRelabels = None
  #Colors
  dictColors = None
  #Elements that are forced to be highlighted
  dictForcedHighLights = None
  #Ticks
  llsTicks = None
  #Forced root of the tree, discarding data as needed.
  strRoot = None
  #Holds circle data as a list of dictionaries
  #One dictionary per circle
  ldictCircleData = None

  def __init__(self):
    self.dictForcedHighLights = dict()

  #Happy Path Tested
  def addHighLights(self, dictClades,fOverwrite):
    """
    This methods allows highlighting to be added.
    When an element is added in this manner it will not be filtered out.
    These elements, if existing in the tree will be highlighted the named color given.
    This color name should be supplied in the set Color Data method
    {strName1:strColorName1,strName2:strColorName2,...}

    :param dictClades: Names of elements, if found in the tree which should be highlighted
    :type: dictClades Dictionary of element name (string) and element color (string)
    :param fOverwrite: If element is already indicated to be highlighted, overwrite the color to the one provided here.
    :type: fOverwrite boolean (True == overwrite color)
    """
    if ValidateData.funcIsValidDictionary(dictClades):
        if ValidateData.funcIsValidBoolean(fOverwrite):
            for strElement in dictClades:
                if(strElement in self.dictForcedHighLights):
                    if(fOverwrite):
                        self.dictForcedHighLights[strElement] = dictClades[strElement]
                else:
                    self.dictForcedHighLights[strElement] = dictClades[strElement]

  #Not tested
  def getHighLights(self):
    return self.dictForcedHighLights

  #Not tested
  def forceRoot(self, strRoot):
    """
    This method allows one to root the tree at a certain level and value
    Only taxa that contain this value in their ancestry will be plotted
    The root will be the value given, any previous heirachy will be ignored
    This will remove highlighted data if indicated to do so

    :params strRoot: Where to root the tree
    :type: strRoot String
    """
    self.strRoot = strRoot

  def generate(self, strImageName, strStyleFile, sTaxaFileName, strCircladerScript = ConstantsBreadCrumbs.c_strCircladerScript, iTerminalCladeLevel = 10, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None):
    """
    This is the method to call to generate a cladogram using circlader.
    The default data file is an abundance table unless the getDa function is overwritten.

    :param strImageName: File name to save the output cladogram image
    :type: strImageName File name (string)
    :param strStyleFile: File path indicating the style file to use
    :type: strStyleFile File path (string)
    :param sTaxaFileName: File path indicating the taxa file to use
    :type: sTaxaFileName File path (string)
    :param strCircladerScript: File path to the Circlader script
    :type: String
    :param iTerminalCladeLevel: Clade level to use as terminal in plotting
    :type: iTerminalCladeLevel integer starting with 1
    :param strColorFile: File path indicating the color file to use
    :type: strColorFile File path (string)
    :param strTickFile: File path indicating the tick file to use
    :type: strTickFile File path (string)
    :param strHighlightFile: File path indicating the highlight file to use
    :type: strHighlightFile File path (string)
    :param strSizeFile: File path indicating the size file to use
    :type: strSizeFile File path (string)
    :param sCircleFileName: File path of circlader circle file.
    :type: String
    """

    if self.npaAbundance == None:
      print "Cladogram::generate. The data was not set so an image could not be generated"
      return False

    #Set script
    self.circladerScript = strCircladerScript

    #Set output file name
    self.strImageName = strImageName

    #Check files exist and remove files which will be written
    self.manageFilePaths(sTaxaFileName, strStyleFile, sColorFileName, sTickFileName, sHighlightFileName, sSizeFileName, sCircleFileName)

    #Get IDs
    lsIDs = [strId for strId in list(self.npaAbundance[self.strSampleID])]

    #Generate a dictionary to convert the ids to correct format
    #Fix unclassified names
    #Make numeric labels as indicated
    self.dictConvertIDs = self.generateLabels(lsIDs)

    #Remove taxa lower than the display clade level
    lsCladeAndAboveFeatures = []
    for sFeature in lsIDs:
        if len(sFeature.split(self.cFeatureDelimiter)) <= iTerminalCladeLevel:
            lsCladeAndAboveFeatures.append(sFeature)
    lsIDs = lsCladeAndAboveFeatures

    #Filter by abundance
    if(self.fAbundanceFilter):
      lsIDs = self.filterByAbundance(lsIDs)

    #Update to the correct root
    lsIDs = self.updateToRoot(lsIDs)

    #Set highlights to root for consistency
    if(not self.strRoot == None):
      dictRootedHighLights = dict()
      if not self.dictForcedHighLights == None:
        for sKey in self.dictForcedHighLights.keys():
          strUpdatedKey = self.updateToRoot([sKey])
          dictRootedHighLights[strUpdatedKey[0]]=self.dictForcedHighLights[sKey]
        self.dictForcedHighLights = dictRootedHighLights

    #Set relabels to root for consistency
    if(not self.strRoot == None):
      dictRootedLabels = dict()
      if not self.dictRelabels == None:
        for sKey in self.dictRelabels.keys():
          strUpdatedKey = self.updateToRoot([sKey])
          dictRootedLabels[strUpdatedKey[0]]=self.dictRelabels[sKey]
        self.dictRelabels = dictRootedLabels

    #Filter by clade size Should be the last filter.
    #It is not a strong filter but cleans up images
    if(self.fCladeSizeFilter):
      lsIDs = self.filterByCladeSize(lsIDs)

    #Add in forced highlighting
    lsIDs.extend(self.dictForcedHighLights.keys())
    lsIDs = list(set(lsIDs))

    #Add in forced circle data
    for dictCircleData in self.ldictCircleData:
      if(dictCircleData[self.c_sForced]):
        lsTaxa = dictCircleData[self.c_sTaxa]
        lsAlpha = dictCircleData[self.c_sAlpha]
        lsAddTaxa = []
        [lsAddTaxa.append(lsTaxa[tpleAlpha[0]]) if not tpleAlpha[1] == '0.0' else 0 for tpleAlpha in enumerate(lsAlpha)]
        lsIDs.extend(lsAddTaxa)
    lsIDs = list(set(lsIDs))

    #Create circle files (needs to be after any filtering because it has a forcing option).
    if not self.createCircleFile(lsIDs):
      return False

    #Generate / Write Tree file
    if not self.createTreeFile(lsIDs):
      return False

    #Generate / Write Highlight file
    if not self.createHighlightFile(lsIDs):
      return False

    #Generate / write color file
    if(self.dictColors is not None):
        lsColorData = [ConstantsBreadCrumbs.c_cTab.join([sColorKey,self.dictColors[sColorKey]]) for sColorKey in self.dictColors]
        self.writeToFile(self.strColorFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsColorData), False)
        self.fColorFileMade=True

    #Generate / write tick file
    if(self.llsTicks is not None):
        lsTickData = [ConstantsBreadCrumbs.c_cTab.join(lsTicks) for lsTicks in self.llsTicks]
        self.writeToFile(self.strTickFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsTickData), False)
        self.fTickFileMade=True

    #Generate / Write size data
    if not self.createSizeFile(lsIDs):
      return False

    #Call commandline
    lsCommand = [self.circladerScript, self.strTreeFilePath, self.strImageName, "--style_file", self.strStyleFilePath, "--tree_format", "tabular"]
    if(self.fSizeFileMade):
      lsCommand.extend(["--size_file", self.strSizeFilePath])
    if(self.fColorFileMade):
      lsCommand.extend(["--color_file", self.strColorFilePath])
    if(self.fTickFileMade):
      lsCommand.extend(["--tick_file", self.strTickFilePath])
    if(self.fHighlightFileMade):
      lsCommand.extend(["--highlight_file", self.strHighLightFilePath])
    if(self.fCircleFileMade):
      lsCommand.extend(["--circle_file", self.strCircleFilePath])
    CommandLine().runCommandLine(lsCommand)

  #Happy path tested
  def setColorData(self, dictColors):
    """
    This methods allows color information to be specified.
    Need to give a dictionary having a name (key)(string) and color (value)(string RGB)data
    {strName1:Color,strName2:Color...}
    Name will be a string name that references what needs to be this color
    Color data should be a string in the RGB format 0-255,0-255,0-255

    :param dictColors: Color Name and RGB specification
    :type: dictColorsDictionary strings
    """
    if ValidateData.funcIsValidDictionary(dictColors):
      self.dictColors = dictColors
      if not ConstantsFiguresBreadCrumbs.c_strBackgroundColorName in self.dictColors:
        self.dictColors[ConstantsFiguresBreadCrumbs.c_strBackgroundColorName]=ConstantsFiguresBreadCrumbs.c_strBackgroundColor

  #Not tested
  def setAbundanceData(self, abtbAbundanceTable):
    """
    Sets the abundance data the Cladogram will use to plot

    :params abtAbundanceTable: AbundanceTable to set
    :type: AbundanceTable
    """
    self.npaAbundance = abtbAbundanceTable.funcGetAbundanceCopy()
    self.strSampleID = abtbAbundanceTable.funcGetIDMetadataName()
    self.lsSampleNames = abtbAbundanceTable.funcGetSampleNames()

  #Not tested
  def setFilterByAbundance(self, fAbundanceFilter, dPercentileCutOff = 90.0,  dPercentageAbovePercentile = 1.0):
    """
    Switch filtering by abundance on and off.
    fAbundanceFilter == True indicates filtering is on

    :param fAbundanceFilter: Switch to turn on (true) and off (false) abundance-based filtering
    :type: fAbundanceFilter boolean
    :param dPercentileCutOff: Percentage between 100.0 to 0.0.
    :type: double
    :param dPercentageAbovePercentile: Percentage between 100.0 to 1.0.
    :type: double
    """
    self.fAbundanceFilter = fAbundanceFilter
    self.c_dPercentileCutOff = dPercentileCutOff
    self.c_dPercentageAbovePercentile = dPercentageAbovePercentile

  #Not Tested
  def setCircleScale(self, iScale):
    """
    Is a scale used to increase or decrease node sizes in the the cladogram to make more visible
    iScale default is 3

    :param iScale: Integer to increase the relative sizes of nodes
    :type: iScale integer
    """
    self.c_dCircleScale = iScale

  #Not tested
  def setFeatureDelimiter(self, cDelimiter):
    """
    Set the delimiter used to parse the consensus lineages of features.

    :param cDelimiter: The delimiter used to parse the consensus lineage of features.
    :type: Character
    """
    if cDelimiter:
      self.cFeatureDelimiter = cDelimiter

  #Not tested
  def setFilterByCladeSize(self, fCladeSizeFilter, iCladeLevelToMeasure = 3, iCladeLevelToReduce = 1, iMinimumCladeSize = 5, cFeatureDelimiter = None, strUnclassified="unclassified"):
    """
    Switch filtering by clade size on and off.
    fCladeSizeFilter == True indicates filtering is on
    NOT 0 based.

    :param fCladeSizeFilter: Switch to turn on (true) and off (false) clade size-based filtering
    :type: fCladeSizeFilter boolean
    :param iCladeLevelToMeasure: The level of the concensus lineage that is measure or counted. Should be greater than iCladeLevelToReduce (Root is 1)
    :type: iCladeLevelToMeasure int
    :param iCladeLevelToReduce: The level of the concensus lineage that is reduced if the measured level are not the correct count (Root is 1)
    :type: iCladeLevelToReduce int
    :param iMinimumCladeSize: Minimum count of the measured clade for the clade to be kept
    :type: iMinimumCladeSize int
    :param cFeatureDelimiter: One may set the feature delimiter if needed.
    :type: Character
    :param strUnclassified: String indicating unclassifed features
    :type: String
    """
    self.fCladeSizeFilter = fCladeSizeFilter
    if iCladeLevelToMeasure > 0:
        self.iCladeLevelToMeasure = iCladeLevelToMeasure
    if iCladeLevelToReduce > 0:
        self.iCladeLevelToReduce = iCladeLevelToReduce
    if iMinimumCladeSize > 0:
        self.iMinCladeSize = iMinimumCladeSize
    if cFeatureDelimiter:
        self.cFeatureDelimiter = cFeatureDelimiter
    if strUnclassified:
        self.strUnclassified = strUnclassified

  #Not tested
  def setTicks(self, llsTicks):
    """
    This methods allows tick information to be specified.
    Need to generate a list of lists each having a tick level (number starting at 0 as a string), and tick name
    #Lowest numbers are closest to the center of the tree
    [[#,Name1],[#,Name2]...]

    :param llsTicks: Level # and Name of level
    :type: llsTicks List of lists of strings
    """
    self.llsTicks = llsTicks

  #Happy Path tested with createCircleFile
  def addCircle(self, lsTaxa, strCircle, dBorder=0.0, strShape="R", dAlpha=1.0, fForced=False):
    """
    This methods allows one to add a circle to the outside of the cladogram.

    :param lsTaxa: Taxa to highlight with this circle
    :type: lsTaxa List of strings (taxa names)
    :param strCircle: Circle the elements will be in, indicates color and circle level.
    :type: strCircle String circle
    :param dBorder: Border size for the circle element border (between 0.0 and 1.0)
      can also be a list of dBorders.  If list, position must match lsTaxa.
    :type: dBorder Float of border size (or list of floats).
    :param strShape: String Indicator of shape or method to determine shape.
      Can also be a list of shapes.  If list, position must match lsTaxa.
    :type: strShape String to indicate the shape (may also be a list of strings).
        Default value is square.
        Valid shapes are R(Square), v(inward pointing triangle), ^(outward pointing triangle)
    :param dAlpha: The transparency of the circle element (between 0.0[clear] and 1.0[solid]).
      Can also be a list of floats.  If list, position must match lsTaxa.
    :type: dAlpha Float to indicate the transparency of the shape (may also be a list of strings).
    :param fForced: Forces item in the features in the circle to be displayed in the cladogram no matter thier passing filters.
    :type: Boolean
    """
    if(self.ldictCircleData == None):
      self.ldictCircleData = list()
    dictCircleData = dict()
    dictCircleData[self.c_sTaxa]=lsTaxa
    dictCircleData[self.c_sCircle]=strCircle
    dictCircleData[self.c_sBorder]=dBorder
    dictCircleData[self.c_sShape]=strShape
    dictCircleData[self.c_sAlpha]=dAlpha
    dictCircleData[self.c_sForced]=fForced

    self.ldictCircleData.append(dictCircleData)
    return True

  #Happy Path tested with AddCircle
  def createCircleFile(self, lsIDs):
    """
    Write circle data to file.

    :param lsIDs: Ids to include in the circle file
    :type: lsIDs List of strings
    """
    #If there is circle data
    if(not self.ldictCircleData == None):
      if self.strCircleFilePath == None:
        print("Error, there is no circle file specified to write to.")
        return False
      #Holds circle data {Taxaname:string updates correctly for output to file}
      dictCircleDataMethods = dict()
      lsCircleData = list()

      for dictCircleData in self.ldictCircleData:
        lsTaxa = dictCircleData[self.c_sTaxa]
        #Shape/s for taxa
        datShape = dictCircleData[self.c_sShape]
        fShapeIsList = (str(type(datShape)) == "<type 'list'>")
        #Border/s for taxa
        datBorder = dictCircleData[self.c_sBorder]
        fBorderIsList = (str(type(datBorder)) == "<type 'list'>")
        #Alpha/s for taxa
        datAlpha = dictCircleData[self.c_sAlpha]
        fAlphaIsList = (str(type(datAlpha)) == "<type 'list'>")
        #Circle name
        sCircleMethod = dictCircleData[self.c_sCircle]

        #Check to make sure the lengths of the array match up
        if(fShapeIsList):
          if not len(datShape) == len(lsTaxa):
            print("".join(["Error, Shapes were given as an list not of the size of the taxa list. Shape list length: ",str(len(datShape)),". Taxa list length: ",str(len(lsTaxa)),"."]))
            return False
        if(fBorderIsList):
          if not len(datBorder) == len(lsTaxa):
            print("".join(["Error, Border sizes were given as an list not of the size of the taxa list. Border list length: ",str(len(datBorder)),". Taxa list length: ",str(len(lsTaxa)),"."]))
            return False
        if(fAlphaIsList):
          if not len(datAlpha) == len(lsTaxa):
            print("".join(["Error, Alpha sizes were given as an list not of the size of the taxa list. Alpha list length: ",str(len(datAlpha)),". Taxa list length: ",str(len(lsTaxa)),"."]))
            return False

        #Update taxa to root if needed
        #When doing this if any of the other data is an array we have to edit them
        #as the taxa are edited for updating root
        if((not fShapeIsList) and (not fBorderIsList) and (not fAlphaIsList)):
          lsTaxa = self.updateToRoot(dictCircleData[self.c_sTaxa])
        else:
          #Initilize as lists or as the string value they already are
          lsUpdatedTaxa = list()
          datUpdatedShapes=list()
          if(not fShapeIsList):
            datUpdatedShapes = datShape
          datUpdatedBorders=list()
          if(not fBorderIsList):
            datUpdatedBorders = datBorder
          datUpdatedAlphas=list()
          if(not fAlphaIsList):
            datUpdatedAlphas = datAlpha

          #If a taxa is kept, keep associated list information
          #If not a list data, leave alone, it will be used globally for all taxa.
          iTaxaIndex = -1
          for sTaxa in lsTaxa:
            iTaxaIndex = iTaxaIndex + 1
            sUpdatedTaxa=self.updateToRoot([sTaxa])

            if len(sUpdatedTaxa)==1:
              lsUpdatedTaxa.append(sUpdatedTaxa[0])
              if(fShapeIsList):
                datUpdatedShapes.append(datShape[iTaxaIndex])
              if(fBorderIsList):
                datUpdatedBorders.append(datBorder[iTaxaIndex])
              if(fAlphaIsList):
                datUpdatedAlphas.append(datAlpha[iTaxaIndex])

          #Reset data to rooted data
          lsTaxa=lsUpdatedTaxa
          datShape=datUpdatedShapes
          datBorder=datUpdatedBorders
          datAlpha=datUpdatedAlphas

        #QC passes so we will add the circle to the figure and the ticks.
        #If there are ticks and if the circle is not already in the ticks.
        if(not self.llsTicks == None):
          strCircleName = dictCircleData[self.c_sCircle]
          fFound = False
          iHighestNumber = -1
          for tick in self.llsTicks:
            #Look for name
            if tick[1] == strCircleName:
              fFound = True
            #Find highest count
            if int(tick[0]) > iHighestNumber:
              iHighestNumber = int(tick[0])
          if not fFound:
            self.llsTicks.append([str(iHighestNumber+1),strCircleName])

        #If the circle is forced, add the taxa to the lsIDs
        #Otherwise we will only plot those that are matching
        #the lsIDs and the circle taxa list.
        if dictCircleData[self.c_sForced]:
          for iAlpha in xrange(0,len(datAlpha)):
            if(not datAlpha[iAlpha] == "0.0"):
              lsIDs.append(lsTaxa[iAlpha])
          lsIDs = list(set(lsIDs))

        #For all taxa in the cladogram
        for sTaxa in lsTaxa:
          #Store circle content name in dictionary
          if not sTaxa in dictCircleDataMethods:
            #Reset name to . delimited
            asNameElements = filter(None,re.split("\|",sTaxa))

            sCurTaxaName = asNameElements[len(asNameElements)-1]
            if(len(asNameElements)>1):
              if(sCurTaxaName=="unclassified"):
                sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName])
            sCurTaxa = ".".join(asNameElements)
            #Add to dictionary
            dictCircleDataMethods[sTaxa] = sCurTaxa

          #If the taxa is in the selected method
          if sTaxa in lsTaxa:
            #Index of the id in the circle data
            iTaxaIndex = lsTaxa.index(sTaxa)
            #Get border
            sBorder = ""
            if(fBorderIsList):
              sBorder = str(datBorder[iTaxaIndex])
            else:
              sBorder = str(datBorder)
            #Get shape
            sShape = ""
            if(fShapeIsList):
              sShape = datShape[iTaxaIndex]
            else:
              sShape = datShape
            #Get alpha
            sAlpha = ""
            if(fAlphaIsList):
              sAlpha = str(datAlpha[iTaxaIndex])
            else:
              sAlpha = str(datAlpha)
            dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":",sAlpha,"!",sShape,"#",sBorder])
          else:
            dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":0.0!R#0.0"])

      if len(dictCircleDataMethods)>0:
        lsTaxaKeys = dictCircleDataMethods.keys()
        sCircleContent = dictCircleDataMethods[lsTaxaKeys[0]]
        for sTaxaKey in lsTaxaKeys[1:len(lsTaxaKeys)]:
          sCircleContent = ConstantsBreadCrumbs.c_strEndline.join([sCircleContent,dictCircleDataMethods[sTaxaKey]])
        self.writeToFile(self.strCircleFilePath, sCircleContent, False)
        self.fCircleFileMade=True

        return True
    self.fCircleFileMade=False
    return False

  #Happy Path tested
  def createHighlightFile(self, lsIDs):
    """
    Write highlight data to file

    :param lsIDs: Ids to include in the highlight file
    :type: lsIDs List of strings
    """
    lsHighLightData = list()
    #Each taxa name
    for sID in lsIDs:
      sCurColor = ""
      #Rename taxa to be consisten with the . delimit format
      asNameElements = filter(None,re.split("\|",sID))
      sCurTaxaName = asNameElements[len(asNameElements)-1]
      if(len(asNameElements)>1):
        if(sCurTaxaName=="unclassified"):
          sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName])
      sCurTaxa = ".".join(asNameElements)

      sCurLabel = ""
      #Get color
      sColorKey = ""
      if(sID in self.dictForcedHighLights):
        sColorKey = self.dictForcedHighLights[sID]
        if(sColorKey in self.dictColors):
          sCurColor = self.formatRGB(self.dictColors[sColorKey])
        #Get label
        if(self.dictRelabels is not None):
          if(sID in self.dictRelabels):
            sCurLabel = self.dictRelabels[sID]
        if(sCurLabel == ""):
          lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurTaxaName,sCurLabel,sCurColor]))
        else:
          lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurLabel,sCurLabel,sCurColor]))

    if len(lsHighLightData)>0:
      self.writeToFile(self.strHighLightFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsHighLightData), False)
      self.fHighlightFileMade=True
    return True

  #Happy path tested
  def createSizeFile(self, lsIDs):
    """
    Write size data to file

    :param lsIDs: Ids to include in the size file
    :type: lsIDs List of strings
    """
    if self.npaAbundance is not None:
      dMinimumValue = (self.c_dMinLogSize*self.c_dLogScale)+1
      lsWriteData = list()
      for rowData in self.npaAbundance:
        strCurrentId = rowData[0]
        #Reset to root if needed to match current data
        if(not self.strRoot == None):
          strCurrentId = self.updateToRoot([strCurrentId])
          if(len(strCurrentId) > 0):
            strCurrentId = strCurrentId[0]
        if(strCurrentId in lsIDs):
          dAverage = np.average(list(rowData)[1:])
          dSize = max([dMinimumValue,(dAverage*self.c_dLogScale)+1])
          lsWriteData.append(".".join(re.split("\|",strCurrentId))+ConstantsBreadCrumbs.c_cTab+str(math.log10(dSize)*self.c_dCircleScale))
      if len(lsWriteData)>0:
        self.writeToFile(self.strSizeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsWriteData), False)
        self.fSizeFileMade=True
    return True

  #Happy path tested 1
  def createTreeFile(self, lsIDs):
    """
    Write tree data to file. The tree file defines the internal cladogram and all it's points.

    :param lsIDs: Ids to include in the tree file as well as their ancestors
    :type: lsIDs List of strings
    """
    lsFullTree = list()
    for sID in lsIDs:
      lsIDElements = filter(None,re.split("\|",sID))
      sElementCur = lsIDElements[0]
      if(not sElementCur in lsFullTree):
        lsFullTree.append(sElementCur)
      if(len(lsIDElements) > 1):
        sNodePath = ""
        for iEndLevel in xrange(1,len(lsIDElements)+1):
          sCurAncestry = lsIDElements[0:iEndLevel]
          sNodePath = ".".join(sCurAncestry)
          if(not sNodePath in lsFullTree):
            lsFullTree.append(sNodePath)

    if len(lsFullTree)>0:
      self.writeToFile(self.strTreeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsFullTree), False)
    return True

  #Happy Path tested
  def filterByAbundance(self, lsIDs):
    """
    Filter by abundance. Specifically this version requires elements of
    the tree to have a certain percentage of a certain percentile in samples.

    :param lsIDs: Ids to filter
    :type: lsIDs List of strings
    """
    #list of ids to return that survived the filtering
    retls = list()
    if not self.npaAbundance is None:
      #Hold the cuttoff score (threshold) for the percentile of interest {SampleName(string):score(double)}
      dictPercentiles = dict()
      for index in xrange(1,len(self.npaAbundance.dtype.names)):
        dScore = scipy.stats.scoreatpercentile(self.npaAbundance[self.npaAbundance.dtype.names[index]],self.c_dPercentileCutOff)
        dictPercentiles[self.npaAbundance.dtype.names[index]] = dScore

      #Sample count (Ignore sample id [position 0] which is not a name)
      dSampleCount = float(len(self.npaAbundance.dtype.names[1:]))

      #Check each taxa
      for rowTaxaData in self.npaAbundance:
        sCurTaxaName = rowTaxaData[0]
        #Only look at the IDs given
        if(sCurTaxaName in lsIDs):
          dCountAbovePercentile = 0.0
          ldAbundanceMeasures = list(rowTaxaData)[1:]
          #Check to see if the abundance score meets the threshold and count if it does
          for iScoreIndex in xrange(0,len(ldAbundanceMeasures)):
            if(ldAbundanceMeasures[iScoreIndex] >= dictPercentiles[self.lsSampleNames[iScoreIndex]]):
              dCountAbovePercentile = dCountAbovePercentile + 1.0
          dPercentOverPercentile = dCountAbovePercentile / dSampleCount
          if(dPercentOverPercentile >= (self.c_dPercentageAbovePercentile/100.0)):
            retls.append(sCurTaxaName)
    return retls

  #Happy Path Tested
  def filterByCladeSize(self, lsIDs):
    """
    Filter by the count of individuals in the clade.

    :param lsIDs: Ids to filter
    :type: lsIDs List of strings
    """
    #First get terminal nodes
    lsTerminalNodes = AbundanceTable.funcGetTerminalNodesFromList(lsIDs,self.cFeatureDelimiter)

    #Count up clades
    cladeCounts = dict()

    #For each terminal node count the
    #Clades at clade levels
    for sTerminalNode in lsTerminalNodes:
        lsLineage = sTerminalNode.split(self.cFeatureDelimiter)
        iLineageCount = len(lsLineage)
        #If the lineage is shorter than the reduced clade level then no need to filter it
        if iLineageCount >= self.iCladeLevelToReduce:
            #If the lineage is longer than the reduced clade level and measuring clade level then count
            #or If the lineage is longer than the reduced clade level but shorter than the measuring clade,
            #only count if the last element is unclassified
            if (iLineageCount >= self.iCladeLevelToMeasure) or (lsLineage[-1] == self.strUnclassified):
                sLineage = self.cFeatureDelimiter.join(lsLineage[0:self.iCladeLevelToReduce])
                cladeCounts[sLineage] = cladeCounts.get(sLineage,0) + 1

    #Go through the IDs and reduce as needed using the clade counts
    retls = list()
    for sID in lsIDs:
        lsID = sID.split(self.cFeatureDelimiter)
        iIDCount = len(lsID)

        #Too short to filter
        if iLineageCount < self.iCladeLevelToReduce:
            retls.append(sID)
        #Check to see if the clade which is being reduced made the cut
        if iIDCount >= self.iCladeLevelToReduce:
            if (iIDCount >= self.iCladeLevelToMeasure) or (lsID[-1] == self.strUnclassified):
                if cladeCounts[self.cFeatureDelimiter.join(lsID[0:self.iCladeLevelToReduce])] >= self.iMinCladeSize:
                    retls.append(sID)

    return retls

  #Happy path tested
  def formatRGB(self, sColor):
    """
    Takes a string that is of the format 0-255,0-255,0-255 and converts it to the
    color format of circlader _c_[0-1,0-1,0-1]

    :param sColor: String RGB format
    :type: sColor String
    """
    sCircladerColor = "_c_[1,1,1]"
    if(sColor is not None):
      sColorElements = filter(None,re.split(",",sColor))
      if(len(sColorElements)==3):
        iR = int(sColorElements[0])/255.0
        iG = int(sColorElements[1])/255.0
        iB = int(sColorElements[2])/255.0
        sCircladerColor = "".join(["_c_[",str(iR),",",str(iG),",",str(iB),"]"])
    return sCircladerColor

  #Happy path tested
  def generateLabels(self, lsIDs):
    """
    Labels for visualization.
    Changes unclassified to one_level_higher.unclassified and enables numeric labeling / relabeling.
    Will only rename, will not add the label. The key must exist for the value to be used in replacing.

    :param lsIDs: Ids to include in the labels file
    :type: lsIDs List of strings
    """
    dictRet = dict()
    for sID in lsIDs:
        lsIDElements = filter(None,re.split("\|",sID))
        iIDElementsCount = len(lsIDElements)
        sLabel = lsIDElements[iIDElementsCount-1]
        #Fix unclassified
        if((sLabel == "unclassified") and (iIDElementsCount > 1)):
            sLabel = ".".join([lsIDElements[iIDElementsCount-2],sLabel])
        #Change to relabels if given
        if(self.dictRelabels is not None):
            if(sLabel in self.dictRelabels):
                sLabel = self.dictRelabels[sLabel]
        #Store lable
        dictRet[sID] = sLabel
    return dictRet

  #Happy path tested
  def manageFilePaths(self, sTaxaFileName, strStyleFile, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None):
    """
    This method sets the naming to the files generated that Circlader acts on.
    These files include the tree, color, highlight, tick, circle, and size files.
    Checks to make sure the file path to the syle file provided is an existing file.
    Deletes any existing files with these generated names (except for the style files).

    :param sStyleFile: File path indicating the style file to use
    :type: String
    :param strTaxaFile: File path indicating the taxa file to use
    :type: String
    :param sColorFile: File path indicating the color file to use
    :type: String
    :param sTickFile: File path indicating the tick file to use
    :type: String
    :param sHighlightFile: File path indicating the highlight file to use
    :type: String
    :param sSizeFile: File path indicating the size file to use
    :type: String
    :param sCircleFileName: File path for circle files
    :type: String
    :return boolean: True indicates success, false indicates error
    """
    #Do not remove the style file, it is static
    if strStyleFile is None:
      print("Error, style file is None")
      return(False)
    if not os.path.exists(strStyleFile):
      print("Error, no style file found.")
      return(False)
    else:
      self.strStyleFilePath = strStyleFile

    #Set output files and remove if needed
    self.strTreeFilePath = sTaxaFileName
    self.strColorFilePath = sColorFileName
    self.strTickFilePath = sTickFileName
    self.strHighLightFilePath = sHighlightFileName
    self.strSizeFilePath = sSizeFileName
    self.strCircleFilePath = sCircleFileName
    for sFile in [self.strTreeFilePath,self.strColorFilePath,self.strTickFilePath,
                  self.strHighLightFilePath,self.strSizeFilePath,self.strCircleFilePath]:
      if not sFile is None:
        if(os.path.exists(sFile)):
          os.remove(sFile)
    return True

  #Not tested
  def relabelIDs(self, dictLabels):
    """
    Allows the relabeling of ids. Can be used to make numeric labeling of ids or renaming

    :param dictLabels: Should label (key) (after unclassified is modified) and new label (value)
    :type: dictLabels Dictionary of string (key:label to replace) string (value:new label to use in replacing)
    """
    self.dictRelabels = dictLabels

  #Happy path tested
  def updateToRoot(self, lsIDs):
    """
    Updates the clade to the root given. The clade must contain the root and the level of the
    root in the clade will be rest to it's first level, ignoring the previous levels of the clade.

    :param lsIDs: List of Clades that will be reset to the root specified by setRoot
    :type: lsIDs List of strings. Each string representing a clade.
    """

    if(self.strRoot is None):
      return lsIDs
    #Force root tree if indicated to do so
    lsRootedIDs = list()
    for sID in lsIDs:
      sIDElements = filter(None,re.split("\|",sID))
      if(self.strRoot in sIDElements):
        iRootIndex = sIDElements.index(self.strRoot)
        #If multiple levels of the clade exist after the new root merge them.
        if(len(sIDElements)>iRootIndex+2):
          lsRootedIDs.append("|".join(sIDElements[iRootIndex+1:]))
        #If only one level of the clade exists after the new root, return it.
        elif(len(sIDElements)>iRootIndex+1):
          lsRootedIDs.append(sIDElements[iRootIndex+1])
    return(lsRootedIDs)

  #Testing: Used extensively in other tests
  def writeToFile(self, strFileName, strDataToWrite, fAppend):
    """
    Helper function that writes a string to a file

    :param strFileName: File to write to
    :type: strFileName File path (string)
    :param strDataToWrite: Data to write to file
    :type: strDataToWrite String
    :param fAppend: Indicates if an append should occur (True == Append)
    :type: fAppend boolean
    """

    cMode = 'w'
    if fAppend:
      cMode = 'a'
    with open(strFileName,cMode) as f:
        f.write(strDataToWrite)
author	george-weingart
date	Tue, 13 May 2014 21:58:57 -0400
parents
children