annotate src/breadcrumbs/src/Cladogram.py @ 0:2f4f6f08c8c4 draft

Uploaded
author george-weingart
date Tue, 13 May 2014 21:58:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
1 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
2 Author: Timothy Tickle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
3 Description: Class to call circlader and create dendrograms.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
4 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
5
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
6 #####################################################################################
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
7 #Copyright (C) <2012>
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
8 #
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
10 #this software and associated documentation files (the "Software"), to deal in the
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
11 #Software without restriction, including without limitation the rights to use, copy,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
13 #and to permit persons to whom the Software is furnished to do so, subject to
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
14 #the following conditions:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
15 #
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
16 #The above copyright notice and this permission notice shall be included in all copies
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
17 #or substantial portions of the Software.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
18 #
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
25 #####################################################################################
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
26
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
27 __author__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
28 __copyright__ = "Copyright 2012"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
29 __credits__ = ["Timothy Tickle"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
30 __license__ = "MIT"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
31 __maintainer__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
32 __email__ = "ttickle@sph.harvard.edu"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
33 __status__ = "Development"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
34
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
35 #External libraries
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
36 from AbundanceTable import AbundanceTable
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
37 from CommandLine import CommandLine
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
38 from ConstantsBreadCrumbs import ConstantsBreadCrumbs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
39 from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
40 import math
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
41 import numpy as np
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
42 import os
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
43 import re
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
44 import scipy.stats
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
45 from ValidateData import ValidateData
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
46 #import scipy.stats.stats as stats
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
47
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
48 class Cladogram:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
49 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
50 This class manages creating files for Circlader and calling circulator.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
51 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
52
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
53 #Script name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
54 circladerScript=None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
55
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
56 #Constants
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
57 c_sTaxa="Taxa"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
58 c_sCircle="Circle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
59 c_sBorder="Border"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
60 c_sShape="Shape"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
61 c_sAlpha="Alpha"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
62 c_sForced="Forced"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
63
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
64 #Numpy array (structured array) holding data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
65 #Should be SampleID, Sample Abundances/Data (samples = columns).....
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
66 npaAbundance = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
67 #List of sample names
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
68 lsSampleNames = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
69 #Name of output image
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
70 strImageName = "Cladogram.png"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
71 #String used to call the sample id column
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
72 strSampleID = "ID"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
73 strUnclassified = "unclassified"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
74
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
75 #Minimum size of clade (terminal node count for clade)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
76 iMinCladeSize = 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
77 #Level of ancestry to filter at (starts with 0 and based on the input file)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
78 iCladeLevelToMeasure = 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
79 iCladeLevelToReduce = 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
80 cFeatureDelimiter = "|"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
81
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
82 #Flags
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
83 #Turns on (True) or off (False) abundance-based filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
84 fAbundanceFilter = False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
85 #Turns on (True) or off (False) clade size-based filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
86 fCladeSizeFilter = False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
87 #Indicate if the following files were made
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
88 fSizeFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
89 fCircleFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
90 fColorFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
91 fTickFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
92 fHighlightFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
93
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
94 #Circlader files
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
95 strTreeFilePath="_Taxa.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
96 strCircleFilePath = "_Circle.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
97 strColorFilePath="_Color.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
98 strTickFilePath="_Tick.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
99 strHighLightFilePath="_HighLight.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
100 strSizeFilePath="_Size.txt"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
101 strStyleFilePath=""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
102
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
103 #Thresholds
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
104 #Controls the showing of taxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
105 c_dPercentileCutOff = 90.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
106 c_dPercentageAbovePercentile = 1.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
107
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
108 #Minimum average abundance score when using log scale
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
109 c_dMinLogSize = 0.0000000001
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
110 #Constant used to maginfy the size difference in the taxa (log only)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
111 c_dLogScale = 1000000
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
112 #When after log10, an addition scaling adjustment (use this)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
113 c_dCircleScale = 3
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
114
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
115 #Data for circular files
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
116 #Used to change IDs to proper labels
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
117 dictConvertIDs = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
118 #Labels to be relabeled
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
119 dictRelabels = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
120 #Colors
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
121 dictColors = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
122 #Elements that are forced to be highlighted
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
123 dictForcedHighLights = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
124 #Ticks
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
125 llsTicks = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
126 #Forced root of the tree, discarding data as needed.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
127 strRoot = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
128 #Holds circle data as a list of dictionaries
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
129 #One dictionary per circle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
130 ldictCircleData = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
131
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
132 def __init__(self):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
133 self.dictForcedHighLights = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
134
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
135 #Happy Path Tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
136 def addHighLights(self, dictClades,fOverwrite):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
137 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
138 This methods allows highlighting to be added.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
139 When an element is added in this manner it will not be filtered out.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
140 These elements, if existing in the tree will be highlighted the named color given.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
141 This color name should be supplied in the set Color Data method
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
142 {strName1:strColorName1,strName2:strColorName2,...}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
143
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
144 :param dictClades: Names of elements, if found in the tree which should be highlighted
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
145 :type: dictClades Dictionary of element name (string) and element color (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
146 :param fOverwrite: If element is already indicated to be highlighted, overwrite the color to the one provided here.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
147 :type: fOverwrite boolean (True == overwrite color)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
148 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
149 if ValidateData.funcIsValidDictionary(dictClades):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
150 if ValidateData.funcIsValidBoolean(fOverwrite):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
151 for strElement in dictClades:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
152 if(strElement in self.dictForcedHighLights):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
153 if(fOverwrite):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
154 self.dictForcedHighLights[strElement] = dictClades[strElement]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
155 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
156 self.dictForcedHighLights[strElement] = dictClades[strElement]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
157
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
158 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
159 def getHighLights(self):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
160 return self.dictForcedHighLights
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
161
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
162 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
163 def forceRoot(self, strRoot):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
164 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
165 This method allows one to root the tree at a certain level and value
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
166 Only taxa that contain this value in their ancestry will be plotted
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
167 The root will be the value given, any previous heirachy will be ignored
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
168 This will remove highlighted data if indicated to do so
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
169
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
170 :params strRoot: Where to root the tree
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
171 :type: strRoot String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
172 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
173 self.strRoot = strRoot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
174
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
175 def generate(self, strImageName, strStyleFile, sTaxaFileName, strCircladerScript = ConstantsBreadCrumbs.c_strCircladerScript, iTerminalCladeLevel = 10, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
176 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
177 This is the method to call to generate a cladogram using circlader.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
178 The default data file is an abundance table unless the getDa function is overwritten.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
179
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
180 :param strImageName: File name to save the output cladogram image
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
181 :type: strImageName File name (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
182 :param strStyleFile: File path indicating the style file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
183 :type: strStyleFile File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
184 :param sTaxaFileName: File path indicating the taxa file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
185 :type: sTaxaFileName File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
186 :param strCircladerScript: File path to the Circlader script
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
187 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
188 :param iTerminalCladeLevel: Clade level to use as terminal in plotting
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
189 :type: iTerminalCladeLevel integer starting with 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
190 :param strColorFile: File path indicating the color file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
191 :type: strColorFile File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
192 :param strTickFile: File path indicating the tick file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
193 :type: strTickFile File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
194 :param strHighlightFile: File path indicating the highlight file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
195 :type: strHighlightFile File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
196 :param strSizeFile: File path indicating the size file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
197 :type: strSizeFile File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
198 :param sCircleFileName: File path of circlader circle file.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
199 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
200 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
201
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
202 if self.npaAbundance == None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
203 print "Cladogram::generate. The data was not set so an image could not be generated"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
204 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
205
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
206 #Set script
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
207 self.circladerScript = strCircladerScript
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
208
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
209 #Set output file name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
210 self.strImageName = strImageName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
211
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
212 #Check files exist and remove files which will be written
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
213 self.manageFilePaths(sTaxaFileName, strStyleFile, sColorFileName, sTickFileName, sHighlightFileName, sSizeFileName, sCircleFileName)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
214
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
215 #Get IDs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
216 lsIDs = [strId for strId in list(self.npaAbundance[self.strSampleID])]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
217
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
218 #Generate a dictionary to convert the ids to correct format
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
219 #Fix unclassified names
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
220 #Make numeric labels as indicated
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
221 self.dictConvertIDs = self.generateLabels(lsIDs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
222
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
223 #Remove taxa lower than the display clade level
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
224 lsCladeAndAboveFeatures = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
225 for sFeature in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
226 if len(sFeature.split(self.cFeatureDelimiter)) <= iTerminalCladeLevel:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
227 lsCladeAndAboveFeatures.append(sFeature)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
228 lsIDs = lsCladeAndAboveFeatures
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
229
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
230 #Filter by abundance
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
231 if(self.fAbundanceFilter):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
232 lsIDs = self.filterByAbundance(lsIDs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
233
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
234 #Update to the correct root
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
235 lsIDs = self.updateToRoot(lsIDs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
236
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
237 #Set highlights to root for consistency
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
238 if(not self.strRoot == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
239 dictRootedHighLights = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
240 if not self.dictForcedHighLights == None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
241 for sKey in self.dictForcedHighLights.keys():
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
242 strUpdatedKey = self.updateToRoot([sKey])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
243 dictRootedHighLights[strUpdatedKey[0]]=self.dictForcedHighLights[sKey]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
244 self.dictForcedHighLights = dictRootedHighLights
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
245
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
246 #Set relabels to root for consistency
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
247 if(not self.strRoot == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
248 dictRootedLabels = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
249 if not self.dictRelabels == None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
250 for sKey in self.dictRelabels.keys():
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
251 strUpdatedKey = self.updateToRoot([sKey])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
252 dictRootedLabels[strUpdatedKey[0]]=self.dictRelabels[sKey]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
253 self.dictRelabels = dictRootedLabels
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
254
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
255 #Filter by clade size Should be the last filter.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
256 #It is not a strong filter but cleans up images
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
257 if(self.fCladeSizeFilter):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
258 lsIDs = self.filterByCladeSize(lsIDs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
259
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
260 #Add in forced highlighting
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
261 lsIDs.extend(self.dictForcedHighLights.keys())
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
262 lsIDs = list(set(lsIDs))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
263
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
264 #Add in forced circle data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
265 for dictCircleData in self.ldictCircleData:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
266 if(dictCircleData[self.c_sForced]):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
267 lsTaxa = dictCircleData[self.c_sTaxa]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
268 lsAlpha = dictCircleData[self.c_sAlpha]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
269 lsAddTaxa = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
270 [lsAddTaxa.append(lsTaxa[tpleAlpha[0]]) if not tpleAlpha[1] == '0.0' else 0 for tpleAlpha in enumerate(lsAlpha)]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
271 lsIDs.extend(lsAddTaxa)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
272 lsIDs = list(set(lsIDs))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
273
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
274 #Create circle files (needs to be after any filtering because it has a forcing option).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
275 if not self.createCircleFile(lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
276 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
277
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
278 #Generate / Write Tree file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
279 if not self.createTreeFile(lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
280 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
281
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
282 #Generate / Write Highlight file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
283 if not self.createHighlightFile(lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
284 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
285
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
286 #Generate / write color file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
287 if(self.dictColors is not None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
288 lsColorData = [ConstantsBreadCrumbs.c_cTab.join([sColorKey,self.dictColors[sColorKey]]) for sColorKey in self.dictColors]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
289 self.writeToFile(self.strColorFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsColorData), False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
290 self.fColorFileMade=True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
291
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
292 #Generate / write tick file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
293 if(self.llsTicks is not None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
294 lsTickData = [ConstantsBreadCrumbs.c_cTab.join(lsTicks) for lsTicks in self.llsTicks]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
295 self.writeToFile(self.strTickFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsTickData), False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
296 self.fTickFileMade=True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
297
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
298 #Generate / Write size data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
299 if not self.createSizeFile(lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
300 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
301
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
302 #Call commandline
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
303 lsCommand = [self.circladerScript, self.strTreeFilePath, self.strImageName, "--style_file", self.strStyleFilePath, "--tree_format", "tabular"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
304 if(self.fSizeFileMade):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
305 lsCommand.extend(["--size_file", self.strSizeFilePath])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
306 if(self.fColorFileMade):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
307 lsCommand.extend(["--color_file", self.strColorFilePath])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
308 if(self.fTickFileMade):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
309 lsCommand.extend(["--tick_file", self.strTickFilePath])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
310 if(self.fHighlightFileMade):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
311 lsCommand.extend(["--highlight_file", self.strHighLightFilePath])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
312 if(self.fCircleFileMade):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
313 lsCommand.extend(["--circle_file", self.strCircleFilePath])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
314 CommandLine().runCommandLine(lsCommand)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
315
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
316 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
317 def setColorData(self, dictColors):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
318 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
319 This methods allows color information to be specified.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
320 Need to give a dictionary having a name (key)(string) and color (value)(string RGB)data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
321 {strName1:Color,strName2:Color...}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
322 Name will be a string name that references what needs to be this color
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
323 Color data should be a string in the RGB format 0-255,0-255,0-255
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
324
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
325 :param dictColors: Color Name and RGB specification
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
326 :type: dictColorsDictionary strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
327 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
328 if ValidateData.funcIsValidDictionary(dictColors):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
329 self.dictColors = dictColors
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
330 if not ConstantsFiguresBreadCrumbs.c_strBackgroundColorName in self.dictColors:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
331 self.dictColors[ConstantsFiguresBreadCrumbs.c_strBackgroundColorName]=ConstantsFiguresBreadCrumbs.c_strBackgroundColor
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
332
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
333 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
334 def setAbundanceData(self, abtbAbundanceTable):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
335 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
336 Sets the abundance data the Cladogram will use to plot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
337
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
338 :params abtAbundanceTable: AbundanceTable to set
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
339 :type: AbundanceTable
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
340 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
341 self.npaAbundance = abtbAbundanceTable.funcGetAbundanceCopy()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
342 self.strSampleID = abtbAbundanceTable.funcGetIDMetadataName()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
343 self.lsSampleNames = abtbAbundanceTable.funcGetSampleNames()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
344
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
345 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
346 def setFilterByAbundance(self, fAbundanceFilter, dPercentileCutOff = 90.0, dPercentageAbovePercentile = 1.0):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
347 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
348 Switch filtering by abundance on and off.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
349 fAbundanceFilter == True indicates filtering is on
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
350
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
351 :param fAbundanceFilter: Switch to turn on (true) and off (false) abundance-based filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
352 :type: fAbundanceFilter boolean
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
353 :param dPercentileCutOff: Percentage between 100.0 to 0.0.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
354 :type: double
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
355 :param dPercentageAbovePercentile: Percentage between 100.0 to 1.0.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
356 :type: double
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
357 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
358 self.fAbundanceFilter = fAbundanceFilter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
359 self.c_dPercentileCutOff = dPercentileCutOff
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
360 self.c_dPercentageAbovePercentile = dPercentageAbovePercentile
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
361
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
362 #Not Tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
363 def setCircleScale(self, iScale):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
364 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
365 Is a scale used to increase or decrease node sizes in the the cladogram to make more visible
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
366 iScale default is 3
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
367
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
368 :param iScale: Integer to increase the relative sizes of nodes
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
369 :type: iScale integer
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
370 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
371 self.c_dCircleScale = iScale
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
372
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
373 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
374 def setFeatureDelimiter(self, cDelimiter):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
375 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
376 Set the delimiter used to parse the consensus lineages of features.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
377
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
378 :param cDelimiter: The delimiter used to parse the consensus lineage of features.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
379 :type: Character
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
380 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
381 if cDelimiter:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
382 self.cFeatureDelimiter = cDelimiter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
383
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
384 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
385 def setFilterByCladeSize(self, fCladeSizeFilter, iCladeLevelToMeasure = 3, iCladeLevelToReduce = 1, iMinimumCladeSize = 5, cFeatureDelimiter = None, strUnclassified="unclassified"):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
386 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
387 Switch filtering by clade size on and off.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
388 fCladeSizeFilter == True indicates filtering is on
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
389 NOT 0 based.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
390
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
391 :param fCladeSizeFilter: Switch to turn on (true) and off (false) clade size-based filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
392 :type: fCladeSizeFilter boolean
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
393 :param iCladeLevelToMeasure: The level of the concensus lineage that is measure or counted. Should be greater than iCladeLevelToReduce (Root is 1)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
394 :type: iCladeLevelToMeasure int
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
395 :param iCladeLevelToReduce: The level of the concensus lineage that is reduced if the measured level are not the correct count (Root is 1)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
396 :type: iCladeLevelToReduce int
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
397 :param iMinimumCladeSize: Minimum count of the measured clade for the clade to be kept
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
398 :type: iMinimumCladeSize int
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
399 :param cFeatureDelimiter: One may set the feature delimiter if needed.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
400 :type: Character
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
401 :param strUnclassified: String indicating unclassifed features
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
402 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
403 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
404 self.fCladeSizeFilter = fCladeSizeFilter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
405 if iCladeLevelToMeasure > 0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
406 self.iCladeLevelToMeasure = iCladeLevelToMeasure
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
407 if iCladeLevelToReduce > 0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
408 self.iCladeLevelToReduce = iCladeLevelToReduce
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
409 if iMinimumCladeSize > 0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
410 self.iMinCladeSize = iMinimumCladeSize
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
411 if cFeatureDelimiter:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
412 self.cFeatureDelimiter = cFeatureDelimiter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
413 if strUnclassified:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
414 self.strUnclassified = strUnclassified
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
415
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
416 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
417 def setTicks(self, llsTicks):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
418 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
419 This methods allows tick information to be specified.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
420 Need to generate a list of lists each having a tick level (number starting at 0 as a string), and tick name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
421 #Lowest numbers are closest to the center of the tree
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
422 [[#,Name1],[#,Name2]...]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
423
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
424 :param llsTicks: Level # and Name of level
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
425 :type: llsTicks List of lists of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
426 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
427 self.llsTicks = llsTicks
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
428
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
429 #Happy Path tested with createCircleFile
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
430 def addCircle(self, lsTaxa, strCircle, dBorder=0.0, strShape="R", dAlpha=1.0, fForced=False):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
431 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
432 This methods allows one to add a circle to the outside of the cladogram.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
433
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
434 :param lsTaxa: Taxa to highlight with this circle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
435 :type: lsTaxa List of strings (taxa names)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
436 :param strCircle: Circle the elements will be in, indicates color and circle level.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
437 :type: strCircle String circle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
438 :param dBorder: Border size for the circle element border (between 0.0 and 1.0)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
439 can also be a list of dBorders. If list, position must match lsTaxa.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
440 :type: dBorder Float of border size (or list of floats).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
441 :param strShape: String Indicator of shape or method to determine shape.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
442 Can also be a list of shapes. If list, position must match lsTaxa.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
443 :type: strShape String to indicate the shape (may also be a list of strings).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
444 Default value is square.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
445 Valid shapes are R(Square), v(inward pointing triangle), ^(outward pointing triangle)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
446 :param dAlpha: The transparency of the circle element (between 0.0[clear] and 1.0[solid]).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
447 Can also be a list of floats. If list, position must match lsTaxa.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
448 :type: dAlpha Float to indicate the transparency of the shape (may also be a list of strings).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
449 :param fForced: Forces item in the features in the circle to be displayed in the cladogram no matter thier passing filters.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
450 :type: Boolean
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
451 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
452 if(self.ldictCircleData == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
453 self.ldictCircleData = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
454 dictCircleData = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
455 dictCircleData[self.c_sTaxa]=lsTaxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
456 dictCircleData[self.c_sCircle]=strCircle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
457 dictCircleData[self.c_sBorder]=dBorder
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
458 dictCircleData[self.c_sShape]=strShape
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
459 dictCircleData[self.c_sAlpha]=dAlpha
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
460 dictCircleData[self.c_sForced]=fForced
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
461
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
462 self.ldictCircleData.append(dictCircleData)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
463 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
464
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
465 #Happy Path tested with AddCircle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
466 def createCircleFile(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
467 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
468 Write circle data to file.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
469
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
470 :param lsIDs: Ids to include in the circle file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
471 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
472 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
473 #If there is circle data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
474 if(not self.ldictCircleData == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
475 if self.strCircleFilePath == None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
476 print("Error, there is no circle file specified to write to.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
477 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
478 #Holds circle data {Taxaname:string updates correctly for output to file}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
479 dictCircleDataMethods = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
480 lsCircleData = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
481
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
482 for dictCircleData in self.ldictCircleData:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
483 lsTaxa = dictCircleData[self.c_sTaxa]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
484 #Shape/s for taxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
485 datShape = dictCircleData[self.c_sShape]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
486 fShapeIsList = (str(type(datShape)) == "<type 'list'>")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
487 #Border/s for taxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
488 datBorder = dictCircleData[self.c_sBorder]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
489 fBorderIsList = (str(type(datBorder)) == "<type 'list'>")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
490 #Alpha/s for taxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
491 datAlpha = dictCircleData[self.c_sAlpha]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
492 fAlphaIsList = (str(type(datAlpha)) == "<type 'list'>")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
493 #Circle name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
494 sCircleMethod = dictCircleData[self.c_sCircle]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
495
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
496 #Check to make sure the lengths of the array match up
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
497 if(fShapeIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
498 if not len(datShape) == len(lsTaxa):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
499 print("".join(["Error, Shapes were given as an list not of the size of the taxa list. Shape list length: ",str(len(datShape)),". Taxa list length: ",str(len(lsTaxa)),"."]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
500 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
501 if(fBorderIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
502 if not len(datBorder) == len(lsTaxa):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
503 print("".join(["Error, Border sizes were given as an list not of the size of the taxa list. Border list length: ",str(len(datBorder)),". Taxa list length: ",str(len(lsTaxa)),"."]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
504 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
505 if(fAlphaIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
506 if not len(datAlpha) == len(lsTaxa):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
507 print("".join(["Error, Alpha sizes were given as an list not of the size of the taxa list. Alpha list length: ",str(len(datAlpha)),". Taxa list length: ",str(len(lsTaxa)),"."]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
508 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
509
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
510 #Update taxa to root if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
511 #When doing this if any of the other data is an array we have to edit them
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
512 #as the taxa are edited for updating root
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
513 if((not fShapeIsList) and (not fBorderIsList) and (not fAlphaIsList)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
514 lsTaxa = self.updateToRoot(dictCircleData[self.c_sTaxa])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
515 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
516 #Initilize as lists or as the string value they already are
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
517 lsUpdatedTaxa = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
518 datUpdatedShapes=list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
519 if(not fShapeIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
520 datUpdatedShapes = datShape
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
521 datUpdatedBorders=list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
522 if(not fBorderIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
523 datUpdatedBorders = datBorder
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
524 datUpdatedAlphas=list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
525 if(not fAlphaIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
526 datUpdatedAlphas = datAlpha
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
527
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
528 #If a taxa is kept, keep associated list information
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
529 #If not a list data, leave alone, it will be used globally for all taxa.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
530 iTaxaIndex = -1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
531 for sTaxa in lsTaxa:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
532 iTaxaIndex = iTaxaIndex + 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
533 sUpdatedTaxa=self.updateToRoot([sTaxa])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
534
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
535 if len(sUpdatedTaxa)==1:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
536 lsUpdatedTaxa.append(sUpdatedTaxa[0])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
537 if(fShapeIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
538 datUpdatedShapes.append(datShape[iTaxaIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
539 if(fBorderIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
540 datUpdatedBorders.append(datBorder[iTaxaIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
541 if(fAlphaIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
542 datUpdatedAlphas.append(datAlpha[iTaxaIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
543
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
544 #Reset data to rooted data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
545 lsTaxa=lsUpdatedTaxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
546 datShape=datUpdatedShapes
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
547 datBorder=datUpdatedBorders
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
548 datAlpha=datUpdatedAlphas
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
549
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
550 #QC passes so we will add the circle to the figure and the ticks.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
551 #If there are ticks and if the circle is not already in the ticks.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
552 if(not self.llsTicks == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
553 strCircleName = dictCircleData[self.c_sCircle]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
554 fFound = False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
555 iHighestNumber = -1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
556 for tick in self.llsTicks:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
557 #Look for name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
558 if tick[1] == strCircleName:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
559 fFound = True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
560 #Find highest count
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
561 if int(tick[0]) > iHighestNumber:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
562 iHighestNumber = int(tick[0])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
563 if not fFound:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
564 self.llsTicks.append([str(iHighestNumber+1),strCircleName])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
565
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
566 #If the circle is forced, add the taxa to the lsIDs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
567 #Otherwise we will only plot those that are matching
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
568 #the lsIDs and the circle taxa list.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
569 if dictCircleData[self.c_sForced]:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
570 for iAlpha in xrange(0,len(datAlpha)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
571 if(not datAlpha[iAlpha] == "0.0"):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
572 lsIDs.append(lsTaxa[iAlpha])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
573 lsIDs = list(set(lsIDs))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
574
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
575 #For all taxa in the cladogram
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
576 for sTaxa in lsTaxa:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
577 #Store circle content name in dictionary
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
578 if not sTaxa in dictCircleDataMethods:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
579 #Reset name to . delimited
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
580 asNameElements = filter(None,re.split("\|",sTaxa))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
581
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
582 sCurTaxaName = asNameElements[len(asNameElements)-1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
583 if(len(asNameElements)>1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
584 if(sCurTaxaName=="unclassified"):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
585 sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
586 sCurTaxa = ".".join(asNameElements)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
587 #Add to dictionary
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
588 dictCircleDataMethods[sTaxa] = sCurTaxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
589
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
590 #If the taxa is in the selected method
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
591 if sTaxa in lsTaxa:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
592 #Index of the id in the circle data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
593 iTaxaIndex = lsTaxa.index(sTaxa)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
594 #Get border
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
595 sBorder = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
596 if(fBorderIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
597 sBorder = str(datBorder[iTaxaIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
598 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
599 sBorder = str(datBorder)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
600 #Get shape
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
601 sShape = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
602 if(fShapeIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
603 sShape = datShape[iTaxaIndex]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
604 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
605 sShape = datShape
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
606 #Get alpha
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
607 sAlpha = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
608 if(fAlphaIsList):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
609 sAlpha = str(datAlpha[iTaxaIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
610 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
611 sAlpha = str(datAlpha)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
612 dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":",sAlpha,"!",sShape,"#",sBorder])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
613 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
614 dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":0.0!R#0.0"])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
615
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
616 if len(dictCircleDataMethods)>0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
617 lsTaxaKeys = dictCircleDataMethods.keys()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
618 sCircleContent = dictCircleDataMethods[lsTaxaKeys[0]]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
619 for sTaxaKey in lsTaxaKeys[1:len(lsTaxaKeys)]:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
620 sCircleContent = ConstantsBreadCrumbs.c_strEndline.join([sCircleContent,dictCircleDataMethods[sTaxaKey]])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
621 self.writeToFile(self.strCircleFilePath, sCircleContent, False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
622 self.fCircleFileMade=True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
623
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
624 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
625 self.fCircleFileMade=False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
626 return False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
627
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
628 #Happy Path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
629 def createHighlightFile(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
630 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
631 Write highlight data to file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
632
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
633 :param lsIDs: Ids to include in the highlight file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
634 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
635 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
636 lsHighLightData = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
637 #Each taxa name
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
638 for sID in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
639 sCurColor = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
640 #Rename taxa to be consisten with the . delimit format
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
641 asNameElements = filter(None,re.split("\|",sID))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
642 sCurTaxaName = asNameElements[len(asNameElements)-1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
643 if(len(asNameElements)>1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
644 if(sCurTaxaName=="unclassified"):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
645 sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
646 sCurTaxa = ".".join(asNameElements)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
647
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
648 sCurLabel = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
649 #Get color
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
650 sColorKey = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
651 if(sID in self.dictForcedHighLights):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
652 sColorKey = self.dictForcedHighLights[sID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
653 if(sColorKey in self.dictColors):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
654 sCurColor = self.formatRGB(self.dictColors[sColorKey])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
655 #Get label
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
656 if(self.dictRelabels is not None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
657 if(sID in self.dictRelabels):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
658 sCurLabel = self.dictRelabels[sID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
659 if(sCurLabel == ""):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
660 lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurTaxaName,sCurLabel,sCurColor]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
661 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
662 lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurLabel,sCurLabel,sCurColor]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
663
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
664 if len(lsHighLightData)>0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
665 self.writeToFile(self.strHighLightFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsHighLightData), False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
666 self.fHighlightFileMade=True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
667 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
668
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
669 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
670 def createSizeFile(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
671 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
672 Write size data to file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
673
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
674 :param lsIDs: Ids to include in the size file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
675 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
676 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
677 if self.npaAbundance is not None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
678 dMinimumValue = (self.c_dMinLogSize*self.c_dLogScale)+1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
679 lsWriteData = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
680 for rowData in self.npaAbundance:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
681 strCurrentId = rowData[0]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
682 #Reset to root if needed to match current data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
683 if(not self.strRoot == None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
684 strCurrentId = self.updateToRoot([strCurrentId])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
685 if(len(strCurrentId) > 0):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
686 strCurrentId = strCurrentId[0]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
687 if(strCurrentId in lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
688 dAverage = np.average(list(rowData)[1:])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
689 dSize = max([dMinimumValue,(dAverage*self.c_dLogScale)+1])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
690 lsWriteData.append(".".join(re.split("\|",strCurrentId))+ConstantsBreadCrumbs.c_cTab+str(math.log10(dSize)*self.c_dCircleScale))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
691 if len(lsWriteData)>0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
692 self.writeToFile(self.strSizeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsWriteData), False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
693 self.fSizeFileMade=True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
694 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
695
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
696 #Happy path tested 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
697 def createTreeFile(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
698 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
699 Write tree data to file. The tree file defines the internal cladogram and all it's points.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
700
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
701 :param lsIDs: Ids to include in the tree file as well as their ancestors
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
702 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
703 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
704 lsFullTree = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
705 for sID in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
706 lsIDElements = filter(None,re.split("\|",sID))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
707 sElementCur = lsIDElements[0]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
708 if(not sElementCur in lsFullTree):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
709 lsFullTree.append(sElementCur)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
710 if(len(lsIDElements) > 1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
711 sNodePath = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
712 for iEndLevel in xrange(1,len(lsIDElements)+1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
713 sCurAncestry = lsIDElements[0:iEndLevel]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
714 sNodePath = ".".join(sCurAncestry)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
715 if(not sNodePath in lsFullTree):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
716 lsFullTree.append(sNodePath)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
717
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
718 if len(lsFullTree)>0:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
719 self.writeToFile(self.strTreeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsFullTree), False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
720 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
721
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
722 #Happy Path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
723 def filterByAbundance(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
724 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
725 Filter by abundance. Specifically this version requires elements of
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
726 the tree to have a certain percentage of a certain percentile in samples.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
727
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
728 :param lsIDs: Ids to filter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
729 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
730 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
731 #list of ids to return that survived the filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
732 retls = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
733 if not self.npaAbundance is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
734 #Hold the cuttoff score (threshold) for the percentile of interest {SampleName(string):score(double)}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
735 dictPercentiles = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
736 for index in xrange(1,len(self.npaAbundance.dtype.names)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
737 dScore = scipy.stats.scoreatpercentile(self.npaAbundance[self.npaAbundance.dtype.names[index]],self.c_dPercentileCutOff)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
738 dictPercentiles[self.npaAbundance.dtype.names[index]] = dScore
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
739
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
740 #Sample count (Ignore sample id [position 0] which is not a name)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
741 dSampleCount = float(len(self.npaAbundance.dtype.names[1:]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
742
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
743 #Check each taxa
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
744 for rowTaxaData in self.npaAbundance:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
745 sCurTaxaName = rowTaxaData[0]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
746 #Only look at the IDs given
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
747 if(sCurTaxaName in lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
748 dCountAbovePercentile = 0.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
749 ldAbundanceMeasures = list(rowTaxaData)[1:]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
750 #Check to see if the abundance score meets the threshold and count if it does
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
751 for iScoreIndex in xrange(0,len(ldAbundanceMeasures)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
752 if(ldAbundanceMeasures[iScoreIndex] >= dictPercentiles[self.lsSampleNames[iScoreIndex]]):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
753 dCountAbovePercentile = dCountAbovePercentile + 1.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
754 dPercentOverPercentile = dCountAbovePercentile / dSampleCount
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
755 if(dPercentOverPercentile >= (self.c_dPercentageAbovePercentile/100.0)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
756 retls.append(sCurTaxaName)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
757 return retls
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
758
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
759 #Happy Path Tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
760 def filterByCladeSize(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
761 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
762 Filter by the count of individuals in the clade.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
763
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
764 :param lsIDs: Ids to filter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
765 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
766 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
767 #First get terminal nodes
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
768 lsTerminalNodes = AbundanceTable.funcGetTerminalNodesFromList(lsIDs,self.cFeatureDelimiter)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
769
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
770 #Count up clades
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
771 cladeCounts = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
772
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
773 #For each terminal node count the
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
774 #Clades at clade levels
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
775 for sTerminalNode in lsTerminalNodes:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
776 lsLineage = sTerminalNode.split(self.cFeatureDelimiter)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
777 iLineageCount = len(lsLineage)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
778 #If the lineage is shorter than the reduced clade level then no need to filter it
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
779 if iLineageCount >= self.iCladeLevelToReduce:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
780 #If the lineage is longer than the reduced clade level and measuring clade level then count
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
781 #or If the lineage is longer than the reduced clade level but shorter than the measuring clade,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
782 #only count if the last element is unclassified
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
783 if (iLineageCount >= self.iCladeLevelToMeasure) or (lsLineage[-1] == self.strUnclassified):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
784 sLineage = self.cFeatureDelimiter.join(lsLineage[0:self.iCladeLevelToReduce])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
785 cladeCounts[sLineage] = cladeCounts.get(sLineage,0) + 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
786
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
787 #Go through the IDs and reduce as needed using the clade counts
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
788 retls = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
789 for sID in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
790 lsID = sID.split(self.cFeatureDelimiter)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
791 iIDCount = len(lsID)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
792
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
793 #Too short to filter
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
794 if iLineageCount < self.iCladeLevelToReduce:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
795 retls.append(sID)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
796 #Check to see if the clade which is being reduced made the cut
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
797 if iIDCount >= self.iCladeLevelToReduce:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
798 if (iIDCount >= self.iCladeLevelToMeasure) or (lsID[-1] == self.strUnclassified):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
799 if cladeCounts[self.cFeatureDelimiter.join(lsID[0:self.iCladeLevelToReduce])] >= self.iMinCladeSize:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
800 retls.append(sID)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
801
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
802 return retls
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
803
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
804 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
805 def formatRGB(self, sColor):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
806 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
807 Takes a string that is of the format 0-255,0-255,0-255 and converts it to the
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
808 color format of circlader _c_[0-1,0-1,0-1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
809
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
810 :param sColor: String RGB format
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
811 :type: sColor String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
812 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
813 sCircladerColor = "_c_[1,1,1]"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
814 if(sColor is not None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
815 sColorElements = filter(None,re.split(",",sColor))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
816 if(len(sColorElements)==3):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
817 iR = int(sColorElements[0])/255.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
818 iG = int(sColorElements[1])/255.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
819 iB = int(sColorElements[2])/255.0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
820 sCircladerColor = "".join(["_c_[",str(iR),",",str(iG),",",str(iB),"]"])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
821 return sCircladerColor
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
822
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
823 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
824 def generateLabels(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
825 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
826 Labels for visualization.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
827 Changes unclassified to one_level_higher.unclassified and enables numeric labeling / relabeling.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
828 Will only rename, will not add the label. The key must exist for the value to be used in replacing.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
829
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
830 :param lsIDs: Ids to include in the labels file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
831 :type: lsIDs List of strings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
832 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
833 dictRet = dict()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
834 for sID in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
835 lsIDElements = filter(None,re.split("\|",sID))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
836 iIDElementsCount = len(lsIDElements)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
837 sLabel = lsIDElements[iIDElementsCount-1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
838 #Fix unclassified
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
839 if((sLabel == "unclassified") and (iIDElementsCount > 1)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
840 sLabel = ".".join([lsIDElements[iIDElementsCount-2],sLabel])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
841 #Change to relabels if given
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
842 if(self.dictRelabels is not None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
843 if(sLabel in self.dictRelabels):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
844 sLabel = self.dictRelabels[sLabel]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
845 #Store lable
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
846 dictRet[sID] = sLabel
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
847 return dictRet
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
848
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
849 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
850 def manageFilePaths(self, sTaxaFileName, strStyleFile, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
851 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
852 This method sets the naming to the files generated that Circlader acts on.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
853 These files include the tree, color, highlight, tick, circle, and size files.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
854 Checks to make sure the file path to the syle file provided is an existing file.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
855 Deletes any existing files with these generated names (except for the style files).
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
856
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
857 :param sStyleFile: File path indicating the style file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
858 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
859 :param strTaxaFile: File path indicating the taxa file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
860 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
861 :param sColorFile: File path indicating the color file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
862 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
863 :param sTickFile: File path indicating the tick file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
864 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
865 :param sHighlightFile: File path indicating the highlight file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
866 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
867 :param sSizeFile: File path indicating the size file to use
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
868 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
869 :param sCircleFileName: File path for circle files
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
870 :type: String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
871 :return boolean: True indicates success, false indicates error
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
872 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
873 #Do not remove the style file, it is static
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
874 if strStyleFile is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
875 print("Error, style file is None")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
876 return(False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
877 if not os.path.exists(strStyleFile):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
878 print("Error, no style file found.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
879 return(False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
880 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
881 self.strStyleFilePath = strStyleFile
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
882
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
883 #Set output files and remove if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
884 self.strTreeFilePath = sTaxaFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
885 self.strColorFilePath = sColorFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
886 self.strTickFilePath = sTickFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
887 self.strHighLightFilePath = sHighlightFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
888 self.strSizeFilePath = sSizeFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
889 self.strCircleFilePath = sCircleFileName
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
890 for sFile in [self.strTreeFilePath,self.strColorFilePath,self.strTickFilePath,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
891 self.strHighLightFilePath,self.strSizeFilePath,self.strCircleFilePath]:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
892 if not sFile is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
893 if(os.path.exists(sFile)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
894 os.remove(sFile)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
895 return True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
896
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
897 #Not tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
898 def relabelIDs(self, dictLabels):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
899 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
900 Allows the relabeling of ids. Can be used to make numeric labeling of ids or renaming
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
901
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
902 :param dictLabels: Should label (key) (after unclassified is modified) and new label (value)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
903 :type: dictLabels Dictionary of string (key:label to replace) string (value:new label to use in replacing)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
904 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
905 self.dictRelabels = dictLabels
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
906
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
907 #Happy path tested
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
908 def updateToRoot(self, lsIDs):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
909 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
910 Updates the clade to the root given. The clade must contain the root and the level of the
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
911 root in the clade will be rest to it's first level, ignoring the previous levels of the clade.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
912
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
913 :param lsIDs: List of Clades that will be reset to the root specified by setRoot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
914 :type: lsIDs List of strings. Each string representing a clade.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
915 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
916
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
917 if(self.strRoot is None):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
918 return lsIDs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
919 #Force root tree if indicated to do so
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
920 lsRootedIDs = list()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
921 for sID in lsIDs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
922 sIDElements = filter(None,re.split("\|",sID))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
923 if(self.strRoot in sIDElements):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
924 iRootIndex = sIDElements.index(self.strRoot)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
925 #If multiple levels of the clade exist after the new root merge them.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
926 if(len(sIDElements)>iRootIndex+2):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
927 lsRootedIDs.append("|".join(sIDElements[iRootIndex+1:]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
928 #If only one level of the clade exists after the new root, return it.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
929 elif(len(sIDElements)>iRootIndex+1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
930 lsRootedIDs.append(sIDElements[iRootIndex+1])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
931 return(lsRootedIDs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
932
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
933 #Testing: Used extensively in other tests
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
934 def writeToFile(self, strFileName, strDataToWrite, fAppend):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
935 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
936 Helper function that writes a string to a file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
937
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
938 :param strFileName: File to write to
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
939 :type: strFileName File path (string)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
940 :param strDataToWrite: Data to write to file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
941 :type: strDataToWrite String
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
942 :param fAppend: Indicates if an append should occur (True == Append)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
943 :type: fAppend boolean
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
944 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
945
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
946 cMode = 'w'
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
947 if fAppend:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
948 cMode = 'a'
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
949 with open(strFileName,cMode) as f:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
950 f.write(strDataToWrite)