Mercurial > repos > george-weingart > maaslin
comparison src/MaaslinToGraphlanAnnotation.py @ 0:e0b5980139d9
maaslin
| author | george-weingart |
|---|---|
| date | Tue, 13 May 2014 22:00:40 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e0b5980139d9 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 ##################################################################################### | |
| 3 #Copyright (C) <2012> | |
| 4 # | |
| 5 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
| 6 #this software and associated documentation files (the "Software"), to deal in the | |
| 7 #Software without restriction, including without limitation the rights to use, copy, | |
| 8 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
| 9 #and to permit persons to whom the Software is furnished to do so, subject to | |
| 10 #the following conditions: | |
| 11 # | |
| 12 #The above copyright notice and this permission notice shall be included in all copies | |
| 13 #or substantial portions of the Software. | |
| 14 # | |
| 15 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
| 16 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
| 17 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
| 18 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
| 19 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
| 20 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| 21 # | |
| 22 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), | |
| 23 # authored by the Huttenhower lab at the Harvard School of Public Health | |
| 24 # (contact Timothy Tickle, ttickle@hsph.harvard.edu). | |
| 25 ##################################################################################### | |
| 26 | |
| 27 __author__ = "Timothy Tickle" | |
| 28 __copyright__ = "Copyright 2012" | |
| 29 __credits__ = ["Timothy Tickle"] | |
| 30 __license__ = "" | |
| 31 __version__ = "" | |
| 32 __maintainer__ = "Timothy Tickle" | |
| 33 __email__ = "ttickle@sph.harvard.edu" | |
| 34 __status__ = "Development" | |
| 35 | |
| 36 import argparse | |
| 37 import csv | |
| 38 import math | |
| 39 from operator import itemgetter | |
| 40 import re | |
| 41 import string | |
| 42 import sys | |
| 43 | |
| 44 #def funcGetColor(fNumeric,fMax): | |
| 45 # if fNumeric>0: | |
| 46 # return("#"+str(int(99*fNumeric/fMax)).zfill(2)+"0000") | |
| 47 # if fNumeric<0: | |
| 48 # return("#00"+str(int(99*abs(fNumeric/fMax))).zfill(2)+"00") | |
| 49 # return("#000000") | |
| 50 | |
| 51 def funcGetColor(fNumeric): | |
| 52 if fNumeric>0: | |
| 53 return sRingPositiveColor | |
| 54 else: | |
| 55 return sRingNegativeColor | |
| 56 | |
| 57 def funcGetAlpha(fNumeric,fMax): | |
| 58 return max(abs(fNumeric/fMax),dMinAlpha) | |
| 59 | |
| 60 #Constants | |
| 61 sAnnotation = "annotation" | |
| 62 sAnnotationColor = "annotation_background_color" | |
| 63 sClass = "class" | |
| 64 sRingAlpha = "ring_alpha" | |
| 65 dMinAlpha = .075 | |
| 66 sRingColor = "ring_color" | |
| 67 sRingHeight = "ring_height" | |
| 68 #sRingHeightMin = 0.5 | |
| 69 sStandardizedRingHeight = "1.01" | |
| 70 sRingLabel = "ring_label" | |
| 71 sRingLabelSizeWord = "ring_label_font_size" | |
| 72 sRingLabelSize = 10 | |
| 73 sRingLineColor = "#999999" | |
| 74 sRingPositiveWord = "Positive" | |
| 75 sRingPositiveColor = "#990000" | |
| 76 sRingNegativeWord = "Negative" | |
| 77 sRingNegativeColor = "#009900" | |
| 78 sRingLineColorWord = "ring_separator_color" | |
| 79 sRingLineThickness = "0.5" | |
| 80 sRingLineThicknessWord = "ring_internal_separator_thickness" | |
| 81 sCladeMarkerColor = "clade_marker_color" | |
| 82 sCladeMarkerSize = "clade_marker_size" | |
| 83 sHighlightedMarkerSize = "10" | |
| 84 c_dMinDoubleValue = 0.00000000001 | |
| 85 | |
| 86 #Set up arguments reader | |
| 87 argp = argparse.ArgumentParser( prog = "MaaslinToGraphlanAnnotation.py", | |
| 88 description = """Converts summary files to graphlan annotation files.""" ) | |
| 89 | |
| 90 #### Read in information | |
| 91 #Arguments | |
| 92 argp.add_argument("strInputSummary", metavar = "SummaryFile", type = argparse.FileType("r"), help ="Input summary file produced by maaslin") | |
| 93 argp.add_argument("strInputCore", metavar = "CoreFile", type = argparse.FileType("r"), help ="Core file produced by Graphlan from the maaslin pcl") | |
| 94 argp.add_argument("strInputHeader", metavar = "HeaderFile", type = argparse.FileType("r"), help ="Input header file to append to the generated annotation file.") | |
| 95 argp.add_argument("strOutputAnnotation", metavar = "AnnotationFile", type = argparse.FileType("w"), help ="Output annotation file for graphlan") | |
| 96 | |
| 97 args = argp.parse_args( ) | |
| 98 | |
| 99 #Read in the summary file and transform to class based descriptions | |
| 100 csvSum = open(args.strInputSummary,'r') if isinstance(args.strInputSummary, str) else args.strInputSummary | |
| 101 fSum = csv.reader(csvSum, delimiter="\t") | |
| 102 #Skip header (until i do this a better way) | |
| 103 fSum.next() | |
| 104 | |
| 105 #Extract associations (Metadata,taxon,coef,qvalue) | |
| 106 lsAssociations = [[sLine[1],sLine[2],sLine[4],sLine[7]] for sLine in fSum] | |
| 107 csvSum.close() | |
| 108 | |
| 109 #### Read in default graphlan settings provided by maaslin | |
| 110 #Read in the annotation header file | |
| 111 csvHdr = open(args.strInputHeader,'r') if isinstance(args.strInputHeader, str) else args.strInputHeader | |
| 112 fHdr = csv.reader(csvHdr, delimiter="\t") | |
| 113 | |
| 114 #Begin writting the output | |
| 115 #Output annotation file | |
| 116 csvAnn = open(args.strOutputAnnotation,'w') if isinstance(args.strOutputAnnotation, str) else args.strOutputAnnotation | |
| 117 fAnn = csv.writer(csvAnn, delimiter="\t") | |
| 118 fAnn.writerows(fHdr) | |
| 119 csvHdr.close() | |
| 120 | |
| 121 #If no associatiosn were found | |
| 122 if(len(lsAssociations)==0): | |
| 123 csvAnn.close() | |
| 124 | |
| 125 else: | |
| 126 #### Fix name formats | |
| 127 #Manipulate names to graphlan complient names (clades seperated by .) | |
| 128 lsAssociations = sorted(lsAssociations, key=itemgetter(1)) | |
| 129 lsAssociations = [[sBug[0]]+[re.sub("^[A-Za-z]__","",sBug[1])]+sBug[2:] for sBug in lsAssociations] | |
| 130 lsAssociations = [[sBug[0]]+[re.sub("\|*[A-Za-z]__|\|",".",sBug[1])]+sBug[2:] for sBug in lsAssociations] | |
| 131 | |
| 132 #If this is an OTU, append the number and the genus level together for a more descriptive termal name | |
| 133 lsAssociationsModForOTU = [] | |
| 134 for sBug in lsAssociations: | |
| 135 lsBug = sBug[1].split(".") | |
| 136 if(len(lsBug))> 1: | |
| 137 if(lsBug[-1].isdigit()): | |
| 138 lsBug[-2]=lsBug[-2]+"_"+lsBug[-1] | |
| 139 lsBug = lsBug[0:-1] | |
| 140 lsAssociationsModForOTU.append([sBug[0]]+[".".join(lsBug)]+sBug[2:]) | |
| 141 else: | |
| 142 lsAssociationsModForOTU.append([sBug[0]]+[lsBug[0]]+sBug[2:]) | |
| 143 | |
| 144 #Extract just class info | |
| 145 #lsClassData = [[sLine[2],sClass,sLine[1]] for sLine in fSum] | |
| 146 | |
| 147 ### Make rings | |
| 148 #Setup rings | |
| 149 dictRings = dict([[enumData[1],enumData[0]] for enumData in enumerate(set([lsData[0] for lsData in lsAssociationsModForOTU]))]) | |
| 150 | |
| 151 #Ring graphlan setting: rings represent a metadata that associates with a feature | |
| 152 #Rings have a line to help differetiate them | |
| 153 lsRingSettings = [[sRingLabel,lsPair[1],lsPair[0]] for lsPair in dictRings.items()] | |
| 154 lsRingLineColors = [[sRingLineColorWord,lsPair[1],sRingLineColor] for lsPair in dictRings.items()] | |
| 155 lsRingLineThick = [[sRingLineThicknessWord,lsPair[1],sRingLineThickness] for lsPair in dictRings.items()] | |
| 156 lsRingLineLabelSize = [[sRingLabelSizeWord,lsPair[1], sRingLabelSize] for lsPair in dictRings.items()] | |
| 157 | |
| 158 #Create coloring for rings color represents the directionality of the relationship | |
| 159 dMaxCoef = max([abs(float(sAssociation[2])) for sAssociation in lsAssociationsModForOTU]) | |
| 160 lsRingColors = [[lsAssociation[1], sRingColor, dictRings[lsAssociation[0]], funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU] | |
| 161 lsRingAlpha = [[lsAssociation[1], sRingAlpha, dictRings[lsAssociation[0]], funcGetAlpha(float(lsAssociation[2]), dMaxCoef)] for lsAssociation in lsAssociationsModForOTU] | |
| 162 | |
| 163 #Create height for rings representing the log tranformed q-value? | |
| 164 dMaxQValue = max([-1*math.log(max(float(sAssociation[3]), c_dMinDoubleValue)) for sAssociation in lsAssociationsModForOTU]) | |
| 165 #lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], ((-1*math.log(max(float(lsAssociation[3]), c_dMinDoubleValue)))/dMaxQValue)+sRingHeightMin] for lsAssociation in lsAssociationsModForOTU] | |
| 166 lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], sStandardizedRingHeight] for lsAssociation in lsAssociationsModForOTU] | |
| 167 | |
| 168 #### Marker | |
| 169 # Marker colors (mainly to make legend | |
| 170 lsMarkerColors = [[lsAssociation[1], sCladeMarkerColor, funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU] | |
| 171 lsMarkerSizes = [[lsAssociation[1], sCladeMarkerSize, sHighlightedMarkerSize] for lsAssociation in lsAssociationsModForOTU] | |
| 172 | |
| 173 #### Make internal highlights | |
| 174 #Highlight the associated clades | |
| 175 lsUniqueAssociatedTaxa = sorted(list(set([lsAssociation[1] for lsAssociation in lsAssociationsModForOTU]))) | |
| 176 | |
| 177 lsHighlights = [] | |
| 178 sABCPrefix = "" | |
| 179 sListABC = string.ascii_lowercase | |
| 180 iListABCIndex = 0 | |
| 181 for lsHighlight in lsUniqueAssociatedTaxa: | |
| 182 lsTaxa = lsHighlight.split(".") | |
| 183 sLabel = sABCPrefix+sListABC[iListABCIndex]+":"+lsTaxa[-1] if len(lsTaxa) > 2 else lsTaxa[-1] | |
| 184 lsHighlights.append([lsHighlight, sAnnotation, sLabel]) | |
| 185 iListABCIndex = iListABCIndex + 1 | |
| 186 if iListABCIndex > 25: | |
| 187 iListABCIndex = 0 | |
| 188 sABCPrefix = sABCPrefix + sListABC[len(sABCPrefix)] | |
| 189 | |
| 190 #Read in the core file | |
| 191 csvCore = open(args.strInputCore,'r') if isinstance(args.strInputCore, str) else args.strInputCore | |
| 192 fSum = csv.reader(csvCore, delimiter="\t") | |
| 193 | |
| 194 #Add in all phylum just incase they were not already included here | |
| 195 lsAddSecondLevel = list(set([sUnique[0].split(".")[1] for sUnique in fSum if len(sUnique[0].split(".")) > 1])) | |
| 196 lsHighlights.extend([[sSecondLevel, sAnnotation, sSecondLevel] for sSecondLevel in lsAddSecondLevel]) | |
| 197 lsHighlightColor = [[lsHighlight[0], sAnnotationColor,"b"] for lsHighlight in lsHighlights] | |
| 198 | |
| 199 #### Write the remaining output annotation file | |
| 200 fAnn.writerows(lsRingSettings) | |
| 201 fAnn.writerows(lsRingLineColors) | |
| 202 fAnn.writerows(lsRingColors) | |
| 203 fAnn.writerows(lsRingAlpha) | |
| 204 fAnn.writerows(lsRingLineThick) | |
| 205 fAnn.writerows(lsRingLineLabelSize) | |
| 206 fAnn.writerows(lsRingHeights) | |
| 207 fAnn.writerows(lsMarkerColors) | |
| 208 fAnn.writerows(lsMarkerSizes) | |
| 209 fAnn.writerows([[sRingPositiveWord, sCladeMarkerColor, sRingPositiveColor]]) | |
| 210 fAnn.writerows([[sRingNegativeWord, sCladeMarkerColor, sRingNegativeColor]]) | |
| 211 fAnn.writerows(lsHighlights) | |
| 212 fAnn.writerows(lsHighlightColor) | |
| 213 csvAnn.close() |
