| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 #####################################################################################
 | 
| 
 | 
     3 #Copyright (C) <2012>
 | 
| 
 | 
     4 #
 | 
| 
 | 
     5 #Permission is hereby granted, free of charge, to any person obtaining a copy of
 | 
| 
 | 
     6 #this software and associated documentation files (the "Software"), to deal in the
 | 
| 
 | 
     7 #Software without restriction, including without limitation the rights to use, copy,
 | 
| 
 | 
     8 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 | 
| 
 | 
     9 #and to permit persons to whom the Software is furnished to do so, subject to
 | 
| 
 | 
    10 #the following conditions:
 | 
| 
 | 
    11 #
 | 
| 
 | 
    12 #The above copyright notice and this permission notice shall be included in all copies
 | 
| 
 | 
    13 #or substantial portions of the Software.
 | 
| 
 | 
    14 #
 | 
| 
 | 
    15 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 | 
| 
 | 
    16 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 | 
| 
 | 
    17 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 | 
| 
 | 
    18 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 | 
| 
 | 
    19 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 | 
| 
 | 
    20 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | 
| 
 | 
    21 #
 | 
| 
 | 
    22 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
 | 
| 
 | 
    23 # authored by the Huttenhower lab at the Harvard School of Public Health
 | 
| 
 | 
    24 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
 | 
| 
 | 
    25 #####################################################################################
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 __author__ = "Timothy Tickle"
 | 
| 
 | 
    28 __copyright__ = "Copyright 2012"
 | 
| 
 | 
    29 __credits__ = ["Timothy Tickle"]
 | 
| 
 | 
    30 __license__ = ""
 | 
| 
 | 
    31 __version__ = ""
 | 
| 
 | 
    32 __maintainer__ = "Timothy Tickle"
 | 
| 
 | 
    33 __email__ = "ttickle@sph.harvard.edu"
 | 
| 
 | 
    34 __status__ = "Development"
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 import argparse
 | 
| 
 | 
    37 import csv
 | 
| 
 | 
    38 import math
 | 
| 
 | 
    39 from operator import itemgetter
 | 
| 
 | 
    40 import re
 | 
| 
 | 
    41 import string
 | 
| 
 | 
    42 import sys
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 #def funcGetColor(fNumeric,fMax):
 | 
| 
 | 
    45 #  if fNumeric>0:
 | 
| 
 | 
    46 #    return("#"+str(int(99*fNumeric/fMax)).zfill(2)+"0000")
 | 
| 
 | 
    47 #  if fNumeric<0:
 | 
| 
 | 
    48 #    return("#00"+str(int(99*abs(fNumeric/fMax))).zfill(2)+"00")
 | 
| 
 | 
    49 #  return("#000000")
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 def funcGetColor(fNumeric):
 | 
| 
 | 
    52   if fNumeric>0:
 | 
| 
 | 
    53     return sRingPositiveColor
 | 
| 
 | 
    54   else:
 | 
| 
 | 
    55     return sRingNegativeColor
 | 
| 
 | 
    56 
 | 
| 
 | 
    57 def funcGetAlpha(fNumeric,fMax):
 | 
| 
 | 
    58   return max(abs(fNumeric/fMax),dMinAlpha)
 | 
| 
 | 
    59 
 | 
| 
 | 
    60 #Constants
 | 
| 
 | 
    61 sAnnotation = "annotation"
 | 
| 
 | 
    62 sAnnotationColor = "annotation_background_color"
 | 
| 
 | 
    63 sClass = "class"
 | 
| 
 | 
    64 sRingAlpha = "ring_alpha"
 | 
| 
 | 
    65 dMinAlpha = .075
 | 
| 
 | 
    66 sRingColor = "ring_color"
 | 
| 
 | 
    67 sRingHeight = "ring_height"
 | 
| 
 | 
    68 #sRingHeightMin = 0.5
 | 
| 
 | 
    69 sStandardizedRingHeight = "1.01"
 | 
| 
 | 
    70 sRingLabel = "ring_label"
 | 
| 
 | 
    71 sRingLabelSizeWord = "ring_label_font_size"
 | 
| 
 | 
    72 sRingLabelSize = 10
 | 
| 
 | 
    73 sRingLineColor = "#999999"
 | 
| 
 | 
    74 sRingPositiveWord = "Positive"
 | 
| 
 | 
    75 sRingPositiveColor = "#990000"
 | 
| 
 | 
    76 sRingNegativeWord = "Negative"
 | 
| 
 | 
    77 sRingNegativeColor = "#009900"
 | 
| 
 | 
    78 sRingLineColorWord = "ring_separator_color"
 | 
| 
 | 
    79 sRingLineThickness = "0.5"
 | 
| 
 | 
    80 sRingLineThicknessWord = "ring_internal_separator_thickness"
 | 
| 
 | 
    81 sCladeMarkerColor = "clade_marker_color"
 | 
| 
 | 
    82 sCladeMarkerSize = "clade_marker_size"
 | 
| 
 | 
    83 sHighlightedMarkerSize = "10"
 | 
| 
 | 
    84 c_dMinDoubleValue = 0.00000000001
 | 
| 
 | 
    85 
 | 
| 
 | 
    86 #Set up arguments reader
 | 
| 
 | 
    87 argp = argparse.ArgumentParser( prog = "MaaslinToGraphlanAnnotation.py",
 | 
| 
 | 
    88     description = """Converts summary files to graphlan annotation files.""" )
 | 
| 
 | 
    89 
 | 
| 
 | 
    90 #### Read in information
 | 
| 
 | 
    91 #Arguments
 | 
| 
 | 
    92 argp.add_argument("strInputSummary", metavar = "SummaryFile", type = argparse.FileType("r"), help ="Input summary file produced by maaslin")
 | 
| 
 | 
    93 argp.add_argument("strInputCore", metavar = "CoreFile", type = argparse.FileType("r"), help ="Core file produced by Graphlan from the maaslin pcl")
 | 
| 
 | 
    94 argp.add_argument("strInputHeader", metavar = "HeaderFile", type = argparse.FileType("r"), help ="Input header file to append to the generated annotation file.")
 | 
| 
 | 
    95 argp.add_argument("strOutputAnnotation", metavar = "AnnotationFile", type = argparse.FileType("w"), help ="Output annotation file for graphlan")
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 args = argp.parse_args( )
 | 
| 
 | 
    98 
 | 
| 
 | 
    99 #Read in the summary file and transform to class based descriptions
 | 
| 
 | 
   100 csvSum = open(args.strInputSummary,'r') if isinstance(args.strInputSummary, str) else args.strInputSummary
 | 
| 
 | 
   101 fSum = csv.reader(csvSum, delimiter="\t")
 | 
| 
 | 
   102 #Skip header (until i do this a better way)
 | 
| 
 | 
   103 fSum.next()
 | 
| 
 | 
   104 
 | 
| 
 | 
   105 #Extract associations (Metadata,taxon,coef,qvalue)
 | 
| 
 | 
   106 lsAssociations = [[sLine[1],sLine[2],sLine[4],sLine[7]] for sLine in fSum]
 | 
| 
 | 
   107 csvSum.close()
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 #### Read in default graphlan settings provided by maaslin
 | 
| 
 | 
   110 #Read in the annotation header file
 | 
| 
 | 
   111 csvHdr = open(args.strInputHeader,'r') if isinstance(args.strInputHeader, str) else args.strInputHeader
 | 
| 
 | 
   112 fHdr = csv.reader(csvHdr, delimiter="\t")
 | 
| 
 | 
   113 
 | 
| 
 | 
   114 #Begin writting the output
 | 
| 
 | 
   115 #Output annotation file
 | 
| 
 | 
   116 csvAnn = open(args.strOutputAnnotation,'w') if isinstance(args.strOutputAnnotation, str) else args.strOutputAnnotation
 | 
| 
 | 
   117 fAnn = csv.writer(csvAnn, delimiter="\t")
 | 
| 
 | 
   118 fAnn.writerows(fHdr)
 | 
| 
 | 
   119 csvHdr.close()
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 #If no associatiosn were found
 | 
| 
 | 
   122 if(len(lsAssociations)==0):
 | 
| 
 | 
   123   csvAnn.close()
 | 
| 
 | 
   124 
 | 
| 
 | 
   125 else:
 | 
| 
 | 
   126   #### Fix name formats
 | 
| 
 | 
   127   #Manipulate names to graphlan complient names (clades seperated by .)
 | 
| 
 | 
   128   lsAssociations = sorted(lsAssociations, key=itemgetter(1))
 | 
| 
 | 
   129   lsAssociations = [[sBug[0]]+[re.sub("^[A-Za-z]__","",sBug[1])]+sBug[2:] for sBug in lsAssociations]
 | 
| 
 | 
   130   lsAssociations = [[sBug[0]]+[re.sub("\|*[A-Za-z]__|\|",".",sBug[1])]+sBug[2:] for sBug in lsAssociations]
 | 
| 
 | 
   131 
 | 
| 
 | 
   132   #If this is an OTU, append the number and the genus level together for a more descriptive termal name
 | 
| 
 | 
   133   lsAssociationsModForOTU = []
 | 
| 
 | 
   134   for sBug in lsAssociations:
 | 
| 
 | 
   135     lsBug = sBug[1].split(".")
 | 
| 
 | 
   136     if(len(lsBug))> 1:
 | 
| 
 | 
   137       if(lsBug[-1].isdigit()):
 | 
| 
 | 
   138         lsBug[-2]=lsBug[-2]+"_"+lsBug[-1]
 | 
| 
 | 
   139         lsBug = lsBug[0:-1]
 | 
| 
 | 
   140       lsAssociationsModForOTU.append([sBug[0]]+[".".join(lsBug)]+sBug[2:])
 | 
| 
 | 
   141     else:
 | 
| 
 | 
   142       lsAssociationsModForOTU.append([sBug[0]]+[lsBug[0]]+sBug[2:])
 | 
| 
 | 
   143 
 | 
| 
 | 
   144   #Extract just class info
 | 
| 
 | 
   145   #lsClassData = [[sLine[2],sClass,sLine[1]] for sLine in fSum]
 | 
| 
 | 
   146 
 | 
| 
 | 
   147   ### Make rings
 | 
| 
 | 
   148   #Setup rings
 | 
| 
 | 
   149   dictRings = dict([[enumData[1],enumData[0]] for enumData in enumerate(set([lsData[0] for lsData in lsAssociationsModForOTU]))])
 | 
| 
 | 
   150 
 | 
| 
 | 
   151   #Ring graphlan setting: rings represent a metadata that associates with a feature
 | 
| 
 | 
   152   #Rings have a line to help differetiate them
 | 
| 
 | 
   153   lsRingSettings = [[sRingLabel,lsPair[1],lsPair[0]] for lsPair in dictRings.items()]
 | 
| 
 | 
   154   lsRingLineColors = [[sRingLineColorWord,lsPair[1],sRingLineColor] for lsPair in dictRings.items()]
 | 
| 
 | 
   155   lsRingLineThick = [[sRingLineThicknessWord,lsPair[1],sRingLineThickness] for lsPair in dictRings.items()]
 | 
| 
 | 
   156   lsRingLineLabelSize = [[sRingLabelSizeWord,lsPair[1], sRingLabelSize] for lsPair in dictRings.items()]
 | 
| 
 | 
   157 
 | 
| 
 | 
   158   #Create coloring for rings color represents the directionality of the relationship
 | 
| 
 | 
   159   dMaxCoef = max([abs(float(sAssociation[2])) for sAssociation in lsAssociationsModForOTU])
 | 
| 
 | 
   160   lsRingColors = [[lsAssociation[1], sRingColor, dictRings[lsAssociation[0]], funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   161   lsRingAlpha = [[lsAssociation[1], sRingAlpha, dictRings[lsAssociation[0]], funcGetAlpha(float(lsAssociation[2]), dMaxCoef)] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   162 
 | 
| 
 | 
   163   #Create height for rings representing the log tranformed q-value?
 | 
| 
 | 
   164   dMaxQValue = max([-1*math.log(max(float(sAssociation[3]), c_dMinDoubleValue)) for sAssociation in lsAssociationsModForOTU])
 | 
| 
 | 
   165   #lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], ((-1*math.log(max(float(lsAssociation[3]), c_dMinDoubleValue)))/dMaxQValue)+sRingHeightMin] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   166   lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], sStandardizedRingHeight] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   167 
 | 
| 
 | 
   168   #### Marker
 | 
| 
 | 
   169   # Marker colors (mainly to make legend
 | 
| 
 | 
   170   lsMarkerColors = [[lsAssociation[1], sCladeMarkerColor, funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   171   lsMarkerSizes = [[lsAssociation[1], sCladeMarkerSize, sHighlightedMarkerSize] for lsAssociation in lsAssociationsModForOTU]
 | 
| 
 | 
   172 
 | 
| 
 | 
   173   #### Make internal highlights
 | 
| 
 | 
   174   #Highlight the associated clades
 | 
| 
 | 
   175   lsUniqueAssociatedTaxa = sorted(list(set([lsAssociation[1] for lsAssociation in lsAssociationsModForOTU])))
 | 
| 
 | 
   176 
 | 
| 
 | 
   177   lsHighlights = []
 | 
| 
 | 
   178   sABCPrefix = ""
 | 
| 
 | 
   179   sListABC = string.ascii_lowercase
 | 
| 
 | 
   180   iListABCIndex = 0
 | 
| 
 | 
   181   for lsHighlight in lsUniqueAssociatedTaxa:
 | 
| 
 | 
   182     lsTaxa = lsHighlight.split(".")
 | 
| 
 | 
   183     sLabel = sABCPrefix+sListABC[iListABCIndex]+":"+lsTaxa[-1] if len(lsTaxa) > 2 else lsTaxa[-1]
 | 
| 
 | 
   184     lsHighlights.append([lsHighlight, sAnnotation, sLabel])
 | 
| 
 | 
   185     iListABCIndex = iListABCIndex + 1
 | 
| 
 | 
   186     if iListABCIndex > 25:
 | 
| 
 | 
   187       iListABCIndex = 0
 | 
| 
 | 
   188       sABCPrefix = sABCPrefix + sListABC[len(sABCPrefix)]
 | 
| 
 | 
   189 
 | 
| 
 | 
   190   #Read in the core file
 | 
| 
 | 
   191   csvCore = open(args.strInputCore,'r') if isinstance(args.strInputCore, str) else args.strInputCore
 | 
| 
 | 
   192   fSum = csv.reader(csvCore, delimiter="\t")
 | 
| 
 | 
   193 
 | 
| 
 | 
   194   #Add in all phylum just incase they were not already included here
 | 
| 
 | 
   195   lsAddSecondLevel = list(set([sUnique[0].split(".")[1] for sUnique in fSum if len(sUnique[0].split(".")) > 1]))
 | 
| 
 | 
   196   lsHighlights.extend([[sSecondLevel, sAnnotation, sSecondLevel] for sSecondLevel in lsAddSecondLevel])
 | 
| 
 | 
   197   lsHighlightColor = [[lsHighlight[0], sAnnotationColor,"b"] for lsHighlight in lsHighlights]
 | 
| 
 | 
   198 
 | 
| 
 | 
   199   #### Write the remaining output annotation file
 | 
| 
 | 
   200   fAnn.writerows(lsRingSettings)
 | 
| 
 | 
   201   fAnn.writerows(lsRingLineColors)
 | 
| 
 | 
   202   fAnn.writerows(lsRingColors)
 | 
| 
 | 
   203   fAnn.writerows(lsRingAlpha)
 | 
| 
 | 
   204   fAnn.writerows(lsRingLineThick)
 | 
| 
 | 
   205   fAnn.writerows(lsRingLineLabelSize)
 | 
| 
 | 
   206   fAnn.writerows(lsRingHeights)
 | 
| 
 | 
   207   fAnn.writerows(lsMarkerColors)
 | 
| 
 | 
   208   fAnn.writerows(lsMarkerSizes)
 | 
| 
 | 
   209   fAnn.writerows([[sRingPositiveWord, sCladeMarkerColor, sRingPositiveColor]])
 | 
| 
 | 
   210   fAnn.writerows([[sRingNegativeWord, sCladeMarkerColor, sRingNegativeColor]])
 | 
| 
 | 
   211   fAnn.writerows(lsHighlights)
 | 
| 
 | 
   212   fAnn.writerows(lsHighlightColor)
 | 
| 
 | 
   213   csvAnn.close()
 |