annotate src/breadcrumbs/scripts/scriptPcoa.py @ 3:b4cf8c75305b draft default tip

Pointing to the right root directory
author george-weingart
date Tue, 30 Aug 2016 13:10:34 -0400
parents 2f4f6f08c8c4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
2 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
4 Description: Make PCoA of an abundance file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
5 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
6
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
7 __author__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
8 __copyright__ = "Copyright 2012"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
9 __credits__ = ["Timothy Tickle"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
10 __license__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
11 __version__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
12 __maintainer__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
13 __email__ = "ttickle@sph.harvard.edu"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
14 __status__ = "Development"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
15
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
16 import sys
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
17 import argparse
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
18 from src.AbundanceTable import AbundanceTable
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
19 from src.Metric import Metric
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
20 import csv
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
21 import os
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
22 from src.PCoA import PCoA
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
23
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
24 #Set up arguments reader
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
25 argp = argparse.ArgumentParser( prog = "scriptPcoa.py",
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
26 description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" )
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
27
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
28 #Arguments
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
29 #For table
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
30 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
31 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
32 argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
33 argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
34
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
35 argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
36 argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
37
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
38 argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
39 argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
40 argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
41 argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
42 argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
43
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
44 # Unifrac arguments
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
45 argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
46 argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
47 argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be 'GroupName:ID,ID,ID,ID'")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
48
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
49 argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
50
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
51 args = argp.parse_args( )
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
52
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
53 #Read in abundance table
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
54 abndTable = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
55 if args.strFileAbund:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
56 abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
57 cDelimiter = args.cFileDelimiter,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
58 sMetadataID = args.sIDName,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
59 sLastMetadata = args.sLastMetadataName,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
60 cFeatureNameDelimiter= args.cFeatureNameDelimiter)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
61
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
62 #Normalize if need
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
63 if args.fDoSumData:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
64 abndTable.funcSumClades()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
65
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
66 #Sum if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
67 if args.fDoNormData:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
68 abndTable.funcNormalize()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
69
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
70 #Get the metadata to paint
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
71 lsKeys = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
72 if abndTable:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
73 lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
74
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
75 #Get pieces of output file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
76 if not args.strOutFile:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
77 if not args.strFileAbund:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
78 args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
79 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
80 args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
81 lsFilePieces = os.path.splitext(args.strOutFile)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
82
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
83 # Make PCoA object
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
84 # Get PCoA object and plot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
85 pcoa = PCoA()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
86 if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
87 pcoa.loadData(abndTable,True)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
88 # Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
89 pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
90
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
91 # Write dim 1 and 2 coordinates to file
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
92 if args.strFileCoordinatesMatrix:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
93 lsIds = pcoa.funcGetIDs()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
94 mtrxCoordinates = pcoa.funcGetCoordinates()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
95 csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w'))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
96 csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
97 for x in xrange(mtrxCoordinates.shape[0]):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
98 strId = lsIds[x] if lsIds else ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
99 csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist())
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
100
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
101 # Paint metadata
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
102 if lsKeys:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
103 for iIndex in xrange(len(lsKeys)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
104 lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
105
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
106 pcoa.plotList(lsLabelList = lsMetadata,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
107 strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1],
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
108 iSize=20,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
109 dAlpha=1.0,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
110 charForceColor=None,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
111 charForceShape=None,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
112 fInvert=False,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
113 iDim1=1,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
114 iDim2=2)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
115
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
116 if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
117
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
118 c_sNotGiven = "Not_specified"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
119
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
120 lsIds = pcoa.funcGetIDs()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
121 lsGroupLabels = [c_sNotGiven for s in lsIds]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
122
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
123 if args.fileUnifracColor:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
124
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
125 # Read color file and make a dictionary to convert ids
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
126 lsColorLines = csv.reader(open(args.fileUnifracColor))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
127 dictConvertIDToGroup = {}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
128 for lsLine in lsColorLines:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
129 if lsLine:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
130 sGroupID, sFirstID = lsLine[0].split(":")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
131 dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]]))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
132
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
133 lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
134
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
135 pcoa.plotList(lsLabelList = lsGroupLabels,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
136 strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1],
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
137 iSize=20,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
138 dAlpha=1.0,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
139 charForceColor=None,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
140 charForceShape=None,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
141 fInvert=False,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
142 iDim1=1,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
143 iDim2=2)