comparison src/breadcrumbs/scripts/scriptPlotFeature.py @ 0:2f4f6f08c8c4 draft

Uploaded
author george-weingart
date Tue, 13 May 2014 21:58:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2f4f6f08c8c4
1 #!/usr/bin/env python
2 """
3 Author: Timothy Tickle
4 Description: Plots feaures
5 """
6
7 __author__ = "Timothy Tickle"
8 __copyright__ = "Copyright 2012"
9 __credits__ = ["Timothy Tickle"]
10 __license__ = ""
11 __version__ = ""
12 __maintainer__ = "Timothy Tickle"
13 __email__ = "ttickle@sph.harvard.edu"
14 __status__ = "Development"
15
16 import sys
17 import argparse
18 import csv
19 import os
20 from src.BoxPlot import BoxPlot
21 from src.Histogram import Histogram
22 from src.ScatterPlot import ScatterPlot
23
24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
25
26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
27
28 # Group data
29 dictGroups = {}
30 for iIndex in xrange(len(ly)):
31 lsList = dictGroups.get(lsLabels[iIndex],[])
32 lsList.append(ly[iIndex])
33 dictGroups.setdefault(lsLabels[iIndex],lsList)
34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
35 lsLabels = dictGroups.keys()
36
37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
38
39
40 #Set up arguments reader
41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
42 description = "Make a box plot from an abundance table.")
43
44 #Sepecify output if needed
45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
46
47 # Text annotation
48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
51
52 # Color options
53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
55
56 # Axis adjustments
57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
58
59 # Required
60 argp.add_argument("strFileAbund", help ="Input data file")
61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
62
63 args = argp.parse_args( )
64
65 #Holds the data
66 lxVariable1 = None
67 lxVariable2 = None
68 fOneIsNumeric = False
69 fTwoIsNumeric = False
70
71 strFeatureOneID = args.strFeatures[0]
72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
73
74 # If the output file is not specified, make it up
75 if not args.strOutputFile:
76 lsPieces = os.path.splitext(args.strFileAbund)
77 args.strOutputFile = [lsPieces[0],strFeatureOneID]
78 if strFeatureTwoID:
79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
81
82 if not args.strTitle:
83 args.strTitle = [strFeatureOneID]
84 if strFeatureTwoID:
85 args.strTitle = args.strTitle+[strFeatureTwoID]
86 args.strTitle = " vs ".join(args.strTitle)
87
88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
89
90 if args.strX is None:
91 args.strX = strFeatureOneID
92
93 if args.strY is None:
94 args.strY = strFeatureTwoID
95
96 # Get values and groupings
97 for lsLine in csvReader:
98 if lsLine[0] == strFeatureOneID:
99 lxVariable1 = lsLine[1:]
100 if not strFeatureTwoID is None:
101 if lsLine[0] == strFeatureTwoID:
102 lxVariable2 = lsLine[1:]
103
104 # Remove NAs
105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
108
109 if not lxVariable2 is None:
110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
111
112 # Type variables
113 if not lxVariable1 is None:
114 try:
115 float(lxVariable1[0])
116 lxVariable1 = [float(xItem) for xItem in lxVariable1]
117 fOneIsNumeric = True
118 except ValueError:
119 pass
120
121 if not lxVariable2 is None:
122 try:
123 float(lxVariable2[0])
124 lxVariable2 = [float(xItem) for xItem in lxVariable2]
125 fTwoIsNumeric = True
126 except ValueError:
127 pass
128
129 if lxVariable1 is None:
130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
133 else:
134 # Plot as needed
135 if((not lxVariable1 is None ) and (not lxVariable2 is None)):
136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
142 elif(not lxVariable1 is None ):
143 if fOneIsNumeric:
144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
145 else:
146 print "Sorry currently histograms are support for only numeric data."