diff src/breadcrumbs/scripts/scriptPlotFeature.py @ 0:2f4f6f08c8c4 draft

Uploaded
author george-weingart
date Tue, 13 May 2014 21:58:57 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptPlotFeature.py	Tue May 13 21:58:57 2014 -0400
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+"""
+Author: Timothy Tickle
+Description: Plots feaures
+"""
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import sys
+import argparse
+import csv
+import os
+from src.BoxPlot import BoxPlot
+from src.Histogram import Histogram
+from src.ScatterPlot import ScatterPlot
+
+def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
+
+  ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
+
+  # Group data
+  dictGroups = {}
+  for iIndex in xrange(len(ly)):
+    lsList = dictGroups.get(lsLabels[iIndex],[])
+    lsList.append(ly[iIndex])
+    dictGroups.setdefault(lsLabels[iIndex],lsList)
+  ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
+  lsLabels = dictGroups.keys()
+
+  BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
+
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
+    description = "Make a box plot from an abundance table.")
+
+#Sepecify output if needed
+argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
+
+# Text annotation
+argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
+argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
+argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
+
+# Color options
+argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
+argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
+
+# Axis adjustments
+argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
+
+# Required
+argp.add_argument("strFileAbund", help ="Input data file")
+argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
+
+args = argp.parse_args( )
+
+#Holds the data
+lxVariable1 = None
+lxVariable2 = None
+fOneIsNumeric = False
+fTwoIsNumeric = False
+
+strFeatureOneID = args.strFeatures[0]
+strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
+
+# If the output file is not specified, make it up
+if not args.strOutputFile:
+  lsPieces = os.path.splitext(args.strFileAbund)
+  args.strOutputFile = [lsPieces[0],strFeatureOneID]
+  if strFeatureTwoID:
+    args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
+  args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
+
+if not args.strTitle:
+  args.strTitle = [strFeatureOneID]
+  if strFeatureTwoID:
+    args.strTitle = args.strTitle+[strFeatureTwoID]
+  args.strTitle = " vs ".join(args.strTitle)
+
+csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
+
+if args.strX is None:
+  args.strX = strFeatureOneID
+
+if args.strY is None:
+  args.strY = strFeatureTwoID
+
+# Get values and groupings
+for lsLine in csvReader:
+  if lsLine[0] == strFeatureOneID:
+    lxVariable1 = lsLine[1:]
+  if not strFeatureTwoID is None:
+    if lsLine[0] == strFeatureTwoID:
+      lxVariable2 = lsLine[1:]
+
+# Remove NAs
+liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
+liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
+lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
+
+if not lxVariable2 is None:
+  lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
+
+# Type variables
+if not lxVariable1 is None:
+  try:
+    float(lxVariable1[0])
+    lxVariable1 = [float(xItem) for xItem in lxVariable1]
+    fOneIsNumeric = True
+  except ValueError:
+    pass
+
+if not lxVariable2 is None:
+  try:
+    float(lxVariable2[0])
+    lxVariable2 = [float(xItem) for xItem in lxVariable2]
+    fTwoIsNumeric = True
+  except ValueError:
+    pass
+
+if lxVariable1 is None:
+  print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
+elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
+  print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
+else:
+  # Plot as needed
+  if((not lxVariable1 is None ) and (not lxVariable2 is None)):
+    if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
+      print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
+    elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
+      funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
+    elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
+      ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
+  elif(not lxVariable1 is None ):
+    if fOneIsNumeric:
+      Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
+    else:
+      print "Sorry currently histograms are support for only numeric data."