cpo_prediction: cpo_galaxy_tree.py comparison

comparison cpo_galaxy_tree.py @ 18:596bf8a792de draft

planemo upload

author	jjjjia
date	Tue, 28 Aug 2018 15:15:09 -0400
parents	a14b12a71a53
children	e5a7da2239af

comparison

equal deleted inserted replaced

-:ed3b291693fc
+:596bf8a792de
 import ete3 as e #conda ete3 3.1.1**** >requires pyqt5
 #parses some parameters
 parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...")
-parser.add_option("-t", "--tree", dest="treePath", type="string", default="./pipelineTest/tree.txt", help="identifier of the isolate")
+parser.add_option("-t", "--tree", dest="treePath", type="string", default="./pipelineTest/tree.txt", help="absolute file path to phylip tree")
-parser.add_option("-d", "--distance", dest="distancePath", type="string", default="./pipelineTest/distance.tab", help="absolute file path forward read (R1)")
+parser.add_option("-d", "--distance", dest="distancePath", type="string", default="./pipelineTest/distance.tab", help="absolute file path to distance matrix")
-parser.add_option("-m", "--metadata", dest="metadataPath", type="string", default="./pipelineTest/metadata.tsv",help="absolute file path to reverse read (R2)")
+parser.add_option("-m", "--metadata", dest="metadataPath", type="string", default="./pipelineTest/metadata.tsv",help="absolute file path to metadata file")
+parser.add_option("-o", "--output_file", dest="outputFile", type="string", default="tree.png", help="Output graphics file. Use ending 'png', 'pdf' or 'svg' to specify file format.")
+# sensitive data adder
+parser.add_option("-p", "--sensitive_data", dest="sensitivePath", type="string", default="", help="Spreadsheet (CSV) with sensitive metadata")
+parser.add_option("-c", "--sensitive_cols", dest="sensitiveCols", type="string", default="", help="CSV list of column names from sensitive metadata spreadsheet to use as labels on dendrogram")
+parser.add_option("-b", "--bcid_column", dest="bcidCol", type="string", default="BCID", help="Column name of BCID in sensitive metadata file")
+parser.add_option("-n", "--missing_value", dest="naValue", type="string", default="NA", help="Value to write for missing data.")
 (options,args) = parser.parse_args()
 treePath = str(options.treePath).lstrip().rstrip()
 distancePath = str(options.distancePath).lstrip().rstrip()
 metadataPath = str(options.metadataPath).lstrip().rstrip()
+sensitivePath = str(options.sensitivePath).lstrip().rstrip()
+sensitiveCols = str(options.sensitiveCols).lstrip().rstrip()
+outputFile = str(options.outputFile).lstrip().rstrip()
+bcidCol = str( str(options.bcidCol).lstrip().rstrip() )
+naValue = str( str(options.naValue).lstrip().rstrip() )
 #region result objects
 #define some objects to store values from results
 #//TODO this is not the proper way of get/set private object variables. every value has manually assigned defaults intead of specified in init(). Also, use property(def getVar, def setVar).
+class SensitiveMetadata(object):
+def __init__(self):
+x = pandas.read_csv( sensitivePath )
+col_names = [ s for s in sensitiveCols.split(',')] # convert to 0 offset
+if not bcidCol in col_names:
+col_names.append( bcidCol )
+all_cols = [ str(col) for col in x.columns ]
+col_idxs = [ all_cols.index(col) for col in col_names ]
+self.sensitive_data = x.iloc[:, col_idxs]
+def get_columns(self):
+cols = [ str(x) for x in self.sensitive_data.columns ]
+return cols
+def get_value( self, bcid, column_name ): # might be nice to get them all in single call via an input list of bcids ... for later
+bcids= list( self.sensitive_data.loc[:, bcidCol ] ) # get the list of all BCIDs  in sensitive metadata
+if not bcid in bcids:
+return naValue
+else:
+row_idx = bcids.index( bcid )                        # lookup the row for this BCID
+return self.sensitive_data.loc[ row_idx, column_name ]  # return the one value based on the column (col_idx) and this row
 class workflowResult(object):
 def __init__(self):
 self.new = False
 self.ID	= "?"
 self.ExpectedSpecies = "?"
 self.MLSTSpecies = "?"
 self.SequenceType = "?"
 self.MLSTScheme = "?"
 self.CarbapenemResistanceGenes ="?"
+self.plasmidBestMatch ="?"
 self.OtherAMRGenes="?"
 self.TotalPlasmids = -1
 self.plasmids = []
 self.DefinitelyPlasmidContigs ="?"
 self.LikelyPlasmidContigs="?"
 _results.SequenceType = str(r.loc[r.index[i], 'Sequence Type'])
 _results.MLSTScheme = (str(r.loc[r.index[i], 'MLST Scheme']))
 _results.CarbapenemResistanceGenes = (str(r.loc[r.index[i], 'Carbapenem Resistance Genes']))
 _results.OtherAMRGenes = (str(r.loc[r.index[i], 'Other AMR Genes']))
 _results.TotalPlasmids = int(r.loc[r.index[i], 'Total Plasmids'])
+_results.plasmidBestMatch = str(r.loc[r.index[i], 'Plasmid Best Match'])
 for j in range(0,_results.TotalPlasmids):
 _plasmid = plasmidObj()
 _plasmid.PlasmidsID =(((str(r.loc[r.index[i], 'Plasmids ID'])).split(";"))[j])
 _plasmid.Num_Contigs = (((str(r.loc[r.index[i], 'Num_Contigs'])).split(";"))[j])
 _plasmid.PlasmidLength	= (((str(r.loc[r.index[i], 'Plasmid Length'])).split(";"))[j])
 return _worflowResult
 #endregion
 def Main():
+if len(sensitivePath)>0:
+sensitive_meta_data = SensitiveMetadata()
 metadata = ParseWorkflowResults(metadataPath)
 distance = read(distancePath)
 treeFile = "".join(read(treePath))
 distanceDict = {} #store the distance matrix as rowname:list<string>
 #plasmidIncs = sorted(plasmidIncs)
 for n in t.traverse(): #loop through the nodes of a tree
 if (n.is_leaf() and n.name == "Reference"):
 #if its the reference branch, populate the faces with column headers
 index = 0
+if len(sensitivePath)>0: #sensitive metadat @ chris
+for sensitive_data_column in sensitive_meta_data.get_columns():
+(t&"Reference").add_face(addFace(sensitive_data_column), index, "aligned")
+index = index + 1
 (t&"Reference").add_face(addFace("SampleID"), index, "aligned")
 index = index + 1
 (t&"Reference").add_face(addFace("New?"), index, "aligned")
 index = index + 1
 for i in range(len(plasmidIncs)): #this loop adds the columns (aka the incs) to the reference node
 (t&"Reference").add_face(addFace("MLSTScheme"), index, "aligned")
 index = index + 1
 (t&"Reference").add_face(addFace("Sequence Type"), index, "aligned")
 index = index + 1
 (t&"Reference").add_face(addFace("Carbapenamases"), index, "aligned")
+index = index + 1
+(t&"Reference").add_face(addFace("Plasmid Best Match"), index, "aligned")
 index = index + 1
 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the distance matrix
 (t&"Reference").add_face(addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned")
 index = index + len(distanceDict[list(distanceDict.keys())[0]])
 elif (n.is_leaf() and not n.name == "Reference"):
 #not reference branches, populate with metadata
 index = 0
+if len(sensitivePath)>0: #sensitive metadata @ chris
+# pushing in sensitive data
+for sensitive_data_column in sensitive_meta_data.get_columns():
+# tree uses bcids like BC18A021A_S12
+# while sens meta-data uses BC18A021A
+# trim the "_S.*" if present
+bcid = str(mData.ID)
+if bcid.find( "_S" ) != -1:
+bcid = bcid[ 0:bcid.find( "_S" ) ]
+sens_col_val = sensitive_meta_data.get_value(bcid=bcid, column_name=sensitive_data_column )
+n.add_face(addFace(sens_col_val), index, "aligned")
+index = index + 1
 if (n.name.replace(".fa","") in metadata.keys()):
 mData = metadata[n.name.replace(".fa","")]
 else:
 mData = metadata["na"]
 n.add_face(addFace(mData.ID), index, "aligned")
 index = index + 1
 n.add_face(addFace(mData.SequenceType), index, "aligned")
 index = index + 1
 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned")
 index = index + 1
+n.add_face(addFace(mData.plasmidBestMatch), index, "aligned")
+index = index + 1
 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix
 if (n.name in distanceDict): #make sure the column is in the distance matrice
 n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned")
-t.render("./tree.pdf", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml
+t.render(outputFile, w=5000,units="mm", tree_style=ts) #save it as a png, pdf, svg or an phyloxml
 #endregion
 #endregion

Mercurial > repos > jjjjia > cpo_prediction

comparison cpo_galaxy_tree.py @ 18:596bf8a792de draft