annotate cpo_galaxy_tree.py @ 7:4d2777aa99db draft

planemo upload
author jjjjia
date Thu, 23 Aug 2018 18:35:54 -0400
parents cabceaa239e4
children 93c25036d3b9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
1 #!/home/jjjjia/.conda/envs/py36/bin/python
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
2
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
3 #$ -S /home/jjjjia/.conda/envs/py36/bin/python
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
4 #$ -V # Pass environment variables to the job
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
5 #$ -N CPO_pipeline # Replace with a more specific job name
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
6 #$ -wd /home/jjjjia/testCases # Use the current working dir
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
7 #$ -pe smp 1 # Parallel Environment (how many cores)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
8 #$ -l h_vmem=11G # Memory (RAM) allocation *per core*
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
9 #$ -e ./logs/$JOB_ID.err
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
10 #$ -o ./logs/$JOB_ID.log
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
11 #$ -m ea
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
12 #$ -M bja20@sfu.ca
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
13
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
14 # >python cpo_galaxy_tree.py -t /path/to/tree.ph -d /path/to/distance/matrix -m /path/to/metadata
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
15
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
16 # <requirements>
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
17 # <requirement type="package" version="0.23.4">pandas</requirement>
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
18 # <requirement type="package" version="3.6">python</requirement>
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
19 # <requirement type="package" version="3.1.1">ete3</requirement>
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
20 # <requirement type="package" version="5.9.3">pyqt</requirement>
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
21 # </requirements>
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
22
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
23 import subprocess
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
24 import pandas #conda pandas
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
25 import optparse
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
26 import os
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
27 import datetime
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
28 import sys
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
29 import time
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
30 import urllib.request
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
31 import gzip
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
32 import collections
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
33 import json
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
34 import numpy #conda numpy
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
35 import ete3 as e #conda ete3 3.1.1**** >requires pyqt5
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
36
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
37
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
38 #parses some parameters
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
39 parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
40 parser.add_option("-t", "--tree", dest="treePath", type="string", default="./pipelineTest/tree.txt", help="identifier of the isolate")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
41 parser.add_option("-d", "--distance", dest="distancePath", type="string", default="./pipelineTest/distance.tab", help="absolute file path forward read (R1)")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
42 parser.add_option("-m", "--metadata", dest="metadataPath", type="string", default="./pipelineTest/metadata.tsv",help="absolute file path to reverse read (R2)")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
43 (options,args) = parser.parse_args()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
44 treePath = str(options.treePath).lstrip().rstrip()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
45 distancePath = str(options.distancePath).lstrip().rstrip()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
46 metadataPath = str(options.metadataPath).lstrip().rstrip()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
47
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
48
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
49 #region result objects
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
50 #define some objects to store values from results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
51 #//TODO this is not the proper way of get/set private object variables. every value has manually assigned defaults intead of specified in init(). Also, use property(def getVar, def setVar).
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
52 class workflowResult(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
53 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
54 self.new = False
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
55 self.ID = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
56 self.ExpectedSpecies = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
57 self.MLSTSpecies = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
58 self.SequenceType = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
59 self.MLSTScheme = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
60 self.CarbapenemResistanceGenes =""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
61 self.OtherAMRGenes=""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
62 self.TotalPlasmids = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
63 self.plasmids = []
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
64 self.DefinitelyPlasmidContigs =""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
65 self.LikelyPlasmidContigs=""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
66 self.row = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
67 class plasmidObj(object):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
68 def __init__(self):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
69 self.PlasmidsID = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
70 self.Num_Contigs = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
71 self.PlasmidLength = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
72 self.PlasmidRepType = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
73 self.PlasmidMobility = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
74 self.NearestReference = ""
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
75
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
76 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
77
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
78 #region useful functions
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
79 def read(path): #read in a text file to a list
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
80 return [line.rstrip('\n') for line in open(path)]
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
81 def execute(command): #subprocess.popen call bash command
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
82 process = subprocess.Popen(command, shell=False, cwd=curDir, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
83
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
84 # Poll process for new output until finished
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
85 while True:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
86 nextline = process.stdout.readline()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
87 if nextline == '' and process.poll() is not None:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
88 break
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
89 sys.stdout.write(nextline)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
90 sys.stdout.flush()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
91
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
92 output = process.communicate()[0]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
93 exitCode = process.returncode
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
94
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
95 if (exitCode == 0):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
96 return output
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
97 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
98 raise subprocess.CalledProcessError(exitCode, command)
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
99 def httpGetFile(url, filepath=""): #download a file from the web
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
100 if (filepath == ""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
101 return urllib.request.urlretrieve(url)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
102 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
103 urllib.request.urlretrieve(url, filepath)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
104 return True
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
105 def gunzip(inputpath="", outputpath=""): #gunzip
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
106 if (outputpath == ""):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
107 with gzip.open(inputpath, 'rb') as f:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
108 gzContent = f.read()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
109 return gzContent
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
110 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
111 with gzip.open(inputpath, 'rb') as f:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
112 gzContent = f.read()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
113 with open(outputpath, 'wb') as out:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
114 out.write(gzContent)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
115 return True
7
4d2777aa99db planemo upload
jjjjia
parents: 6
diff changeset
116 def addFace(name): #function to add a facet to a tree
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
117 #if its the reference branch, populate the faces with column headers
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
118 face = e.faces.TextFace(name,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
119 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
120 face.margin_right = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
121 face.margin_left = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
122 return face
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
123 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
124
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
125 #region functions to parse result files
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
126 def ParseWorkflowResults(pathToResult):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
127 _worflowResult = {}
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
128 r = pandas.read_csv(pathToResult, delimiter='\t', header=0)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
129 r = r.replace(numpy.nan, '', regex=True)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
130 for i in range(len(r.index)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
131 _results = workflowResult()
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
132 if(str(r.loc[r.index[i], 'new']).lower() == "new"):
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
133 _results.new = True
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
134 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
135 _results.new = False
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
136 _results.ID = str(r.loc[r.index[i], 'ID'])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
137 _results.ExpectedSpecies = str(r.loc[r.index[i], 'Expected Species'])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
138 _results.MLSTSpecies = str(r.loc[r.index[i], 'MLST Species'])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
139 _results.SequenceType = str(r.loc[r.index[i], 'Sequence Type'])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
140 _results.MLSTScheme = (str(r.loc[r.index[i], 'MLST Scheme']))
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
141 _results.CarbapenemResistanceGenes = (str(r.loc[r.index[i], 'Carbapenem Resistance Genes']))
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
142 _results.OtherAMRGenes = (str(r.loc[r.index[i], 'Other AMR Genes']))
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
143 _results.TotalPlasmids = int(r.loc[r.index[i], 'Total Plasmids'])
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
144 for j in range(0,_results.TotalPlasmids):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
145 _plasmid = plasmidObj()
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
146 _plasmid.PlasmidsID =(((str(r.loc[r.index[i], 'Plasmids ID'])).split(";"))[j])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
147 _plasmid.Num_Contigs = (((str(r.loc[r.index[i], 'Num_Contigs'])).split(";"))[j])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
148 _plasmid.PlasmidLength = (((str(r.loc[r.index[i], 'Plasmid Length'])).split(";"))[j])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
149 _plasmid.PlasmidRepType = (((str(r.loc[r.index[i], 'Plasmid RepType'])).split(";"))[j])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
150 _plasmid.PlasmidMobility = ((str(r.loc[r.index[i], 'Plasmid Mobility'])).split(";"))[j]
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
151 _plasmid.NearestReference = ((str(r.loc[r.index[i], 'Nearest Reference'])).split(";"))[j]
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
152 _results.plasmids.append(_plasmid)
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
153 _results.DefinitelyPlasmidContigs = (str(r.loc[r.index[i], 'Definitely Plasmid Contigs']))
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
154 _results.LikelyPlasmidContigs = (str(r.loc[r.index[i], 'Likely Plasmid Contigs']))
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
155 _results.row = "\t".join(str(x) for x in r.ix[i].tolist())
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
156 _worflowResult[_results.ID] = _results
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
157 return _worflowResult
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
158
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
159 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
160
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
161 def Main():
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
162 metadata = ParseWorkflowResults(metadataPath)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
163 distance = read(distancePath)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
164 treeFile = "".join(read(treePath))
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
165
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
166 distanceDict = {} #store the distance matrix as rowname:list<string>
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
167 for i in range(len(distance)):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
168 temp = distance[i].split("\t")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
169 distanceDict[temp[0]] = temp[1:]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
170 #region step5: tree construction
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
171
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
172 '''
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
173 #region create detailed tree
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
174
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
175 plasmidCount = 0
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
176 for n in t.traverse():
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
177 if (n.is_leaf() and not n.name == "Reference"):
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
178 mData = metadata[n.name.replace(".fa","")]
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
179 face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
180 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
181 face.margin_left = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
182 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
183 n.add_face(face, 0, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
184 face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
185 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
186 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
187 n.add_face(face, 1, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
188 face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
189 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
190 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
191 n.add_face(face, 2, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
192 index = 3
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
193 if (mData.TotalPlasmids > plasmidCount):
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
194 plasmidCount = mData.TotalPlasmids
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
195 for i in range(0, mData.TotalPlasmids):
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
196 face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
197 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
198 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
199 n.add_face(face, index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
200 index+=1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
201 face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
202 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
203 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
204 n.add_face(face, index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
205 index+=1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
206
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
207 face = faces.TextFace("Species",fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
208 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
209 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
210 face.margin_left = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
211 (t&"Reference").add_face(face, 0, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
212 face = faces.TextFace("Sequence Type",fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
213 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
214 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
215 (t&"Reference").add_face(face, 1, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
216 face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
217 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
218 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
219 (t&"Reference").add_face(face, 2, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
220 index = 3
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
221 for i in range(0, plasmidCount):
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
222 face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
223 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
224 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
225 (t&"Reference").add_face(face, index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
226 index+=1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
227 face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
228 face.border.margin = 5
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
229 face.margin_right = 10
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
230 (t&"Reference").add_face(face, index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
231 index+=1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
232
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
233 t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
234
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
235 #endregion
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
236 '''
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
237 #region create box tree
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
238 #region step5: tree construction
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
239 treeFile = "".join(read(treePath))
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
240 t = e.Tree(treeFile)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
241 t.set_outgroup(t&"Reference")
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
242
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
243 #set the tree style
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
244 ts = e.TreeStyle()
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
245 ts.show_leaf_name = False
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
246 ts.show_branch_length = True
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
247 ts.scale = 2000 #pixel per branch length unit
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
248 ts.branch_vertical_margin = 15 #pixel between branches
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
249 style2 = e.NodeStyle()
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
250 style2["fgcolor"] = "#000000"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
251 style2["shape"] = "circle"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
252 style2["vt_line_color"] = "#0000aa"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
253 style2["hz_line_color"] = "#0000aa"
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
254 style2["vt_line_width"] = 2
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
255 style2["hz_line_width"] = 2
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
256 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
257 style2["hz_line_type"] = 0
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
258 for n in t.traverse():
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
259 n.set_style(style2)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
260
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
261 #find the plasmid origins
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
262 plasmidIncs = {}
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
263 for key in metadata:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
264 for plasmid in metadata[key].plasmids:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
265 for inc in plasmid.PlasmidRepType.split(","):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
266 if (inc.lower().find("inc") > -1):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
267 if not (inc in plasmidIncs):
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
268 plasmidIncs[inc] = [metadata[key].ID]
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
269 else:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
270 if metadata[key].ID not in plasmidIncs[inc]:
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
271 plasmidIncs[inc].append(metadata[key].ID)
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
272 #plasmidIncs = sorted(plasmidIncs)
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
273 for n in t.traverse(): #loop through the nodes of a tree
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
274 if (n.is_leaf() and n.name == "Reference"):
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
275 #if its the reference branch, populate the faces with column headers
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
276 index = 0
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
277 (t&"Reference").add_face(addFace("SampleID"), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
278 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
279 (t&"Reference").add_face(addFace("New?"), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
280 index = index + 1
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
281 for i in range(len(plasmidIncs)): #this loop adds the columns (aka the incs) to the reference node
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
282 (t&"Reference").add_face(addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
283 index = index + len(plasmidIncs)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
284 (t&"Reference").add_face(addFace("MLSTScheme"), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
285 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
286 (t&"Reference").add_face(addFace("Sequence Type"), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
287 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
288 (t&"Reference").add_face(addFace("Carbapenamases"), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
289 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
290 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds the distance matrix
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
291 (t&"Reference").add_face(addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
292 index = index + len(distanceDict[list(distanceDict.keys())[0]])
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
293 elif (n.is_leaf() and not n.name == "Reference"):
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
294 #not reference branches, populate with metadata
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
295 index = 0
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
296 mData = metadata[n.name.replace(".fa","")]
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
297 n.add_face(addFace(mData.ID), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
298 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
299 if (metadata[n.name.replace(".fa","")].new == True): #new column
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
300 face = e.RectFace(30,30,"green","green") # TextFace("Y",fsize=10,tight_text=True)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
301 face.border.margin = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
302 face.margin_right = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
303 face.margin_left = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
304 face.vt_align = 1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
305 face.ht_align = 1
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
306 n.add_face(face, index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
307 index = index + 1
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
308 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
309 if (n.name.replace(".fa","") in plasmidIncs[incs]):
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
310 face = e.RectFace(30,30,"black","black") # TextFace("Y",fsize=10,tight_text=True)
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
311 face.border.margin = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
312 face.margin_right = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
313 face.margin_left = 5
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
314 face.vt_align = 1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
315 face.ht_align = 1
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
316 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
317 index = index + len(plasmidIncs)
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
318 n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
319 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
320 n.add_face(addFace(mData.SequenceType), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
321 index = index + 1
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
322 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
323 index = index + 1
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
324 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix
6
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
325 n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned")
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
326
cabceaa239e4 planemo upload
jjjjia
parents: 1
diff changeset
327 t.render("./tree.png", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml
1
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
328
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
329 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
330 #endregion
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
331
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
332
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
333 start = time.time()#time the analysis
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
334
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
335 #analysis time
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
336 Main()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
337
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
338 end = time.time()
fea89c4d5227 Uploaded
jjjjia
parents:
diff changeset
339 print("Finished!\nThe analysis used: " + str(end-start) + " seconds")