16
|
1 """
|
|
2 * Copyright 2018 University of Liverpool
|
|
3 * Author: John Heap, Computational Biology Facility, UoL
|
|
4 * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL
|
|
5 *
|
|
6 * Licensed under the Apache License, Version 2.0 (the "License");
|
|
7 * you may not use this file except in compliance with the License.
|
|
8 * You may obtain a copy of the License at
|
|
9 *
|
|
10 * http://www.apache.org/licenses/LICENSE-2.0
|
|
11 *
|
|
12 * Unless required by applicable law or agreed to in writing, software
|
|
13 * distributed under the License is distributed on an "AS IS" BASIS,
|
|
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15 * See the License for the specific language governing permissions and
|
|
16 * limitations under the License.
|
|
17 *
|
|
18 """
|
6
|
19
|
16
|
20 import subprocess
|
|
21 import re
|
|
22 import os
|
|
23 import sys
|
|
24 import shutil
|
|
25 import pandas as pd
|
|
26 import numpy as np
|
|
27 import matplotlib as mpl
|
|
28 mpl.use('Agg')
|
|
29 import matplotlib.pyplot as plt
|
|
30 from matplotlib.mlab import PCA
|
|
31 import seaborn as sns
|
6
|
32
|
16
|
33 # some globals for convenience
|
6
|
34
|
16
|
35 pList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
|
|
36
|
|
37 quietString = "" #" >>"+os.path.dirname(os.path.realpath(__file__))+"/log/Vap_log.txt 2>&1"
|
|
38
|
|
39 def assembleWithVelvet(name, kmers, inslen, covcut, fastq1name,fastq2name):
|
|
40 #argString = "velveth " + name + "_k65 65 -shortPaired -fastq " + name + "_R1.fastq " + name + "_R2.fastq"
|
|
41 argString = "velveth " + name + "_k"+ kmers+" "+ kmers + " -shortPaired -fastq " + fastq1name+" "+fastq2name+quietString
|
|
42 print(argString)
|
|
43 returncode = subprocess.call(argString, shell=True)
|
|
44 if returncode != 0:
|
|
45 return "Error in velveth"
|
|
46 argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -cov_cutoff "+covcut+" -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
|
|
47 #argString = "velvetg " + name + "_k65 -exp_cov auto -ins_length 400 -cov_cutoff 5 -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
|
|
48 print(argString)
|
|
49 returncode = subprocess.call(argString, shell = True)
|
|
50 if returncode != 0:
|
|
51 return "Error in velvetg"
|
|
52 shutil.copyfile(name + "_k"+kmers+"//contigs.fa",name + ".fa") # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
|
|
53 return "ok"
|
|
54
|
|
55 def contigTranslation(name):
|
|
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
|
|
57 print(argString)
|
|
58 returncode = subprocess.call(argString, shell=True)
|
6
|
59
|
|
60
|
16
|
61 def HMMerMotifSearch(name):
|
|
62 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
|
|
63 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
|
|
64 lineCounts = []
|
|
65 compoundList = []
|
|
66 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
67 phylopath = dir_path + "/data/Motifs/Phylotype"
|
|
68 for m in motifs:
|
|
69 argString = "hmmsearch " + phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" # +quietString
|
|
70 # argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
|
|
71 #print(argString)
|
|
72 subprocess.call(argString, shell=True)
|
|
73
|
|
74 hmmResult = open("Phy" + m + ".out", 'r')
|
|
75 tempout = open(dir_path + "/data/" + "Phy" + m + ".txt", 'w')
|
|
76 #regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
|
|
77 n = 0
|
|
78 outList = []
|
|
79 for l in range(0,14):
|
|
80 hmmResult.readline() #hacky? miss out the first 14 lines. data we want starts on line 15
|
|
81
|
|
82
|
|
83 for line in hmmResult:
|
|
84 if re.search(r"inclusion", line):
|
|
85 #print("inclusion threshold reached")
|
|
86 break
|
|
87 if len(line) <= 1:
|
|
88 #print("end of data")
|
|
89 break
|
|
90 m = line[60:-1]
|
|
91 #print(m)
|
|
92 #tempout.write(m.group() + "\n")
|
|
93 outList.append("" + m + "\n")
|
|
94 n += 1
|
|
95 compoundList.append(outList)
|
|
96 lineCounts.append(n)
|
|
97 hmmResult.close()
|
|
98
|
|
99
|
|
100 print(lineCounts)
|
|
101 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
|
|
102 '9c'],
|
|
103 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
|
|
104 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
|
|
105 countList = []
|
|
106 countIndex = 0
|
|
107 totalCount = 0
|
|
108
|
|
109 for c in concatGroups:
|
|
110 a = []
|
|
111 for n in range(0, c):
|
|
112 a = a + compoundList.pop(0)
|
|
113 t = set(a)
|
|
114 countList.append(len(t))
|
|
115 totalCount += len(t)
|
|
116 countList.append(totalCount)
|
|
117 #print(countList)
|
|
118 #print("--------")
|
|
119 return countList
|
6
|
120
|
|
121
|
|
122
|
16
|
123 def relativeFrequencyTable(countList, name, htmlresource):
|
|
124 relFreqList = []
|
|
125 c = float(countList[15])
|
|
126 if c == 0:
|
|
127 return [0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0]
|
|
128 for i in range(0, 15):
|
|
129 relFreqList.append(countList[i] / c)
|
6
|
130
|
16
|
131 data = {'Phylotype': pList, 'Relative Frequency': relFreqList}
|
|
132 relFreq_df = pd.DataFrame(data)
|
|
133 j_fname = htmlresource+"/" + name + "_relative_frequency.csv"
|
|
134 relFreq_df.to_csv(j_fname)
|
|
135 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
|
6
|
136
|
|
137
|
|
138
|
|
139
|
16
|
140 def getDeviationFromMean(frequencyList, name, htmlresource):
|
|
141 devList = []
|
|
142 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
143 j_fname = dir_path + "/data/congodata.csv"
|
|
144 #j_fname = r"data/congodata.csv"
|
|
145 congo_df = pd.read_csv(j_fname) # we get the means from congo_df
|
|
146 for p in range(0, 15):
|
|
147 m = congo_df[pList[p]].mean()
|
|
148 dev = -(m - frequencyList[p])
|
|
149 devList.append(dev)
|
6
|
150
|
16
|
151 data = {'Phylotype': pList, 'Deviation from Mean': devList}
|
|
152 dev_df = pd.DataFrame(data)
|
|
153 j_fname = htmlresource+"/" + name + "_deviation_from_mean.csv"
|
|
154 dev_df.to_csv(j_fname)
|
|
155 return devList
|
6
|
156
|
|
157
|
16
|
158 def relativeFrequencyHeatMap(name, freqList, pdf, htmlresource):
|
|
159 localFreqList = freqList[:]
|
|
160 localFreqList.insert(0, name)
|
|
161 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
162 j_fname = dir_path+"/data/congodata.csv"
|
|
163 #print(dir_path)
|
|
164 congo_df = pd.read_csv(j_fname)
|
|
165 congo_df.drop('Colour', axis=1, inplace=True)
|
|
166 congo_df.loc[congo_df.index.max() + 1] = localFreqList
|
|
167 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
|
6
|
168
|
16
|
169 congo_df.set_index('Strain', inplace=True)
|
6
|
170
|
16
|
171 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
|
|
172 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
|
|
173 ax=cg.ax_heatmap
|
|
174 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
|
|
175 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
|
|
176 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
|
|
177 title += "\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
|
6
|
178
|
16
|
179 #title = "Variant Antigen Profiles of Trypanosoma congolense estimated as the phylotype proportion across the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of each strain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018). Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
|
|
180 #ax.set_title(title, ha = "center", va = "bottom",wrap = "True")
|
|
181 #title = "Where is this!"
|
|
182 ax.text(-0.15,-0.05, title,va = "top",wrap = "True", transform = ax.transAxes )
|
6
|
183
|
|
184
|
|
185
|
|
186
|
16
|
187 # cg.dendrogram_col.linkage # linkage matrix for columns
|
|
188 # cg.dendrogram_row.linkage # linkage matrix for rows
|
|
189 #plt.savefig(r"results/" + name + "_heatmap.png")
|
|
190 plt.savefig(htmlresource+"/heatmap.png",bbox_inches='tight')
|
|
191 if pdf == 'PDF_Yes':
|
|
192 plt.savefig(htmlresource+"/heatmap.pdf", bbox_inches='tight')
|
|
193 #shutil.copyfile("heatmap.pdf",heatmapfn) #
|
|
194 #plt.show()
|
6
|
195
|
16
|
196 def deviationFromMeanHeatMap(name,devList, pdf, htmlresource):
|
|
197 localDevList = devList[:]
|
|
198 localDevList.insert(0, name)
|
|
199 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
200 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
|
|
201 #j_fname = r"data/congodata_deviationfromthemean.csv"
|
|
202 congo_df = pd.read_csv(j_fname)
|
|
203 congo_df.drop('Colour', axis=1, inplace=True)
|
|
204 congo_df.loc[congo_df.index.max() + 1] = localDevList
|
|
205 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
|
|
206 congo_df.set_index('Strain', inplace=True)
|
|
207 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
|
|
208 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
|
|
209 ax = cg.ax_heatmap
|
|
210 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
|
|
211 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
|
|
212 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
|
|
213 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
|
|
214 #ax.set_title(title,ha = "center", va = "bottom",wrap = "True")
|
|
215 ax.text(-0.2, -0.05, title, va="top", transform=ax.transAxes, wrap="True")
|
|
216 plt.savefig(htmlresource+"/dheatmap.png",bbox_inches='tight')
|
|
217 if pdf == 'PDF_Yes':
|
|
218 plt.savefig(htmlresource+"/dheatmap.pdf", bbox_inches='tight')
|
|
219 #shutil.copyfile("dheatmap.pdf",dhmapfn)
|
|
220 #plt.show()
|
6
|
221
|
|
222
|
16
|
223 def plotPCA(name, freqList, pdf, htmlresource):
|
|
224 localFreqList = freqList[:]
|
|
225 localFreqList.insert(0, name)
|
|
226 localFreqList.append(name)
|
|
227 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
228 j_fname = dir_path + "/data/congodata.csv"
|
|
229 #j_fname = r"data/congodata.csv"
|
|
230 congo_df = pd.read_csv(j_fname)
|
|
231 congo_df.loc[congo_df.index.max() + 1] = localFreqList
|
|
232 # print(congo_df.tail(2))
|
|
233 myColours = congo_df['Colour']
|
|
234 myCountries = congo_df.drop_duplicates('Colour')['Colour'].tolist()
|
|
235 # print(myCountries)
|
|
236 congo_df.drop('Colour', axis=1, inplace=True)
|
|
237 congo_df.set_index('Strain', inplace=True)
|
|
238 dataArray = congo_df.as_matrix()
|
|
239 pcaResult = PCA(dataArray)
|
|
240 # pcaResult.center(0)
|
|
241 # can't seem to find a simple way of prooducing a decent legend.
|
|
242 # going to seperate items in to different countires.
|
|
243 compoundList = []
|
|
244 for i in myCountries:
|
|
245 compoundList.append([])
|
6
|
246
|
16
|
247 i = 0
|
|
248 for item in pcaResult.Y:
|
|
249 col = myCountries.index(myColours[i])
|
|
250 compoundList[col].append(-item[0])
|
|
251 compoundList[col].append(item[1])
|
|
252 i = i + 1
|
|
253 colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired
|
|
254 cols = [colormap(i) for i in np.linspace(0, 1, 20)]
|
|
255 fig, ax = plt.subplots(figsize=(9, 6))
|
|
256 #plt.figure(num=1,figsize=(12, 6))
|
|
257 i = 0
|
|
258 for d in myCountries:
|
|
259 a = compoundList[i]
|
|
260 b = a[::2]
|
|
261 c = a[1::2]
|
|
262 ax.scatter(b, c, color=cols[i], label=myCountries[i])
|
|
263 i = i + 1
|
|
264 leg = ax.legend( bbox_to_anchor=(1.02,1.02), loc = "upper left") #move legend out of plot
|
|
265 title = "Principal Component Analysis of the Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$. " \
|
|
266 "The plot reflects the\nrelationships amongst the VSG repertoires of each strain. Strains are color-coded " \
|
|
267 "by location of collection according\nto key. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
|
|
268 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
|
|
269 #plt.title(title, ha = "center", va = "bottom",wrap = "True")
|
|
270 tx = ax.text(-0.1, -0.07, title, va="top", transform=ax.transAxes, wrap="True")
|
|
271 #fig.add_axes([0,0.05,1.05,1.05])
|
|
272 #fig.tight_layout(rect=[0, 0.03, 1, 0.95])
|
|
273 fig.subplots_adjust(bottom = 0.3)
|
6
|
274
|
16
|
275 fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,tx), bbox_inches='tight')
|
|
276 #fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,))
|
|
277 if pdf == 'PDF_Yes':
|
|
278 fig.savefig(htmlresource+"/vapPCA.pdf",bbox_extra_artists=(leg,tx), bbox_inches='tight')
|
|
279 #shutil.copyfile("vapPCA.pdf",PCAfn) # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
|
|
280 #plt.show()
|
|
281
|
|
282 def createHTML(name,htmlfn,freqList,devList):
|
|
283 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
|
|
284 htmlString = r"<html><title>T.congolense VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma congolense</i> Variant Antigen Profile</h2><h3>"
|
|
285 htmlString += name
|
|
286 htmlString += r"<br/>Genomic Analysis</h3>"
|
|
287 htmlString += "<p style = 'margin-left:23%; margin-right:23%'>Table Legend: Variant Antigen Profiles of <i>Trypanosoma congolense</i> estimated as the phylotype proportion and as the deviation from the mean across the sample cohort.<br>" \
|
|
288 "Data was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019).</p>"
|
|
289 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
|
|
290
|
|
291 htmlString += r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th><th>Relative Frequency</th><th>Deviation from Mean</th></tr>"
|
|
292 tabString = ""
|
|
293 # flush out table with correct values
|
|
294 for i in range(0, 15):
|
|
295 f= format(freqList[i],'.4f')
|
|
296 d= format(devList[i],'.4f')
|
|
297 tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + f + "</td><td>" + d + "</td></tr>"
|
|
298 #tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + str(freqList[i]) + "</td><td>" + str(devList[i]) + "</td></tr>"
|
|
299 htmlString += tabString + "</table><br><br><br><br><br>"
|
6
|
300
|
16
|
301 htmlString += r"<h3>The Variation Heat Map and Dendrogram</h3><p>The absolute phylotype variation in the sample compared to model dataset.</p>"
|
|
302 imgString = r"<img src = 'heatmap.png' alt='Variation Heatmap' style='max-width:100%'><br><br>"
|
|
303 htmlString += imgString
|
|
304
|
|
305 htmlString += r"<br><br><br><br><h3>The Deviation Heat Map and Dendrogram</h3><p>The phylotype variation expressed as the deviation from your sample mean compared to the model dataset</p>"
|
|
306 imgString = r"<img src = 'dheatmap.png' alt='Deviation Heatmap' style='max-width:100%'><br><br>"
|
|
307 htmlString += imgString
|
|
308
|
|
309 htmlString += r"<br><br><br><br><h3>The Variation PCA plot</h3><p>PCA analysis corresponding to absolute variation. Colour coded according to location</p>"
|
|
310 imgString = r"<img src = 'vapPCA.png' alt='PCA Analysis' style='max-width:100%'><br><br>"
|
|
311 htmlString += imgString + r"</div></body></html>"
|
|
312
|
|
313 with open(htmlfn, "w") as htmlfile:
|
|
314 htmlfile.write(htmlString)
|
|
315
|
|
316
|
|
317 def assemble(args,dict):
|
|
318 #argdict = {'name': 2, 'pdfexport': 3, 'kmers': 4, 'inslen': 5, 'covcut': 6, 'forward': 7, 'reverse': 8, 'html_file': 9,'html_resource': 10}
|
|
319 assembleWithVelvet(args[dict['name']],args[dict['kmers']], args[dict['inslen']],args[dict['covcut']], args[dict['forward']],args[dict['reverse']])
|
|
320 contigTranslation(args[dict['name']])
|
|
321 myCountList = HMMerMotifSearch(args[dict['name']])
|
|
322 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
|
|
323 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
|
|
324 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
|
|
325 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
|
|
326 # 0.0372233400402414, 0.17102615694165]
|
|
327
|
|
328
|
|
329 myDevList = getDeviationFromMean(myFreqList, args[dict['name']], args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
|
|
330 relativeFrequencyHeatMap(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
|
|
331 deviationFromMeanHeatMap(args[dict['name']], myDevList,args[dict['pdfexport']], args[dict['html_resource']])
|
|
332 plotPCA(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
|
|
333 createHTML(args[dict['name']], args[dict['html_file']], myFreqList, myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
|
|
334
|
|
335 def contigs(args,dict):
|
|
336 #argdict = {'name': 2, 'pdfexport': 3, 'contigs': 4, 'html_file': 5, 'html_resource': 6}
|
|
337
|
|
338 shutil.copyfile(args[dict['contigs']], args[dict['name']]+".fa")
|
6
|
339
|
16
|
340
|
|
341
|
|
342 contigTranslation(args[dict['name']])
|
|
343 myCountList = HMMerMotifSearch(args[dict['name']])
|
|
344 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],
|
|
345 args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
|
|
346 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
|
|
347 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
|
|
348 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
|
|
349 # 0.0372233400402414, 0.17102615694165]
|
|
350
|
|
351
|
|
352 myDevList = getDeviationFromMean(myFreqList, args[dict['name']],
|
|
353 args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
|
|
354 relativeFrequencyHeatMap(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
|
|
355 deviationFromMeanHeatMap(args[dict['name']], myDevList, args[dict['pdfexport']], args[dict['html_resource']])
|
|
356 plotPCA(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
|
|
357 createHTML(args[dict['name']], args[dict['html_file']], myFreqList,
|
|
358 myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
|
6
|
359
|
|
360
|
16
|
361 def genomicProcess(inputname, exportpdf, forwardFN, reverseFN, htmlfile, htmlresource):
|
|
362 assembleWithVelvet(inputname,forwardFN,reverseFN)
|
|
363 contigTranslation(inputname)
|
|
364 myCountList = HMMerMotifSearch(inputname)
|
|
365 myFreqList = relativeFrequencyTable(myCountList, inputname, htmlresource) # saves out inputname_relative_frequncy.csv
|
|
366 #myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
|
|
367 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
|
|
368 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
|
|
369 # 0.0372233400402414, 0.17102615694165]
|
|
370
|
|
371
|
|
372 myDevList = getDeviationFromMean(myFreqList, inputname,htmlresource) # saves out inputname_deviation_from_mean.csv
|
|
373
|
|
374 relativeFrequencyHeatMap(inputname, myFreqList, exportpdf, htmlresource)
|
|
375 deviationFromMeanHeatMap(inputname, myDevList, exportpdf, htmlresource)
|
|
376 plotPCA(inputname, myFreqList, exportpdf, htmlresource)
|
|
377 createHTML(inputname, htmlfile, myFreqList,myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
|
|
378 return
|
6
|
379
|
|
380
|
|
381
|
16
|
382 if __name__ == "__main__":
|
|
383 #contigTranslation('Tcongo')
|
|
384 #contigTranslation('Test')
|
|
385 #newHMMerMotifSearch('Test')
|
|
386 #HMMerMotifSearch('Tcongo')
|
|
387 #sys.exit()
|
6
|
388
|
|
389
|
16
|
390 myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
|
|
391 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
|
|
392 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
|
|
393 0.0372233400402414, 0.17102615694165]
|
|
394 myDevList = [0.000790026,0.0073109,-0.001151769,-0.004502933,-0.013687421,-0.016159773,0.021689891,
|
|
395 0.007863809,-0.003133585,-0.001111709,-0.01313879,0.0036997,-0.00935284,0.005640693,0.015243802]
|
|
396
|
|
397 relativeFrequencyHeatMap('test', myFreqList, "PDF_Yes","results")
|
|
398 deviationFromMeanHeatMap('test', myDevList, "PDF_Yes","results")
|
|
399 plotPCA('test',myFreqList,"PDF_Yes","results")
|
|
400
|
|
401 createHTML('test',"results/test.html", myFreqList, myDevList)
|
|
402 #contigTranslation("Test")
|
|
403 #myCountList = HMMerMotifSearch("Test")
|
6
|
404
|
|
405
|
16
|
406 sys.exit()
|