annotate Tryp_G.py @ 30:6669fd407dc9 draft default tip

Uploaded
author johnheap
date Fri, 07 Jun 2019 11:07:05 -0400
parents c4e87b277576
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
1 """
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
2 * Copyright 2018 University of Liverpool
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
3 * Author: John Heap, Computational Biology Facility, UoL
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
4 * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
5 *
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
6 * Licensed under the Apache License, Version 2.0 (the "License");
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
7 * you may not use this file except in compliance with the License.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
8 * You may obtain a copy of the License at
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
9 *
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
10 * http://www.apache.org/licenses/LICENSE-2.0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
11 *
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
12 * Unless required by applicable law or agreed to in writing, software
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
13 * distributed under the License is distributed on an "AS IS" BASIS,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
15 * See the License for the specific language governing permissions and
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
16 * limitations under the License.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
17 *
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
18 """
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
19
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
20 import subprocess
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
21 import re
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
22 import os
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
23 import sys
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
24 import shutil
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
25 import pandas as pd
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
26 import numpy as np
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
27 import matplotlib as mpl
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
28 mpl.use('Agg')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
29 import matplotlib.pyplot as plt
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
30 from matplotlib.mlab import PCA
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
31 import seaborn as sns
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
32
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
33 # some globals for convenience
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
34
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
35 pList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
36
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
37 quietString = "" #" >>"+os.path.dirname(os.path.realpath(__file__))+"/log/Vap_log.txt 2>&1"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
38
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
39 def assembleWithVelvet(name, kmers, inslen, covcut, fastq1name,fastq2name):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
40 #argString = "velveth " + name + "_k65 65 -shortPaired -fastq " + name + "_R1.fastq " + name + "_R2.fastq"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
41 argString = "velveth " + name + "_k"+ kmers+" "+ kmers + " -shortPaired -fastq " + fastq1name+" "+fastq2name+quietString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
42 print(argString)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
43 returncode = subprocess.call(argString, shell=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
44 if returncode != 0:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
45 return "Error in velveth"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
46 argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -cov_cutoff "+covcut+" -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
47 #argString = "velvetg " + name + "_k65 -exp_cov auto -ins_length 400 -cov_cutoff 5 -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
48 print(argString)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
49 returncode = subprocess.call(argString, shell = True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
50 if returncode != 0:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
51 return "Error in velvetg"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
52 shutil.copyfile(name + "_k"+kmers+"//contigs.fa",name + ".fa") # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
53 return "ok"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
54
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
55 def contigTranslation(name):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
57 print(argString)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
58 returncode = subprocess.call(argString, shell=True)
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
59
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
60
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
61 def HMMerMotifSearch(name):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
62 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
63 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
64 lineCounts = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
65 compoundList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
66 dir_path = os.path.dirname(os.path.realpath(__file__))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
67 phylopath = dir_path + "/data/Motifs/Phylotype"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
68 for m in motifs:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
69 argString = "hmmsearch " + phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" # +quietString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
70 # argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
71 #print(argString)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
72 subprocess.call(argString, shell=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
73
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
74 hmmResult = open("Phy" + m + ".out", 'r')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
75 tempout = open(dir_path + "/data/" + "Phy" + m + ".txt", 'w')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
76 #regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
77 n = 0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
78 outList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
79 for l in range(0,14):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
80 hmmResult.readline() #hacky? miss out the first 14 lines. data we want starts on line 15
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
81
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
82
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
83 for line in hmmResult:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
84 if re.search(r"inclusion", line):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
85 #print("inclusion threshold reached")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
86 break
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
87 if len(line) <= 1:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
88 #print("end of data")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
89 break
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
90 m = line[60:-1]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
91 #print(m)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
92 #tempout.write(m.group() + "\n")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
93 outList.append("" + m + "\n")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
94 n += 1
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
95 compoundList.append(outList)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
96 lineCounts.append(n)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
97 hmmResult.close()
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
98
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
99
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
100 print(lineCounts)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
101 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
102 '9c'],
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
103 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
104 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
105 countList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
106 countIndex = 0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
107 totalCount = 0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
108
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
109 for c in concatGroups:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
110 a = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
111 for n in range(0, c):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
112 a = a + compoundList.pop(0)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
113 t = set(a)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
114 countList.append(len(t))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
115 totalCount += len(t)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
116 countList.append(totalCount)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
117 #print(countList)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
118 #print("--------")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
119 return countList
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
120
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
121
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
122
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
123 def relativeFrequencyTable(countList, name, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
124 relFreqList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
125 c = float(countList[15])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
126 if c == 0:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
127 return [0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
128 for i in range(0, 15):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
129 relFreqList.append(countList[i] / c)
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
130
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
131 data = {'Phylotype': pList, 'Relative Frequency': relFreqList}
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
132 relFreq_df = pd.DataFrame(data)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
133 j_fname = htmlresource+"/" + name + "_relative_frequency.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
134 relFreq_df.to_csv(j_fname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
135 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
136
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
137
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
138
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
139
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
140 def getDeviationFromMean(frequencyList, name, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
141 devList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
142 dir_path = os.path.dirname(os.path.realpath(__file__))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
143 j_fname = dir_path + "/data/congodata.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
144 #j_fname = r"data/congodata.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
145 congo_df = pd.read_csv(j_fname) # we get the means from congo_df
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
146 for p in range(0, 15):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
147 m = congo_df[pList[p]].mean()
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
148 dev = -(m - frequencyList[p])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
149 devList.append(dev)
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
150
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
151 data = {'Phylotype': pList, 'Deviation from Mean': devList}
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
152 dev_df = pd.DataFrame(data)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
153 j_fname = htmlresource+"/" + name + "_deviation_from_mean.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
154 dev_df.to_csv(j_fname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
155 return devList
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
156
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
157
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
158 def relativeFrequencyHeatMap(name, freqList, pdf, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
159 localFreqList = freqList[:]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
160 localFreqList.insert(0, name)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
161 dir_path = os.path.dirname(os.path.realpath(__file__))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
162 j_fname = dir_path+"/data/congodata.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
163 #print(dir_path)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
164 congo_df = pd.read_csv(j_fname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
165 congo_df.drop('Colour', axis=1, inplace=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
166 congo_df.loc[congo_df.index.max() + 1] = localFreqList
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
167 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
168
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
169 congo_df.set_index('Strain', inplace=True)
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
170
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
171 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
172 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
173 ax=cg.ax_heatmap
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
174 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
175 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
176 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
177 title += "\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
178
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
179 #title = "Variant Antigen Profiles of Trypanosoma congolense estimated as the phylotype proportion across the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of each strain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018). Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
180 #ax.set_title(title, ha = "center", va = "bottom",wrap = "True")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
181 #title = "Where is this!"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
182 ax.text(-0.15,-0.05, title,va = "top",wrap = "True", transform = ax.transAxes )
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
183
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
184
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
185
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
186
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
187 # cg.dendrogram_col.linkage # linkage matrix for columns
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
188 # cg.dendrogram_row.linkage # linkage matrix for rows
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
189 #plt.savefig(r"results/" + name + "_heatmap.png")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
190 plt.savefig(htmlresource+"/heatmap.png",bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
191 if pdf == 'PDF_Yes':
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
192 plt.savefig(htmlresource+"/heatmap.pdf", bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
193 #shutil.copyfile("heatmap.pdf",heatmapfn) #
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
194 #plt.show()
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
195
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
196 def deviationFromMeanHeatMap(name,devList, pdf, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
197 localDevList = devList[:]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
198 localDevList.insert(0, name)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
199 dir_path = os.path.dirname(os.path.realpath(__file__))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
200 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
201 #j_fname = r"data/congodata_deviationfromthemean.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
202 congo_df = pd.read_csv(j_fname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
203 congo_df.drop('Colour', axis=1, inplace=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
204 congo_df.loc[congo_df.index.max() + 1] = localDevList
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
205 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
206 congo_df.set_index('Strain', inplace=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
207 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
208 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
209 ax = cg.ax_heatmap
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
210 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
211 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
212 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
213 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
214 #ax.set_title(title,ha = "center", va = "bottom",wrap = "True")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
215 ax.text(-0.2, -0.05, title, va="top", transform=ax.transAxes, wrap="True")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
216 plt.savefig(htmlresource+"/dheatmap.png",bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
217 if pdf == 'PDF_Yes':
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
218 plt.savefig(htmlresource+"/dheatmap.pdf", bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
219 #shutil.copyfile("dheatmap.pdf",dhmapfn)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
220 #plt.show()
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
221
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
222
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
223 def plotPCA(name, freqList, pdf, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
224 localFreqList = freqList[:]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
225 localFreqList.insert(0, name)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
226 localFreqList.append(name)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
227 dir_path = os.path.dirname(os.path.realpath(__file__))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
228 j_fname = dir_path + "/data/congodata.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
229 #j_fname = r"data/congodata.csv"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
230 congo_df = pd.read_csv(j_fname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
231 congo_df.loc[congo_df.index.max() + 1] = localFreqList
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
232 # print(congo_df.tail(2))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
233 myColours = congo_df['Colour']
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
234 myCountries = congo_df.drop_duplicates('Colour')['Colour'].tolist()
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
235 # print(myCountries)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
236 congo_df.drop('Colour', axis=1, inplace=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
237 congo_df.set_index('Strain', inplace=True)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
238 dataArray = congo_df.as_matrix()
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
239 pcaResult = PCA(dataArray)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
240 # pcaResult.center(0)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
241 # can't seem to find a simple way of prooducing a decent legend.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
242 # going to seperate items in to different countires.
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
243 compoundList = []
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
244 for i in myCountries:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
245 compoundList.append([])
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
246
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
247 i = 0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
248 for item in pcaResult.Y:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
249 col = myCountries.index(myColours[i])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
250 compoundList[col].append(-item[0])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
251 compoundList[col].append(item[1])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
252 i = i + 1
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
253 colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
254 cols = [colormap(i) for i in np.linspace(0, 1, 20)]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
255 fig, ax = plt.subplots(figsize=(9, 6))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
256 #plt.figure(num=1,figsize=(12, 6))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
257 i = 0
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
258 for d in myCountries:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
259 a = compoundList[i]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
260 b = a[::2]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
261 c = a[1::2]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
262 ax.scatter(b, c, color=cols[i], label=myCountries[i])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
263 i = i + 1
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
264 leg = ax.legend( bbox_to_anchor=(1.02,1.02), loc = "upper left") #move legend out of plot
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
265 title = "Principal Component Analysis of the Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$. " \
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
266 "The plot reflects the\nrelationships amongst the VSG repertoires of each strain. Strains are color-coded " \
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
267 "by location of collection according\nto key. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
268 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019)."
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
269 #plt.title(title, ha = "center", va = "bottom",wrap = "True")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
270 tx = ax.text(-0.1, -0.07, title, va="top", transform=ax.transAxes, wrap="True")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
271 #fig.add_axes([0,0.05,1.05,1.05])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
272 #fig.tight_layout(rect=[0, 0.03, 1, 0.95])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
273 fig.subplots_adjust(bottom = 0.3)
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
274
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
275 fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,tx), bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
276 #fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,))
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
277 if pdf == 'PDF_Yes':
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
278 fig.savefig(htmlresource+"/vapPCA.pdf",bbox_extra_artists=(leg,tx), bbox_inches='tight')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
279 #shutil.copyfile("vapPCA.pdf",PCAfn) # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
280 #plt.show()
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
281
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
282 def createHTML(name,htmlfn,freqList,devList):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
283 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
284 htmlString = r"<html><title>T.congolense VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma congolense</i> Variant Antigen Profile</h2><h3>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
285 htmlString += name
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
286 htmlString += r"<br/>Genomic Analysis</h3>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
287 htmlString += "<p style = 'margin-left:23%; margin-right:23%'>Table Legend: Variant Antigen Profiles of <i>Trypanosoma congolense</i> estimated as the phylotype proportion and as the deviation from the mean across the sample cohort.<br>" \
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
288 "Data was produced with the 'Variant Antigen Profiler' (Silva Pereira et al., 2019).</p>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
289 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
290
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
291 htmlString += r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th><th>Relative Frequency</th><th>Deviation from Mean</th></tr>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
292 tabString = ""
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
293 # flush out table with correct values
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
294 for i in range(0, 15):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
295 f= format(freqList[i],'.4f')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
296 d= format(devList[i],'.4f')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
297 tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + f + "</td><td>" + d + "</td></tr>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
298 #tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + str(freqList[i]) + "</td><td>" + str(devList[i]) + "</td></tr>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
299 htmlString += tabString + "</table><br><br><br><br><br>"
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
300
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
301 htmlString += r"<h3>The Variation Heat Map and Dendrogram</h3><p>The absolute phylotype variation in the sample compared to model dataset.</p>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
302 imgString = r"<img src = 'heatmap.png' alt='Variation Heatmap' style='max-width:100%'><br><br>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
303 htmlString += imgString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
304
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
305 htmlString += r"<br><br><br><br><h3>The Deviation Heat Map and Dendrogram</h3><p>The phylotype variation expressed as the deviation from your sample mean compared to the model dataset</p>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
306 imgString = r"<img src = 'dheatmap.png' alt='Deviation Heatmap' style='max-width:100%'><br><br>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
307 htmlString += imgString
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
308
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
309 htmlString += r"<br><br><br><br><h3>The Variation PCA plot</h3><p>PCA analysis corresponding to absolute variation. Colour coded according to location</p>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
310 imgString = r"<img src = 'vapPCA.png' alt='PCA Analysis' style='max-width:100%'><br><br>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
311 htmlString += imgString + r"</div></body></html>"
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
312
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
313 with open(htmlfn, "w") as htmlfile:
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
314 htmlfile.write(htmlString)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
315
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
316
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
317 def assemble(args,dict):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
318 #argdict = {'name': 2, 'pdfexport': 3, 'kmers': 4, 'inslen': 5, 'covcut': 6, 'forward': 7, 'reverse': 8, 'html_file': 9,'html_resource': 10}
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
319 assembleWithVelvet(args[dict['name']],args[dict['kmers']], args[dict['inslen']],args[dict['covcut']], args[dict['forward']],args[dict['reverse']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
320 contigTranslation(args[dict['name']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
321 myCountList = HMMerMotifSearch(args[dict['name']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
322 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
323 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
324 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
325 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
326 # 0.0372233400402414, 0.17102615694165]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
327
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
328
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
329 myDevList = getDeviationFromMean(myFreqList, args[dict['name']], args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
330 relativeFrequencyHeatMap(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
331 deviationFromMeanHeatMap(args[dict['name']], myDevList,args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
332 plotPCA(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
333 createHTML(args[dict['name']], args[dict['html_file']], myFreqList, myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
334
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
335 def contigs(args,dict):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
336 #argdict = {'name': 2, 'pdfexport': 3, 'contigs': 4, 'html_file': 5, 'html_resource': 6}
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
337
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
338 shutil.copyfile(args[dict['contigs']], args[dict['name']]+".fa")
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
339
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
340
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
341
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
342 contigTranslation(args[dict['name']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
343 myCountList = HMMerMotifSearch(args[dict['name']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
344 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
345 args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
346 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
347 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
348 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
349 # 0.0372233400402414, 0.17102615694165]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
350
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
351
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
352 myDevList = getDeviationFromMean(myFreqList, args[dict['name']],
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
353 args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
354 relativeFrequencyHeatMap(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
355 deviationFromMeanHeatMap(args[dict['name']], myDevList, args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
356 plotPCA(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
357 createHTML(args[dict['name']], args[dict['html_file']], myFreqList,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
358 myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
359
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
360
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
361 def genomicProcess(inputname, exportpdf, forwardFN, reverseFN, htmlfile, htmlresource):
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
362 assembleWithVelvet(inputname,forwardFN,reverseFN)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
363 contigTranslation(inputname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
364 myCountList = HMMerMotifSearch(inputname)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
365 myFreqList = relativeFrequencyTable(myCountList, inputname, htmlresource) # saves out inputname_relative_frequncy.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
366 #myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
367 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
368 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
369 # 0.0372233400402414, 0.17102615694165]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
370
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
371
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
372 myDevList = getDeviationFromMean(myFreqList, inputname,htmlresource) # saves out inputname_deviation_from_mean.csv
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
373
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
374 relativeFrequencyHeatMap(inputname, myFreqList, exportpdf, htmlresource)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
375 deviationFromMeanHeatMap(inputname, myDevList, exportpdf, htmlresource)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
376 plotPCA(inputname, myFreqList, exportpdf, htmlresource)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
377 createHTML(inputname, htmlfile, myFreqList,myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
378 return
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
379
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
380
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
381
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
382 if __name__ == "__main__":
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
383 #contigTranslation('Tcongo')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
384 #contigTranslation('Test')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
385 #newHMMerMotifSearch('Test')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
386 #HMMerMotifSearch('Tcongo')
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
387 #sys.exit()
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
388
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
389
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
390 myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
391 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
392 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
393 0.0372233400402414, 0.17102615694165]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
394 myDevList = [0.000790026,0.0073109,-0.001151769,-0.004502933,-0.013687421,-0.016159773,0.021689891,
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
395 0.007863809,-0.003133585,-0.001111709,-0.01313879,0.0036997,-0.00935284,0.005640693,0.015243802]
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
396
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
397 relativeFrequencyHeatMap('test', myFreqList, "PDF_Yes","results")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
398 deviationFromMeanHeatMap('test', myDevList, "PDF_Yes","results")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
399 plotPCA('test',myFreqList,"PDF_Yes","results")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
400
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
401 createHTML('test',"results/test.html", myFreqList, myDevList)
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
402 #contigTranslation("Test")
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
403 #myCountList = HMMerMotifSearch("Test")
6
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
404
e91e41380946 Uploaded
johnheap
parents: 3
diff changeset
405
16
c4e87b277576 Uploaded
johnheap
parents: 6
diff changeset
406 sys.exit()