annotate Tryp_G.py @ 10:320bdfa4d927 draft default tip

Deleted selected files
author johnheap
date Wed, 04 Jul 2018 11:37:20 -0400
parents cfb25df43776
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
1 """
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
2 * Copyright 2018 University of Liverpool
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
3 * Author: John Heap, Computational Biology Facility, UoL
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
4 * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
5 *
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
6 * Licensed under the Apache License, Version 2.0 (the "License");
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
7 * you may not use this file except in compliance with the License.
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
8 * You may obtain a copy of the License at
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
9 *
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
10 * http://www.apache.org/licenses/LICENSE-2.0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
11 *
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
12 * Unless required by applicable law or agreed to in writing, software
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
13 * distributed under the License is distributed on an "AS IS" BASIS,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
15 * See the License for the specific language governing permissions and
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
16 * limitations under the License.
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
17 *
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
18 """
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
19
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
20 import subprocess
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
21 import re
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
22 import os
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
23 import sys
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
24 import shutil
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
25 import pandas as pd
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
26 import numpy as np
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
27 import matplotlib as mpl
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
28 mpl.use('Agg')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
29 import matplotlib.pyplot as plt
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
30 from matplotlib.mlab import PCA
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
31 import seaborn as sns
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
32
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
33 # some globals for convenience
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
34
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
35 pList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
36
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
37 quietString = "" #" >>"+os.path.dirname(os.path.realpath(__file__))+"/log/Vap_log.txt 2>&1"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
38
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
39 def assembleWithVelvet(name, kmers, inslen, covcut, fastq1name,fastq2name):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
40 #argString = "velveth " + name + "_k65 65 -shortPaired -fastq " + name + "_R1.fastq " + name + "_R2.fastq"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
41 argString = "velveth " + name + "_k"+ kmers+" "+ kmers + " -shortPaired -fastq " + fastq1name+" "+fastq2name+quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
42 print(argString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
43 returncode = subprocess.call(argString, shell=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
44 if returncode != 0:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
45 return "Error in velveth"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
46 argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -cov_cutoff "+covcut+" -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
47 #argString = "velvetg " + name + "_k65 -exp_cov auto -ins_length 400 -cov_cutoff 5 -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
48 print(argString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
49 returncode = subprocess.call(argString, shell = True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
50 if returncode != 0:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
51 return "Error in velvetg"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
52 shutil.copyfile(name + "_k"+kmers+"//contigs.fa",name + ".fa") # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
53 return "ok"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
54
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
55 def contigTranslation(name):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
57 print(argString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
58 returncode = subprocess.call(argString, shell=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
59 #subprocess.call('ls -l *.fa', shell = True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
60 #sys.exit(1)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
61 #if returncode != 0:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
62 # return "Error in Transeq"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
63 #return 'ok'
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
64
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
65
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
66 def HMMerMotifSearch(name):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
67 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
68 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
69 lineCounts = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
70 compoundList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
71 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
72 phylopath = dir_path + "/data/Motifs/Phylotype"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
73 for m in motifs:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
74 argString = "hmmsearch " + phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" # +quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
75 # argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
76 #print(argString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
77 subprocess.call(argString, shell=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
78
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
79 hmmResult = open("Phy" + m + ".out", 'r')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
80 tempout = open(dir_path + "/data/" + "Phy" + m + ".txt", 'w')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
81 #regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
82 n = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
83 outList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
84 for l in range(0,14):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
85 hmmResult.readline() #hacky? miss out the first 14 lines. data we want starts on line 15
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
86
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
87
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
88 for line in hmmResult:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
89 if re.search(r"inclusion", line):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
90 #print("inclusion threshold reached")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
91 break
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
92 if len(line) <= 1:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
93 #print("end of data")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
94 break
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
95 m = line[60:-1]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
96 #print(m)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
97 #tempout.write(m.group() + "\n")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
98 outList.append("" + m + "\n")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
99 n += 1
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
100 compoundList.append(outList)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
101 lineCounts.append(n)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
102 hmmResult.close()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
103
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
104
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
105 print(lineCounts)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
106 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
107 '9c'],
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
108 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
109 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
110 countList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
111 countIndex = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
112 totalCount = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
113
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
114 for c in concatGroups:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
115 a = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
116 for n in range(0, c):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
117 a = a + compoundList.pop(0)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
118 t = set(a)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
119 countList.append(len(t))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
120 totalCount += len(t)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
121 countList.append(totalCount)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
122 #print(countList)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
123 #print("--------")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
124 return countList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
125
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
126 """
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
127 def HMMerMotifSearch(name):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
128 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
129 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
130 lineCounts = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
131 compoundList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
132 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
133 phylopath = dir_path+"/data/Motifs/Phylotype"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
134 for m in motifs:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
135 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
136 #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
137 print(argString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
138 subprocess.call(argString, shell=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
139
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
140 hmmResult = open("Phy" + m + ".out", 'r')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
141 tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
142 regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
143 n = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
144 outList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
145 for line in hmmResult:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
146 m = re.search(regex, line)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
147 if m:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
148 tempout.write(m.group() + "\n")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
149 outList.append(""+m.group()+"\n")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
150 n += 1
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
151 if re.search(r"inclusion", line):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
152 print("inclusion threshold reached")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
153 break
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
154 compoundList.append(outList)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
155 lineCounts.append(n)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
156 hmmResult.close()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
157 #tempout.close()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
158 print(lineCounts)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
159 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
160 '9c'],
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
161 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
162 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
163 countList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
164 countIndex = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
165 totalCount = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
166
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
167 for c in concatGroups:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
168 a = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
169 for n in range(0, c):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
170 a = a + compoundList.pop(0)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
171 t = set(a)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
172 countList.append(len(t))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
173 totalCount += len(t)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
174 countList.append(totalCount)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
175 print(countList)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
176 print("--------")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
177 return countList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
178 """
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
179
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
180
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
181
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
182 def relativeFrequencyTable(countList, name, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
183 relFreqList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
184 c = float(countList[15])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
185 if c == 0:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
186 return [0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
187 for i in range(0, 15):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
188 relFreqList.append(countList[i] / c)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
189
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
190 data = {'Phylotype': pList, 'Relative Frequency': relFreqList}
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
191 relFreq_df = pd.DataFrame(data)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
192 j_fname = htmlresource+"/" + name + "_relative_frequency.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
193 relFreq_df.to_csv(j_fname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
194 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
195
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
196
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
197
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
198
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
199 def getDeviationFromMean(frequencyList, name, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
200 devList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
201 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
202 j_fname = dir_path + "/data/congodata.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
203 #j_fname = r"data/congodata.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
204 congo_df = pd.read_csv(j_fname) # we get the means from congo_df
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
205 for p in range(0, 15):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
206 m = congo_df[pList[p]].mean()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
207 dev = -(m - frequencyList[p])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
208 devList.append(dev)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
209
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
210 data = {'Phylotype': pList, 'Deviation from Mean': devList}
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
211 dev_df = pd.DataFrame(data)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
212 j_fname = htmlresource+"/" + name + "_deviation_from_mean.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
213 dev_df.to_csv(j_fname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
214 return devList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
215
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
216
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
217 def relativeFrequencyHeatMap(name, freqList, pdf, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
218 localFreqList = freqList[:]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
219 localFreqList.insert(0, name)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
220 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
221 j_fname = dir_path+"/data/congodata.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
222 #print(dir_path)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
223 congo_df = pd.read_csv(j_fname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
224 congo_df.drop('Colour', axis=1, inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
225 congo_df.loc[congo_df.index.max() + 1] = localFreqList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
226 congo_df.set_index('Strain', inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
227
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
228 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
229 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
230 ax=cg.ax_heatmap
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
231 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
232 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
233 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
234 title += "\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
235
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
236 #title = "Variant Antigen Profiles of Trypanosoma congolense estimated as the phylotype proportion across the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of each strain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018). Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
237 #ax.set_title(title, ha = "center", va = "bottom",wrap = "True")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
238 #title = "Where is this!"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
239 ax.text(-0.15,-0.05, title,va = "top",wrap = "True", transform = ax.transAxes )
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
240
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
241
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
242
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
243
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
244 # cg.dendrogram_col.linkage # linkage matrix for columns
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
245 # cg.dendrogram_row.linkage # linkage matrix for rows
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
246 #plt.savefig(r"results/" + name + "_heatmap.png")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
247 plt.savefig(htmlresource+"/heatmap.png",bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
248 if pdf == 'PDF_Yes':
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
249 plt.savefig(htmlresource+"/heatmap.pdf", bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
250 #shutil.copyfile("heatmap.pdf",heatmapfn) #
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
251 #plt.show()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
252
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
253 def deviationFromMeanHeatMap(name,devList, pdf, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
254 localDevList = devList[:]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
255 localDevList.insert(0, name)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
256 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
257 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
258 #j_fname = r"data/congodata_deviationfromthemean.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
259 congo_df = pd.read_csv(j_fname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
260 congo_df.drop('Colour', axis=1, inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
261 congo_df.loc[congo_df.index.max() + 1] = localDevList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
262 congo_df.set_index('Strain', inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
263 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
264 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
265 ax = cg.ax_heatmap
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
266 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
267 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
268 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
269 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
270 #ax.set_title(title,ha = "center", va = "bottom",wrap = "True")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
271 ax.text(-0.2, -0.05, title, va="top", transform=ax.transAxes, wrap="True")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
272 plt.savefig(htmlresource+"/dheatmap.png",bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
273 if pdf == 'PDF_Yes':
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
274 plt.savefig(htmlresource+"/dheatmap.pdf", bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
275 #shutil.copyfile("dheatmap.pdf",dhmapfn)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
276 #plt.show()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
277
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
278
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
279 def plotPCA(name, freqList, pdf, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
280 localFreqList = freqList[:]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
281 localFreqList.insert(0, name)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
282 localFreqList.append(name)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
283 dir_path = os.path.dirname(os.path.realpath(__file__))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
284 j_fname = dir_path + "/data/congodata.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
285 #j_fname = r"data/congodata.csv"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
286 congo_df = pd.read_csv(j_fname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
287 congo_df.loc[congo_df.index.max() + 1] = localFreqList
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
288 # print(congo_df.tail(2))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
289 myColours = congo_df['Colour']
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
290 myCountries = congo_df.drop_duplicates('Colour')['Colour'].tolist()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
291 # print(myCountries)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
292 congo_df.drop('Colour', axis=1, inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
293 congo_df.set_index('Strain', inplace=True)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
294 dataArray = congo_df.as_matrix()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
295 pcaResult = PCA(dataArray)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
296 # pcaResult.center(0)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
297 # can't seem to find a simple way of prooducing a decent legend.
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
298 # going to seperate items in to different countires.
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
299 compoundList = []
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
300 for i in myCountries:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
301 compoundList.append([])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
302
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
303 i = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
304 for item in pcaResult.Y:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
305 col = myCountries.index(myColours[i])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
306 compoundList[col].append(-item[0])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
307 compoundList[col].append(item[1])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
308 i = i + 1
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
309 cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k']
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
310
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
311 fig, ax = plt.subplots(figsize=(9, 6))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
312 #plt.figure(num=1,figsize=(12, 6))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
313 i = 0
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
314 for d in myCountries:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
315 a = compoundList[i]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
316 b = a[::2]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
317 c = a[1::2]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
318 ax.scatter(b, c, color=cols[i], label=myCountries[i])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
319 i = i + 1
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
320 leg = ax.legend( bbox_to_anchor=(1.02,1.02), loc = "upper left") #move legend out of plot
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
321 title = "Principal Component Analysis of the Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$. " \
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
322 "The plot reflects the\nrelationships amongst the VSG repertoires of each strain. Strains are color-coded " \
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
323 "by location of collection according\nto key. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
324 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
325 #plt.title(title, ha = "center", va = "bottom",wrap = "True")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
326 tx = ax.text(-0.1, -0.07, title, va="top", transform=ax.transAxes, wrap="True")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
327 #fig.add_axes([0,0.05,1.05,1.05])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
328 #fig.tight_layout(rect=[0, 0.03, 1, 0.95])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
329 fig.subplots_adjust(bottom = 0.3)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
330
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
331 fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,tx), bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
332 #fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,))
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
333 if pdf == 'PDF_Yes':
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
334 fig.savefig(htmlresource+"/vapPCA.pdf",bbox_extra_artists=(leg,tx), bbox_inches='tight')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
335 #shutil.copyfile("vapPCA.pdf",PCAfn) # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
336 #plt.show()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
337
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
338 def createHTML(name,htmlfn,freqList,devList):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
339 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
340 htmlString = r"<html><title>T.congolense VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma congolense</i> Variant Antigen Profile</h2><h3>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
341 htmlString += name
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
342 htmlString += r"<br/>Genomic Analysis</h3>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
343 htmlString += "<p style = 'margin-left:23%; margin-right:23%'>Table Legend: Variant Antigen Profiles of <i>Trypanosoma congolense</i> estimated as the phylotype proportion and as the deviation from the mean across the sample cohort.<br>" \
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
344 "Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018).</p>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
345 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
346
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
347 htmlString += r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th><th>Relative Frequency</th><th>Deviation from Mean</th></tr>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
348 tabString = ""
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
349 # flush out table with correct values
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
350 for i in range(0, 15):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
351 f= format(freqList[i],'.4f')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
352 d= format(devList[i],'.4f')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
353 tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + f + "</td><td>" + d + "</td></tr>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
354 #tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + str(freqList[i]) + "</td><td>" + str(devList[i]) + "</td></tr>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
355 htmlString += tabString + "</table><br><br><br><br><br>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
356
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
357 htmlString += r"<h3>The Variation Heat Map and Dendrogram</h3><p>The absolute phylotype variation in the sample compared to model dataset.</p>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
358 imgString = r"<img src = 'heatmap.png' alt='Variation Heatmap' style='max-width:100%'><br><br>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
359 htmlString += imgString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
360
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
361 htmlString += r"<br><br><br><br><h3>The Deviation Heat Map and Dendrogram</h3><p>The phylotype variation expressed as the deviation from your sample mean compared to the model dataset</p>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
362 imgString = r"<img src = 'dheatmap.png' alt='Deviation Heatmap' style='max-width:100%'><br><br>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
363 htmlString += imgString
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
364
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
365 htmlString += r"<br><br><br><br><h3>The Variation PCA plot</h3><p>PCA analysis corresponding to absolute variation. Colour coded according to location</p>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
366 imgString = r"<img src = 'vapPCA.png' alt='PCA Analysis' style='max-width:100%'><br><br>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
367 htmlString += imgString + r"</div></body></html>"
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
368
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
369 with open(htmlfn, "w") as htmlfile:
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
370 htmlfile.write(htmlString)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
371
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
372
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
373 def assemble(args,dict):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
374 #argdict = {'name': 2, 'pdfexport': 3, 'kmers': 4, 'inslen': 5, 'covcut': 6, 'forward': 7, 'reverse': 8, 'html_file': 9,'html_resource': 10}
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
375 assembleWithVelvet(args[dict['name']],args[dict['kmers']], args[dict['inslen']],args[dict['covcut']], args[dict['forward']],args[dict['reverse']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
376 contigTranslation(args[dict['name']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
377 myCountList = HMMerMotifSearch(args[dict['name']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
378 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
379 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
380 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
381 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
382 # 0.0372233400402414, 0.17102615694165]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
383
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
384
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
385 myDevList = getDeviationFromMean(myFreqList, args[dict['name']], args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
386 relativeFrequencyHeatMap(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
387 deviationFromMeanHeatMap(args[dict['name']], myDevList,args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
388 plotPCA(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
389 createHTML(args[dict['name']], args[dict['html_file']], myFreqList, myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
390
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
391 def contigs(args,dict):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
392 #argdict = {'name': 2, 'pdfexport': 3, 'contigs': 4, 'html_file': 5, 'html_resource': 6}
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
393
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
394 shutil.copyfile(args[dict['contigs']], args[dict['name']]+".fa")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
395
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
396
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
397
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
398 contigTranslation(args[dict['name']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
399 myCountList = HMMerMotifSearch(args[dict['name']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
400 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
401 args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
402 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
403 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
404 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
405 # 0.0372233400402414, 0.17102615694165]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
406
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
407
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
408 myDevList = getDeviationFromMean(myFreqList, args[dict['name']],
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
409 args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
410 relativeFrequencyHeatMap(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
411 deviationFromMeanHeatMap(args[dict['name']], myDevList, args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
412 plotPCA(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
413 createHTML(args[dict['name']], args[dict['html_file']], myFreqList,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
414 myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
415
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
416
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
417 def genomicProcess(inputname, exportpdf, forwardFN, reverseFN, htmlfile, htmlresource):
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
418 assembleWithVelvet(inputname,forwardFN,reverseFN)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
419 contigTranslation(inputname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
420 myCountList = HMMerMotifSearch(inputname)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
421 myFreqList = relativeFrequencyTable(myCountList, inputname, htmlresource) # saves out inputname_relative_frequncy.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
422 #myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
423 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
424 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
425 # 0.0372233400402414, 0.17102615694165]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
426
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
427
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
428 myDevList = getDeviationFromMean(myFreqList, inputname,htmlresource) # saves out inputname_deviation_from_mean.csv
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
429
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
430 relativeFrequencyHeatMap(inputname, myFreqList, exportpdf, htmlresource)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
431 deviationFromMeanHeatMap(inputname, myDevList, exportpdf, htmlresource)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
432 plotPCA(inputname, myFreqList, exportpdf, htmlresource)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
433 createHTML(inputname, htmlfile, myFreqList,myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
434 return
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
435
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
436
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
437
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
438 if __name__ == "__main__":
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
439 #contigTranslation('Tcongo')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
440 #contigTranslation('Test')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
441 #newHMMerMotifSearch('Test')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
442 #HMMerMotifSearch('Tcongo')
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
443 #sys.exit()
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
444
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
445
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
446 myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
447 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
448 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
449 0.0372233400402414, 0.17102615694165]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
450 myDevList = [0.000790026,0.0073109,-0.001151769,-0.004502933,-0.013687421,-0.016159773,0.021689891,
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
451 0.007863809,-0.003133585,-0.001111709,-0.01313879,0.0036997,-0.00935284,0.005640693,0.015243802]
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
452
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
453 relativeFrequencyHeatMap('test', myFreqList, "PDF_Yes","results")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
454 deviationFromMeanHeatMap('test', myDevList, "PDF_Yes","results")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
455 plotPCA('test',myFreqList,"PDF_Yes","results")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
456
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
457 createHTML('test',"results/test.html", myFreqList, myDevList)
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
458 #contigTranslation("Test")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
459 #myCountList = HMMerMotifSearch("Test")
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
460
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
461
cfb25df43776 Uploaded
johnheap
parents:
diff changeset
462 sys.exit()