Mercurial > repos > martasampaio > phagepromoter
annotate phagepromoter.py @ 37:ff92440d89c4 draft default tip
Uploaded
author | martasampaio |
---|---|
date | Fri, 22 Jan 2021 10:08:35 +0000 |
parents | a555e95b2066 |
children |
rev | line source |
---|---|
29
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
2 """ |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
3 Created on Thu Jul 19 13:45:05 2018 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
4 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
5 @author: Marta |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
6 """ |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
7 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
8 from Bio import SeqIO |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
9 import numpy as np |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
10 import pandas as pd |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
11 from auxiliar import free_energy,freq_base |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
12 from Bio.Seq import Seq |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
13 from Bio.SeqRecord import SeqRecord |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
14 from Bio.Alphabet import IUPAC |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
15 from auxiliar import get_bacteria, get_families, get_max_pssm, get_scores, get_lifecycle |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
16 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
17 #division of the test genome in sequences of 65 bp |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
18 def get_testseqs65(form,fic,both=False): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
19 ALL = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
20 indexes = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
21 a = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
22 rec = SeqIO.read(fic,form) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
23 genome = rec.seq |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
24 i = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
25 j = 65 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
26 while j < len(genome): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
27 s = genome[i:j] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
28 ALL.append([1,i,j,s]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
29 i += 20 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
30 j += 20 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
31 a += 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
32 indexes.append(rec.name+":"+str(a)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
33 if both: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
34 comp = genome.reverse_complement() |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
35 size = len(rec.seq) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
36 i = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
37 j = 65 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
38 while j < len(comp): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
39 s = comp[i:j] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
40 ALL.append([-1,size-j,size-i,s]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
41 i += 20 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
42 j += 20 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
43 a += 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
44 indexes.append(rec.name+":"+str(a)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
45 df = pd.DataFrame(ALL, index=indexes, columns=['strand','iniprom','endprom','seq']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
46 return df |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
47 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
48 #calculate the scores of all sequences (similar to get_posScores and get_negScores) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
49 def get_testScores(loc,test): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
50 scores = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
51 posis = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
52 sizes = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
53 dic = {} |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
54 for ind,row in test.iterrows(): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
55 _,window = ind.split(':') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
56 strand = row['strand'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
57 ini = row['iniprom'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
58 end = row['endprom'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
59 seq = row['seq'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
60 pos = [ini,end,strand] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
61 dic[window] = pos |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
62 s = seq |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
63 score10_6,pos10_6 = get_scores(os.path.join(loc,'pssm10_6.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
64 maxi10_6 = get_max_pssm(os.path.join(loc,'pssm10_6.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
65 score10_8,pos10_8 = get_scores(os.path.join(loc,'pssm10_8.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
66 maxi10_8 = get_max_pssm(os.path.join(loc,'pssm10_8.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
67 scores23,pos23 = get_scores(os.path.join(loc,'pssm_23.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
68 maxi23 = get_max_pssm(os.path.join(loc,'pssm_23.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
69 scores21,pos21 = get_scores(os.path.join(loc,'pssm_21.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
70 maxi21 = get_max_pssm(os.path.join(loc,'pssm_21.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
71 scores27,pos27 = get_scores(os.path.join(loc,'pssm_27.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
72 maxi27 = get_max_pssm(os.path.join(loc,'pssm_27.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
73 scores32,pos32 = get_scores(os.path.join(loc,'pssm_32.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
74 maxi32 = get_max_pssm(os.path.join(loc,'pssm_32.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
75 score23 = max(scores23) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
76 score21 = max(scores21) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
77 score27 = max(scores27) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
78 score32 = max(scores32) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
79 maxiphage = max(score23,score21,score27,score32) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
80 if maxiphage == score23: phage_max = score23*maxi23 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
81 elif maxiphage == score21: phage_max = score21*maxi21 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
82 elif maxiphage == score27: phage_max = score27*maxi27 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
83 elif maxiphage == score32: phage_max = score32*maxi32 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
84 score35_6,pos35_6 = get_scores(os.path.join(loc,'pssm35_6.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
85 maxi35_6 = get_max_pssm(os.path.join(loc,'pssm35_6.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
86 score35_9,pos35_9 = get_scores(os.path.join(loc,'pssm35_9.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
87 maxi35_9 = get_max_pssm(os.path.join(loc,'pssm35_9.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
88 score35_t4,pos35_t4 = get_scores(os.path.join(loc,'pssm35_t4.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
89 maxi35_t4 = get_max_pssm(os.path.join(loc,'pssm35_t4.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
90 score35_cbb,pos35_cbb = get_scores(os.path.join(loc,'pssm35_cbb.txt'), s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
91 maxi35_cbb = get_max_pssm(os.path.join(loc,'pssm35_cbb.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
92 score35_lb,pos35_lb = get_scores(os.path.join(loc,'pssm35_lb.txt'),s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
93 maxi35_lb = get_max_pssm(os.path.join(loc,'pssm35_lb.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
94 score35_mu, pos35_mu = get_scores(os.path.join(loc,'pssm35_mu.txt'),s) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
95 maxi35_mu = get_max_pssm(os.path.join(loc,'pssm35_mu.txt')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
96 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
97 dists6 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
98 score6 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
99 for p in pos10_6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
100 for a in range(14,22): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
101 d = p-a-6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
102 if d >= 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
103 s10 = score10_6[p] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
104 s35_6 = score35_6[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
105 score6.append([s35_6,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
106 dists6.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
107 dists9 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
108 score9 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
109 for p in pos10_6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
110 for a in range(11,14): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
111 d = p-a-9 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
112 if d >= 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
113 s10 = score10_6[p] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
114 s35_9 = score35_9[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
115 score9.append([s35_9,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
116 dists9.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
117 distst4 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
118 scoret4 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
119 distscbb = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
120 scorecbb = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
121 for p in pos10_6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
122 for a in range(16,18): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
123 d = p-a-7 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
124 if d >= 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
125 s10 = score10_6[p] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
126 s35_t4 = score35_t4[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
127 s35_cbb = score35_cbb[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
128 scoret4.append([s35_t4,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
129 distst4.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
130 scorecbb.append([s35_cbb,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
131 distscbb.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
132 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
133 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
134 distsmu = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
135 scoremu = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
136 for p in pos10_6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
137 d = p-16-14 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
138 if d >= 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
139 s10 = score10_6[p] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
140 s35_mu = score35_mu[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
141 scoremu.append([s35_mu,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
142 distsmu.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
143 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
144 distslb = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
145 scorelb = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
146 for p in pos10_6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
147 d = p-13-14 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
148 if d >= 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
149 s10 = score10_6[p] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
150 s35_lb = score35_lb[d] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
151 scorelb.append([s35_lb,s10]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
152 distslb.append([d,p]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
153 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
154 soma6 = [sum(x) for x in score6] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
155 soma9 = [sum(x) for x in score9] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
156 somat4 = [sum(x) for x in scoret4] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
157 somacbb = [sum(x) for x in scorecbb] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
158 somamu = [sum(x) for x in scoremu] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
159 somalb = [sum(x) for x in scorelb] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
160 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
161 maxi6 = max(soma6) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
162 maxi9 = max(soma9) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
163 maxit4 = max(somat4) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
164 maxicbb = max(somacbb) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
165 maximu = max(somamu) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
166 maxilb = max(somalb) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
167 maxi_elems = max(maxi6,maxi9,maxit4,maxicbb,maxilb,maximu) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
168 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
169 if maxi_elems == maxilb: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
170 indmax = somalb.index(maxilb) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
171 sc35 = scorelb[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
172 sc10 = scorelb[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
173 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
174 posel = distslb[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
175 size35 = 14 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
176 elems_maxi = sc35*maxi35_lb+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
177 elif maxi_elems == maximu: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
178 indmax = somamu.index(maximu) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
179 sc35 = scoremu[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
180 sc10 = scoremu[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
181 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
182 posel = distsmu[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
183 size35 = 14 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
184 elems_maxi = sc35*maxi35_mu+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
185 elif maxi_elems == maxi9: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
186 indmax = soma9.index(maxi9) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
187 sc35 = score9[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
188 sc10 = score9[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
189 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
190 posel = dists9[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
191 size35 = 9 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
192 elems_maxi = sc35*maxi35_9+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
193 elif maxi_elems == maxit4: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
194 indmax = somat4.index(maxit4) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
195 sc35 = scoret4[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
196 sc10 = scoret4[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
197 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
198 posel = distst4[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
199 size35 = 7 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
200 elems_maxi = sc35*maxi35_t4+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
201 elif maxi_elems == maxicbb: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
202 indmax = somacbb.index(maxicbb) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
203 sc35 = scorecbb[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
204 sc10 = scorecbb[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
205 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
206 posel = distscbb[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
207 size35 = 7 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
208 elems_maxi = sc35*maxi35_cbb+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
209 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
210 indmax = soma6.index(maxi6) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
211 sc35 = score6[indmax][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
212 sc10 = score6[indmax][1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
213 score_elems = [sc35,sc10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
214 posel = dists6[indmax] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
215 size35 = 6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
216 elems_maxi = sc35*maxi35_6+sc10*maxi10_6 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
217 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
218 if score23 == maxiphage: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
219 phage_score = score23 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
220 posiphage = scores23.index(score23) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
221 sizephage = 23 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
222 elif score21 == maxiphage: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
223 phage_score = score21 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
224 posiphage = scores21.index(score21) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
225 sizephage = 21 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
226 elif score27 == maxiphage: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
227 phage_score = score27 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
228 posiphage = scores27.index(score27) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
229 sizephage = 27 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
230 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
231 phage_score = score32 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
232 posiphage = scores32.index(score32) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
233 sizephage = 32 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
234 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
235 if elems_maxi >= max(score10_8)*maxi10_8: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
236 i = posel[1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
237 ext = s[i-3:i-1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
238 if ext == 'TG': tg = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
239 else: tg = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
240 if elems_maxi > phage_max: host = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
241 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
242 host = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
243 tg = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
244 sc = max(score10_8) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
245 end35 = posel[0]+size35 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
246 dist = posel[1]-end35 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
247 scores.append([host, score_elems[1],sc,score_elems[0],phage_score,tg,dist,str(seq)]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
248 posis.append([posel[1],posel[0],posiphage]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
249 sizes.append([6,size35,sizephage]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
250 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
251 host = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
252 sc = max(score10_8) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
253 i = score10_8.index(sc) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
254 ext = s[i-3:i-1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
255 if ext == 'TG': tg = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
256 else: tg = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
257 if max(score10_8)*maxi10_8 > phage_max: host = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
258 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
259 host = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
260 tg = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
261 end35 = posel[0]+size35 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
262 dist = posel[1]-end35 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
263 scores.append([host,score_elems[1],sc,score_elems[0],phage_score,tg,dist,str(seq)]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
264 posis.append([i,posel[0],posiphage]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
265 sizes.append([8,size35,sizephage]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
266 score = pd.DataFrame(scores, index=test.index, columns=['host','score10','score10_8','score35','score_phage','tg','dist','seq']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
267 posis = pd.DataFrame(posis, index=test.index, columns=['pos10','pos35','posphage']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
268 sizes = pd.DataFrame(sizes, index=test.index, columns=['size10','size35','size_phage']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
269 df_all = pd.concat([score,posis,sizes],axis=1) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
270 return df_all,dic |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
271 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
272 def create_dftest(scores_test,dic_window,family,bacteria,lifecycle): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
273 tudo = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
274 tudo2 = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
275 for ind,row in scores_test.iterrows(): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
276 _,window = ind.split(':') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
277 posis = dic_window[window] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
278 strand=posis[2] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
279 if strand == 1: ini=posis[0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
280 else: ini=posis[1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
281 seqprom = row['seq'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
282 score10 = row['score10'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
283 score10_8 = row['score10_8'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
284 score35 = row['score35'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
285 scorephage = row['score_phage'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
286 size10 = row['size10'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
287 size35 = row['size35'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
288 sizephage = row['size_phage'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
289 ini10 = row['pos10'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
290 tg = row['tg'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
291 host = row['host'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
292 ini35 = row['pos35'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
293 dist = row['dist'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
294 end10=ini10+size10 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
295 iniphage = row['posphage'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
296 endphage = iniphage+sizephage |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
297 if strand == 1: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
298 if host == 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
299 new_seq = seqprom[iniphage:endphage] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
300 new_ini = ini+iniphage+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
301 new_end = ini+endphage |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
302 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
303 if size10 == 6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
304 new_seq = seqprom[ini35:end10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
305 new_ini = ini+ini35+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
306 new_end = ini+end10 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
307 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
308 new_seq = seqprom[ini10:end10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
309 new_ini = ini+ini10+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
310 new_end = ini+end10 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
311 new_pos = '('+str(new_ini)+'..'+str(new_end)+')' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
312 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
313 if host == 0: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
314 new_seq = seqprom[iniphage:endphage] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
315 new_ini = ini-endphage+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
316 new_end = ini-iniphage |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
317 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
318 if size10 == 6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
319 new_seq = seqprom[ini35:end10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
320 new_ini = ini-end10+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
321 new_end = ini-ini35 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
322 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
323 new_seq = seqprom[ini10:end10] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
324 new_ini = ini-end10+1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
325 new_end = ini-ini10 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
326 new_pos = 'complement('+str(new_ini)+'..'+str(new_end)+')' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
327 if size10 == 6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
328 size10_6 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
329 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
330 size10_6 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
331 if size35 == 6: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
332 size35_6 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
333 size35_7 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
334 size35_9 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
335 size35_14 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
336 elif size35 == 7: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
337 size35_6 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
338 size35_7 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
339 size35_9 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
340 size35_14 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
341 elif size35 == 9: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
342 size35_6 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
343 size35_7 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
344 size35_9 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
345 size35_14 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
346 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
347 size35_6 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
348 size35_7 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
349 size35_9 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
350 size35_14 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
351 if sizephage == 23: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
352 sizephage_23 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
353 sizephage_21 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
354 sizephage_32 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
355 elif sizephage == 21: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
356 sizephage_23 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
357 sizephage_21 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
358 sizephage_32 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
359 elif sizephage == 32: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
360 sizephage_23 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
361 sizephage_21 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
362 sizephage_32 = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
363 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
364 sizephage_23 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
365 sizephage_21 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
366 sizephage_32 = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
367 if family == 'Podoviridae': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
368 Podo = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
369 Sipho = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
370 Myo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
371 elif family == 'Siphoviridae': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
372 Podo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
373 Sipho = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
374 Myo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
375 elif family == 'Myoviridae': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
376 Podo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
377 Sipho = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
378 Myo = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
379 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
380 Podo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
381 Sipho = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
382 Myo = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
383 if bacteria == 'Bacillus': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
384 bac = [1,0,0,0,0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
385 elif bacteria == 'Escherichia coli': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
386 bac = [0,1,0,0,0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
387 elif bacteria == 'Klebsiella': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
388 bac = [0,0,1,0,0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
389 elif bacteria == 'Pectobacterium': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
390 bac = [0,0,0,1,0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
391 elif bacteria == 'Cronobacter': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
392 bac = [0,0,0,0,1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
393 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
394 bac = [0,0,0,0,0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
395 if lifecycle == 'virulent': tp = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
396 else: tp = 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
397 fe = free_energy(str(seqprom)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
398 AT = freq_base(str(seqprom)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
399 linha = [score10, score10_8, score35, dist, scorephage, fe, AT, host,size10_6, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
400 size35_6, size35_7, size35_9, size35_14, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
401 sizephage_23, sizephage_21, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
402 sizephage_32, tg, Podo, Sipho, Myo,tp] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
403 linha.extend(bac) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
404 tudo.append(linha) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
405 linha2 = [new_pos,str(new_seq), host, size10_6, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
406 score10, score10_8, size35_6, size35_7, size35_9,size35_14, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
407 score35, dist, sizephage_23, sizephage_21, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
408 sizephage_32, scorephage, tg, Podo, Sipho, Myo,tp, fe, AT] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
409 linha2.extend(bac) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
410 tudo2.append(linha2) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
411 df_test = pd.DataFrame(tudo, index=scores_test.index, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
412 columns = ['score10', 'score10_8','score35', 'dist35_10', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
413 'scorephage','fe', 'freqAT', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
414 'host','size10', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
415 'size35_6', 'size35_7','size35_9','size35_14', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
416 'sizephage_23', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
417 'sizephage_21', 'sizephage_32', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
418 'TG', 'Podo', 'Sipho', 'Myo', 'tp', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
419 'Bacillus', 'EColi', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
420 'Pectobacterium','Klebsiella', 'Cronobacter']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
421 df_INFO = pd.DataFrame(tudo2, index=scores_test.index, |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
422 columns = ['Positions','Promoter Sequence','host','size10', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
423 'score10', 'score10_8', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
424 'size35_6', 'size35_7','size35_9','size35_14', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
425 'score35', 'dist35_10','sizephage_23', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
426 'sizephage_21', 'sizephage_32', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
427 'scorephage', 'TG', 'Podo', 'Sipho', 'Myo', 'tp','fe', 'freqAT', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
428 'EColi', 'Salmonella', |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
429 'Pectobacterium','Cronobacter', 'Streptococcus']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
430 return df_test,df_INFO |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
431 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
432 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
433 def get_predictions(scaler_file,model_file,test,df_testinfo,threshold): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
434 import pickle |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
435 scaler = pickle.load(open(scaler_file, 'rb')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
436 model = pickle.load(open(model_file, 'rb')) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
437 feat_scaled = pd.DataFrame(scaler.transform(test.iloc[:,:7]),index =test.index, columns=test.columns[:7]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
438 TEST_scaled = pd.concat([feat_scaled,test.iloc[:,7:]],axis=1) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
439 scores = model.predict_proba(TEST_scaled) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
440 pos_scores = np.empty((TEST_scaled.shape[0],0), float) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
441 for x in scores: pos_scores = np.append(pos_scores,x[1]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
442 try: positive_indexes = np.nonzero(pos_scores>float(threshold))[0] #escolher os positivos, podia ser escolher com score > x |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
443 except ValueError: return 'The threshold value is not a float' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
444 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
445 if len(positive_indexes) == 0: return None |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
446 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
447 positive_windows = TEST_scaled.index[positive_indexes] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
448 INFO = df_testinfo.loc[positive_windows,['Positions','Promoter Sequence']] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
449 promoter_type = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
450 for x in df_testinfo.loc[positive_windows,'host'].tolist(): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
451 if x == 0: promoter_type.append('phage') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
452 else: promoter_type.append('host') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
453 INFO['Type'] = promoter_type |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
454 INFO['Scores'] = np.around(pos_scores[positive_indexes],decimals=3) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
455 INFO.index = positive_windows |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
456 return INFO |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
457 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
458 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
459 def get_finaldf(test,rec): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
460 new_df = test.groupby(['Positions']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
461 groups = list(new_df.groups.keys()) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
462 for i in range(len(groups)-1): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
463 for j in range(i, len(groups)): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
464 if 'complement' in groups[i]: inii = int(groups[i][11:].split('..')[0]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
465 else: inii = int(groups[i][1:].split('..')[0]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
466 if 'complement' in groups[j]: inij = int(groups[j][11:].split('..')[0]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
467 else: inij = int(groups[j][1:].split('..')[0]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
468 if inij < inii: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
469 temp = groups[i] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
470 groups[i] = groups[j] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
471 groups[j] = temp |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
472 new_inds = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
473 for g in groups: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
474 inds = new_df.groups[g] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
475 if len(inds) == 1: new_inds.append(inds[0]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
476 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
477 maxi = max(new_df.get_group(g)['Scores']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
478 i = new_df.groups[g][new_df.get_group(g)['Scores']==maxi][0] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
479 new_inds.append(i) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
480 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
481 output = test.loc[new_inds,:] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
482 strands = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
483 new_pos = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
484 old_pos = output['Positions'].tolist() |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
485 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
486 from Bio.SeqFeature import SeqFeature, FeatureLocation |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
487 feats = rec.features |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
488 for ind, row in output.iterrows(): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
489 pos = row['Positions'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
490 if 'complement' in pos: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
491 strands.append('-') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
492 new_pos.append(pos[10:]) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
493 ini,end= pos[11:-1].split('..') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
494 new_loc = FeatureLocation(int(ini),int(end),strand=-1) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
495 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
496 strands.append('+') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
497 new_pos.append(pos) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
498 ini,end= pos[1:-1].split('..') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
499 new_loc = FeatureLocation(int(ini),int(end),strand=1) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
500 feat = SeqFeature(new_loc, type='regulatory',qualifiers={'regulatory_class':['promoter'], 'note=':['predicted by PhagePromoter']}) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
501 feats.append(feat) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
502 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
503 output.insert(loc=0, column='Strand', value=strands) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
504 output['Positions'] = new_pos |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
505 output.to_html('output.html',index=False, justify='center') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
506 recs = [] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
507 i = 0 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
508 for ind,row in output.iterrows(): |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
509 s = Seq(row['Promoter Sequence']) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
510 posis = old_pos[i] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
511 typ = row['Type'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
512 score = row['Scores'] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
513 sq = SeqRecord(seq=s, id=ind, description=typ+' '+posis+' score='+str(score)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
514 recs.append(sq) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
515 i += 1 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
516 SeqIO.write(recs, 'output.fasta','fasta') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
517 new_rec = rec |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
518 new_rec.seq.alphabet = IUPAC.IUPACAmbiguousDNA() |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
519 new_feats = sorted(feats, key=lambda x: x.location.start) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
520 new_rec.features = new_feats |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
521 SeqIO.write(new_rec,'output.gb','genbank') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
522 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
523 if __name__== "__main__": |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
524 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
525 import sys |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
526 import os |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
527 __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
528 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
529 gen_format = sys.argv[1] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
530 genome_file = sys.argv[2] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
531 both = sys.argv[3] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
532 threshold = sys.argv[4] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
533 family = sys.argv[5] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
534 host = sys.argv[6] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
535 phage_type = sys.argv[7] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
536 model = sys.argv[8] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
537 ''' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
538 gen_format = 'genbank' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
539 genome_file = 'test-data/NC_015264.gb' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
540 both = False |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
541 threshold = '0.50' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
542 family = 'Podoviridae' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
543 host = 'Pseudomonas' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
544 phage_type = 'virulent' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
545 model = 'SVM2400' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
546 #model = 'ANN1600' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
547 ''' |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
548 |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
549 rec = SeqIO.read(genome_file, gen_format) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
550 test_windows = get_testseqs65(gen_format, genome_file,both) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
551 try: score_test,dic_window = get_testScores(__location__,test_windows) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
552 except IndexError: print('Error. Input sequence can only have A,C,G or T') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
553 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
554 df_test,df_testinfo = create_dftest(score_test,dic_window,family,host,phage_type) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
555 if model == 'ANN1600': |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
556 scaler_file = os.path.join(__location__, 'scaler1600.sav') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
557 model_file = os.path.join(__location__, 'model1600.sav') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
558 preds = get_predictions(scaler_file, model_file, df_test,df_testinfo,threshold) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
559 if preds is None: print('There is no sequence with a score value higher or equal to the threshold '+str(threshold)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
560 elif type(preds) == str: print(preds) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
561 else: output = get_finaldf(preds,rec) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
562 else: |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
563 scaler_file = os.path.join(__location__, 'scaler2400.sav') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
564 model_file = os.path.join(__location__, 'model2400.sav') |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
565 new_df_test = df_test.iloc[:,[0,1,2,3,4,5,6,7,8,9,13,14,16,17,19,20,22,24,25]] |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
566 preds = get_predictions(scaler_file, model_file, new_df_test,df_testinfo,threshold) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
567 if preds is None: print('There is no sequence with a score value higher or equal to the threshold '+str(threshold)) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
568 elif type(preds) == str: print(preds) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
569 else: output = get_finaldf(preds,rec) |
a555e95b2066
Update phagepromoter.py -> pickle instead joblib
martasampaio
parents:
diff
changeset
|
570 |