annotate Tryp_G.py @ 0:36cb22bd911d draft

planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
author johnheap
date Wed, 04 Jul 2018 16:39:13 -0400
parents
children 4432e4183ebd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
1 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
2 * Copyright 2018 University of Liverpool
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
3 * Author: John Heap, Computational Biology Facility, UoL
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
4 * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
5 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
6 * Licensed under the Apache License, Version 2.0 (the "License");
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
7 * you may not use this file except in compliance with the License.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
8 * You may obtain a copy of the License at
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
9 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
10 * http://www.apache.org/licenses/LICENSE-2.0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
11 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
12 * Unless required by applicable law or agreed to in writing, software
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
13 * distributed under the License is distributed on an "AS IS" BASIS,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
15 * See the License for the specific language governing permissions and
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
16 * limitations under the License.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
17 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
18 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
19
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
20 import subprocess
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
21 import re
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
22 import os
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
23 import sys
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
24 import shutil
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
25 import pandas as pd
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
26 import numpy as np
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
27 import matplotlib as mpl
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
28 mpl.use('Agg')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
29 import matplotlib.pyplot as plt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
30 from matplotlib.mlab import PCA
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
31 import seaborn as sns
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
32
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
33 # some globals for convenience
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
34
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
35 pList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
36
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
37 quietString = "" #" >>"+os.path.dirname(os.path.realpath(__file__))+"/log/Vap_log.txt 2>&1"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
38
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
39 def assembleWithVelvet(name, kmers, inslen, covcut, fastq1name,fastq2name):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
40 #argString = "velveth " + name + "_k65 65 -shortPaired -fastq " + name + "_R1.fastq " + name + "_R2.fastq"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
41 argString = "velveth " + name + "_k"+ kmers+" "+ kmers + " -shortPaired -fastq " + fastq1name+" "+fastq2name+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
42 print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
43 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
44 if returncode != 0:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
45 return "Error in velveth"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
46 argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -cov_cutoff "+covcut+" -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
47 #argString = "velvetg " + name + "_k65 -exp_cov auto -ins_length 400 -cov_cutoff 5 -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
48 print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
49 returncode = subprocess.call(argString, shell = True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
50 if returncode != 0:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
51 return "Error in velvetg"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
52 shutil.copyfile(name + "_k"+kmers+"//contigs.fa",name + ".fa") # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
53 return "ok"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
54
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
55 def contigTranslation(name):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
57 print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
58 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
59 #subprocess.call('ls -l *.fa', shell = True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
60 #sys.exit(1)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
61 #if returncode != 0:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
62 # return "Error in Transeq"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
63 #return 'ok'
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
64
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
65
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
66 def HMMerMotifSearch(name):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
67 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
68 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
69 lineCounts = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
70 compoundList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
71 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
72 phylopath = dir_path + "/data/Motifs/Phylotype"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
73 for m in motifs:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
74 argString = "hmmsearch " + phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" # +quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
75 # argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
76 #print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
77 subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
78
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
79 hmmResult = open("Phy" + m + ".out", 'r')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
80 tempout = open(dir_path + "/data/" + "Phy" + m + ".txt", 'w')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
81 #regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
82 n = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
83 outList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
84 for l in range(0,14):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
85 hmmResult.readline() #hacky? miss out the first 14 lines. data we want starts on line 15
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
86
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
87
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
88 for line in hmmResult:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
89 if re.search(r"inclusion", line):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
90 #print("inclusion threshold reached")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
91 break
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
92 if len(line) <= 1:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
93 #print("end of data")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
94 break
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
95 m = line[60:-1]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
96 #print(m)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
97 #tempout.write(m.group() + "\n")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
98 outList.append("" + m + "\n")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
99 n += 1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
100 compoundList.append(outList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
101 lineCounts.append(n)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
102 hmmResult.close()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
103
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
104
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
105 print(lineCounts)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
106 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
107 '9c'],
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
108 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
109 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
110 countList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
111 countIndex = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
112 totalCount = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
113
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
114 for c in concatGroups:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
115 a = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
116 for n in range(0, c):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
117 a = a + compoundList.pop(0)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
118 t = set(a)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
119 countList.append(len(t))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
120 totalCount += len(t)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
121 countList.append(totalCount)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
122 #print(countList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
123 #print("--------")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
124 return countList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
125
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
126 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
127 def HMMerMotifSearch(name):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
128 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
129 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
130 lineCounts = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
131 compoundList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
132 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
133 phylopath = dir_path+"/data/Motifs/Phylotype"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
134 for m in motifs:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
135 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
136 #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
137 print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
138 subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
139
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
140 hmmResult = open("Phy" + m + ".out", 'r')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
141 tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
142 regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
143 n = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
144 outList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
145 for line in hmmResult:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
146 m = re.search(regex, line)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
147 if m:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
148 tempout.write(m.group() + "\n")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
149 outList.append(""+m.group()+"\n")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
150 n += 1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
151 if re.search(r"inclusion", line):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
152 print("inclusion threshold reached")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
153 break
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
154 compoundList.append(outList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
155 lineCounts.append(n)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
156 hmmResult.close()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
157 #tempout.close()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
158 print(lineCounts)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
159 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
160 '9c'],
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
161 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
162 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
163 countList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
164 countIndex = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
165 totalCount = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
166
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
167 for c in concatGroups:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
168 a = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
169 for n in range(0, c):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
170 a = a + compoundList.pop(0)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
171 t = set(a)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
172 countList.append(len(t))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
173 totalCount += len(t)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
174 countList.append(totalCount)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
175 print(countList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
176 print("--------")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
177 return countList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
178 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
179
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
180
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
181
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
182 def relativeFrequencyTable(countList, name, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
183 relFreqList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
184 c = float(countList[15])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
185 if c == 0:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
186 return [0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
187 for i in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
188 relFreqList.append(countList[i] / c)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
189
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
190 data = {'Phylotype': pList, 'Relative Frequency': relFreqList}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
191 relFreq_df = pd.DataFrame(data)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
192 j_fname = htmlresource+"/" + name + "_relative_frequency.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
193 relFreq_df.to_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
194 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
195
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
196
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
197
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
198
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
199 def getDeviationFromMean(frequencyList, name, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
200 devList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
201 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
202 j_fname = dir_path + "/data/congodata.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
203 #j_fname = r"data/congodata.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
204 congo_df = pd.read_csv(j_fname) # we get the means from congo_df
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
205 for p in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
206 m = congo_df[pList[p]].mean()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
207 dev = -(m - frequencyList[p])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
208 devList.append(dev)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
209
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
210 data = {'Phylotype': pList, 'Deviation from Mean': devList}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
211 dev_df = pd.DataFrame(data)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
212 j_fname = htmlresource+"/" + name + "_deviation_from_mean.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
213 dev_df.to_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
214 return devList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
215
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
216
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
217 def relativeFrequencyHeatMap(name, freqList, pdf, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
218 localFreqList = freqList[:]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
219 localFreqList.insert(0, name)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
220 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
221 j_fname = dir_path+"/data/congodata.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
222 #print(dir_path)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
223 congo_df = pd.read_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
224 congo_df.drop('Colour', axis=1, inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
225 congo_df.loc[congo_df.index.max() + 1] = localFreqList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
226 congo_df.set_index('Strain', inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
227
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
228 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
229 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
230 ax=cg.ax_heatmap
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
231 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
232 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
233 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
234 title += "\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
235
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
236 #title = "Variant Antigen Profiles of Trypanosoma congolense estimated as the phylotype proportion across the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of each strain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018). Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
237 #ax.set_title(title, ha = "center", va = "bottom",wrap = "True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
238 #title = "Where is this!"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
239 ax.text(-0.15,-0.05, title,va = "top",wrap = "True", transform = ax.transAxes )
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
240
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
241
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
242
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
243
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
244 # cg.dendrogram_col.linkage # linkage matrix for columns
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
245 # cg.dendrogram_row.linkage # linkage matrix for rows
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
246 #plt.savefig(r"results/" + name + "_heatmap.png")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
247 plt.savefig(htmlresource+"/heatmap.png",bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
248 if pdf == 'PDF_Yes':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
249 plt.savefig(htmlresource+"/heatmap.pdf", bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
250 #shutil.copyfile("heatmap.pdf",heatmapfn) #
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
251 #plt.show()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
252
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
253 def deviationFromMeanHeatMap(name,devList, pdf, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
254 localDevList = devList[:]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
255 localDevList.insert(0, name)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
256 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
257 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
258 #j_fname = r"data/congodata_deviationfromthemean.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
259 congo_df = pd.read_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
260 congo_df.drop('Colour', axis=1, inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
261 congo_df.loc[congo_df.index.max() + 1] = localDevList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
262 congo_df.set_index('Strain', inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
263 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
264 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
265 ax = cg.ax_heatmap
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
266 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
267 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
268 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
269 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
270 #ax.set_title(title,ha = "center", va = "bottom",wrap = "True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
271 ax.text(-0.2, -0.05, title, va="top", transform=ax.transAxes, wrap="True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
272 plt.savefig(htmlresource+"/dheatmap.png",bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
273 if pdf == 'PDF_Yes':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
274 plt.savefig(htmlresource+"/dheatmap.pdf", bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
275 #shutil.copyfile("dheatmap.pdf",dhmapfn)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
276 #plt.show()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
277
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
278
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
279 def plotPCA(name, freqList, pdf, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
280 localFreqList = freqList[:]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
281 localFreqList.insert(0, name)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
282 localFreqList.append(name)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
283 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
284 j_fname = dir_path + "/data/congodata.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
285 #j_fname = r"data/congodata.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
286 congo_df = pd.read_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
287 congo_df.loc[congo_df.index.max() + 1] = localFreqList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
288 # print(congo_df.tail(2))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
289 myColours = congo_df['Colour']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
290 myCountries = congo_df.drop_duplicates('Colour')['Colour'].tolist()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
291 # print(myCountries)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
292 congo_df.drop('Colour', axis=1, inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
293 congo_df.set_index('Strain', inplace=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
294 dataArray = congo_df.as_matrix()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
295 pcaResult = PCA(dataArray)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
296 # pcaResult.center(0)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
297 # can't seem to find a simple way of prooducing a decent legend.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
298 # going to seperate items in to different countires.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
299 compoundList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
300 for i in myCountries:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
301 compoundList.append([])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
302
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
303 i = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
304 for item in pcaResult.Y:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
305 col = myCountries.index(myColours[i])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
306 compoundList[col].append(-item[0])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
307 compoundList[col].append(item[1])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
308 i = i + 1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
309 cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
310
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
311 fig, ax = plt.subplots(figsize=(9, 6))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
312 #plt.figure(num=1,figsize=(12, 6))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
313 i = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
314 for d in myCountries:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
315 a = compoundList[i]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
316 b = a[::2]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
317 c = a[1::2]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
318 ax.scatter(b, c, color=cols[i], label=myCountries[i])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
319 i = i + 1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
320 leg = ax.legend( bbox_to_anchor=(1.02,1.02), loc = "upper left") #move legend out of plot
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
321 title = "Principal Component Analysis of the Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$. " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
322 "The plot reflects the\nrelationships amongst the VSG repertoires of each strain. Strains are color-coded " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
323 "by location of collection according\nto key. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
324 title +="\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
325 #plt.title(title, ha = "center", va = "bottom",wrap = "True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
326 tx = ax.text(-0.1, -0.07, title, va="top", transform=ax.transAxes, wrap="True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
327 #fig.add_axes([0,0.05,1.05,1.05])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
328 #fig.tight_layout(rect=[0, 0.03, 1, 0.95])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
329 fig.subplots_adjust(bottom = 0.3)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
330
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
331 fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,tx), bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
332 #fig.savefig(htmlresource+"/vapPCA.png", bbox_extra_artists=(leg,))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
333 if pdf == 'PDF_Yes':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
334 fig.savefig(htmlresource+"/vapPCA.pdf",bbox_extra_artists=(leg,tx), bbox_inches='tight')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
335 #shutil.copyfile("vapPCA.pdf",PCAfn) # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa";
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
336 #plt.show()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
337
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
338 def createHTML(name,htmlfn,freqList,devList):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
339 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
340 htmlString = r"<html><title>T.congolense VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma congolense</i> Variant Antigen Profile</h2><h3>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
341 htmlString += name
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
342 htmlString += r"<br/>Genomic Analysis</h3>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
343 htmlString += "<p style = 'margin-left:23%; margin-right:23%'>Table Legend: Variant Antigen Profiles of <i>Trypanosoma congolense</i> estimated as the phylotype proportion and as the deviation from the mean across the sample cohort.<br>" \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
344 "Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018).</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
345 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
346
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
347 htmlString += r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th><th>Relative Frequency</th><th>Deviation from Mean</th></tr>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
348 tabString = ""
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
349 # flush out table with correct values
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
350 for i in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
351 f= format(freqList[i],'.4f')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
352 d= format(devList[i],'.4f')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
353 tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + f + "</td><td>" + d + "</td></tr>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
354 #tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + str(freqList[i]) + "</td><td>" + str(devList[i]) + "</td></tr>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
355 htmlString += tabString + "</table><br><br><br><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
356
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
357 htmlString += r"<h3>The Variation Heat Map and Dendrogram</h3><p>The absolute phylotype variation in the sample compared to model dataset.</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
358 imgString = r"<img src = 'heatmap.png' alt='Variation Heatmap' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
359 htmlString += imgString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
360
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
361 htmlString += r"<br><br><br><br><h3>The Deviation Heat Map and Dendrogram</h3><p>The phylotype variation expressed as the deviation from your sample mean compared to the model dataset</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
362 imgString = r"<img src = 'dheatmap.png' alt='Deviation Heatmap' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
363 htmlString += imgString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
364
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
365 htmlString += r"<br><br><br><br><h3>The Variation PCA plot</h3><p>PCA analysis corresponding to absolute variation. Colour coded according to location</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
366 imgString = r"<img src = 'vapPCA.png' alt='PCA Analysis' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
367 htmlString += imgString + r"</div></body></html>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
368
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
369 with open(htmlfn, "w") as htmlfile:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
370 htmlfile.write(htmlString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
371
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
372
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
373 def assemble(args,dict):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
374 #argdict = {'name': 2, 'pdfexport': 3, 'kmers': 4, 'inslen': 5, 'covcut': 6, 'forward': 7, 'reverse': 8, 'html_file': 9,'html_resource': 10}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
375 assembleWithVelvet(args[dict['name']],args[dict['kmers']], args[dict['inslen']],args[dict['covcut']], args[dict['forward']],args[dict['reverse']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
376 contigTranslation(args[dict['name']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
377 myCountList = HMMerMotifSearch(args[dict['name']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
378 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
379 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
380 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
381 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
382 # 0.0372233400402414, 0.17102615694165]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
383
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
384
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
385 myDevList = getDeviationFromMean(myFreqList, args[dict['name']], args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
386 relativeFrequencyHeatMap(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
387 deviationFromMeanHeatMap(args[dict['name']], myDevList,args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
388 plotPCA(args[dict['name']], myFreqList,args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
389 createHTML(args[dict['name']], args[dict['html_file']], myFreqList, myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
390
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
391 def contigs(args,dict):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
392 #argdict = {'name': 2, 'pdfexport': 3, 'contigs': 4, 'html_file': 5, 'html_resource': 6}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
393
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
394 shutil.copyfile(args[dict['contigs']], args[dict['name']]+".fa")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
395
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
396
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
397
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
398 contigTranslation(args[dict['name']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
399 myCountList = HMMerMotifSearch(args[dict['name']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
400 myFreqList = relativeFrequencyTable(myCountList, args[dict['name']],
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
401 args[dict['html_resource']]) # saves out inputname_relative_frequncy.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
402 # myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
403 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
404 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
405 # 0.0372233400402414, 0.17102615694165]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
406
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
407
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
408 myDevList = getDeviationFromMean(myFreqList, args[dict['name']],
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
409 args[dict['html_resource']]) # saves out inputname_deviation_from_mean.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
410 relativeFrequencyHeatMap(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
411 deviationFromMeanHeatMap(args[dict['name']], myDevList, args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
412 plotPCA(args[dict['name']], myFreqList, args[dict['pdfexport']], args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
413 createHTML(args[dict['name']], args[dict['html_file']], myFreqList,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
414 myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
415
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
416
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
417 def genomicProcess(inputname, exportpdf, forwardFN, reverseFN, htmlfile, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
418 assembleWithVelvet(inputname,forwardFN,reverseFN)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
419 contigTranslation(inputname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
420 myCountList = HMMerMotifSearch(inputname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
421 myFreqList = relativeFrequencyTable(myCountList, inputname, htmlresource) # saves out inputname_relative_frequncy.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
422 #myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
423 # 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
424 # 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
425 # 0.0372233400402414, 0.17102615694165]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
426
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
427
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
428 myDevList = getDeviationFromMean(myFreqList, inputname,htmlresource) # saves out inputname_deviation_from_mean.csv
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
429
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
430 relativeFrequencyHeatMap(inputname, myFreqList, exportpdf, htmlresource)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
431 deviationFromMeanHeatMap(inputname, myDevList, exportpdf, htmlresource)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
432 plotPCA(inputname, myFreqList, exportpdf, htmlresource)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
433 createHTML(inputname, htmlfile, myFreqList,myDevList) # assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
434 return
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
435
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
436
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
437
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
438 if __name__ == "__main__":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
439 #contigTranslation('Tcongo')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
440 #contigTranslation('Test')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
441 #newHMMerMotifSearch('Test')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
442 #HMMerMotifSearch('Tcongo')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
443 #sys.exit()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
444
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
445
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
446 myFreqList = [0.111670020120724, 0.103621730382294, 0.0784708249496982, 0.0110663983903421,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
447 0.0543259557344064, 0.0563380281690141, 0.0734406438631791, 0.0160965794768612,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
448 0.0110663983903421, 0.028169014084507, 0.126760563380282, 0.0583501006036217, 0.062374245472837,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
449 0.0372233400402414, 0.17102615694165]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
450 myDevList = [0.000790026,0.0073109,-0.001151769,-0.004502933,-0.013687421,-0.016159773,0.021689891,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
451 0.007863809,-0.003133585,-0.001111709,-0.01313879,0.0036997,-0.00935284,0.005640693,0.015243802]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
452
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
453 relativeFrequencyHeatMap('test', myFreqList, "PDF_Yes","results")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
454 deviationFromMeanHeatMap('test', myDevList, "PDF_Yes","results")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
455 plotPCA('test',myFreqList,"PDF_Yes","results")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
456
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
457 createHTML('test',"results/test.html", myFreqList, myDevList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
458 #contigTranslation("Test")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
459 #myCountList = HMMerMotifSearch("Test")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
460
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
461
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
462 sys.exit()