annotate Tryp_T.py @ 5:7f3cfd8d114c draft

Uploaded file as planemo borked
author johnheap
date Mon, 03 Jun 2019 14:04:16 -0400
parents 8f6469ffef85
children 25226d33642e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
1 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
2 * Copyright 2018 University of Liverpool
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
3 * Author: John Heap, Computational Biology Facility, UoL
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
4 * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
5 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
6 * Licensed under the Apache License, Version 2.0 (the "License");
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
7 * you may not use this file except in compliance with the License.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
8 * You may obtain a copy of the License at
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
9 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
10 * http://www.apache.org/licenses/LICENSE-2.0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
11 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
12 * Unless required by applicable law or agreed to in writing, software
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
13 * distributed under the License is distributed on an "AS IS" BASIS,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
15 * See the License for the specific language governing permissions and
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
16 * limitations under the License.
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
17 *
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
18 """
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
19
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
20
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
21 import subprocess
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
22 import pandas as pd
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
23 import re
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
24 import os
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
25 import sys
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
26 import matplotlib as mpl
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
27 mpl.use('Agg')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
28 import matplotlib.pyplot as plt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
29
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
30 pList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
31 quietString = "" #"">> Vap_log.txt 2>&1"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
32 def transcriptMapping(inputname, strain, forwardFN,reverseFN):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
33 #where is our Reference data -
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
34 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
35 refName = dir_path+"/data/Reference/Tc148" #default
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
36 if strain == "Tc148":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
37 refName = dir_path+"/data/Reference/Tc148"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
38 if strain == "IL3000":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
39 refName = dir_path+"/data/Reference/IL3000"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
40 #argString = "bowtie2 -x Refe4rence/IL3000 -1 data/"+forwardFN+" -2 data/"+reverseFN+" -S "+inputname+".sam" #>log.txt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
41 #argString = "bowtie2 -x Reference/Tc148 -1 data/"+forwardFN+" -2 data/"+reverseFN+" -S "+inputname+".sam" #>log.txt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
42 argString = "bowtie2 -x "+refName+" -1 "+forwardFN+" -2 "+reverseFN+" -S "+inputname+".sam"+quietString #>log.txt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
43 #print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
44 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
45
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
46 def processSamFiles(inputname):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
47 #debug use a mapping sam file we have already found
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
48 #dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
49 #bugName = dir_path+"/data/T_Test" #defasult
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
50
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
51 cur_path = os.getcwd()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
52 samName = cur_path+"/"+inputname
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
53
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
54 #argString = "samtools view -bS "+bugName+" > "+inputname+".bam"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
55 argString = "samtools view -bS "+inputname+".sam > "+samName+".bam"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
56 #print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
57 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
58
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
59
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
60 #argString = "samtools sort "+bugName+" -o "+inputname+".sorted"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
61 argString = "samtools sort "+samName+".bam -o "+samName+".sorted"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
62 #print("argstring = "+argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
63 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
64
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
65 #argString = "samtools index "+bugName+".sorted "+inputname+".sorted.bai"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
66 argString = "samtools index "+samName+".sorted "+samName+".sorted.bai"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
67 #print("argstring = " + argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
68 returncode = subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
69
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
70
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
71
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
72
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
73 def transcriptAbundance(inputname, strain):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
74 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
75 refName = dir_path + "/data/Reference/ORFAnnotation.gtf" # defasult
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
76 if strain == "Tc148":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
77 refName = dir_path + "/data/Reference/ORFAnnotation.gtf"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
78 if strain == "IL3000":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
79 refName = dir_path + "/data/Reference/IL3000.gtf"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
80 #argString = "cufflinks -G Reference/IL3000.gtf -o "+inputname+".cuff -u -p 8 "+inputname+".sorted"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
81 #argString = "cufflinks -G Reference/ORFAnnotation.gtf -o "+inputname+".cuff -u -p 8 "+inputname+".sorted"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
82 argString = "cufflinks -q -G "+refName+" -o "+inputname+".cuff -u -p 8 "+inputname+".sorted"+quietString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
83 returncode = subprocess.call(argString, shell = True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
84
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
85
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
86 def convertToFasta(inputName, strain): #equivalent to Sara's awk scripte
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
87 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
88 refName = dir_path + "/data/Reference/ORFAnnotation.gtf" # default
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
89 if strain == "Tc148":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
90 refName = dir_path + "/data/Reference/148_prot.fasta"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
91 if strain == "IL3000":
4
8f6469ffef85 planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents: 0
diff changeset
92 refName = dir_path + "/data/Reference/IL3000_prot.fasta"
0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
93
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
94 cuff_df = pd.read_csv(inputName+".cuff/genes.fpkm_tracking", sep='\t')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
95 cuff_df = cuff_df[(cuff_df['FPKM'] > 0)]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
96 cuff_df.to_csv("cuffTest.csv")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
97 gene_id_List = cuff_df['gene_id'].tolist()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
98
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
99 #print(gene_id_List)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
100 #print ("Found from 8880="+str(found))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
101
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
102 # need to load in IL3000_prot.fasta
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
103 # for each line with >TcIL3000_1_1940
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
104 # search within cuff_df[gene_id] for match
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
105 # add it to the outfile. (need to save it as used by hmmer later
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
106 number = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
107 all = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
108 with open(inputName+"_6frame.fas", 'w') as outfile:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
109 ref = open(refName,'r')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
110 #ref = open(r"Reference/IL3000_prot.fasta",'r')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
111 n = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
112 line = ref.readline()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
113 while line:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
114 if line[0] == '>':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
115 all = all+1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
116 ln = line[1:] #remove >
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
117 ln = ln.rstrip() #remove /n /r etc
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
118 #print (ln)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
119 if ln in gene_id_List:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
120 number = number+1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
121 outfile.write(line)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
122 line = ref.readline()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
123 if line:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
124 while line[0] != '>':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
125 outfile.write(line)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
126 line=ref.readline()
4
8f6469ffef85 planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents: 0
diff changeset
127 if not line:
8f6469ffef85 planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents: 0
diff changeset
128 break;
0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
129 else:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
130 line = ref.readline()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
131 else:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
132 line =ref.readline()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
133 ref.close()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
134 print(str(len(gene_id_List))+":"+str(number)+" from "+str(all))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
135 return cuff_df
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
136
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
137 def HMMerMotifSearch(name, strain, cuff_df):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
138 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
139 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
140 dir_path = os.path.dirname(os.path.realpath(__file__))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
141 phylopath = dir_path + "/data/Motifs/Phylotype"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
142 lineCounts = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
143 compoundList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
144 for m in motifs:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
145 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
146 print(argString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
147 subprocess.call(argString, shell=True)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
148 hmmResult = open("Phy" + m + ".out", 'r')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
149 regex = r"Tc148[0-9]{1,8}"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
150 if strain == "Tc148":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
151 regex = r"Tc148[0-9]{1,8}"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
152 if strain == "IL3000":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
153 regex = r"TcIL3000_[0-9]{1,4}_[0-9]{1,5}"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
154 n = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
155 outList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
156 for line in hmmResult:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
157 m = re.search(regex, line)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
158 if m:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
159 outList.append(""+m.group())
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
160 n += 1
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
161 if re.search(r"inclusion", line):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
162 print("inclusion threshold reached")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
163 break
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
164 compoundList.append(outList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
165 lineCounts.append(n)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
166 hmmResult.close()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
167 #print(lineCounts)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
168
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
169 #print(cuff_df)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
170 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
171 countList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
172 weightList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
173 countIndex = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
174 totalCount = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
175 totalWeigth = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
176 for c in concatGroups:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
177 a = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
178 weight = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
179 for n in range(0, c):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
180 a = a + compoundList.pop(0)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
181 t = set(a)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
182 countList.append(len(t))
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
183 wa = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
184 for w in t:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
185 wt = cuff_df.loc[cuff_df['gene_id'] == w, 'FPKM'].iloc[0]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
186 #print(w)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
187 #print(wt)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
188 wa = wa+wt
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
189 weightList.append(wa)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
190 totalWeigth+=wa
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
191 totalCount += len(t)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
192 countList.append(totalCount)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
193 weightList.append(totalWeigth)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
194 #print(countList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
195 #print("--------")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
196 #print(weightList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
197 #print("--------")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
198 return countList,weightList
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
199
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
200 def relativeFrequencyTable(countList, name, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
201 relFreqList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
202 c = float(countList[15])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
203 for i in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
204 relFreqList.append(countList[i] / c)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
205
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
206 data = {'Phylotype': pList, 'Relative Frequency': relFreqList}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
207 relFreq_df = pd.DataFrame(data)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
208 j_fname = htmlresource+ "/" + name + "_t_relative_frequency.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
209 relFreq_df.to_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
210 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
211
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
212
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
213 def weightedFrequencyTable(countList, name, htmlresource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
214 relFreqList = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
215 c = float(countList[15])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
216 for i in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
217 relFreqList.append(countList[i] / c)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
218
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
219 data = {'Phylotype': pList, 'Weighted Frequency': relFreqList}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
220 relFreq_df = pd.DataFrame(data)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
221 j_fname = htmlresource+ "/" + name + "_t_weighted_frequency.csv"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
222 relFreq_df.to_csv(j_fname)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
223 return relFreqList # 0-14 = p1-p15 counts [15] = total counts
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
224
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
225
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
226
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
227 def createStackedBar(name,freqList,strain,pdf,html_resource):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
228 palette = ["#0000ff", "#6495ed", "#00ffff", "#caff70",
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
229 "#228b22", "#528b8b", "#00ff00", "#a52a2a",
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
230 "#ff0000", "#ffff00", "#ffa500", "#ff1493",
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
231 "#9400d3", "#bebebe", "#000000", "#ff00ff"]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
232
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
233 VAP_148 = [0.072, 0.032, 0.032, 0.004, 0.007,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
234 0.005, 0.202, 0.004, 0.006, 0.014,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
235 0.130, 0.133, 0.054, 0.039, 0.265]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
236
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
237 VAP_IL3000 = [0.073, 0.040, 0.049, 0.018, 0.060,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
238 0.055, 0.054, 0.025, 0.012, 0.060,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
239 0.142, 0.100, 0.061, 0.078, 0.172]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
240 cmap = plt.cm.get_cmap('tab20')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
241 palette = [cmap(i) for i in range(cmap.N)]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
242
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
243 if strain == "Tc148":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
244 VAPtable = VAP_148
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
245 VAPname='Tc148\nGenome VAP'
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
246 if strain == "IL3000":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
247 VAPtable = VAP_IL3000
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
248 VAPname= 'IL3000\nGenome VAP'
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
249 width = 0.35 # the width of the bars: can also be len(x) sequence
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
250 plots = []
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
251 fpos = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
252 vpos = 0
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
253 for p in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
254 tp = plt.bar(0, freqList[p], width, color= palette[p], bottom = fpos)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
255 fpos +=freqList[p]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
256
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
257 tp = plt.bar(1, VAPtable[p], width, color= palette[p], bottom = vpos)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
258 vpos +=VAPtable[p]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
259
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
260 plots.append(tp)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
261 plt.xticks([0,1],[name,VAPname])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
262 plt.legend(plots[::-1],['p15','p14','p13','p12','p11','p10','p9','p8','p7','p6','p5','p4','p3','p2','p1'])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
263 title = "Figure Legend: The transcriptomic Variant Antigen Profile of $\itTrypanosoma$ $\itcongolense$ estimated as phylotype " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
264 "proportion adjusted for transcript abundance and the reference genomic Variant Antigen Profile. " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
265 "\nData was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018)."
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
266 #plt.title(title, wrap="True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
267 #plt.text(-0.2, -0.05, title, va="top", transform=ax.transAxes, wrap="True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
268 plt.text(-0.3, -0.15, title, va="top", wrap="True")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
269 plt.tight_layout(pad=1.5)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
270 plt.subplots_adjust(bottom = 0.3,top=0.99,left=0.125,right=0.9,hspace=0.2,wspace=0.2)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
271
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
272 plt.savefig(html_resource + "/stackedbar.png")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
273 if pdf == 'PDF_Yes':
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
274 plt.savefig(html_resource + "/stackedbar.pdf")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
275 #plt.show()
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
276
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
277
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
278 def createHTML(name,htmlfn,htmlresource,freqList,weightList):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
279 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
280 htmlString = r"<html><title>T.congolense VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma congolense</i> Variant Antigen Profile</h2><h3>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
281 htmlString += name
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
282 htmlString += r"<br>Transcriptomic Analysis</h3></p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
283 htmlString += "<p style = 'margin-left:20%; margin-right:20%'>Table Legend: Variant Antigen Profiles of a transcriptome of <i>Trypanosoma congolense</i> estimated as phylotype proportion. " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
284 "Weighted frequency refers to the phylotype proportion based transcript abundance. " \
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
285 "Data was produced with the 'Variant Antigen Profiler' (Silva Pereira and Jackson, 2018).</p> "
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
286 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
287
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
288 htmlString += r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th><th>Relative Frequency</th><th>Weighted Frequency</th></tr>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
289 tabString = ""
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
290 # flush out table with correct values
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
291 for i in range(0, 15):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
292 f = format(freqList[i], '.4f')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
293 w = format(weightList[i], '.4f')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
294 tabString += "<tr><td>phy" + str(i + 1) + "</td><td>" + f + "</td><td>" + w + "</td></tr>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
295 htmlString += tabString + "</table><br><br><br><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
296 htmlString += r"<p> <h3>Stacked Bar chart of Phylotype Frequency</h3> The 'weighted' relative frequency of each phylotype alongside the VAP of selected strain.</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
297 imgString = r"<img src = 'stackedbar.png' alt='Stacked bar chart of phylotype variation' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
298 htmlString += imgString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
299
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
300 # htmlString += r"<p><h3>The Deviation Heat Map and Dendogram</h3>The phylotype variation expressed as the deviation from your sample mean compared to the model dataset</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
301 # imgString = r"<img src = 'dheatmap.png' alt='Deviation Heatmap' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
302 # htmlString += imgString
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
303
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
304 # htmlString += r"<p><h3>The Variation PCA plot</h3>PCA analysis corresponding to absolute variation. Colour coded according to location</p>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
305 # imgString = r"<img src = 'vapPCA.png' alt='PCA Analysis' style='max-width:100%'><br><br>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
306 # htmlString += imgString + r"</div></body></html>"
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
307
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
308 with open(htmlfn, "w") as htmlfile:
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
309 htmlfile.write(htmlString)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
310
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
311 #argdict = {'name':2, 'pdfexport': 3, 'strain': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8}
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
312 def transcriptomicProcess(args,dict):
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
313 transcriptMapping(args[dict['name']], args[dict['strain']], args[dict['forward']], args[dict['reverse']]) #uses bowtie
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
314 processSamFiles(args[dict['name']]) #uses samtools
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
315 transcriptAbundance(args[dict['name']],args[dict['strain']]) #uses cufflinks -> ?.cuff/*.*
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
316 cuff_df = convertToFasta(args[dict['name']],args[dict['strain']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
317 countList, weightList = HMMerMotifSearch(args[dict['name']],args[dict['strain']], cuff_df)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
318 relFreqList = relativeFrequencyTable(countList,args[dict['name']],args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
319 relWeightList = weightedFrequencyTable(weightList,args[dict['name']],args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
320 createStackedBar(args[dict['name']],relWeightList, args[dict['strain']],args[dict['pdfexport']],args[dict['html_resource']])
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
321 createHTML(args[dict['name']],args[dict['html_file']],args[dict['html_resource']], relFreqList, relWeightList)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
322
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
323 if __name__ == "__main__":
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
324 #print("Commencing Transcript Mapping")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
325 #transcriptMapping("T_Test", "Transcripts.1","Transcripts.2")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
326 #print("Processimg Sam Files")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
327 #processSamFiles("T_Test")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
328 #print("Assessing Transcript Abundance")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
329 #transcriptAbundance("T_Test")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
330 #print ("Converting to Fasta Subset")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
331 #cuff_df = convertToFasta("T_Test")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
332 #print("Commencing HMMer search")
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
333 #countList, weightList = HMMerMotifSearch("T_Test",cuff_df)
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
334 #relativeFrequencyTable(countList,'T_Test')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
335 #weightedFrequencyTable(weightList,'T_Test')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
336 relFreqList = [0.111842105,0.059210526,0.026315789,0.013157895,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
337 0.006578947,0.013157895,0.032894737,0.019736842,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
338 0.039473684,0.046052632,0.217105263,0.065789474,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
339 0.151315789,0.059210526,0.138157895]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
340
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
341 relWeightList = [0.07532571,0.05900545,0.009601452,0.042357532,0.01236219,0.001675663,0.04109726,
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
342 0.097464248,0.057491666,0.05826875,0.279457473,0.070004772,0.065329007,0.085361298,0.045197529]
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
343
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
344 createStackedBar('T_Test',relWeightList, 'Tc148','PDF_Yes','results')
36cb22bd911d planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
johnheap
parents:
diff changeset
345 createHTML("t_test","results/t_test.html","results",relFreqList,relWeightList)