19
|
1 """
|
|
2 * Galaxy Version
|
|
3
|
|
4 * Copyright 2019 University of Liverpool
|
|
5 * Author John Heap, Computational Biology Facility, UoL
|
|
6 * Based on original scripts of Sara Silva Silva Pereira, Institute of Infection and Global Health, UoL
|
|
7 *
|
|
8 * Licensed under the Apache License, Version 2.0 (the "License");
|
|
9 * you may not use this file except in compliance with the License.
|
|
10 * You may obtain a copy of the License at
|
|
11 *
|
|
12 * http://www.apache.org/licenses/LICENSE-2.0
|
|
13 *
|
|
14 * Unless required by applicable law or agreed to in writing, software
|
|
15 * distributed under the License is distributed on an "AS IS" BASIS,
|
|
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17 * See the License for the specific language governing permissions and
|
|
18 * limitations under the License.
|
|
19 *
|
|
20 """
|
|
21
|
|
22
|
|
23 import subprocess
|
|
24 import pandas as pd
|
|
25 import re
|
|
26 import os
|
|
27 import sys
|
|
28 import shutil
|
|
29 # import matplotlib as mpl
|
|
30 # mpl.use('Agg')
|
|
31 import matplotlib.pyplot as plt
|
|
32 import numpy as np
|
|
33
|
|
34
|
|
35
|
|
36
|
|
37 # copies the user provided Fasta file to data/reference/file/file.fasta
|
|
38 def uploadUserReferenceFastq(refFastq):
|
|
39 refBase = os.path.basename(refFastq)
|
|
40 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test'
|
|
41 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file
|
|
42 refDir = dir_path + "/data/Reference/" + ref #propose putting file in '/data/reference/ref/
|
|
43 if not os.path.isdir(refDir): # if directory data/Reference/ref doesn't exist
|
|
44 os.mkdir(refDir)
|
|
45 refPath = refDir+"/"
|
|
46 shutil.copy(refFastq, refPath + refBase) #copy reference file into the directory
|
|
47 argString = "bowtie2-build " + refPath + refBase+" "+refPath+ref
|
|
48 print("Building the bowtie2 reference files.")
|
|
49 subprocess.call(argString, shell=True)
|
|
50 return
|
|
51
|
|
52 def transcriptMapping(inputname, refFastq, forwardFN, reverseFN):
|
|
53 # where is our Reference data?
|
|
54 refBase = os.path.basename(refFastq)
|
|
55 ref = os.path.splitext(refBase)[0]
|
|
56 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
57 refDir = dir_path + "/data/Reference/" + ref + "/"
|
|
58 refName = refDir + ref
|
|
59 # now have reference file so we can proceed with the transcript mapping via bowtie2
|
|
60 argString = "bowtie2 -x "+refName+" -1 "+forwardFN+" -2 "+reverseFN+" -S "+inputname+".sam"
|
|
61 print(argString)
|
|
62 subprocess.call(argString, shell=True) #outputs a name.sam file
|
|
63 return
|
|
64
|
|
65
|
|
66
|
|
67 def processSamFiles(inputname):
|
|
68 cur_path = os.getcwd()
|
|
69 samName = cur_path+"/"+inputname
|
|
70 argString = "samtools view -bS "+inputname+".sam > "+samName+".bam"
|
|
71 print(argString)
|
|
72 subprocess.call(argString, shell=True)
|
|
73
|
|
74 argString = "samtools sort "+samName+".bam -o "+samName+".sorted"
|
|
75 print("argstring = "+argString)
|
|
76 subprocess.call(argString, shell=True)
|
|
77
|
|
78 argString = "samtools index "+samName+".sorted "+samName+".sorted.bai"
|
|
79 print("argstring = " + argString)
|
|
80 subprocess.call(argString, shell=True)
|
|
81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files
|
|
82
|
|
83 # we will not have the .gtf file so call cufflinks without -G option
|
|
84 def transcriptAbundance(inputname):
|
|
85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted"
|
21
|
86 subprocess.call(argString, shell=True)
|
19
|
87 os.remove(inputname+".sorted") #remove name.sorted
|
|
88 os.remove(inputname+".sorted.bai")
|
|
89 os.remove(inputname+".bam")
|
|
90 return
|
|
91
|
|
92 def transcriptsForBlast(name, refFastq):
|
|
93 # quick and dirty just to see.
|
|
94 refBase = os.path.basename(refFastq)
|
|
95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test'
|
|
96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file
|
|
97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta
|
|
98 # used for dirty # refPath = 'Trinity.fasta' # dirty one
|
26
|
99 cur_path = os.getcwd()
|
|
100 track_df = pd.read_csv(cur_path+'/' + name + '.cuff/genes.fpkm_tracking', sep='\t')
|
19
|
101 names = track_df['locus']
|
|
102 # print(len(names))
|
|
103 # print(names[:5])
|
|
104
|
|
105 nlist = []
|
|
106 for n in range(0,len(names)):
|
|
107 i = names[n].find(':')
|
|
108 nlist.append(names[n][:i])
|
|
109 nameset = set(nlist) #get unique.
|
|
110 with open(refPath, 'r') as myRef:
|
|
111 refData = myRef.read()
|
|
112 refData= refData+'\n>'
|
|
113
|
|
114 with open(name + '_for_blast.fa', 'w') as outfile:
|
|
115 for trans_id in nameset:
|
|
116 namepos = refData.find(trans_id)
|
|
117 endpos = refData.find('>', namepos)
|
|
118 outfile.write('>'+refData[namepos:endpos])
|
|
119
|
|
120 pass
|
|
121
|
|
122 def blastContigs(test_name,html_resource, database):
|
|
123 db_path = database
|
25
|
124 #argString = "makeblastdb - in " + db_path
|
|
125 #subprocess.call(argString, shell=True)
|
|
126
|
|
127 argString = "blastx -db " + db_path + " -query "+test_name+"_for_blast.fa -outfmt 10 -out "+test_name+"_blast.txt"
|
19
|
128 print(argString)
|
|
129 returncode = subprocess.call(argString, shell=True)
|
|
130 if returncode != 0:
|
|
131 return "Error in blastall"
|
|
132 blast_df = pd.read_csv(""+test_name+"_blast.txt")
|
|
133 blast_df.columns = ['qaccver', 'saccver', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue','bitscore']
|
|
134 blastResult_df = blast_df[(blast_df['pident']>=70) & (blast_df['length'] > 100) & (blast_df['evalue'] <=0.001) ]
|
|
135 blastResult_df = blastResult_df[['qaccver', 'saccver', 'pident', 'evalue', 'bitscore']] #query accession.version, subject accession.version, Percentage of identical matches
|
|
136 # need to allocate the transcripts (if allocated more than once to the phylotype with least error.
|
|
137 transcripts = blastResult_df['qaccver']
|
|
138 b_df = pd.DataFrame(columns=['qaccver', 'saccver', 'pident', 'evalue', 'bitscore'])
|
|
139 transSet = set(transcripts)
|
|
140 for t in transSet:
|
|
141 temp_df = blastResult_df[(blastResult_df['qaccver'] == t)]
|
|
142 # get one with smallest error value
|
|
143 #print(t + ":")
|
|
144 #print(temp_df)
|
|
145 temp_df = temp_df.sort_values(by=['evalue'])
|
|
146 b_df = b_df.append(temp_df.iloc[[0]])
|
|
147
|
|
148 b_df.sort_values(by=['qaccver'])
|
|
149 b_df.to_csv(test_name + '_transcript.csv')
|
|
150 return b_df
|
|
151
|
|
152
|
|
153 def createMultiHTML(tdict,composite_df):
|
|
154 labelList = composite_df.columns.tolist()
|
|
155 htmlString = r"<html><title>T.vivax VAP (Transcriptomic Pathway(</title><body><div style='text-align:center'><h2><i>Trypanosoma vivax</i> Variant Antigen Profile</h2><h3>"
|
|
156 htmlString += r"Sample name: "+tdict['name']
|
|
157 htmlString += r"<br>Transcriptomic Analysis</h3></p>"
|
|
158 htmlString += "<p style = 'margin-left:20%; margin-right:20%'>Legend: " \
|
|
159 "Variant Antigen Profile of a <i>Trypanosoma vivax</i> transcriptomes. " \
|
|
160 "Weighted Frequency reflects Phylotype abundance and is expressed as " \
|
|
161 "phylotype frequencies adjusted for the combined transcript abundance. " \
|
|
162 "Data was produced with VAPPER-Variant Antigen Profiler " \
|
|
163 "(Silva Pereira et al., 2019).</p> "
|
|
164 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
|
|
165
|
|
166 header = r"<table style='width:50%;margin-left:25%;text-align:center'><tr><th>Phylotype</th>"
|
|
167 wLists = []
|
|
168
|
|
169 for j in range(1,len(labelList)):
|
|
170 wLists.append(composite_df[labelList[j]])
|
|
171 header += r"<th>" + str(labelList[j]) + "</th>"
|
|
172
|
|
173 htmlString += "</tr>\n" + header
|
|
174 tabString = ""
|
|
175 phyList = composite_df['Phylotype']
|
|
176
|
|
177
|
|
178
|
|
179 for i in range(0, len(composite_df)):
|
|
180 tabString += "<tr><td>" + str(phyList[i]) + "</td>"
|
|
181 for j in range(0,len(labelList)-1):
|
|
182 #print(j)
|
|
183 f = format(wLists[j][i], '.4f')
|
|
184 tabString += "<td>" + str(f) + "</td>"
|
|
185 tabString += "</tr>\n"
|
|
186
|
|
187 htmlString += tabString + "</table><br><br><br><br><br>"
|
|
188 htmlString += r"<h3>Weighted Relative Frequencies of Detected Phylotypes.</h3>"
|
|
189 imgString = r"<img src = '"+ tdict['name']+"_phylotypes.png' alt='Bar chart of phylotype variation' style='max-width:100%'><br><br>"
|
|
190 htmlString += imgString
|
|
191
|
|
192 with open(tdict['html_file'], "w") as htmlfile:
|
|
193 htmlfile.write(htmlString)
|
|
194
|
|
195
|
|
196 def createHTML(tdict,sum_df):
|
|
197 #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
|
|
198 htmlString = r"<html><title>T.vivax VAP (Transcriptomic Pathway(</title><body><div style='text-align:center'><h2><i>Trypanosoma vivax</i> Variant Antigen Profile</h2><h3>"
|
|
199 htmlString += r"Sample name: "+tdict['name']
|
|
200 htmlString += r"<br>Transcriptomic Analysis</h3></p>"
|
|
201 htmlString += "<p style = 'margin-left:20%; margin-right:20%'>Legend: " \
|
|
202 "Variant Antigen Profile of a <i>Trypanosoma vivax</i> transcriptome. " \
|
|
203 "Weighted Frequency reflects Phylotype abundance and is expressed as " \
|
|
204 "phylotype frequencies adjusted for the combined transcript abundance. " \
|
|
205 "Data was produced with VAPPER-Variant Antigen Profiler " \
|
|
206 "(Silva Pereira et al., 2019).</p> "
|
|
207 htmlString += r"<style> table, th, tr, td {border: 1px solid black; border-collapse: collapse;}</style>"
|
|
208
|
|
209 htmlString += r"<table style='width:50%;table-layout: auto; margin-left:25%;text-align:center'><tr><th>Phylotype</th>" \
|
|
210 r"<th>Combined FPKM</th><th>Weighted Frequency</th></tr>"
|
|
211 tabString = ""
|
|
212 # flush out table with correct values
|
|
213 phySeries = sum_df['Phylotype']
|
|
214 # sacSeries = sum_df['saccver']
|
|
215 fSeries = sum_df['FPKM']
|
|
216 total = fSeries.sum()
|
|
217 # print("Total="+str(total))
|
|
218 for i in range(0, len(sum_df)):
|
|
219 # print(phySeries[i])
|
|
220 f = format(fSeries[i], '.2f')
|
|
221 w = format(fSeries[i]/total, '.2f')
|
|
222
|
|
223 #w = format(weightList[i], '.4f')
|
|
224
|
|
225 tabString += "<tr><td>" + str(phySeries[i]) + "</td><td>" + str(f) + "</td><td>"+str(w)+"</tr>"
|
|
226 htmlString += tabString + "</table><br><br><br><br><br>"
|
|
227 htmlString += r"<h3>Weighted Relative Frequencies of Detected Phylotypes.</h3>"
|
|
228 imgString = r"<img src = '"+ tdict['name']+"_phylotypes.png' alt='Bar chart of phylotype variation' style='max-width:100%'><br><br>"
|
|
229 htmlString += imgString
|
|
230
|
25
|
231 with open(tdict['html_resource']+'/'+tdict['html_file'], "w") as htmlfile:
|
19
|
232 htmlfile.write(htmlString)
|
|
233
|
|
234
|
|
235
|
|
236 def getPhyloNumber(sac):
|
|
237 i = sac.find('_')
|
|
238 return int(sac[1:i])
|
|
239
|
|
240 def combineFPMK(tdict):
|
21
|
241 dir_path = os.path.dirname(os.path.realpath(__file__))+'/'
|
|
242
|
25
|
243 fpkm_df = pd.read_csv(dir_path+tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t')
|
19
|
244
|
|
245 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t')
|
|
246 #print(fpkm_df.head())
|
|
247 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')])
|
|
248 #print(fpkm_df.head())
|
21
|
249
|
|
250 reducedBlast_df = pd.read_csv(dir_path + tdict['name']+'_transcript.csv')
|
19
|
251 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv')
|
|
252 saccverSet = set(reducedBlast_df['saccver'])
|
|
253 saccverList = list(saccverSet)
|
|
254 saccverList.sort()
|
|
255 # print(saccverList[:5])
|
|
256 new_df = pd.DataFrame(columns=['qaccver','saccver','FPKM'])
|
|
257 for sv in saccverList:
|
|
258 #print(sv)
|
|
259 temp_df = reducedBlast_df[reducedBlast_df['saccver'] == sv]
|
|
260 qList = list(temp_df['qaccver'])
|
|
261 for q in qList:
|
|
262 f_df = fpkm_df[(fpkm_df['locus'] == q)]
|
|
263 if len(f_df) > 1:
|
|
264 print('WARNING MULTIPLE FPKM')
|
|
265 new_fpkm=list(f_df['FPKM'])
|
|
266 f = (new_fpkm[0])
|
|
267 # print(f)
|
|
268 new_df = new_df.append({'qaccver': q, 'saccver': sv, 'FPKM': f}, ignore_index=True)
|
|
269 FPKMsum_df = new_df.groupby('saccver')['FPKM'].sum().reset_index()
|
|
270
|
|
271 FPKMsum_df['Phylotype'] = FPKMsum_df.apply(lambda row: getPhyloNumber(row['saccver']), axis=1)
|
|
272 FPKMsum_df = FPKMsum_df.sort_values(by=['Phylotype'])
|
|
273 FPKMsum_df = FPKMsum_df.reset_index(drop=True)
|
|
274
|
|
275 # print(FPKMsum_df)
|
|
276 FPKMsum_df.to_csv('FPKM_sum.csv')
|
|
277 FPKMsum2_df = FPKMsum_df.groupby('Phylotype')['FPKM'].sum().reset_index()
|
|
278 FPKMsum2_df = FPKMsum2_df.sort_values(by=['Phylotype'])
|
|
279
|
|
280 # print(FPKMsum2_df)
|
|
281 FPKMsum2_df.to_csv('FPKM_sum2.csv') # in case more than one entry for a particular phylotype
|
|
282 return FPKMsum_df, FPKMsum2_df
|
|
283
|
|
284
|
|
285
|
|
286 def normalisef(f,max):
|
|
287 return f/max
|
|
288
|
|
289 def getComposite_sum2(nameList,sum2_dfs):
|
|
290 # lets get a composite sum2_df from all of the sum2_dfs
|
|
291 phyList = []
|
|
292
|
|
293 for i in range(0, len(sum2_dfs)):
|
|
294 total = sum2_dfs[i]['FPKM'].sum()
|
|
295 sum2_dfs[i]['w'] = sum2_dfs[i].apply(lambda row: normalisef(row['FPKM'], total), axis=1)
|
|
296 pSeries = sum2_dfs[i]['Phylotype']
|
|
297 for p in pSeries:
|
|
298 phyList.append(p) # get all the phylotypes in this one
|
|
299 phyList = list(set(phyList))
|
|
300 phyList.sort()
|
|
301 composite_sum2_df = pd.DataFrame(phyList, columns=['Phylotype'])
|
|
302 for i in range(0, len(sum2_dfs)):
|
|
303 wList = []
|
|
304 pindf = list(sum2_dfs[i]['Phylotype'])
|
|
305 # print(pindf)
|
|
306 for p in phyList:
|
|
307 if p in pindf:
|
|
308 df = sum2_dfs[i]
|
|
309 w = df.loc[df['Phylotype'] == p, 'w'].iloc[0]
|
|
310 else:
|
|
311 w = 0
|
|
312 wList.append(w)
|
|
313 composite_sum2_df[nameList[i]] = wList
|
|
314 #print(composite_sum2_df)
|
|
315 #composite_sum2_df.to_csv('composite.csv')
|
|
316 return composite_sum2_df
|
|
317
|
|
318
|
|
319 def doMultiBarChart(tdict, composite_df): #array of multiple sum2_dfs
|
|
320 labelList = composite_df.columns.tolist()
|
|
321 sampnum = len(labelList)-1
|
|
322 # need to arrange bars
|
|
323 # number of phylotype = len(composite_df)
|
|
324 #number of bars = (len(labelist)-1) +1 for space
|
|
325 # ytick needs to ne
|
|
326
|
|
327 cmap = plt.cm.get_cmap('tab10')
|
|
328 palette = [cmap(i) for i in range(cmap.N)]
|
|
329 title = "Legend: Variant Antigen Profile of a $\itTrypanosoma$ $\itvivax$ transcriptomes. " \
|
|
330 "Phylotype abundance is expressed as phylotype frequencies adjusted " \
|
|
331 "for combined transcript abundance. " \
|
|
332 "Data was produced with VAPPER-Variant Antigen Profiler (Silva Pereira et al., 2019)."
|
|
333 width = 0.6
|
|
334 ind = np.arange(width*sampnum/2, len(composite_df)*width*(sampnum+1), width*(sampnum+1))
|
|
335 #print(ind)
|
|
336 ysize = len(composite_df)*0.4
|
|
337
|
|
338 fig, ax = plt.subplots(figsize=(10,ysize))
|
|
339
|
|
340
|
|
341 for s in range(1, len(labelList)):
|
|
342 ax.barh(ind, composite_df[labelList[s]], width, color=palette[s], label=labelList[s])
|
|
343 ind = ind + width
|
|
344
|
|
345 ax.set(yticks=np.arange(width*(sampnum+2)/2, len(composite_df)*width*(sampnum+1), width*(sampnum+1)), yticklabels=composite_df['Phylotype']) # , ylim=[(len(labelList)-1) * width - 1, len(composite_df)])
|
|
346 ax.legend()
|
|
347
|
|
348
|
|
349 ax.set_ylabel('Phylotype')
|
|
350 ax.invert_yaxis() # labels read top-to-bottom
|
|
351 ax.set_xlabel('Weighted Phylotype Frequency')
|
|
352
|
|
353 # plt.text(-0.3, -0.15, title, va="top", wrap="True")
|
|
354 #plt.tight_layout()
|
|
355
|
|
356 plt.subplots_adjust(bottom=0.1, top=0.92, left=0.15, right=0.9)
|
|
357 ax.set_title(title, x=0, wrap='True',ha='left',)
|
|
358
|
|
359 plt.savefig(tdict['html_resource'] + tdict['name']+"_phylotypes.png")
|
|
360 if tdict['pdf'] == 'PDF_Yes':
|
|
361 plt.savefig(tdict['html_resource'] + tdict['name']+"phylotypes.pdf")
|
|
362 plt.show()
|
|
363 pass
|
|
364
|
|
365
|
|
366
|
|
367 def doBarChart(tdict, sum2_df):
|
|
368 cmap = plt.cm.get_cmap('tab20')
|
|
369 palette = [cmap(i) for i in range(cmap.N)]
|
|
370 title = "Legend: Variant Antigen Profile of a $\itTrypanosoma$ $\itvivax$ transcriptome. " \
|
|
371 "Phylotype abundance is expressed as phylotype frequencies adjusted " \
|
|
372 "for combined transcript abundance. " \
|
|
373 "Data was produced with VAPPER-Variant Antigen Profiler (Silva Pereira et al., 2019)."
|
|
374 # get a list of phylotype, create equivalent of saccver, get a list of
|
|
375 maxFPKM = sum2_df['FPKM'].max()
|
|
376 total = sum2_df['FPKM'].sum()
|
|
377
|
|
378 sum2_df['Normalised'] = sum2_df.apply(lambda row: normalisef(row['FPKM'], maxFPKM),axis=1)
|
|
379 sum2_df['Weighted'] = sum2_df.apply(lambda row: normalisef(row['FPKM'], total),axis=1)
|
|
380 pList = sum2_df['Phylotype']
|
|
381 phList = []
|
|
382 for p in pList:
|
|
383 phList.append(str(p))
|
|
384
|
|
385 fList = sum2_df['Weighted']
|
|
386 ysize = len(phList)*0.3
|
|
387 fig, ax = plt.subplots(figsize=(10,ysize))
|
|
388
|
|
389 ax.barh(phList, fList, color=palette)
|
|
390 ax.set_ylabel('Phylotype')
|
|
391 ax.invert_yaxis() # labels read top-to-bottom
|
|
392 ax.set_xlabel('Weighted Phylotype Frequency')
|
|
393
|
|
394 # plt.text(-0.3, -0.15, title, va="top", wrap="True")
|
|
395 #plt.tight_layout()
|
|
396 plt.subplots_adjust(bottom=0.1, top=0.9, left=0.15, right=0.9)
|
|
397 ax.set_title(title, x=0, wrap='True',ha='left',)
|
|
398
|
25
|
399 plt.savefig(tdict['html_resource'] + '/' + tdict['name']+"_phylotypes.png")
|
19
|
400 if tdict['pdf'] == 'PDF_Yes':
|
25
|
401 plt.savefig(tdict['html_resource'] + '/' + tdict['name']+"phylotypes.pdf")
|
19
|
402 # plt.show()
|
|
403 pass
|
|
404
|
|
405 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8}
|
|
406
|
|
407 def transcriptomicProcess(args,argdict):
|
21
|
408 dir_path = os.path.dirname(os.path.realpath(__file__))
|
19
|
409 tdict = {}
|
|
410 tdict['name'] = args[argdict['name']]
|
|
411 tdict['refFastq'] = args[argdict['refFastq']]
|
|
412 tdict['forward'] = args[argdict['forward']]
|
|
413 tdict['reverse'] = args[argdict['reverse']]
|
21
|
414 dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
415 tdict['vivax_trans_database'] = dir_path+'/data/vivax/Database/Phylotype_typeseqs.fas'
|
19
|
416 tdict['pdf'] = args[argdict['pdfexport']]
|
|
417 tdict['html_file'] = args[argdict['html_file']]
|
|
418 tdict['html_resource'] = args[argdict['html_resource']]
|
|
419
|
|
420 uploadUserReferenceFastq(tdict['refFastq'])
|
|
421 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie
|
|
422 processSamFiles(tdict['name']) #uses samtools
|
|
423 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.*
|
|
424 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa
|
21
|
425 blastContigs(tdict['name'], tdict['html_resource'], tdict['vivax_trans_database'])
|
19
|
426 sum_df, sum2_df = combineFPMK(tdict)
|
|
427 doBarChart(tdict, sum2_df)
|
|
428 createHTML(tdict, sum_df)
|
|
429
|
|
430
|
|
431 if __name__ == "__main__":
|
|
432 exit()
|
|
433
|
|
434
|