comparison Tryp_V_T.py @ 21:1b5bf8383973 draft

Uploaded
author johnheap
date Tue, 04 Jun 2019 01:29:12 -0400
parents fe79425b1fa4
children c0a6a170163e
comparison
equal deleted inserted replaced
20:26ec953069b3 21:1b5bf8383973
81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files 81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files
82 82
83 # we will not have the .gtf file so call cufflinks without -G option 83 # we will not have the .gtf file so call cufflinks without -G option
84 def transcriptAbundance(inputname): 84 def transcriptAbundance(inputname):
85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted" 85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted"
86 subprocess.call(argString, shell = True) 86 subprocess.call(argString, shell=True)
87 os.remove(inputname+".sorted") #remove name.sorted 87 os.remove(inputname+".sorted") #remove name.sorted
88 os.remove(inputname+".sorted.bai") 88 os.remove(inputname+".sorted.bai")
89 os.remove(inputname+".bam") 89 os.remove(inputname+".bam")
90 return 90 return
91 91
94 refBase = os.path.basename(refFastq) 94 refBase = os.path.basename(refFastq)
95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test' 95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test'
96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file 96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file
97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta 97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta
98 # used for dirty # refPath = 'Trinity.fasta' # dirty one 98 # used for dirty # refPath = 'Trinity.fasta' # dirty one
99 track_df = pd.read_csv(name+'.cuff/genes.fpkm_tracking', sep='\t') 99 track_df = pd.read_csv(dir_path+'/' + name + '.cuff/genes.fpkm_tracking', sep='\t')
100 names = track_df['locus'] 100 names = track_df['locus']
101 # print(len(names)) 101 # print(len(names))
102 # print(names[:5]) 102 # print(names[:5])
103 103
104 nlist = [] 104 nlist = []
232 def getPhyloNumber(sac): 232 def getPhyloNumber(sac):
233 i = sac.find('_') 233 i = sac.find('_')
234 return int(sac[1:i]) 234 return int(sac[1:i])
235 235
236 def combineFPMK(tdict): 236 def combineFPMK(tdict):
237 fpkm_df = pd.read_csv(tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t') 237 dir_path = os.path.dirname(os.path.realpath(__file__))+'/'
238
239 fpkm_df = pd.read_csv(dir_path++tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t')
238 240
239 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t') 241 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t')
240 #print(fpkm_df.head()) 242 #print(fpkm_df.head())
241 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')]) 243 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')])
242 #print(fpkm_df.head()) 244 #print(fpkm_df.head())
243 reducedBlast_df = pd.read_csv(tdict['name']+'_transcript.csv') 245
246 reducedBlast_df = pd.read_csv(dir_path + tdict['name']+'_transcript.csv')
244 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv') 247 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv')
245 saccverSet = set(reducedBlast_df['saccver']) 248 saccverSet = set(reducedBlast_df['saccver'])
246 saccverList = list(saccverSet) 249 saccverList = list(saccverSet)
247 saccverList.sort() 250 saccverList.sort()
248 # print(saccverList[:5]) 251 # print(saccverList[:5])
396 pass 399 pass
397 400
398 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8} 401 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8}
399 402
400 def transcriptomicProcess(args,argdict): 403 def transcriptomicProcess(args,argdict):
404 dir_path = os.path.dirname(os.path.realpath(__file__))
401 tdict = {} 405 tdict = {}
402 tdict['name'] = args[argdict['name']] 406 tdict['name'] = args[argdict['name']]
403 tdict['refFastq'] = args[argdict['refFastq']] 407 tdict['refFastq'] = args[argdict['refFastq']]
404 tdict['forward'] = args[argdict['forward']] 408 tdict['forward'] = args[argdict['forward']]
405 tdict['reverse'] = args[argdict['reverse']] 409 tdict['reverse'] = args[argdict['reverse']]
406 tdict['vivax_trans_database'] = 'data/vivax/Database/Phylotype_typeseqs.fas' 410 dir_path = os.path.dirname(os.path.realpath(__file__))
411 tdict['vivax_trans_database'] = dir_path+'/data/vivax/Database/Phylotype_typeseqs.fas'
407 tdict['pdf'] = args[argdict['pdfexport']] 412 tdict['pdf'] = args[argdict['pdfexport']]
408 tdict['html_file'] = args[argdict['html_file']] 413 tdict['html_file'] = args[argdict['html_file']]
409 tdict['html_resource'] = args[argdict['html_resource']] 414 tdict['html_resource'] = args[argdict['html_resource']]
410 415
411 uploadUserReferenceFastq(tdict['refFastq']) 416 uploadUserReferenceFastq(tdict['refFastq'])
412 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie 417 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie
413 processSamFiles(tdict['name']) #uses samtools 418 processSamFiles(tdict['name']) #uses samtools
414 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.* 419 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.*
415 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa 420 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa
416 blastContigs(tdict['name'], tdict['html_resource'], 'data/vivax/Database/Phylotype_typeseqs.fas') 421 blastContigs(tdict['name'], tdict['html_resource'], tdict['vivax_trans_database'])
417 sum_df, sum2_df = combineFPMK(tdict) 422 sum_df, sum2_df = combineFPMK(tdict)
418 doBarChart(tdict, sum2_df) 423 doBarChart(tdict, sum2_df)
419 createHTML(tdict, sum_df) 424 createHTML(tdict, sum_df)
420 425
421 426