Mercurial > repos > johnheap > vapper
comparison Tryp_V_T.py @ 21:1b5bf8383973 draft
Uploaded
author | johnheap |
---|---|
date | Tue, 04 Jun 2019 01:29:12 -0400 |
parents | fe79425b1fa4 |
children | c0a6a170163e |
comparison
equal
deleted
inserted
replaced
20:26ec953069b3 | 21:1b5bf8383973 |
---|---|
81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files | 81 return #we have saved out the relevent name.bam, name.sorted and name.sorted.bai files |
82 | 82 |
83 # we will not have the .gtf file so call cufflinks without -G option | 83 # we will not have the .gtf file so call cufflinks without -G option |
84 def transcriptAbundance(inputname): | 84 def transcriptAbundance(inputname): |
85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted" | 85 argString = "cufflinks -o "+inputname+".cuff -u -p 8 "+inputname+".sorted" |
86 subprocess.call(argString, shell = True) | 86 subprocess.call(argString, shell=True) |
87 os.remove(inputname+".sorted") #remove name.sorted | 87 os.remove(inputname+".sorted") #remove name.sorted |
88 os.remove(inputname+".sorted.bai") | 88 os.remove(inputname+".sorted.bai") |
89 os.remove(inputname+".bam") | 89 os.remove(inputname+".bam") |
90 return | 90 return |
91 | 91 |
94 refBase = os.path.basename(refFastq) | 94 refBase = os.path.basename(refFastq) |
95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test' | 95 ref = os.path.splitext(refBase)[0] # 'mydata/test.fasta' -> 'test' |
96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file | 96 dir_path = os.path.dirname(os.path.realpath(__file__)) # directory of this file |
97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta | 97 refPath = dir_path + "/data/Reference/" + ref + "/" + refBase # eg refPath = data/Reference/Trinity/Trinity.fasta |
98 # used for dirty # refPath = 'Trinity.fasta' # dirty one | 98 # used for dirty # refPath = 'Trinity.fasta' # dirty one |
99 track_df = pd.read_csv(name+'.cuff/genes.fpkm_tracking', sep='\t') | 99 track_df = pd.read_csv(dir_path+'/' + name + '.cuff/genes.fpkm_tracking', sep='\t') |
100 names = track_df['locus'] | 100 names = track_df['locus'] |
101 # print(len(names)) | 101 # print(len(names)) |
102 # print(names[:5]) | 102 # print(names[:5]) |
103 | 103 |
104 nlist = [] | 104 nlist = [] |
232 def getPhyloNumber(sac): | 232 def getPhyloNumber(sac): |
233 i = sac.find('_') | 233 i = sac.find('_') |
234 return int(sac[1:i]) | 234 return int(sac[1:i]) |
235 | 235 |
236 def combineFPMK(tdict): | 236 def combineFPMK(tdict): |
237 fpkm_df = pd.read_csv(tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t') | 237 dir_path = os.path.dirname(os.path.realpath(__file__))+'/' |
238 | |
239 fpkm_df = pd.read_csv(dir_path++tdict['name']+'.cuff/genes.fpkm_tracking', sep='\t') | |
238 | 240 |
239 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t') | 241 #fpkm_df = pd.read_csv('genes.fpkm_tracking',sep='\t') |
240 #print(fpkm_df.head()) | 242 #print(fpkm_df.head()) |
241 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')]) | 243 fpkm_df['locus'] = fpkm_df['locus'].apply(lambda names: names[:names.find(':')]) |
242 #print(fpkm_df.head()) | 244 #print(fpkm_df.head()) |
243 reducedBlast_df = pd.read_csv(tdict['name']+'_transcript.csv') | 245 |
246 reducedBlast_df = pd.read_csv(dir_path + tdict['name']+'_transcript.csv') | |
244 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv') | 247 # reducedBlast_df = pd.read_csv('TrinityVT_transcript.csv') |
245 saccverSet = set(reducedBlast_df['saccver']) | 248 saccverSet = set(reducedBlast_df['saccver']) |
246 saccverList = list(saccverSet) | 249 saccverList = list(saccverSet) |
247 saccverList.sort() | 250 saccverList.sort() |
248 # print(saccverList[:5]) | 251 # print(saccverList[:5]) |
396 pass | 399 pass |
397 | 400 |
398 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8} | 401 # argdict = {'name':2, 'pdfexport': 3, 'refFastq': 4, 'forward': 5, 'reverse': 6, 'html_file': 7, 'html_resource': 8} |
399 | 402 |
400 def transcriptomicProcess(args,argdict): | 403 def transcriptomicProcess(args,argdict): |
404 dir_path = os.path.dirname(os.path.realpath(__file__)) | |
401 tdict = {} | 405 tdict = {} |
402 tdict['name'] = args[argdict['name']] | 406 tdict['name'] = args[argdict['name']] |
403 tdict['refFastq'] = args[argdict['refFastq']] | 407 tdict['refFastq'] = args[argdict['refFastq']] |
404 tdict['forward'] = args[argdict['forward']] | 408 tdict['forward'] = args[argdict['forward']] |
405 tdict['reverse'] = args[argdict['reverse']] | 409 tdict['reverse'] = args[argdict['reverse']] |
406 tdict['vivax_trans_database'] = 'data/vivax/Database/Phylotype_typeseqs.fas' | 410 dir_path = os.path.dirname(os.path.realpath(__file__)) |
411 tdict['vivax_trans_database'] = dir_path+'/data/vivax/Database/Phylotype_typeseqs.fas' | |
407 tdict['pdf'] = args[argdict['pdfexport']] | 412 tdict['pdf'] = args[argdict['pdfexport']] |
408 tdict['html_file'] = args[argdict['html_file']] | 413 tdict['html_file'] = args[argdict['html_file']] |
409 tdict['html_resource'] = args[argdict['html_resource']] | 414 tdict['html_resource'] = args[argdict['html_resource']] |
410 | 415 |
411 uploadUserReferenceFastq(tdict['refFastq']) | 416 uploadUserReferenceFastq(tdict['refFastq']) |
412 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie | 417 transcriptMapping(tdict['name'], tdict['refFastq'], tdict['forward'], tdict['reverse']) #uses bowtie |
413 processSamFiles(tdict['name']) #uses samtools | 418 processSamFiles(tdict['name']) #uses samtools |
414 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.* | 419 transcriptAbundance(tdict['name']) #uses cufflinks -> ?.cuff/*.* |
415 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa | 420 transcriptsForBlast(tdict['name'], tdict['refFastq']) #creates name+4blast.fa |
416 blastContigs(tdict['name'], tdict['html_resource'], 'data/vivax/Database/Phylotype_typeseqs.fas') | 421 blastContigs(tdict['name'], tdict['html_resource'], tdict['vivax_trans_database']) |
417 sum_df, sum2_df = combineFPMK(tdict) | 422 sum_df, sum2_df = combineFPMK(tdict) |
418 doBarChart(tdict, sum2_df) | 423 doBarChart(tdict, sum2_df) |
419 createHTML(tdict, sum_df) | 424 createHTML(tdict, sum_df) |
420 | 425 |
421 | 426 |