comparison smRtools.py @ 8:be0c6b6466cc draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 97b40d7a593cef6c3303f7baba781a84d242e454
author mvdbeek
date Mon, 19 Sep 2016 06:16:21 -0400
parents ac7d8e55bb67
children
comparison
equal deleted inserted replaced
7:c9e267cb84c0 8:be0c6b6466cc
140 else: 140 else:
141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow 141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
142 self.alignedReads += 1 142 self.alignedReads += 1
143 F.close() 143 F.close()
144 return self.instanceDict 144 return self.instanceDict
145 # elif self.alignmentFileFormat == "sam":
146 # F = open (self.alignmentFile, "r")
147 # dict = {"0":"+", "16":"-"}
148 # for line in F:
149 # if line[0]=='@':
150 # continue
151 # fields = line.split()
152 # if fields[2] == "*": continue
153 # polarity = dict[fields[1]]
154 # gene = fields[2]
155 # offset = int(fields[3])
156 # size = len (fields[9])
157 # if self.size_inf:
158 # if (size>=self.size_inf and size<= self.size_sup):
159 # self.instanceDict[gene].addread (polarity, offset, size)
160 # self.alignedReads += 1
161 # else:
162 # self.instanceDict[gene].addread (polarity, offset, size)
163 # self.alignedReads += 1
164 # F.close()
165 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam": 145 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam":
166 import pysam 146 import pysam
167 samfile = pysam.Samfile(self.alignmentFile) 147 samfile = pysam.Samfile(self.alignmentFile)
168 for read in samfile: 148 for read in samfile:
169 if read.tid == -1: 149 if read.tid == -1:
182 else: 162 else:
183 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow 163 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
184 self.alignedReads += 1 164 self.alignedReads += 1
185 return self.instanceDict 165 return self.instanceDict
186 166
187 # def size_histogram (self):
188 # size_dict={}
189 # size_dict['F']= defaultdict (int)
190 # size_dict['R']= defaultdict (int)
191 # size_dict['both'] = defaultdict (int)
192 # for item in self.instanceDict:
193 # buffer_dict_F = self.instanceDict[item].size_histogram()['F']
194 # buffer_dict_R = self.instanceDict[item].size_histogram()['R']
195 # for size in buffer_dict_F:
196 # size_dict['F'][size] += buffer_dict_F[size]
197 # for size in buffer_dict_R:
198 # size_dict['R'][size] -= buffer_dict_R[size]
199 # allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
200 # for size in allSizeKeys:
201 # size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size]
202 # return size_dict
203 def size_histogram (self): # in HandleSmRNAwindows 167 def size_histogram (self): # in HandleSmRNAwindows
204 '''refactored on 7-9-2014 to debug size_histogram tool''' 168 '''refactored on 7-9-2014 to debug size_histogram tool'''
205 size_dict={} 169 size_dict={}
206 size_dict['F']= defaultdict (float) 170 size_dict['F']= defaultdict (float)
207 size_dict['R']= defaultdict (float) 171 size_dict['R']= defaultdict (float)
359 for size in self.readDict[offset]: 323 for size in self.readDict[offset]:
360 dicsize[size] = dicsize.get(size, 0) + 1 324 dicsize[size] = dicsize.get(size, 0) + 1
361 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1): 325 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1):
362 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values 326 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values
363 return dicsize 327 return dicsize
364 328
365 # def size_histogram(self):
366 # norm=self.norm
367 # hist_dict={}
368 # hist_dict['F']={}
369 # hist_dict['R']={}
370 # for offset in self.readDict:
371 # for size in self.readDict[offset]:
372 # if offset < 0:
373 # hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm
374 # else:
375 # hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm
376 # ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
377 # if not (hist_dict['F']) and (not hist_dict['R']):
378 # hist_dict['F'][21] = 0
379 # hist_dict['R'][21] = 0
380 # ##
381 # return hist_dict
382 329
383 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow 330 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow
384 '''refactored on 7-9-2014 to debug size_histogram tool''' 331 '''refactored on 7-9-2014 to debug size_histogram tool'''
385 norm=self.norm 332 norm=self.norm
386 size_dict={} 333 size_dict={}
478 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100) 425 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100)
479 elif forward_sum == 0: 426 elif forward_sum == 0:
480 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100) 427 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100)
481 else: 428 else:
482 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100) 429 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100)
483
484 430
485 def readplot (self): 431 def readplot (self):
486 norm=self.norm 432 norm=self.norm
487 readmap = {} 433 readmap = {}
488 for offset in self.readDict.keys(): 434 for offset in self.readDict.keys():