Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
comparison smRtools.py @ 8:be0c6b6466cc draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 97b40d7a593cef6c3303f7baba781a84d242e454
| author | mvdbeek |
|---|---|
| date | Mon, 19 Sep 2016 06:16:21 -0400 |
| parents | ac7d8e55bb67 |
| children |
comparison
equal
deleted
inserted
replaced
| 7:c9e267cb84c0 | 8:be0c6b6466cc |
|---|---|
| 140 else: | 140 else: |
| 141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow | 141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
| 142 self.alignedReads += 1 | 142 self.alignedReads += 1 |
| 143 F.close() | 143 F.close() |
| 144 return self.instanceDict | 144 return self.instanceDict |
| 145 # elif self.alignmentFileFormat == "sam": | |
| 146 # F = open (self.alignmentFile, "r") | |
| 147 # dict = {"0":"+", "16":"-"} | |
| 148 # for line in F: | |
| 149 # if line[0]=='@': | |
| 150 # continue | |
| 151 # fields = line.split() | |
| 152 # if fields[2] == "*": continue | |
| 153 # polarity = dict[fields[1]] | |
| 154 # gene = fields[2] | |
| 155 # offset = int(fields[3]) | |
| 156 # size = len (fields[9]) | |
| 157 # if self.size_inf: | |
| 158 # if (size>=self.size_inf and size<= self.size_sup): | |
| 159 # self.instanceDict[gene].addread (polarity, offset, size) | |
| 160 # self.alignedReads += 1 | |
| 161 # else: | |
| 162 # self.instanceDict[gene].addread (polarity, offset, size) | |
| 163 # self.alignedReads += 1 | |
| 164 # F.close() | |
| 165 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam": | 145 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam": |
| 166 import pysam | 146 import pysam |
| 167 samfile = pysam.Samfile(self.alignmentFile) | 147 samfile = pysam.Samfile(self.alignmentFile) |
| 168 for read in samfile: | 148 for read in samfile: |
| 169 if read.tid == -1: | 149 if read.tid == -1: |
| 182 else: | 162 else: |
| 183 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow | 163 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
| 184 self.alignedReads += 1 | 164 self.alignedReads += 1 |
| 185 return self.instanceDict | 165 return self.instanceDict |
| 186 | 166 |
| 187 # def size_histogram (self): | |
| 188 # size_dict={} | |
| 189 # size_dict['F']= defaultdict (int) | |
| 190 # size_dict['R']= defaultdict (int) | |
| 191 # size_dict['both'] = defaultdict (int) | |
| 192 # for item in self.instanceDict: | |
| 193 # buffer_dict_F = self.instanceDict[item].size_histogram()['F'] | |
| 194 # buffer_dict_R = self.instanceDict[item].size_histogram()['R'] | |
| 195 # for size in buffer_dict_F: | |
| 196 # size_dict['F'][size] += buffer_dict_F[size] | |
| 197 # for size in buffer_dict_R: | |
| 198 # size_dict['R'][size] -= buffer_dict_R[size] | |
| 199 # allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) ) | |
| 200 # for size in allSizeKeys: | |
| 201 # size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size] | |
| 202 # return size_dict | |
| 203 def size_histogram (self): # in HandleSmRNAwindows | 167 def size_histogram (self): # in HandleSmRNAwindows |
| 204 '''refactored on 7-9-2014 to debug size_histogram tool''' | 168 '''refactored on 7-9-2014 to debug size_histogram tool''' |
| 205 size_dict={} | 169 size_dict={} |
| 206 size_dict['F']= defaultdict (float) | 170 size_dict['F']= defaultdict (float) |
| 207 size_dict['R']= defaultdict (float) | 171 size_dict['R']= defaultdict (float) |
| 359 for size in self.readDict[offset]: | 323 for size in self.readDict[offset]: |
| 360 dicsize[size] = dicsize.get(size, 0) + 1 | 324 dicsize[size] = dicsize.get(size, 0) + 1 |
| 361 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1): | 325 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1): |
| 362 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values | 326 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values |
| 363 return dicsize | 327 return dicsize |
| 364 | 328 |
| 365 # def size_histogram(self): | |
| 366 # norm=self.norm | |
| 367 # hist_dict={} | |
| 368 # hist_dict['F']={} | |
| 369 # hist_dict['R']={} | |
| 370 # for offset in self.readDict: | |
| 371 # for size in self.readDict[offset]: | |
| 372 # if offset < 0: | |
| 373 # hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm | |
| 374 # else: | |
| 375 # hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm | |
| 376 # ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate ! | |
| 377 # if not (hist_dict['F']) and (not hist_dict['R']): | |
| 378 # hist_dict['F'][21] = 0 | |
| 379 # hist_dict['R'][21] = 0 | |
| 380 # ## | |
| 381 # return hist_dict | |
| 382 | 329 |
| 383 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow | 330 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow |
| 384 '''refactored on 7-9-2014 to debug size_histogram tool''' | 331 '''refactored on 7-9-2014 to debug size_histogram tool''' |
| 385 norm=self.norm | 332 norm=self.norm |
| 386 size_dict={} | 333 size_dict={} |
| 478 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100) | 425 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100) |
| 479 elif forward_sum == 0: | 426 elif forward_sum == 0: |
| 480 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100) | 427 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100) |
| 481 else: | 428 else: |
| 482 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100) | 429 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100) |
| 483 | |
| 484 | 430 |
| 485 def readplot (self): | 431 def readplot (self): |
| 486 norm=self.norm | 432 norm=self.norm |
| 487 readmap = {} | 433 readmap = {} |
| 488 for offset in self.readDict.keys(): | 434 for offset in self.readDict.keys(): |
