# HG changeset patch # User artbio # Date 1505053639 14400 # Node ID 4da23f009c9ebe2b8fd5db1198103e20b1df08ea # Parent a7fd042087644b4678acc2f824d94671980e7d4e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6c727f4b2288c9b2517b28addf1eed6409d682a4 diff -r a7fd04208764 -r 4da23f009c9e overlapping_reads.py --- a/overlapping_reads.py Sat Sep 09 11:57:39 2017 -0400 +++ b/overlapping_reads.py Sun Sep 10 10:27:19 2017 -0400 @@ -85,6 +85,32 @@ list(set(all_query_positions[chrom]))) return all_query_positions + def countpairs(self, uppers, lowers): + query_range = self.query_range + target_range = self.target_range + uppers = [seq for seq in uppers if (len(seq) in query_range or len(seq) in target_range)] + uppers_expanded = [] + for seq in uppers: + expand = [seq for i in range(self.readdic[seq])] + uppers_expanded.extend(expand) + uppers = uppers_expanded + lowers = [seq for seq in lowers if (len(seq) in query_range or len(seq) in target_range)] + lowers_expanded = [] + for seq in lowers: + expand = [seq for i in range(self.readdic[seq])] + lowers_expanded.extend(expand) + lowers = lowers_expanded + paired = [] + for upread in uppers: + for downread in lowers: + if (len(upread) in query_range and len(downread) in + target_range) or (len(upread) in target_range and + len(downread) in query_range): + paired.append(upread) + lowers.remove(downread) + break + return len(paired) + def pairing(self): F = open(self.output, 'w') query_range = self.query_range @@ -92,11 +118,16 @@ overlap = self.overlap stringresult = [] header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\n%s\n' + total_pairs = 0 + print ('Chromosome\tNbre of pairs') for chrom in sorted(self.chromosomes): + number_pairs = 0 for pos in self.all_query_positions[chrom]: stringbuffer = [] uppers = self.alignement_dic[chrom, pos, 'F'] lowers = self.alignement_dic[chrom, pos+overlap-1, 'R'] + number_pairs += self.countpairs(uppers, lowers) + total_pairs += number_pairs if uppers and lowers: for upread in uppers: for downread in lowers: @@ -114,6 +145,8 @@ len(downread), self.readdic[downread], self.revcomp(downread))) stringresult.extend(sorted(set(stringbuffer))) + print('%s\t%s' % (chrom, number_pairs)) + print('Total nbre of pairs that can be simultaneously formed\t%s' % total_pairs) F.write(''.join(stringresult)) def revcomp(self, sequence): diff -r a7fd04208764 -r 4da23f009c9e overlapping_reads.xml --- a/overlapping_reads.xml Sat Sep 09 11:57:39 2017 -0400 +++ b/overlapping_reads.xml Sun Sep 10 10:27:19 2017 -0400 @@ -1,4 +1,4 @@ - + pysam