comparison overlapping_reads.py @ 4:20d28cfdeefe draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit cfdc08418887bfe4a35588cd78d0a2b6ffa6e19e
author artbio
date Fri, 08 Sep 2017 04:44:22 -0400
parents 4d9682bd3a6b
children a7fd04208764
comparison
equal deleted inserted replaced
3:4d9682bd3a6b 4:20d28cfdeefe
49 all_query_positions = defaultdict(list) 49 all_query_positions = defaultdict(list)
50 for chrom in self.chromosomes: 50 for chrom in self.chromosomes:
51 for read in bam_object.fetch(chrom): 51 for read in bam_object.fetch(chrom):
52 if not read.is_reverse: 52 if not read.is_reverse:
53 all_query_positions[chrom].append( 53 all_query_positions[chrom].append(
54 read.get_reference_positions(full_length=True)[0]) 54 read.reference_start)
55 else: 55 else:
56 all_query_positions[chrom].append( 56 all_query_positions[chrom].append(
57 read.get_reference_positions(full_length=True)[-1]) 57 read.reference_end)
58 all_query_positions[chrom] = sorted( 58 all_query_positions[chrom] = sorted(
59 list(set(all_query_positions[chrom]))) 59 list(set(all_query_positions[chrom])))
60 return all_query_positions 60 return all_query_positions
61 61
62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget, 62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget,
75 start=pos, end=pos+overlap-1) 75 start=pos, end=pos+overlap-1)
76 iterreads_4 = self.bam_object.fetch(chrom, 76 iterreads_4 = self.bam_object.fetch(chrom,
77 start=pos, end=pos+overlap-1) 77 start=pos, end=pos+overlap-1)
78 # 1 78 # 1
79 for queryread in iterreads_1: 79 for queryread in iterreads_1:
80 if queryread.get_reference_positions( 80 if queryread.reference_start == pos and \
81 full_length=True)[0] == pos and \
82 queryread.query_alignment_length in query_range \ 81 queryread.query_alignment_length in query_range \
83 and not queryread.is_reverse: 82 and not queryread.is_reverse:
84 for targetread in iterreads_2: 83 for targetread in iterreads_2:
85 if (targetread. 84 if (targetread.
86 get_reference_positions(full_length=True)[-1] 85 get_reference_positions()[-1]
87 == queryread.get_reference_positions( 86 == queryread.get_reference_positions(
88 full_length=True)[overlap-1] and 87 )[overlap-1] and
89 targetread.query_alignment_length in 88 targetread.query_alignment_length in
90 target_range and targetread.is_reverse): 89 target_range and targetread.is_reverse):
91 targetreadseq = self.revcomp( 90 targetreadseq = self.revcomp(
92 targetread.query_sequence) 91 targetread.query_sequence)
93 stringresult.append( 92 stringresult.append(
94 '>%s|%s|%s|%s|n=%s\n%s\n' % 93 '>%s|%s|%s|%s|n=%s\n%s\n' %
95 (chrom, queryread.get_reference_positions( 94 (chrom, queryread.reference_start+1,
96 full_length=True)[0]+1,
97 'F', queryread.query_alignment_length, 95 'F', queryread.query_alignment_length,
98 self.readdic[queryread.query_sequence], 96 self.readdic[queryread.query_sequence],
99 queryread.query_sequence)) 97 queryread.query_sequence))
100 stringresult.append( 98 stringresult.append(
101 '>%s|%s|%s|%s|n=%s\n%s\n' % 99 '>%s|%s|%s|%s|n=%s\n%s\n' %
102 (chrom, targetread.get_reference_positions( 100 (chrom, targetread.reference_start+1,
103 full_length=True)[0]+1,
104 'R', targetread.query_alignment_length, 101 'R', targetread.query_alignment_length,
105 self.readdic[targetread.query_sequence], 102 self.readdic[targetread.query_sequence],
106 targetreadseq)) 103 targetreadseq))
107 # 2 104 # 2
108 for queryread in iterreads_3: 105 for queryread in iterreads_3:
109 if queryread.get_reference_positions( 106 if queryread.reference_end-1 == pos+overlap-1 and \
110 full_length=True)[-1] == pos+overlap-1 and \
111 queryread.query_alignment_length in query_range \ 107 queryread.query_alignment_length in query_range \
112 and queryread.is_reverse: 108 and queryread.is_reverse:
113 for targetread in iterreads_4: 109 for targetread in iterreads_4:
114 if (targetread. 110 if (targetread.
115 get_reference_positions(full_length=True)[0] 111 reference_start
116 == pos and targetread.query_alignment_length 112 == pos and targetread.query_alignment_length
117 in target_range and not 113 in target_range and not
118 targetread.is_reverse): 114 targetread.is_reverse):
119 queryreadseq = self.revcomp( 115 queryreadseq = self.revcomp(
120 queryread.query_sequence) 116 queryread.query_sequence)
121 targetreadseq = targetread.query_sequence 117 targetreadseq = targetread.query_sequence
122 stringresult.append( 118 stringresult.append(
123 '>%s|%s|%s|%s|n=%s\n%s\n' % 119 '>%s|%s|%s|%s|n=%s\n%s\n' %
124 (chrom, queryread.get_reference_positions( 120 (chrom, queryread.reference_start+1, 'R',
125 full_length=True)[0]+1, 'R',
126 queryread.query_alignment_length, 121 queryread.query_alignment_length,
127 self.readdic[queryread.query_sequence], 122 self.readdic[queryread.query_sequence],
128 queryreadseq)) 123 queryreadseq))
129 stringresult.append( 124 stringresult.append(
130 '>%s|%s|%s|%s|n=%s\n%s\n' % 125 '>%s|%s|%s|%s|n=%s\n%s\n' %
131 (chrom, targetread.get_reference_positions( 126 (chrom, targetread.reference_start+1,
132 full_length=True)[0]+1,
133 'F', targetread.query_alignment_length, 127 'F', targetread.query_alignment_length,
134 self.readdic[targetread.query_sequence], 128 self.readdic[targetread.query_sequence],
135 targetreadseq)) 129 targetreadseq))
136 stringresult = sorted(set(stringresult), 130 stringresult = sorted(set(stringresult),
137 key=lambda x: stringresult.index(x)) 131 key=lambda x: stringresult.index(x))