annotate overlapping_reads.py @ 3:4d9682bd3a6b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
author artbio
date Sat, 02 Sep 2017 06:35:15 -0400
parents 6f1378738798
children 20d28cfdeefe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
1 import argparse
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
2 from collections import defaultdict
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
3
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
4 import pysam
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
5
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
6
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
7 def Parser():
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
8 the_parser = argparse.ArgumentParser()
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
9 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
10 '--input', action="store", type=str, help="bam alignment file")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
11 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
12 '--minquery', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
13 help="Minimum readsize of query reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
14 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
15 '--maxquery', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
16 help="Maximum readsize of query reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
17 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
18 '--mintarget', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
19 help="Minimum readsize of target reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
20 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
21 '--maxtarget', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
22 help="Maximum readsize of target reads (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
23 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
24 '--overlap', type=int,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
25 help="Overlap analyzed (nt) - must be an integer")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
26 the_parser.add_argument(
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
27 '--output', action="store", type=str,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
28 help="Pairable sequences")
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
29 args = the_parser.parse_args()
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
30 return args
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
31
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
32
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
33 class Map:
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
34
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
35 def __init__(self, bam_file):
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
36 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
37 self.chromosomes = dict(zip(self.bam_object.references,
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
38 self.bam_object.lengths))
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
39 self.all_query_positions = self.query_positions(self.bam_object)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
40 self.readdic = self.make_readdic(self.bam_object)
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
41
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
42 def make_readdic(self, bam_object):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
43 readdic = defaultdict(int)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
44 for read in bam_object.fetch():
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
45 readdic[read.query_sequence] += 1
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
46 return readdic
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
47
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
48 def query_positions(self, bam_object):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
49 all_query_positions = defaultdict(list)
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
50 for chrom in self.chromosomes:
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
51 for read in bam_object.fetch(chrom):
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
52 if not read.is_reverse:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
53 all_query_positions[chrom].append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
54 read.get_reference_positions(full_length=True)[0])
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
55 else:
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
56 all_query_positions[chrom].append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
57 read.get_reference_positions(full_length=True)[-1])
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
58 all_query_positions[chrom] = sorted(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
59 list(set(all_query_positions[chrom])))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
60 return all_query_positions
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
61
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
62 def direct_pairing(self, minquery, maxquery, mintarget, maxtarget,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
63 file, overlap=10):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
64 F = open(file, 'w')
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
65 query_range = range(minquery, maxquery + 1)
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
66 target_range = range(mintarget, maxtarget + 1)
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
67 stringresult = []
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
68 for chrom in sorted(self.chromosomes):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
69 for pos in (self.all_query_positions[chrom]):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
70 iterreads_1 = self.bam_object.fetch(chrom,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
71 start=pos, end=pos+overlap-1)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
72 iterreads_2 = self.bam_object.fetch(chrom,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
73 start=pos, end=pos+overlap-1)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
74 iterreads_3 = self.bam_object.fetch(chrom,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
75 start=pos, end=pos+overlap-1)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
76 iterreads_4 = self.bam_object.fetch(chrom,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
77 start=pos, end=pos+overlap-1)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
78 # 1
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
79 for queryread in iterreads_1:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
80 if queryread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
81 full_length=True)[0] == pos and \
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
82 queryread.query_alignment_length in query_range \
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
83 and not queryread.is_reverse:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
84 for targetread in iterreads_2:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
85 if (targetread.
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
86 get_reference_positions(full_length=True)[-1]
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
87 == queryread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
88 full_length=True)[overlap-1] and
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
89 targetread.query_alignment_length in
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
90 target_range and targetread.is_reverse):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
91 targetreadseq = self.revcomp(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
92 targetread.query_sequence)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
93 stringresult.append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
94 '>%s|%s|%s|%s|n=%s\n%s\n' %
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
95 (chrom, queryread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
96 full_length=True)[0]+1,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
97 'F', queryread.query_alignment_length,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
98 self.readdic[queryread.query_sequence],
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
99 queryread.query_sequence))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
100 stringresult.append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
101 '>%s|%s|%s|%s|n=%s\n%s\n' %
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
102 (chrom, targetread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
103 full_length=True)[0]+1,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
104 'R', targetread.query_alignment_length,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
105 self.readdic[targetread.query_sequence],
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
106 targetreadseq))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
107 # 2
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
108 for queryread in iterreads_3:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
109 if queryread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
110 full_length=True)[-1] == pos+overlap-1 and \
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
111 queryread.query_alignment_length in query_range \
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
112 and queryread.is_reverse:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
113 for targetread in iterreads_4:
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
114 if (targetread.
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
115 get_reference_positions(full_length=True)[0]
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
116 == pos and targetread.query_alignment_length
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
117 in target_range and not
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
118 targetread.is_reverse):
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
119 queryreadseq = self.revcomp(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
120 queryread.query_sequence)
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
121 targetreadseq = targetread.query_sequence
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
122 stringresult.append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
123 '>%s|%s|%s|%s|n=%s\n%s\n' %
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
124 (chrom, queryread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
125 full_length=True)[0]+1, 'R',
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
126 queryread.query_alignment_length,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
127 self.readdic[queryread.query_sequence],
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
128 queryreadseq))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
129 stringresult.append(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
130 '>%s|%s|%s|%s|n=%s\n%s\n' %
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
131 (chrom, targetread.get_reference_positions(
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
132 full_length=True)[0]+1,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
133 'F', targetread.query_alignment_length,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
134 self.readdic[targetread.query_sequence],
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
135 targetreadseq))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
136 stringresult = sorted(set(stringresult),
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
137 key=lambda x: stringresult.index(x))
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
138 F.write(''.join(stringresult))
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
139
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
140 def revcomp(self, sequence):
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
141 antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
142 revseq = sequence[::-1]
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
143 return "".join([antidict[i] for i in revseq])
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
144
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
145
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
146 def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10):
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
147 mapobj = Map(input)
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
148 mapobj.direct_pairing(minquery, maxquery, mintarget, maxtarget,
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
149 output, overlap)
1
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
150
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
151
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
152 if __name__ == "__main__":
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
153 args = Parser()
6f1378738798 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
artbio
parents:
diff changeset
154 main(args.input, args.minquery, args.maxquery, args.mintarget,
3
4d9682bd3a6b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
artbio
parents: 1
diff changeset
155 args.maxtarget, args.output, args.overlap)