Mercurial > repos > abims-sbr > pairwise
annotate scripts/S05_find_rbh.py @ 0:90b57ab0bd1d draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:23:16 -0500 |
parents | |
children |
rev | line source |
---|---|
0
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
1 #!/usr/bin/env python |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
2 # coding: utf-8 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
3 # Author : Victor Mataigne |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
4 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
5 import argparse, pickle, itertools |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
6 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
7 def main(): |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
8 parser = argparse.ArgumentParser() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
9 parser.add_argument('besthits_file1', help='') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
10 parser.add_argument('besthits_file2', help='') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
11 args = parser.parse_args() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
12 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
13 # Open dict of best hits |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
14 file_best_hit_dict_q = open('dict_best_hits_from_blast_1') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
15 file_best_hit_dict_db = open('dict_best_hits_from_blast_2') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
16 best_hit_dict_q = pickle.load(file_best_hit_dict_q) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
17 best_hit_dict_db = pickle.load(file_best_hit_dict_db) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
18 file_best_hit_dict_q.close() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
19 file_best_hit_dict_db.close() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
20 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
21 best_h1 = {} |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
22 with open(args.besthits_file1, 'r') as bh1 : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
23 for h, s in itertools.izip_longest(*[bh1]*2): |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
24 header = h.strip('>\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
25 sequence = s.strip('\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
26 best_h1[header] = sequence |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
27 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
28 best_h2 = {} |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
29 with open(args.besthits_file2, 'r') as bh2 : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
30 for h, s in itertools.izip_longest(*[bh2]*2): |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
31 header = h.strip('>\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
32 sequence = s.strip('\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
33 best_h2[header] = sequence |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
34 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
35 # Find RBH: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
36 reverse_best_hit_dict_db = dict((v,k) for k,v in best_hit_dict_db.iteritems()) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
37 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
38 rbh = set(best_hit_dict_q.items()).intersection(set(reverse_best_hit_dict_db.items())) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
39 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
40 s = args.besthits_file1.split('_') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
41 suffix = s[4] + '_' + s[5] |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
42 out_name = 'RBH_{}_dna.fasta'.format(suffix) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
43 output = open(out_name, 'w') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
44 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
45 for pairwise_couple in rbh : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
46 output.write('>'+pairwise_couple[0]+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
47 output.write(best_h1[pairwise_couple[0]]+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
48 output.write('>'+pairwise_couple[1]+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
49 output.write(best_h2[pairwise_couple[1]]+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
50 output.close() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
51 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
52 if __name__ == "__main__": |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
53 main() |