Mercurial > repos > abims-sbr > pairwise
diff scripts/S05_find_rbh.py @ 0:90b57ab0bd1d draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
| author | abims-sbr | 
|---|---|
| date | Fri, 01 Feb 2019 10:23:16 -0500 | 
| parents | |
| children | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S05_find_rbh.py Fri Feb 01 10:23:16 2019 -0500 @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# coding: utf-8 +# Author : Victor Mataigne + +import argparse, pickle, itertools + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('besthits_file1', help='') + parser.add_argument('besthits_file2', help='') + args = parser.parse_args() + + # Open dict of best hits + file_best_hit_dict_q = open('dict_best_hits_from_blast_1') + file_best_hit_dict_db = open('dict_best_hits_from_blast_2') + best_hit_dict_q = pickle.load(file_best_hit_dict_q) + best_hit_dict_db = pickle.load(file_best_hit_dict_db) + file_best_hit_dict_q.close() + file_best_hit_dict_db.close() + + best_h1 = {} + with open(args.besthits_file1, 'r') as bh1 : + for h, s in itertools.izip_longest(*[bh1]*2): + header = h.strip('>\n') + sequence = s.strip('\n') + best_h1[header] = sequence + + best_h2 = {} + with open(args.besthits_file2, 'r') as bh2 : + for h, s in itertools.izip_longest(*[bh2]*2): + header = h.strip('>\n') + sequence = s.strip('\n') + best_h2[header] = sequence + + # Find RBH: + reverse_best_hit_dict_db = dict((v,k) for k,v in best_hit_dict_db.iteritems()) + + rbh = set(best_hit_dict_q.items()).intersection(set(reverse_best_hit_dict_db.items())) + + s = args.besthits_file1.split('_') + suffix = s[4] + '_' + s[5] + out_name = 'RBH_{}_dna.fasta'.format(suffix) + output = open(out_name, 'w') + + for pairwise_couple in rbh : + output.write('>'+pairwise_couple[0]+'\n') + output.write(best_h1[pairwise_couple[0]]+'\n') + output.write('>'+pairwise_couple[1]+'\n') + output.write(best_h2[pairwise_couple[1]]+'\n') + output.close() + +if __name__ == "__main__": + main() \ No newline at end of file
