Mercurial > repos > abims-sbr > pairwise
annotate scripts/S01_run_first_blast.py @ 0:90b57ab0bd1d draft default tip
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:23:16 -0500 |
parents | |
children |
rev | line source |
---|---|
0
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
1 #!/usr/bin/env python |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
2 # coding: utf-8 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
3 # Author : Victor Mataigne |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
4 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
5 import itertools, argparse, os |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
6 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
7 """ |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
8 IMPROVMENTS : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
9 - Maybe a bit of code factoring |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
10 - See if it possible to avoid build several times the same db |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
11 """ |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
12 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
13 # The script (and S03_run_second_blast.py as well) must be launched with the python '-W ignore' option if tested with planemo |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
14 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
15 def main(): |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
16 parser = argparse.ArgumentParser() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
17 parser.add_argument('files', help='fasta files separated by commas') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
18 parser.add_argument('evalue', help='evalue for blast') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
19 parser.add_argument('method', choices=['tblastx', 'diamond'], help='alignment tool (tblastx or diamond)') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
20 args = parser.parse_args() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
21 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
22 in_files = args.files.split(',') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
23 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
24 if args.method == 'diamond': |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
25 in_files_translated = [] |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
26 from Bio.Seq import Seq |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
27 from Bio.Alphabet import IUPAC |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
28 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
29 # From every sequence, make three sequences (translations in the three reading frames) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
30 print 'Translating every sequence in all reading frames ...' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
31 for file in in_files: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
32 name = 'translated_%s' %file |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
33 in_files_translated.append(name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
34 translated_file = open(name, 'w') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
35 with open(file, 'r') as file: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
36 for name, seq in itertools.izip_longest(*[file]*2): |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
37 s = Seq(seq.strip('\n').upper(), IUPAC.ambiguous_dna) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
38 translated_file.write(name.strip('\n')+'_orf_1\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
39 translated_file.write(s.translate()._data+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
40 translated_file.write(name.strip('\n')+'_orf_2\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
41 translated_file.write(s[1:].translate()._data+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
42 translated_file.write(name.strip('\n')+'_orf_3\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
43 translated_file.write(s[2:].translate()._data+'\n') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
44 translated_file.close() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
45 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
46 # Make the list of all pairwise combinations |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
47 list_pairwise = itertools.combinations(in_files_translated, 2) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
48 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
49 elif args.method == 'tblastx': |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
50 list_pairwise = itertools.combinations(in_files, 2) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
51 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
52 else : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
53 print 'Mispecified alignment tool' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
54 exit() |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
55 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
56 os.mkdir('outputs_RBH_dna') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
57 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
58 # Main loop |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
59 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
60 if args.method == 'diamond': |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
61 for pairwise in list_pairwise: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
62 print "Pair of species:" |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
63 print pairwise |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
64 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
65 sp1, sp2 = pairwise[0].split('_')[1], pairwise[1].split('_')[1] #rename 'translated_Xx_transcriptom.fasta' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
66 sub_directory_name = sp1 + '_' + sp2 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
67 os.mkdir('./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
68 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
69 print 'Running first blast with Diamond ...' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
70 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
71 # Run diamond |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
72 os.system('diamond makedb --in %s -d %s >> log_diamond.log' %(pairwise[1], sp2)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
73 os.system('diamond blastp -q %s -d %s --max-target-seqs 1 -o matches_blast1_%s -e %s --more-sensitive >> log_diamond.log' %(pairwise[0], sp2, sub_directory_name, args.evalue)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
74 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
75 # tabular output : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
76 # qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
77 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
78 a = pairwise[1].replace('translated_', '') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
79 b = pairwise[0].replace('translated_', '') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
80 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
81 # There is a chance to have no hits returned |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
82 if os.path.getsize('matches_blast1_%s' %sub_directory_name) == 0: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
83 print 'No hits found. Processing next species pair ...' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
84 else : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
85 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
86 # Record only one best_hit per transcript (best of the 6 orfs) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
87 os.system('python S02_04_keep_one_hit_from_blast.py matches_blast1_%s %s %s %s %s %s' %(sub_directory_name, a, b, sub_directory_name, '1', args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
88 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
89 # 2d blast with only best hits as db |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
90 print 'Running second blast with Diamond ... ' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
91 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
92 os.system('python -W ignore S03_run_second_blast.py best_hits_db_blast1_%s %s %s %s %s' %(sub_directory_name, pairwise[0], sub_directory_name, args.evalue, args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
93 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
94 # Record only one best_hit per transcript (best of the 6 orfs) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
95 os.system('python S02_04_keep_one_hit_from_blast.py matches_blast2_%s %s %s %s %s %s' %(sub_directory_name, b, a, sub_directory_name, '2', args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
96 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
97 # Find Reciprocical Best Hits |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
98 name1 = 'best_hits_q_blast1_{}'.format(sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
99 name2 = 'best_hits_q_blast2_{}'.format(sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
100 os.system('python S05_find_rbh.py %s %s ' %(name1, name2)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
101 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
102 os.system('mv log_diamond.log ./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
103 os.system('rm -f *.dmnd') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
104 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
105 # Those files exist obly if hits were found during the first blast |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
106 if os.path.getsize('matches_blast1_%s' %sub_directory_name) != 0: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
107 os.system('mv *best_hits* ./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
108 os.system('mv RBH* outputs_RBH_dna') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
109 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
110 os.system('mv matches_blast* ./blast_%s' %(sub_directory_name)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
111 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
112 os.mkdir('translated_seqs') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
113 os.system('mv translated*.fasta ./translated_seqs') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
114 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
115 elif args.method == 'tblastx': |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
116 for pairwise in list_pairwise: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
117 print "Pair of species:" |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
118 print pairwise |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
119 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
120 sp1, sp2 = pairwise[0].split('_')[0], pairwise[1].split('_')[0] |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
121 sub_directory_name = sp1 + '_' + sp2 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
122 os.mkdir('./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
123 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
124 print 'Running first tblastx ...' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
125 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
126 # Run diamond |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
127 os.system('formatdb -i %s -p F -o T >> log_tblastx.log' %(pairwise[1])) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
128 os.system('blastall -p tblastx -d %s -i %s -o matches_blast1_%s -T F -e %s -F "mS" -b1 -v1 -K 1 -m 8 >> log_tblastx.log' %(pairwise[1], pairwise[0], sub_directory_name, args.evalue)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
129 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
130 # tabular output : |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
131 # qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
132 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
133 # There is a chance to have no hits returned |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
134 if os.path.getsize('matches_blast1_%s' %sub_directory_name) == 0: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
135 print 'No hits found. Processing next species pair ...' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
136 else: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
137 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
138 # Record only one best_hit per transcript (best of the 6 orfs) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
139 os.system('python S02_04_keep_one_hit_from_blast.py matches_blast1_%s %s %s %s %s %s' %(sub_directory_name, pairwise[1], pairwise[0], sub_directory_name, '1', args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
140 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
141 # 2d blast with only best hits as db |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
142 print 'Running second blast with Diamond ... ' |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
143 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
144 os.system('python S03_run_second_blast.py best_hits_db_blast1_%s %s %s %s %s' %(sub_directory_name, pairwise[0], sub_directory_name, args.evalue, args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
145 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
146 # Record only one best_hit per transcript (best of the 6 orfs) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
147 os.system('python S02_04_keep_one_hit_from_blast.py matches_blast2_%s %s %s %s %s %s' %(sub_directory_name, pairwise[0], pairwise[1], sub_directory_name, '2', args.method)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
148 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
149 # Find Reciprocical Best Hits |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
150 name1 = 'best_hits_q_blast1_{}'.format(sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
151 name2 = 'best_hits_q_blast2_{}'.format(sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
152 os.system('python S05_find_rbh.py %s %s ' %(name1, name2)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
153 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
154 os.system('mv log_tblastx.log ./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
155 os.system('rm -f *.nhr') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
156 os.system('rm -f *.nin') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
157 os.system('rm -f *.nsd') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
158 os.system('rm -f *.nsi') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
159 os.system('rm -f *.nsq') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
160 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
161 # Those files exist obly if hits were found during the first blast |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
162 if os.path.getsize('matches_blast1_%s' %sub_directory_name) != 0: |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
163 os.system('mv *best_hits* ./blast_%s' %sub_directory_name) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
164 os.system('mv RBH* outputs_RBH_dna') |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
165 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
166 os.system('mv matches_blast* ./blast_%s' %(sub_directory_name)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
167 #os.system('mv matches_blast2_%s ./blast_%s' %(sub_directory_name, sub_directory_name)) |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
168 |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
169 if __name__ == "__main__": |
90b57ab0bd1d
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
170 main() |