annotate blast_unmatched.py @ 5:d0c2a559fe1b draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/blast_unmatched commit 4a20557bdd1306b701fd5bc61f9d0aceb24da0d5
author artbio
date Wed, 11 Oct 2023 11:12:34 +0000
parents caa54ff096c8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
1 #!/usr/bin/env python3
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
2
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
3 import optparse
2
dfcdac284538 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 830e10a94c2afc178f4078609842cd93808df1b4
artbio
parents: 1
diff changeset
4 import re
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
5
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
6
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
7 def parse_options():
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
8 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
9 Parse the options guiven to the script
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
10 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
11 parser = optparse.OptionParser(description='Get unmatched blast queries')
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
12 parser.add_option('-f', '--fasta', dest='fasta_file',
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
13 help='Query fasta file used during blast')
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
14 parser.add_option('-b', '--blast', dest='blast_file',
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
15 help='Blast tabular output (queries in 1rst column)')
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
16 parser.add_option('-o', '--output', dest='output_file',
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
17 help='Output file name')
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
18 (options, args) = parser.parse_args()
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
19 if len(args) > 0:
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
20 parser.error('Wrong number of arguments')
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
21 return options
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
22
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
23
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
24 def get_matched(blast_file):
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
25 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
26 Get a dictionary of all the queries that got a match
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
27 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
28 matched = dict()
1
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
29 with open(blast_file, 'r') as infile:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
30 for line in infile:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
31 fields = line.split("\t")
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
32 query_id = fields[0]
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
33 matched[query_id] = 1
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
34 return matched
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
35
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
36
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
37 def get_unmatched(output_file, fasta_file, matched):
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
38 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
39 Compares matched queries to query fasta file and print unmatched to ouput
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
40 """
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
41 output_file_handle = open(output_file, 'w')
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
42 unmatched = False
4
caa54ff096c8 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 3dcf4e7314dd6a4fefcd721ac58c8130dd1da2a1"
artbio
parents: 3
diff changeset
43 end = re.compile(r".+\W$")
1
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
44 with open(fasta_file, 'r') as infile:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
45 for line in infile:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
46 if line.startswith('>'):
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
47 subline = line[1:].rstrip() # qid are 100chars long in blast
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
48 if end.match(subline) is not None:
2
dfcdac284538 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 830e10a94c2afc178f4078609842cd93808df1b4
artbio
parents: 1
diff changeset
49 subline = subline[:-1]
1
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
50 if subline not in matched:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
51 output_file_handle.write(line)
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
52 unmatched = True
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
53 else:
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
54 unmatched = False
50c1fa95a076 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents: 0
diff changeset
55 elif unmatched:
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
56 output_file_handle.write(line)
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
57 output_file_handle.close()
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
58
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
59
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
60 def __main__():
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
61 opts = parse_options()
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
62 matched = get_matched(opts.blast_file)
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
63 get_unmatched(opts.output_file, opts.fasta_file, matched)
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
64
3
fffdb903f2d1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents: 2
diff changeset
65
0
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
66 if __main__():
f3b63b59a1ea planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff changeset
67 __main__()