Mercurial > repos > artbio > blast_unmatched
annotate blast_unmatched.py @ 4:caa54ff096c8 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 3dcf4e7314dd6a4fefcd721ac58c8130dd1da2a1"
author | artbio |
---|---|
date | Wed, 10 Mar 2021 19:10:06 +0000 |
parents | fffdb903f2d1 |
children |
rev | line source |
---|---|
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
2 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
3 import optparse |
2
dfcdac284538
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 830e10a94c2afc178f4078609842cd93808df1b4
artbio
parents:
1
diff
changeset
|
4 import re |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
5 |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
6 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
7 def parse_options(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
8 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
9 Parse the options guiven to the script |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
10 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
11 parser = optparse.OptionParser(description='Get unmatched blast queries') |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
12 parser.add_option('-f', '--fasta', dest='fasta_file', |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
13 help='Query fasta file used during blast') |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
14 parser.add_option('-b', '--blast', dest='blast_file', |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
15 help='Blast tabular output (queries in 1rst column)') |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
16 parser.add_option('-o', '--output', dest='output_file', |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
17 help='Output file name') |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
18 (options, args) = parser.parse_args() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
19 if len(args) > 0: |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
20 parser.error('Wrong number of arguments') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
21 return options |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
22 |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
23 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
24 def get_matched(blast_file): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
25 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
26 Get a dictionary of all the queries that got a match |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
27 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
28 matched = dict() |
1
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
29 with open(blast_file, 'r') as infile: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
30 for line in infile: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
31 fields = line.split("\t") |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
32 query_id = fields[0] |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
33 matched[query_id] = 1 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
34 return matched |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
35 |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
36 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
37 def get_unmatched(output_file, fasta_file, matched): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
38 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
39 Compares matched queries to query fasta file and print unmatched to ouput |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
40 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
41 output_file_handle = open(output_file, 'w') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
42 unmatched = False |
4
caa54ff096c8
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 3dcf4e7314dd6a4fefcd721ac58c8130dd1da2a1"
artbio
parents:
3
diff
changeset
|
43 end = re.compile(r".+\W$") |
1
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
44 with open(fasta_file, 'r') as infile: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
45 for line in infile: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
46 if line.startswith('>'): |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
47 subline = line[1:].rstrip() # qid are 100chars long in blast |
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
48 if end.match(subline) is not None: |
2
dfcdac284538
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 830e10a94c2afc178f4078609842cd93808df1b4
artbio
parents:
1
diff
changeset
|
49 subline = subline[:-1] |
1
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
50 if subline not in matched: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
51 output_file_handle.write(line) |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
52 unmatched = True |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
53 else: |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
54 unmatched = False |
50c1fa95a076
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit be082f72c8d8c1eebe3f5643da1a73ab0ac9e4b3
artbio
parents:
0
diff
changeset
|
55 elif unmatched: |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
56 output_file_handle.write(line) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
57 output_file_handle.close() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
58 |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
59 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
60 def __main__(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
61 opts = parse_options() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
62 matched = get_matched(opts.blast_file) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
63 get_unmatched(opts.output_file, opts.fasta_file, matched) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
64 |
3
fffdb903f2d1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e
artbio
parents:
2
diff
changeset
|
65 |
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
66 if __main__(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
67 __main__() |