Mercurial > repos > cpt > cpt_protein_blast_grouping
comparison protein_blast_grouping.py @ 1:f2a7dffab581 draft
planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405
author | cpt |
---|---|
date | Thu, 08 Aug 2024 02:46:54 +0000 |
parents | 7abe5f471364 |
children | c5e0e05ce58a |
comparison
equal
deleted
inserted
replaced
0:7abe5f471364 | 1:f2a7dffab581 |
---|---|
1 import argparse | 1 import argparse |
2 import re | 2 import re |
3 import sys | |
3 | 4 |
4 | 5 |
5 class BlastProteinResultParser: | 6 class BlastProteinResultParser: |
6 def __init__(self, blast_file): | 7 def __init__(self, blast_file): |
7 self.blast_file = blast_file | 8 self.blast_file = blast_file |
35 return len(item[1][key]) | 36 return len(item[1][key]) |
36 | 37 |
37 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) | 38 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) |
38 return sorted_results[:num_hits] | 39 return sorted_results[:num_hits] |
39 | 40 |
40 def print_results(self, num_hits, sort_key="unique_queries"): | 41 def print_results( |
42 self, num_hits, sort_key="unique_queries", output_file=sys.stdout | |
43 ): | |
41 top_hits = self.get_top_hits(num_hits, sort_key) | 44 top_hits = self.get_top_hits(num_hits, sort_key) |
42 print(f"# Top {num_hits} Hits") | 45 print(f"# Top {num_hits} Hits") |
43 print( | 46 print( |
44 "{:<50} {:<25} {:<25}".format( | 47 "{:<50} {:<25} {:<25}".format( |
45 "# Name", "Unique Query Matches", "Unique Subject Hits" | 48 "# Name", "Unique Query Matches", "Unique Subject Hits" |
47 ) | 50 ) |
48 for organism, data in top_hits: | 51 for organism, data in top_hits: |
49 print( | 52 print( |
50 "{:<50} {:<25} {:<25}".format( | 53 "{:<50} {:<25} {:<25}".format( |
51 organism, len(data["unique_queries"]), len(data["unique_hits"]) | 54 organism, len(data["unique_queries"]), len(data["unique_hits"]) |
52 ) | 55 ), |
56 file=output_file, | |
53 ) | 57 ) |
54 | 58 |
55 | 59 |
56 def main(): | 60 def main(): |
57 parser = argparse.ArgumentParser( | 61 parser = argparse.ArgumentParser( |
60 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results") | 64 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results") |
61 parser.add_argument( | 65 parser.add_argument( |
62 "--hits", type=int, default=5, help="Number of top hits to display" | 66 "--hits", type=int, default=5, help="Number of top hits to display" |
63 ) | 67 ) |
64 parser.add_argument( | 68 parser.add_argument( |
69 "--output", | |
70 type=argparse.FileType("w"), | |
71 default="-", | |
72 help="Output file (default: stdout)", | |
73 ) | |
74 parser.add_argument( | |
65 "--sort", | 75 "--sort", |
66 choices=["unique_queries", "unique_hits"], | 76 choices=["unique_queries", "unique_hits"], |
67 default="unique_queries", | 77 default="unique_queries", |
68 help="Sort results by 'unique_queries' (default) or 'unique_hits'", | 78 help="Sort results by 'unique_queries' (default) or 'unique_hits'", |
69 ) | 79 ) |
70 args = parser.parse_args() | 80 args = parser.parse_args() |
71 | 81 |
72 blast_parser = BlastProteinResultParser(args.blast) | 82 blast_parser = BlastProteinResultParser(args.blast) |
73 blast_parser.parse_blast() | 83 blast_parser.parse_blast() |
74 blast_parser.print_results(args.hits, args.sort) | 84 blast_parser.print_results(args.hits, args.sort, args.output) |
85 | |
86 args.output.close() | |
75 | 87 |
76 | 88 |
77 if __name__ == "__main__": | 89 if __name__ == "__main__": |
78 main() | 90 main() |