comparison protein_blast_grouping.py @ 1:f2a7dffab581 draft

planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405
author cpt
date Thu, 08 Aug 2024 02:46:54 +0000
parents 7abe5f471364
children c5e0e05ce58a
comparison
equal deleted inserted replaced
0:7abe5f471364 1:f2a7dffab581
1 import argparse 1 import argparse
2 import re 2 import re
3 import sys
3 4
4 5
5 class BlastProteinResultParser: 6 class BlastProteinResultParser:
6 def __init__(self, blast_file): 7 def __init__(self, blast_file):
7 self.blast_file = blast_file 8 self.blast_file = blast_file
35 return len(item[1][key]) 36 return len(item[1][key])
36 37
37 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) 38 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True)
38 return sorted_results[:num_hits] 39 return sorted_results[:num_hits]
39 40
40 def print_results(self, num_hits, sort_key="unique_queries"): 41 def print_results(
42 self, num_hits, sort_key="unique_queries", output_file=sys.stdout
43 ):
41 top_hits = self.get_top_hits(num_hits, sort_key) 44 top_hits = self.get_top_hits(num_hits, sort_key)
42 print(f"# Top {num_hits} Hits") 45 print(f"# Top {num_hits} Hits")
43 print( 46 print(
44 "{:<50} {:<25} {:<25}".format( 47 "{:<50} {:<25} {:<25}".format(
45 "# Name", "Unique Query Matches", "Unique Subject Hits" 48 "# Name", "Unique Query Matches", "Unique Subject Hits"
47 ) 50 )
48 for organism, data in top_hits: 51 for organism, data in top_hits:
49 print( 52 print(
50 "{:<50} {:<25} {:<25}".format( 53 "{:<50} {:<25} {:<25}".format(
51 organism, len(data["unique_queries"]), len(data["unique_hits"]) 54 organism, len(data["unique_queries"]), len(data["unique_hits"])
52 ) 55 ),
56 file=output_file,
53 ) 57 )
54 58
55 59
56 def main(): 60 def main():
57 parser = argparse.ArgumentParser( 61 parser = argparse.ArgumentParser(
60 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results") 64 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results")
61 parser.add_argument( 65 parser.add_argument(
62 "--hits", type=int, default=5, help="Number of top hits to display" 66 "--hits", type=int, default=5, help="Number of top hits to display"
63 ) 67 )
64 parser.add_argument( 68 parser.add_argument(
69 "--output",
70 type=argparse.FileType("w"),
71 default="-",
72 help="Output file (default: stdout)",
73 )
74 parser.add_argument(
65 "--sort", 75 "--sort",
66 choices=["unique_queries", "unique_hits"], 76 choices=["unique_queries", "unique_hits"],
67 default="unique_queries", 77 default="unique_queries",
68 help="Sort results by 'unique_queries' (default) or 'unique_hits'", 78 help="Sort results by 'unique_queries' (default) or 'unique_hits'",
69 ) 79 )
70 args = parser.parse_args() 80 args = parser.parse_args()
71 81
72 blast_parser = BlastProteinResultParser(args.blast) 82 blast_parser = BlastProteinResultParser(args.blast)
73 blast_parser.parse_blast() 83 blast_parser.parse_blast()
74 blast_parser.print_results(args.hits, args.sort) 84 blast_parser.print_results(args.hits, args.sort, args.output)
85
86 args.output.close()
75 87
76 88
77 if __name__ == "__main__": 89 if __name__ == "__main__":
78 main() 90 main()