Mercurial > repos > cpt > cpt_protein_blast_grouping
comparison protein_blast_grouping.py @ 1:f2a7dffab581 draft
planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405
| author | cpt |
|---|---|
| date | Thu, 08 Aug 2024 02:46:54 +0000 |
| parents | 7abe5f471364 |
| children | c5e0e05ce58a |
comparison
equal
deleted
inserted
replaced
| 0:7abe5f471364 | 1:f2a7dffab581 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import re | 2 import re |
| 3 import sys | |
| 3 | 4 |
| 4 | 5 |
| 5 class BlastProteinResultParser: | 6 class BlastProteinResultParser: |
| 6 def __init__(self, blast_file): | 7 def __init__(self, blast_file): |
| 7 self.blast_file = blast_file | 8 self.blast_file = blast_file |
| 35 return len(item[1][key]) | 36 return len(item[1][key]) |
| 36 | 37 |
| 37 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) | 38 sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) |
| 38 return sorted_results[:num_hits] | 39 return sorted_results[:num_hits] |
| 39 | 40 |
| 40 def print_results(self, num_hits, sort_key="unique_queries"): | 41 def print_results( |
| 42 self, num_hits, sort_key="unique_queries", output_file=sys.stdout | |
| 43 ): | |
| 41 top_hits = self.get_top_hits(num_hits, sort_key) | 44 top_hits = self.get_top_hits(num_hits, sort_key) |
| 42 print(f"# Top {num_hits} Hits") | 45 print(f"# Top {num_hits} Hits") |
| 43 print( | 46 print( |
| 44 "{:<50} {:<25} {:<25}".format( | 47 "{:<50} {:<25} {:<25}".format( |
| 45 "# Name", "Unique Query Matches", "Unique Subject Hits" | 48 "# Name", "Unique Query Matches", "Unique Subject Hits" |
| 47 ) | 50 ) |
| 48 for organism, data in top_hits: | 51 for organism, data in top_hits: |
| 49 print( | 52 print( |
| 50 "{:<50} {:<25} {:<25}".format( | 53 "{:<50} {:<25} {:<25}".format( |
| 51 organism, len(data["unique_queries"]), len(data["unique_hits"]) | 54 organism, len(data["unique_queries"]), len(data["unique_hits"]) |
| 52 ) | 55 ), |
| 56 file=output_file, | |
| 53 ) | 57 ) |
| 54 | 58 |
| 55 | 59 |
| 56 def main(): | 60 def main(): |
| 57 parser = argparse.ArgumentParser( | 61 parser = argparse.ArgumentParser( |
| 60 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results") | 64 parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results") |
| 61 parser.add_argument( | 65 parser.add_argument( |
| 62 "--hits", type=int, default=5, help="Number of top hits to display" | 66 "--hits", type=int, default=5, help="Number of top hits to display" |
| 63 ) | 67 ) |
| 64 parser.add_argument( | 68 parser.add_argument( |
| 69 "--output", | |
| 70 type=argparse.FileType("w"), | |
| 71 default="-", | |
| 72 help="Output file (default: stdout)", | |
| 73 ) | |
| 74 parser.add_argument( | |
| 65 "--sort", | 75 "--sort", |
| 66 choices=["unique_queries", "unique_hits"], | 76 choices=["unique_queries", "unique_hits"], |
| 67 default="unique_queries", | 77 default="unique_queries", |
| 68 help="Sort results by 'unique_queries' (default) or 'unique_hits'", | 78 help="Sort results by 'unique_queries' (default) or 'unique_hits'", |
| 69 ) | 79 ) |
| 70 args = parser.parse_args() | 80 args = parser.parse_args() |
| 71 | 81 |
| 72 blast_parser = BlastProteinResultParser(args.blast) | 82 blast_parser = BlastProteinResultParser(args.blast) |
| 73 blast_parser.parse_blast() | 83 blast_parser.parse_blast() |
| 74 blast_parser.print_results(args.hits, args.sort) | 84 blast_parser.print_results(args.hits, args.sort, args.output) |
| 85 | |
| 86 args.output.close() | |
| 75 | 87 |
| 76 | 88 |
| 77 if __name__ == "__main__": | 89 if __name__ == "__main__": |
| 78 main() | 90 main() |
