# HG changeset patch # User cpt # Date 1723085214 0 # Node ID f2a7dffab5818718925a22487d208af5abfa4559 # Parent 7abe5f4713643f8f73c97286320de1653af04ade planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405 diff -r 7abe5f471364 -r f2a7dffab581 protein_blast_grouping.py --- a/protein_blast_grouping.py Wed Jul 24 01:37:37 2024 +0000 +++ b/protein_blast_grouping.py Thu Aug 08 02:46:54 2024 +0000 @@ -1,5 +1,6 @@ import argparse import re +import sys class BlastProteinResultParser: @@ -37,7 +38,9 @@ sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) return sorted_results[:num_hits] - def print_results(self, num_hits, sort_key="unique_queries"): + def print_results( + self, num_hits, sort_key="unique_queries", output_file=sys.stdout + ): top_hits = self.get_top_hits(num_hits, sort_key) print(f"# Top {num_hits} Hits") print( @@ -49,7 +52,8 @@ print( "{:<50} {:<25} {:<25}".format( organism, len(data["unique_queries"]), len(data["unique_hits"]) - ) + ), + file=output_file, ) @@ -62,6 +66,12 @@ "--hits", type=int, default=5, help="Number of top hits to display" ) parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="-", + help="Output file (default: stdout)", + ) + parser.add_argument( "--sort", choices=["unique_queries", "unique_hits"], default="unique_queries", @@ -71,7 +81,9 @@ blast_parser = BlastProteinResultParser(args.blast) blast_parser.parse_blast() - blast_parser.print_results(args.hits, args.sort) + blast_parser.print_results(args.hits, args.sort, args.output) + + args.output.close() if __name__ == "__main__": diff -r 7abe5f471364 -r f2a7dffab581 protein_blast_grouping.xml --- a/protein_blast_grouping.xml Wed Jul 24 01:37:37 2024 +0000 +++ b/protein_blast_grouping.xml Thu Aug 08 02:46:54 2024 +0000 @@ -6,11 +6,11 @@ '$grouping_output' + --output '$grouping_output' ]]> @@ -45,13 +45,14 @@ - -**What it does** -* Reads a tab-delimited BLAST output file. -* Extracts organism names from the subject titles (text in square brackets). -* Counts unique query proteins that matched each organism and unique hit proteins from each organism. -* Sorts and displays results based on either unique queries or unique hits. -* The output is a formatted table showing the top N organisms with the most matches. +