# HG changeset patch
# User cpt
# Date 1723085214 0
# Node ID f2a7dffab5818718925a22487d208af5abfa4559
# Parent 7abe5f4713643f8f73c97286320de1653af04ade
planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405
diff -r 7abe5f471364 -r f2a7dffab581 protein_blast_grouping.py
--- a/protein_blast_grouping.py Wed Jul 24 01:37:37 2024 +0000
+++ b/protein_blast_grouping.py Thu Aug 08 02:46:54 2024 +0000
@@ -1,5 +1,6 @@
import argparse
import re
+import sys
class BlastProteinResultParser:
@@ -37,7 +38,9 @@
sorted_results = sorted(self.results.items(), key=sort_key, reverse=True)
return sorted_results[:num_hits]
- def print_results(self, num_hits, sort_key="unique_queries"):
+ def print_results(
+ self, num_hits, sort_key="unique_queries", output_file=sys.stdout
+ ):
top_hits = self.get_top_hits(num_hits, sort_key)
print(f"# Top {num_hits} Hits")
print(
@@ -49,7 +52,8 @@
print(
"{:<50} {:<25} {:<25}".format(
organism, len(data["unique_queries"]), len(data["unique_hits"])
- )
+ ),
+ file=output_file,
)
@@ -62,6 +66,12 @@
"--hits", type=int, default=5, help="Number of top hits to display"
)
parser.add_argument(
+ "--output",
+ type=argparse.FileType("w"),
+ default="-",
+ help="Output file (default: stdout)",
+ )
+ parser.add_argument(
"--sort",
choices=["unique_queries", "unique_hits"],
default="unique_queries",
@@ -71,7 +81,9 @@
blast_parser = BlastProteinResultParser(args.blast)
blast_parser.parse_blast()
- blast_parser.print_results(args.hits, args.sort)
+ blast_parser.print_results(args.hits, args.sort, args.output)
+
+ args.output.close()
if __name__ == "__main__":
diff -r 7abe5f471364 -r f2a7dffab581 protein_blast_grouping.xml
--- a/protein_blast_grouping.xml Wed Jul 24 01:37:37 2024 +0000
+++ b/protein_blast_grouping.xml Thu Aug 08 02:46:54 2024 +0000
@@ -6,11 +6,11 @@
'$grouping_output'
+ --output '$grouping_output'
]]>
@@ -45,13 +45,14 @@
-
-**What it does**
-* Reads a tab-delimited BLAST output file.
-* Extracts organism names from the subject titles (text in square brackets).
-* Counts unique query proteins that matched each organism and unique hit proteins from each organism.
-* Sorts and displays results based on either unique queries or unique hits.
-* The output is a formatted table showing the top N organisms with the most matches.
+