Mercurial > repos > cpt > cpt_protein_blast_grouping

--- a/protein_blast_grouping.py	Wed Jul 24 01:37:37 2024 +0000
+++ b/protein_blast_grouping.py	Thu Aug 08 02:46:54 2024 +0000
@@ -1,5 +1,6 @@
 import argparse
 import re
+import sys


 class BlastProteinResultParser:
@@ -37,7 +38,9 @@
         sorted_results = sorted(self.results.items(), key=sort_key, reverse=True)
         return sorted_results[:num_hits]

-    def print_results(self, num_hits, sort_key="unique_queries"):
+    def print_results(
+        self, num_hits, sort_key="unique_queries", output_file=sys.stdout
+    ):
         top_hits = self.get_top_hits(num_hits, sort_key)
         print(f"# Top {num_hits} Hits")
         print(
@@ -49,7 +52,8 @@
             print(
                 "{:<50} {:<25} {:<25}".format(
                     organism, len(data["unique_queries"]), len(data["unique_hits"])
-                )
+                ),
+                file=output_file,
             )


@@ -62,6 +66,12 @@
         "--hits", type=int, default=5, help="Number of top hits to display"
     )
     parser.add_argument(
+        "--output",
+        type=argparse.FileType("w"),
+        default="-",
+        help="Output file (default: stdout)",
+    )
+    parser.add_argument(
         "--sort",
         choices=["unique_queries", "unique_hits"],
         default="unique_queries",
@@ -71,7 +81,9 @@

     blast_parser = BlastProteinResultParser(args.blast)
     blast_parser.parse_blast()
-    blast_parser.print_results(args.hits, args.sort)
+    blast_parser.print_results(args.hits, args.sort, args.output)
+
+    args.output.close()


 if __name__ == "__main__":
--- a/protein_blast_grouping.xml	Wed Jul 24 01:37:37 2024 +0000
+++ b/protein_blast_grouping.xml	Thu Aug 08 02:46:54 2024 +0000
@@ -6,11 +6,11 @@
   <expand macro="requirements"/>
   <command detect_errors="aggressive">
     <![CDATA[
-      '$__tool_directory__/protein_blast_grouping.py'
+      $__tool_directory__/protein_blast_grouping.py
       '${blast_in.blast}'
       --hits '$hits'
       --sort '$sort.sortType'
-      > '$grouping_output'
+      --output '$grouping_output'
     ]]>
   </command>
   <inputs>
@@ -45,13 +45,14 @@
       <output name="grouping_output" file="outfile.txt" lines_diff="1"/>
     </test>
   </tests>
-  <help>
-**What it does**
-* Reads a tab-delimited BLAST output file.
-* Extracts organism names from the subject titles (text in square brackets).
-* Counts unique query proteins that matched each organism and unique hit proteins from each organism.
-* Sorts and displays results based on either unique queries or unique hits.
-* The output is a formatted table showing the top N organisms with the most matches.
+  <help><![CDATA[
+  **What it does**
+  * Reads a tab-delimited BLAST output file.
+  * Extracts organism names from the subject titles (text in square brackets).
+  * Counts unique query proteins that matched each organism and unique hit proteins from each organism.
+  * Sorts and displays results based on either unique queries or unique hits.
+  * The output is a formatted table showing the top N organisms with the most matches.
+  ]]>
 </help>
   <expand macro="citations-2020"/>
 </tool>