Mercurial > repos > cpt > cpt_protein_blast_grouping
changeset 1:f2a7dffab581 draft
planemo upload commit 6dde4ec93f27f36a10017393063dcf0568f1d405
author | cpt |
---|---|
date | Thu, 08 Aug 2024 02:46:54 +0000 |
parents | 7abe5f471364 |
children | 8e34cd137d3e |
files | protein_blast_grouping.py protein_blast_grouping.xml |
diffstat | 2 files changed, 25 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/protein_blast_grouping.py Wed Jul 24 01:37:37 2024 +0000 +++ b/protein_blast_grouping.py Thu Aug 08 02:46:54 2024 +0000 @@ -1,5 +1,6 @@ import argparse import re +import sys class BlastProteinResultParser: @@ -37,7 +38,9 @@ sorted_results = sorted(self.results.items(), key=sort_key, reverse=True) return sorted_results[:num_hits] - def print_results(self, num_hits, sort_key="unique_queries"): + def print_results( + self, num_hits, sort_key="unique_queries", output_file=sys.stdout + ): top_hits = self.get_top_hits(num_hits, sort_key) print(f"# Top {num_hits} Hits") print( @@ -49,7 +52,8 @@ print( "{:<50} {:<25} {:<25}".format( organism, len(data["unique_queries"]), len(data["unique_hits"]) - ) + ), + file=output_file, ) @@ -62,6 +66,12 @@ "--hits", type=int, default=5, help="Number of top hits to display" ) parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="-", + help="Output file (default: stdout)", + ) + parser.add_argument( "--sort", choices=["unique_queries", "unique_hits"], default="unique_queries", @@ -71,7 +81,9 @@ blast_parser = BlastProteinResultParser(args.blast) blast_parser.parse_blast() - blast_parser.print_results(args.hits, args.sort) + blast_parser.print_results(args.hits, args.sort, args.output) + + args.output.close() if __name__ == "__main__":
--- a/protein_blast_grouping.xml Wed Jul 24 01:37:37 2024 +0000 +++ b/protein_blast_grouping.xml Thu Aug 08 02:46:54 2024 +0000 @@ -6,11 +6,11 @@ <expand macro="requirements"/> <command detect_errors="aggressive"> <![CDATA[ - '$__tool_directory__/protein_blast_grouping.py' + $__tool_directory__/protein_blast_grouping.py '${blast_in.blast}' --hits '$hits' --sort '$sort.sortType' - > '$grouping_output' + --output '$grouping_output' ]]> </command> <inputs> @@ -45,13 +45,14 @@ <output name="grouping_output" file="outfile.txt" lines_diff="1"/> </test> </tests> - <help> -**What it does** -* Reads a tab-delimited BLAST output file. -* Extracts organism names from the subject titles (text in square brackets). -* Counts unique query proteins that matched each organism and unique hit proteins from each organism. -* Sorts and displays results based on either unique queries or unique hits. -* The output is a formatted table showing the top N organisms with the most matches. + <help><![CDATA[ + **What it does** + * Reads a tab-delimited BLAST output file. + * Extracts organism names from the subject titles (text in square brackets). + * Counts unique query proteins that matched each organism and unique hit proteins from each organism. + * Sorts and displays results based on either unique queries or unique hits. + * The output is a formatted table showing the top N organisms with the most matches. + ]]> </help> <expand macro="citations-2020"/> </tool>