Repository 'cpt_protein_blast_grouping'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_protein_blast_grouping

Changeset 0:7abe5f471364 (2024-07-24)
Next changeset 1:f2a7dffab581 (2024-08-08)
Commit message:
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
added:
macros.xml
protein_blast_grouping.py
protein_blast_grouping.xml
test-data/infile.txt
test-data/outfile.txt
b
diff -r 000000000000 -r 7abe5f471364 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jul 24 01:37:37 2024 +0000
b
@@ -0,0 +1,170 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.9">python</requirement>
+            <requirement type="package" version="1.81">biopython</requirement>
+            <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r 000000000000 -r 7abe5f471364 protein_blast_grouping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/protein_blast_grouping.py Wed Jul 24 01:37:37 2024 +0000
[
@@ -0,0 +1,78 @@
+import argparse
+import re
+
+
+class BlastProteinResultParser:
+    def __init__(self, blast_file):
+        self.blast_file = blast_file
+        self.results = {}
+
+    def parse_blast(self):
+        for line in self.blast_file:
+            parts = line.strip().split("\t")
+            query_id = parts[0]
+            subject_titles = parts[2].split("<>")
+            for title in subject_titles:
+                organism = self.extract_organism(title)
+                if organism:
+                    if organism not in self.results:
+                        self.results[organism] = {
+                            "unique_queries": set(),
+                            "unique_hits": set(),
+                        }
+                    #  "unique query" == query proteins had a LEAST one match in organism
+                    self.results[organism]["unique_queries"].add(query_id)
+                    #  "unique hits" == unique proteins from eahc organism were matched by ANY of the queries
+                    self.results[organism]["unique_hits"].add(parts[1])
+
+    @staticmethod
+    def extract_organism(title):
+        match = re.search(r"\[(.*?)\]", title)
+        return match.group(1) if match else None
+
+    def get_top_hits(self, num_hits, key="unique_queries"):
+        def sort_key(item):
+            return len(item[1][key])
+
+        sorted_results = sorted(self.results.items(), key=sort_key, reverse=True)
+        return sorted_results[:num_hits]
+
+    def print_results(self, num_hits, sort_key="unique_queries"):
+        top_hits = self.get_top_hits(num_hits, sort_key)
+        print(f"# Top {num_hits} Hits")
+        print(
+            "{:<50} {:<25} {:<25}".format(
+                "# Name", "Unique Query Matches", "Unique Subject Hits"
+            )
+        )
+        for organism, data in top_hits:
+            print(
+                "{:<50} {:<25} {:<25}".format(
+                    organism, len(data["unique_queries"]), len(data["unique_hits"])
+                )
+            )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Parse BLAST results and group by 'top hits' to an organism"
+    )
+    parser.add_argument("blast", type=argparse.FileType("r"), help="Blast Results")
+    parser.add_argument(
+        "--hits", type=int, default=5, help="Number of top hits to display"
+    )
+    parser.add_argument(
+        "--sort",
+        choices=["unique_queries", "unique_hits"],
+        default="unique_queries",
+        help="Sort results by 'unique_queries' (default) or 'unique_hits'",
+    )
+    args = parser.parse_args()
+
+    blast_parser = BlastProteinResultParser(args.blast)
+    blast_parser.parse_blast()
+    blast_parser.print_results(args.hits, args.sort)
+
+
+if __name__ == "__main__":
+    main()
b
diff -r 000000000000 -r 7abe5f471364 protein_blast_grouping.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/protein_blast_grouping.xml Wed Jul 24 01:37:37 2024 +0000
[
@@ -0,0 +1,57 @@
+<tool id="edu.tamu.cpt.blast.protein_grouping" name="Protein Blast Grouping" version="0.0.1">
+  <description>Based on a BLASTp result</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive">
+    <![CDATA[
+      '$__tool_directory__/protein_blast_grouping.py'
+      '${blast_in.blast}'
+      --hits '$hits'
+      --sort '$sort.sortType'
+      > '$grouping_output'
+    ]]>
+  </command>
+  <inputs>
+    <conditional name="blast_in">
+      <param name="blastType" type="select" label="Blastn Input Type">
+        <option value="TSV">Blast Tabular</option>
+      </param>
+      <when value="TSV">
+        <param label="BLASTp Results" name="blast" type="data" format="tsv,tabular"/>
+      </when>
+    </conditional>
+    <param label="Number of results to return" name="hits" type="integer" value="5" min="1" max="30"/>
+    <conditional name="sort">
+      <param name="sortType" type="select" label="Sort by">
+        <option value="unique_queries" selected="true">Unique Queries</option>
+        <option value="unique_hits">Unique Hits</option>
+      </param>
+      <when value="unique_queries"/>
+      <when value="unique_hits"/>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="grouping_output" label="Top BlastP Hits"/>
+  </outputs>
+  <tests>
+    <test>
+      <conditional name="blast_in">
+        <param name="blastType" value="TSV"/>
+        <param name="blast" value="infile.txt"/>
+      </conditional>
+      <param name="hits" value="20"/>
+      <output name="grouping_output" file="outfile.txt" lines_diff="1"/>
+    </test>
+  </tests>
+  <help>
+**What it does**
+* Reads a tab-delimited BLAST output file.
+* Extracts organism names from the subject titles (text in square brackets).
+* Counts unique query proteins that matched each organism and unique hit proteins from each organism.
+* Sorts and displays results based on either unique queries or unique hits.
+* The output is a formatted table showing the top N organisms with the most matches.
+</help>
+  <expand macro="citations-2020"/>
+</tool>
b
diff -r 000000000000 -r 7abe5f471364 test-data/infile.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/infile.txt Wed Jul 24 01:37:37 2024 +0000
[
b'@@ -0,0 +1,7414 @@\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNY41722.1\tUNY41722.1 capsid scaffolding protein [Burkholderia phage Milagro]\tUNY41722.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNY41662.1\tUNY41662.1 capsid scaffolding protein [Burkholderia phage Musica]\tUNY41662.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tADP02333.1\tADP02333.1 gp40 [Burkholderia phage KL3]\tADP02333.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNY41778.1\tUNY41778.1 capsid scaffolding protein [Burkholderia phage Menos]\tUNY41778.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNY41833.1\tUNY41833.1 capsid scaffolding protein [Burkholderia phage Momento]\tUNY41833.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUKM53765.1\tUKM53765.1 capsid scaffolding protein [Burkholderia phage PhiBP82.2]\tUKM53765.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUPI15557.1\tUPI15557.1 capsid scaffolding protein [Burkholderia phage PhiBP82.3]\tUPI15557.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tAFV51397.1\tAFV51397.1 O capsid scaffolding protein [Burkholderia phage phiX216]\tAFV51397.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tAIP84294.1\tAIP84294.1 phage capsid scaffolding (GPO) serine peptidase family protein [Burkholderia phage BEK]\tAIP84294.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tAAZ72645.1\tAAZ72645.1 phage capsid scaffolding protein [Burkholderia phage phiE52237]\tAAZ72645.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tABO60716.1\tABO60716.1 gp3, phage capsid scaffolding protein (GPO) [Burkholderia phage phiE202]\tABO60716.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tABO60773.1\tABO60773.1 gp3, phage capsid scaffolding protein (GPO) [Burkholderia phage phiE12-2]\tABO60773.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tWWY66145.1\tWWY66145.1 head scaffolding protein [Burkholderia phage vB_HM387]\tWWY66145.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQPI18543.1\tQPI18543.1 capsid scaffolding protein [Burkholderia phage phiE094]\tQPI18543.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQWY84948.1\tQWY84948.1 capsid scaffolding protein [Burkholderia phage PK23]\tQWY84948.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQBP27622.1\tQBP27622.1 capsid scaffolding protein [Klebsiella phage ST13-OXA48phi12.1]\tQBP27622.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tADX32417.1\tADX32417.1 capsid scaffolding protein [Erwinia phage ENT90]\tADX32417.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tAQT27281.1\tAQT27281.1 capsid scaffolding protein [Salmonella phage SEN8]\tAQT27281.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNA05911.1\tUNA05911.1 capsid scaffolding protein [Yersinia phage vB_YenM_56.17]\tUNA05911.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNA05864.1\tUNA05864.1 capsid scaffolding protein [Yersinia phage vB_YenM_06.16-2]\tUNA05864.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tBCB23213.1\tBCB23213.1 capsid scaffolding protein (O) [Burkholderia phage FLC5]\tBCB23213.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tAKA61129.1\tAKA61129.1 putative capsid scaffolding protein [Burkholderia phage AP3]\tAKA61129.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUNA05966.1\tUNA05966.1 capsid scaffolding protein [Yersinia phage vB_YenM_201.16]\tUNA05966.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tBDD79918.1\tBDD79918.1 hypothetical protein [Burkholderia phage FLC10]\tBDD79918.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tADQ92392.1\tADQ92392.1 capsid scaffolding protein [Salmonella phage RE2010]\tADQ92392.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQBP08044.1\tQBP08044.1 capsid scaffolding protein [Klebsiella phage ST437-OXA245phi4.1]\tQBP08044.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQBP27772.1\tQBP27772.1 capsid scaffolding protein [Klebsiella phage ST512-KPC3phi13.6]\tQBP27772.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tQBP27926.1\tQBP27926.1 capsid scaffolding protein [Klebsiella phage ST258-KPC3phi16.1]\tQBP27926.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUSM11610.1\tUSM11610.1 capsid scaffolding protein [Burkholderia phage Carl1]\tUSM11610.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab\tUKL54246.1\tUKL54246.1 capsid scaffolding protein [Yersinia phage vB_YenM_31.17]\tUKL54246.1\t0\n+99974bdd-2c83-421a-b7a3-dab7d44153ab'..b'8-4a77-aaca-8235df42b2c5\tWMT83780.1\tWMT83780.1 large terminase subunit [Pseudoalteromonas phage proACA1-A]\tWMT83780.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tWMT83728.1\tWMT83728.1 large terminase subunit [Pseudoalteromonas phage ACA2]\tWMT83728.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tWMT83676.1\tWMT83676.1 large terminase subunit [Pseudoalteromonas phage ACA1]\tWMT83676.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tABG73174.1\tABG73174.1 putative terminase large subunit [Aeromonas phage phiO18P]\tABG73174.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tUYE91963.1\tUYE91963.1 terminase [Aeromonas phage A051]\tUYE91963.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tWGL39674.1\tWGL39674.1 terminase large subunit [Aeromonas phage P05B]\tWGL39674.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAZ93654.1\tAAZ93654.1 unknown [Pasteurella phage F108]\tAAZ93654.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAB09201.1\tAAB09201.1 hypothetical protein [Haemophilus phage HP1]\tAAB09201.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAK37798.1\tAAK37798.1 orf16 [Haemophilus phage HP2]\tAAK37798.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEI25522.1\tQEI25522.1 terminase, ATPase subunit [Salmonella phage SW3]\tQEI25522.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEI25484.1\tQEI25484.1 terminase ATPase subunit [Salmonella phage SW5]\tQEI25484.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEI25431.1\tQEI25431.1 terminase, ATPase [Salmonella phage SI7]\tQEI25431.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tWBY65432.1\tWBY65432.1 hypothetical protein FP3_000001 [Pasteurella phage vB_PmuM_CFP3]\tWBY65432.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEH60873.1\tQEH60873.1 major coat protein, partial [Escherichia phage MoI-2019a]\tQEH60873.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAA32202.1\tAAA32202.1 CP12, partial [Eganvirus ev186]\tAAA32202.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tURC10016.1\tURC10016.1 terminase, ATPase subunit [Escherichia phage vB_EcoM-473R3]\tURC10016.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAG50261.1\tAAG50261.1 probable terminase, partial [Phage GMSE-1]\tAAG50261.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tABG73173.1\tABG73173.1 truncated terminase ATPase subunit [Aeromonas phage phiO18P]\tABG73173.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tUNA05908.1\tUNA05908.1 terminase, ATPase subunit [Yersinia phage vB_YenM_56.17]\tUNA05908.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAGG36518.1\tAGG36518.1 phage terminase, ATPase subunit [Escherichia phage P2]\tAGG36518.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tUNA05963.1\tUNA05963.1 terminase, ATPase subunit [Yersinia phage vB_YenM_201.16]\tUNA05963.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAC34147.1\tAAC34147.1 W protein [Escherichia phage 186]\tAAC34147.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tASD51186.1\tASD51186.1 terminase, ATPase subunit [Erwinia phage EtG]\tASD51186.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAN08364.1\tAAN08364.1 gp2 [Salmonella phage PSP3]\tAAN08364.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEI25341.1\tQEI25341.1 terminase, ATPase subunit [Salmonella phage SI22]\tQEI25341.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQEI25386.1\tQEI25386.1 terminase, ATPase subunit [Salmonella phage SW9]\tQEI25386.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tBDX35531.1\tBDX35531.1 terminase [Thermus phage MN1]\tBDX35531.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAWY03244.1\tAWY03244.1 terminase large subunit [Pasteurella phage AFS-2018a]\tAWY03244.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAO61393.1\tAAO61393.1 putative large subunit terminase TerL [Halophage HF1]\tAAO61393.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAAL55022.1\tAAL55022.1 putative large subunit terminase TerL [Halorubrum phage HF2]\tAAL55022.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tQIR31049.1\tQIR31049.1 large subunit terminase TerL [Halorubrum virus Hardycor2]\tQIR31049.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tALF02062.1\tALF02062.1 terminase large subunit [Thiobacimonas phage vB_ThpS-P1]\tALF02062.1\t0\n+d7715b15-7a78-4a77-aaca-8235df42b2c5\tAGR47349.1\tAGR47349.1 terminase [Brevibacillus phage Emery]\tAGR47349.1\t0\n'
b
diff -r 000000000000 -r 7abe5f471364 test-data/outfile.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outfile.txt Wed Jul 24 01:37:37 2024 +0000
b
@@ -0,0 +1,22 @@
+# Top 20 Hits
+# Name                                             Unique Query Matches      Unique Subject Hits      
+Burkholderia phage Milagro                         47                        48                       
+Burkholderia phage Momento                         41                        45                       
+Burkholderia phage Musica                          39                        42                       
+Burkholderia phage Menos                           39                        40                       
+Burkholderia phage KL3                             38                        39                       
+Burkholderia phage PhiBP82.2                       35                        35                       
+Burkholderia phage PhiBP82.3                       34                        34                       
+Burkholderia phage phiE202                         34                        34                       
+Burkholderia phage phiE094                         34                        34                       
+Burkholderia phage phiX216                         33                        33                       
+Burkholderia phage phiE52237                       33                        33                       
+Burkholderia phage AP3                             33                        33                       
+Burkholderia phage Carl1                           33                        34                       
+Burkholderia phage Mana                            33                        34                       
+Burkholderia phage vB_HM387                        32                        32                       
+Burkholderia phage BEK                             31                        31                       
+Burkholderia phage KS5                             31                        32                       
+Ralstonia phage RsoM1USA                           28                        28                       
+Ralstonia phage RSA1                               28                        29                       
+Burkholderia phage PK23                            26                        27