Mercurial > repos > cpt > cpt_protein_blast_grouping
diff protein_blast_grouping.xml @ 0:7abe5f471364 draft
planemo upload commit 7ebbd0df0aea9e58c4df58b61d6da385ee0ebb49
author | cpt |
---|---|
date | Wed, 24 Jul 2024 01:37:37 +0000 |
parents | |
children | f2a7dffab581 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_blast_grouping.xml Wed Jul 24 01:37:37 2024 +0000 @@ -0,0 +1,57 @@ +<tool id="edu.tamu.cpt.blast.protein_grouping" name="Protein Blast Grouping" version="0.0.1"> + <description>Based on a BLASTp result</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"> + <![CDATA[ + '$__tool_directory__/protein_blast_grouping.py' + '${blast_in.blast}' + --hits '$hits' + --sort '$sort.sortType' + > '$grouping_output' + ]]> + </command> + <inputs> + <conditional name="blast_in"> + <param name="blastType" type="select" label="Blastn Input Type"> + <option value="TSV">Blast Tabular</option> + </param> + <when value="TSV"> + <param label="BLASTp Results" name="blast" type="data" format="tsv,tabular"/> + </when> + </conditional> + <param label="Number of results to return" name="hits" type="integer" value="5" min="1" max="30"/> + <conditional name="sort"> + <param name="sortType" type="select" label="Sort by"> + <option value="unique_queries" selected="true">Unique Queries</option> + <option value="unique_hits">Unique Hits</option> + </param> + <when value="unique_queries"/> + <when value="unique_hits"/> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="grouping_output" label="Top BlastP Hits"/> + </outputs> + <tests> + <test> + <conditional name="blast_in"> + <param name="blastType" value="TSV"/> + <param name="blast" value="infile.txt"/> + </conditional> + <param name="hits" value="20"/> + <output name="grouping_output" file="outfile.txt" lines_diff="1"/> + </test> + </tests> + <help> +**What it does** +* Reads a tab-delimited BLAST output file. +* Extracts organism names from the subject titles (text in square brackets). +* Counts unique query proteins that matched each organism and unique hit proteins from each organism. +* Sorts and displays results based on either unique queries or unique hits. +* The output is a formatted table showing the top N organisms with the most matches. +</help> + <expand macro="citations-2020"/> +</tool>