comparison tools/blast_rbh/blast_rbh.xml @ 5:8f4500f6f2aa draft

Refactored to use more than one Python file (internal change only).
author peterjc
date Tue, 06 Dec 2022 15:53:36 +0000
parents d8d9a9069586
children b2f91cbed8d9
comparison
equal deleted inserted replaced
4:d8d9a9069586 5:8f4500f6f2aa
1 <tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.11"> 1 <tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.2.0">
2 <description>from two FASTA files</description> 2 <description>from two FASTA files</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.67">biopython</requirement> 4 <requirement type="package" version="1.67">biopython</requirement>
5 <requirement type="package" version="2.5.0">blast</requirement> 5 <requirement type="package" version="2.5.0">blast</requirement>
6 </requirements> 6 </requirements>
7 <stdio> 7 <version_command>
8 <!-- Anything other than zero is an error --> 8 python $__tool_directory__/blast_rbh.py --version
9 <exit_code range="1:" />
10 <exit_code range=":-1" />
11 </stdio>
12 <version_command interpreter="python">
13 blast_rbh.py --version
14 </version_command> 9 </version_command>
15 <command interpreter="python"> 10 <command detect_errors="aggressive">
16 blast_rbh.py "$fasta_a" "$fasta_b" 11 python $__tool_directory__/blast_rbh.py '$fasta_a' '$fasta_b'
17 -a $seq.dbtype 12 -a $seq.dbtype
18 #if $seq.dbtype=="nucl" 13 #if $seq.dbtype=="nucl"
19 -t $seq.nucl_type 14 -t $seq.nucl_type
20 #else 15 #else
21 -t $seq.prot_type 16 -t $seq.prot_type
22 #end if 17 #end if
23 $make_nr 18 $make_nr
24 -i $identity 19 -i $identity
25 -c $q_cover 20 -c $q_cover
26 -o "$output" 21 -o '$output'
27 </command> 22 </command>
28 <inputs> 23 <inputs>
29 <!-- Galaxy does not have sub-types for protein vs nucletide FASTA --> 24 <!-- Galaxy does not have sub-types for protein vs nucletide FASTA -->
30 <param name="fasta_a" type="data" format="fasta" 25 <param name="fasta_a" type="data" format="fasta"
31 label="Genes/proteins from species A" 26 label="Genes/proteins from species A"
32 help="FASTA file, one sequence per gene/protein." /> 27 help="FASTA file, one sequence per gene/protein."/>
33 <param name="fasta_b" type="data" format="fasta" 28 <param name="fasta_b" type="data" format="fasta"
34 label="Genes/proteins from species B" 29 label="Genes/proteins from species B"
35 help="FASTA file, one sequence per gene/protein." /> 30 help="FASTA file, one sequence per gene/protein."/>
36 <conditional name="seq"> 31 <conditional name="seq">
37 <param name="dbtype" type="select" label="Molecule type of FASTA inputs"> 32 <param name="dbtype" type="select" label="Molecule type of FASTA inputs">
38 <option value="prot">protein</option> 33 <option value="prot">protein</option>
39 <option value="nucl">nucleotide</option> 34 <option value="nucl">nucleotide</option>
40 </param> 35 </param>
53 <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option> 48 <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>
54 <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option> 49 <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option>
55 </param> 50 </param>
56 </when> 51 </when>
57 </conditional> 52 </conditional>
58 <param name="identity" type="float" value="70" min="0" max="100" 53 <param name="identity" type="float" value="70" min="0" max="100"
59 label="Minimum percentage identity for BLAST matches" 54 label="Minimum percentage identity for BLAST matches"
60 help="Default is 70%, use 0 for no filtering." /> 55 help="Default is 70%, use 0 for no filtering." />
61 <param name="q_cover" type="float" value="50" min="0" max="100" 56 <param name="q_cover" type="float" value="50" min="0" max="100"
62 label="Minimum percentage query coverage for BLAST matches" 57 label="Minimum percentage query coverage for BLAST matches"
63 help="Default is 50%, use 0 for no filtering." /> 58 help="Default is 50%, use 0 for no filtering." />
64 <param name="make_nr" type="boolean" checked="false" truevalue="--nr" falsevalue="" 59 <param name="make_nr" type="boolean" checked="false" truevalue="--nr" falsevalue=""
65 label="Process input FASTA files to collapse identical sequences" 60 label="Process input FASTA files to collapse identical sequences"
66 help="i.e. First make the input non-redundant" /> 61 help="i.e. First make the input non-redundant" />
67 </inputs> 62 </inputs>
68 <outputs> 63 <outputs>
69 <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" /> 64 <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" />
70 </outputs> 65 </outputs>
104 <param name="nucl_type" value="megablast"/> 99 <param name="nucl_type" value="megablast"/>
105 <param name="identity" value="92.5"/> 100 <param name="identity" value="92.5"/>
106 <param name="q_cover" value="86"/> 101 <param name="q_cover" value="86"/>
107 <output name="output" file="rbh_none.tabular" ftype="tabular"/> 102 <output name="output" file="rbh_none.tabular" ftype="tabular"/>
108 </test> 103 </test>
109 <!-- push the coverage over the 86% level --> 104 <!-- push the coverage over the 86% level -->
110 <test> 105 <test>
111 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/> 106 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
112 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/> 107 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
113 <param name="dbtype" value="nucl"/> 108 <param name="dbtype" value="nucl"/>
114 <param name="nucl_type" value="megablast"/> 109 <param name="nucl_type" value="megablast"/>
132 <param name="nucl_type" value="blastn"/> 127 <param name="nucl_type" value="blastn"/>
133 <param name="identity" value="0.0"/> 128 <param name="identity" value="0.0"/>
134 <param name="q_cover" value="0.0"/> 129 <param name="q_cover" value="0.0"/>
135 <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/> 130 <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/>
136 </test> 131 </test>
137 <!-- this pair of examples test tied best hits --> 132 <!-- this pair of examples test tied best hits -->
138 <test> 133 <test>
139 <param name="fasta_a" value="k12_ten_proteins.fasta" ftype="fasta"/> 134 <param name="fasta_a" value="k12_ten_proteins.fasta" ftype="fasta"/>
140 <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/> 135 <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/>
141 <param name="dbtype" value="prot"/> 136 <param name="dbtype" value="prot"/>
142 <param name="nucl_type" value="blastp"/> 137 <param name="nucl_type" value="blastp"/>
218 coverage threshold or similiar. See: 213 coverage threshold or similiar. See:
219 214
220 Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction, 215 Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction,
221 or How To Use Sequence and Structure Information To Predict Protein 216 or How To Use Sequence and Structure Information To Predict Protein
222 Function. PLoS Comput Biol 4(10): e1000160. 217 Function. PLoS Comput Biol 4(10): e1000160.
223 http://dx.doi.org/10.1371/journal.pcbi.1000160 218 https://doi.org/10.1371/journal.pcbi.1000160
224 219
225 The defaults are to require 70% sequence identity over the aligned region 220 The defaults are to require 70% sequence identity over the aligned region
226 (using ``pident`` in the BLAST+ tabular output), and that the HSP alignment 221 (using ``pident`` in the BLAST+ tabular output), and that the HSP alignment
227 covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+ 222 covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+
228 tabular output). 223 tabular output).
233 Please cite: 228 Please cite:
234 229
235 P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo (2015). 230 P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo (2015).
236 NCBI BLAST+ integrated into Galaxy. 231 NCBI BLAST+ integrated into Galaxy.
237 *GigaScience* 4:39 232 *GigaScience* 4:39
238 http://dx.doi.org/10.1186/s13742-015-0080-7 233 https://doi.org/10.1186/s13742-015-0080-7
239 234
240 Christiam Camacho et al. (2009). 235 Christiam Camacho et al. (2009).
241 BLAST+: architecture and applications. 236 BLAST+: architecture and applications.
242 *BMC Bioinformatics* 15;10:421. 237 *BMC Bioinformatics* 15;10:421.
243 http://dx.doi.org/10.1186/1471-2105-10-421 238 https://doi.org/10.1186/1471-2105-10-421
244 239
245 This wrapper is available to install into other Galaxy Instances via the Galaxy 240 This wrapper is available to install into other Galaxy Instances via the Galaxy
246 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh 241 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh
247 </help> 242 </help>
248 <citations> 243 <citations>