Mercurial > repos > peterjc > blast_rbh
comparison tools/blast_rbh/blast_rbh.xml @ 5:8f4500f6f2aa draft
Refactored to use more than one Python file (internal change only).
author | peterjc |
---|---|
date | Tue, 06 Dec 2022 15:53:36 +0000 |
parents | d8d9a9069586 |
children | b2f91cbed8d9 |
comparison
equal
deleted
inserted
replaced
4:d8d9a9069586 | 5:8f4500f6f2aa |
---|---|
1 <tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.11"> | 1 <tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.2.0"> |
2 <description>from two FASTA files</description> | 2 <description>from two FASTA files</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.67">biopython</requirement> | 4 <requirement type="package" version="1.67">biopython</requirement> |
5 <requirement type="package" version="2.5.0">blast</requirement> | 5 <requirement type="package" version="2.5.0">blast</requirement> |
6 </requirements> | 6 </requirements> |
7 <stdio> | 7 <version_command> |
8 <!-- Anything other than zero is an error --> | 8 python $__tool_directory__/blast_rbh.py --version |
9 <exit_code range="1:" /> | |
10 <exit_code range=":-1" /> | |
11 </stdio> | |
12 <version_command interpreter="python"> | |
13 blast_rbh.py --version | |
14 </version_command> | 9 </version_command> |
15 <command interpreter="python"> | 10 <command detect_errors="aggressive"> |
16 blast_rbh.py "$fasta_a" "$fasta_b" | 11 python $__tool_directory__/blast_rbh.py '$fasta_a' '$fasta_b' |
17 -a $seq.dbtype | 12 -a $seq.dbtype |
18 #if $seq.dbtype=="nucl" | 13 #if $seq.dbtype=="nucl" |
19 -t $seq.nucl_type | 14 -t $seq.nucl_type |
20 #else | 15 #else |
21 -t $seq.prot_type | 16 -t $seq.prot_type |
22 #end if | 17 #end if |
23 $make_nr | 18 $make_nr |
24 -i $identity | 19 -i $identity |
25 -c $q_cover | 20 -c $q_cover |
26 -o "$output" | 21 -o '$output' |
27 </command> | 22 </command> |
28 <inputs> | 23 <inputs> |
29 <!-- Galaxy does not have sub-types for protein vs nucletide FASTA --> | 24 <!-- Galaxy does not have sub-types for protein vs nucletide FASTA --> |
30 <param name="fasta_a" type="data" format="fasta" | 25 <param name="fasta_a" type="data" format="fasta" |
31 label="Genes/proteins from species A" | 26 label="Genes/proteins from species A" |
32 help="FASTA file, one sequence per gene/protein." /> | 27 help="FASTA file, one sequence per gene/protein."/> |
33 <param name="fasta_b" type="data" format="fasta" | 28 <param name="fasta_b" type="data" format="fasta" |
34 label="Genes/proteins from species B" | 29 label="Genes/proteins from species B" |
35 help="FASTA file, one sequence per gene/protein." /> | 30 help="FASTA file, one sequence per gene/protein."/> |
36 <conditional name="seq"> | 31 <conditional name="seq"> |
37 <param name="dbtype" type="select" label="Molecule type of FASTA inputs"> | 32 <param name="dbtype" type="select" label="Molecule type of FASTA inputs"> |
38 <option value="prot">protein</option> | 33 <option value="prot">protein</option> |
39 <option value="nucl">nucleotide</option> | 34 <option value="nucl">nucleotide</option> |
40 </param> | 35 </param> |
53 <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option> | 48 <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option> |
54 <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option> | 49 <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option> |
55 </param> | 50 </param> |
56 </when> | 51 </when> |
57 </conditional> | 52 </conditional> |
58 <param name="identity" type="float" value="70" min="0" max="100" | 53 <param name="identity" type="float" value="70" min="0" max="100" |
59 label="Minimum percentage identity for BLAST matches" | 54 label="Minimum percentage identity for BLAST matches" |
60 help="Default is 70%, use 0 for no filtering." /> | 55 help="Default is 70%, use 0 for no filtering." /> |
61 <param name="q_cover" type="float" value="50" min="0" max="100" | 56 <param name="q_cover" type="float" value="50" min="0" max="100" |
62 label="Minimum percentage query coverage for BLAST matches" | 57 label="Minimum percentage query coverage for BLAST matches" |
63 help="Default is 50%, use 0 for no filtering." /> | 58 help="Default is 50%, use 0 for no filtering." /> |
64 <param name="make_nr" type="boolean" checked="false" truevalue="--nr" falsevalue="" | 59 <param name="make_nr" type="boolean" checked="false" truevalue="--nr" falsevalue="" |
65 label="Process input FASTA files to collapse identical sequences" | 60 label="Process input FASTA files to collapse identical sequences" |
66 help="i.e. First make the input non-redundant" /> | 61 help="i.e. First make the input non-redundant" /> |
67 </inputs> | 62 </inputs> |
68 <outputs> | 63 <outputs> |
69 <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" /> | 64 <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" /> |
70 </outputs> | 65 </outputs> |
104 <param name="nucl_type" value="megablast"/> | 99 <param name="nucl_type" value="megablast"/> |
105 <param name="identity" value="92.5"/> | 100 <param name="identity" value="92.5"/> |
106 <param name="q_cover" value="86"/> | 101 <param name="q_cover" value="86"/> |
107 <output name="output" file="rbh_none.tabular" ftype="tabular"/> | 102 <output name="output" file="rbh_none.tabular" ftype="tabular"/> |
108 </test> | 103 </test> |
109 <!-- push the coverage over the 86% level --> | 104 <!-- push the coverage over the 86% level --> |
110 <test> | 105 <test> |
111 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/> | 106 <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/> |
112 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/> | 107 <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/> |
113 <param name="dbtype" value="nucl"/> | 108 <param name="dbtype" value="nucl"/> |
114 <param name="nucl_type" value="megablast"/> | 109 <param name="nucl_type" value="megablast"/> |
132 <param name="nucl_type" value="blastn"/> | 127 <param name="nucl_type" value="blastn"/> |
133 <param name="identity" value="0.0"/> | 128 <param name="identity" value="0.0"/> |
134 <param name="q_cover" value="0.0"/> | 129 <param name="q_cover" value="0.0"/> |
135 <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/> | 130 <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/> |
136 </test> | 131 </test> |
137 <!-- this pair of examples test tied best hits --> | 132 <!-- this pair of examples test tied best hits --> |
138 <test> | 133 <test> |
139 <param name="fasta_a" value="k12_ten_proteins.fasta" ftype="fasta"/> | 134 <param name="fasta_a" value="k12_ten_proteins.fasta" ftype="fasta"/> |
140 <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/> | 135 <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/> |
141 <param name="dbtype" value="prot"/> | 136 <param name="dbtype" value="prot"/> |
142 <param name="nucl_type" value="blastp"/> | 137 <param name="nucl_type" value="blastp"/> |
218 coverage threshold or similiar. See: | 213 coverage threshold or similiar. See: |
219 | 214 |
220 Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction, | 215 Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction, |
221 or How To Use Sequence and Structure Information To Predict Protein | 216 or How To Use Sequence and Structure Information To Predict Protein |
222 Function. PLoS Comput Biol 4(10): e1000160. | 217 Function. PLoS Comput Biol 4(10): e1000160. |
223 http://dx.doi.org/10.1371/journal.pcbi.1000160 | 218 https://doi.org/10.1371/journal.pcbi.1000160 |
224 | 219 |
225 The defaults are to require 70% sequence identity over the aligned region | 220 The defaults are to require 70% sequence identity over the aligned region |
226 (using ``pident`` in the BLAST+ tabular output), and that the HSP alignment | 221 (using ``pident`` in the BLAST+ tabular output), and that the HSP alignment |
227 covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+ | 222 covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+ |
228 tabular output). | 223 tabular output). |
233 Please cite: | 228 Please cite: |
234 | 229 |
235 P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo (2015). | 230 P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo (2015). |
236 NCBI BLAST+ integrated into Galaxy. | 231 NCBI BLAST+ integrated into Galaxy. |
237 *GigaScience* 4:39 | 232 *GigaScience* 4:39 |
238 http://dx.doi.org/10.1186/s13742-015-0080-7 | 233 https://doi.org/10.1186/s13742-015-0080-7 |
239 | 234 |
240 Christiam Camacho et al. (2009). | 235 Christiam Camacho et al. (2009). |
241 BLAST+: architecture and applications. | 236 BLAST+: architecture and applications. |
242 *BMC Bioinformatics* 15;10:421. | 237 *BMC Bioinformatics* 15;10:421. |
243 http://dx.doi.org/10.1186/1471-2105-10-421 | 238 https://doi.org/10.1186/1471-2105-10-421 |
244 | 239 |
245 This wrapper is available to install into other Galaxy Instances via the Galaxy | 240 This wrapper is available to install into other Galaxy Instances via the Galaxy |
246 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh | 241 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh |
247 </help> | 242 </help> |
248 <citations> | 243 <citations> |