Mercurial > repos > iuc > hmmer_hmmsearch
changeset 8:3bb58751f4ee draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hmmer3 commit 061757dd7b3bfe66b7738fd54bd6c5e135d9afe8
author | iuc |
---|---|
date | Mon, 06 Nov 2023 20:22:49 +0000 |
parents | d753d9169482 |
children | df7a52791be9 |
files | hmmsearch.xml macros.xml test-data/cut_ga_test.out |
diffstat | 3 files changed, 207 insertions(+), 66 deletions(-) [+] |
line wrap: on
line diff
--- a/hmmsearch.xml Wed Jul 21 14:14:52 2021 +0000 +++ b/hmmsearch.xml Mon Nov 06 20:22:49 2023 +0000 @@ -1,9 +1,10 @@ <?xml version="1.0"?> -<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy0"> +<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy1"> <description>search profile(s) against a sequence database</description> - <macros> + <macros> <import>macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ @@ -12,7 +13,7 @@ @OFORMAT_WITH_OPTS@ @THRESHOLDS@ -@CUT@ +@THRESHOLDS_DOM@ @ACCEL_HEUR@ @ADV_OPTS@ @CPU@ @@ -27,8 +28,7 @@ <!-- todo use Galaxy features like data libraries/data tables/??? --> <param name="seqdb" type="data" format="fasta" label="Sequence database to search against"/> <expand macro="oformat_with_opts_dom_pfam"/> - <expand macro="thresholds_xml"/> - <expand macro="cut"/> + <expand macro="thresholds_cut_dom_xml"/> <expand macro="accel_heur_xml"/> <expand macro="adv_opts"/> <expand macro="seed"/> @@ -65,6 +65,41 @@ <expand macro="assert_out" tool="hmmsearch"/> </output> </test> + <test expect_num_outputs="1"><!-- test with additional evalue threshold options set --> + <param name="hmmfile" value="fn3.hmm"/> + <param name="seqdb" value="uniprot_matches.fasta"/> + <conditional name="repopt"> + <param name="incE" value="0.00001"/> + <param name="incdomE" value="0.0001"/> + </conditional> + <expand macro="oformat_test" /> + <param name="oformat" value=""/> + <expand macro="seed_test" /> + <output name="output"> + <expand macro="assert_out" tool="hmmsearch"/> + </output> + <assert_command> + <has_text text="-E 10"/> + <has_text text="--incE 1e-05"/> + <has_text text="--domE 10"/> + <has_text text="--incdomE 0.0001"/> + </assert_command> + </test> + <test expect_num_outputs="1"><!-- test with cut_ga set --> + <param name="hmmfile" value="fn3.hmm"/> + <param name="seqdb" value="uniprot_matches.fasta"/> + <conditional name="repopt"> + <param name="repopt_sel" value="--cut_ga"/> + </conditional> + <expand macro="oformat_test" /> + <param name="oformat" value=""/> + <expand macro="seed_test" /> + <output name="output" file="cut_ga_test.out" lines_diff="10"> + <expand macro="assert_out" tool="hmmsearch"/> + </output> + </test> + + </tests> <help><![CDATA[ @HELP_PRE@
--- a/macros.xml Wed Jul 21 14:14:52 2021 +0000 +++ b/macros.xml Mon Nov 06 20:22:49 2023 +0000 @@ -6,6 +6,11 @@ <yield/> </requirements> </xml> + <xml name="bio_tools"> + <xrefs> + <xref type="bio.tools">hmmer3</xref> + </xrefs> + </xml> <token name="@TOOL_VERSION@">3.3.2</token> <xml name="stdio"> <stdio> @@ -17,69 +22,135 @@ <regex match="Exception:"/> </stdio> </xml> + + <!-- command line for thresholds_* --> <token name="@THRESHOLDS@"> --E $E ---domE $domE - -#if str($T): - -T $T -#end if - -#if str($domT): - --domT $domT +#if $repopt.repopt_sel == "evalue" + -E $repopt.E + #if str($repopt.incE) != "" + --incE $repopt.incE + #end if +#elif $repopt.repopt_sel == "score" + -T $repopt.T + #if str($repopt.incT) != "" + --incT $repopt.incT + #end if +#else + $repopt.repopt_sel #end if - -#if str($incE): - --incE $incE -#end if - -#if str($incdomE): - --incdomE $incdomE -#end if - -#if str($incT): - --incT $incT -#end if - -#if str($incdomT): - --incdomT $incdomT + </token> + <!-- additional command line thresholds*dom* --> + <token name="@THRESHOLDS_DOM@"> +#if $repopt.repopt_sel == "evalue" + --domE $repopt.domE + #if str($repopt.incdomE) != "" + --incdomE $repopt.incdomE + #end if +#elif $repopt.repopt_sel == "score" + --domT $repopt.domT + #if str($repopt.incdomT) != "" + --incdomT $repopt.incdomT + #end if #end if </token> + <xml name="thresholds_xml"> - <!-- Options controlling reporting thresholds --> - <param argument="-E" type="float" min="0" value="10.0" label="report sequences <= this E-Value threshold in output" /> - <param argument="--domE" type="float" min="0" value="10.0" label="report domains <= this E-Value threshold in output" /> - <param argument="-T" type="float" optional="true" label="report sequences >= this score threshold in output" /> - <param argument="--domT" type="float" optional="true" label="report domains >= this score threshold in output" /> <!-- Options controlling inclusion (significance) thresholds --> - <param argument="--incE" type="float" optional="true" label="consider sequences <= this E-Value threshold as significant" /> + <conditional name="repopt"> + <param name="repopt_sel" type="select" label="Threshold option"> + <option value="evalue" selected="true">E-value (reporting threshold)</option> + <option value="score">Score (reporting threshold)</option> + <yield name="additional_options"/> + </param> + <when value="evalue"> + <param argument="-E" type="float" min="0" value="10" label="E-value threshold" help="Report sequences <= this E-Value threshold in output. Default: 10.0" /> + <param argument="--incE" type="float" optional="true" label="consider sequences <= this E-Value threshold as significant" /> + <yield name="edom_params"/> + </when> + <when value="score"> + <param argument="-T" type="float" value="" label="Score Threshold" help="Report sequences >= this score threshold in output. This option is incompatible with -E, --cut_ga,--cut_nc" /> + <param argument="--incT" type="float" optional="true" label="consider sequences >= this score threshold as significant" /> + <yield name="tdom_params"/> + </when> + <yield name="additional_whens"/> + </conditional> + </xml> + + <xml name="thresholds_dom_xml"> + <expand macro="thresholds_xml"> + <token name="edom_params"> + <expand macro="dome_thresholds"/> + </token> + <token name="tdom_params"> + <expand macro="domt_thresholds"/> + </token> + </expand> + </xml> + + <xml name="thresholds_cut_xml"> + <expand macro="thresholds_xml"> + <token name="additional_options"> + <expand macro="model_specific_options"/> + </token> + <token name="additional_whens"> + <expand macro="model_specific_whens"/> + </token> + </expand> + </xml> + + <xml name="thresholds_cut_dom_xml"> + <expand macro="thresholds_xml"> + <token name="edom_params"> + <expand macro="dome_thresholds"/> + </token> + <token name="tdom_params"> + <expand macro="domt_thresholds"/> + </token> + <token name="additional_options"> + <expand macro="model_specific_options"/> + </token> + <token name="additional_whens"> + <expand macro="model_specific_whens"/> + </token> + </expand> + </xml> + + <xml name="model_specific_options"> + <option value="--cut_ga">Use profile's GA gathering cutoffs (model specific threshold)</option> + <option value="--cut_nc">Use profile's NC gathering cutoffs (model specific threshold)</option> + <option value="--cut_tc">Use profile's TC gathering cutoffs (model specific threshold)</option> + </xml> + + <xml name="model_specific_whens"> + <when value="--cut_ga"/> + <when value="--cut_nc"/> + <when value="--cut_tc"/> + </xml> + + <xml name="dome_thresholds"> + <param argument="--domE" type="float" min="0" value="10" label="Domains E-value threshold" help="Report domains <= this E-Value threshold in output. Default: 10.0. This option is incompatible with --domT, --cut_ga" /> <param argument="--incdomE" type="float" optional="true" label="consider domains <= this E-Value threshold as significant" /> - <param argument="--incT" type="float" optional="true" label="consider sequences >= this score threshold as significant" /> + </xml> + + <xml name="domt_thresholds"> + <param argument="--domT" type="float" value="" label="Domains Score Threshold" help="Report domains >= this score threshold in output. This option is incompatible with --domE, --cut_ga" /> <param argument="--incdomT" type="float" optional="true" label="consider domains >= this score threshold as significant" /> </xml> - <token name="@THRESHOLDS_NODOM@"> --E $E -#if str($T): - -T $T -#end if - -#if str($incE): - --incE $incE -#end if - -#if str($incT): - --incT $incT -#end if - </token> <xml name="thresholds_nodom"> <!-- Options controlling reporting thresholds --> - <param argument="-E" type="float" min="0" value="10.0" label="report sequences <= this E-Value threshold in output" /> - <param argument="-T" type="float" optional="true" label="report sequences >= this score threshold in output" /> + <param argument="-E" type="float" min="0" optional="true" label="E-Value Threshold" help="Report sequences <= this E-Value threshold in output. Default: 10.0. This option is incompatible with option -T,--cut_ga, --cut_nc" /> + <param argument="-T" type="float" optional="true" label="Score Threshold" help="Report sequences >= this score threshold in output. This option is incompatible with -E, --cut_ga,--cut_nc" /> <!-- Options controlling inclusion (significance) thresholds --> <param argument="--incE" type="float" optional="true" label="consider sequences <= this E-Value threshold as significant" /> <param argument="--incT" type="float" optional="true" label="consider sequences >= this score threshold as significant" /> </xml> + <xml name="cut"> + <param argument="--cut_ga" type="boolean" truevalue="--cut_ga" falsevalue="" label="use profile's GA gathering cutoffs to set all thresholding" help="This option is incompatible with options -E,-T,--domE,--domT" /> + <param argument="--cut_nc" type="boolean" truevalue="--cut_nc" falsevalue="" label="use profile's NC gathering cutoffs to set all thresholding" help="This option is incompatible with options -E,-T,--domE,--domT" /> + <param argument="--cut_tc" type="boolean" truevalue="--cut_tc" falsevalue="" label="use profile's TC gathering cutoffs to set all thresholding" /> + </xml> + <token name="@ACCEL_HEUR@"> $max --F1 $F1 @@ -115,7 +186,7 @@ <param argument="--Eft" type="float" min="0" max="1" value="0.04" label="tail mass for Forward exponential tail tau fit" /> </xml> <token name="@OFORMAT_WITH_OPTS@"> -#if $oformat: +#if $oformat: #for o in str($oformat).split(','): --$o '$getVar($o, 'MISSING_OUTPUT'+$o)' #end for @@ -144,7 +215,7 @@ <expand macro="oformat_with_opts_dom"> <option value="pfamtblout" selected="true">Table of hits and domains in Pfam format (--pfamtblout)</option> </expand> - </xml> + </xml> <xml name="oformat_with_opts_dfam_alisc"> <!-- Options directing output --> @@ -192,14 +263,14 @@ <has_line_matching expression="# @TOOL@.*"/> <has_line_matching expression="\[ok\]"/> </assert_contents> - </xml> + </xml> <xml name="assert_tblout" token_tool=""> <assert_contents> <has_line_matching expression="# Program: @TOOL@"/> <has_line_matching expression="# \[ok\]"/> </assert_contents> - </xml> + </xml> <xml name="oformat_test"> <param name="notextw" value="true" /> @@ -342,16 +413,6 @@ </when> </conditional> </xml> - <token name="@CUT@"> -$cut_ga -$cut_nc -$cut_tc - </token> - <xml name="cut"> - <param argument="--cut_ga" type="boolean" truevalue="--cut_ga" falsevalue="" label="use profile's GA gathering cutoffs to set all thresholding" /> - <param argument="--cut_nc" type="boolean" truevalue="--cut_nc" falsevalue="" label="use profile's NC gathering cutoffs to set all thresholding" /> - <param argument="--cut_tc" type="boolean" truevalue="--cut_tc" falsevalue="" label="use profile's TC gathering cutoffs to set all thresholding" /> - </xml> <token name="@MCSS@"> --$mcs.model_construction_strategy_select
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_ga_test.out Mon Nov 06 20:22:49 2023 +0000 @@ -0,0 +1,45 @@ +# hmmsearch :: search profile(s) against a sequence database +# HMMER 3.3.2 (Nov 2020); http://hmmer.org/ +# Copyright (C) 2020 Howard Hughes Medical Institute. +# Freely distributed under the BSD open source license. +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# query HMM file: /tmp/saskia/tmpug0qcpsy/files/9/9/b/dataset_99bcd3c6-6d16-4e09-9878-7117f928bb24.dat +# target sequence database: /tmp/saskia/tmpug0qcpsy/files/f/9/9/dataset_f99d0a82-fcb5-4323-b2d4-8b218045c72e.dat +# max ASCII text line length: unlimited +# model-specific thresholding: GA cutoffs +# Vit filter P threshold: <= 0.001 +# Fwd filter P threshold: <= 1e-05 +# random number seed set to: 4 +# number of worker threads: 0 +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Query: fn3 [M=86] +Accession: PF00041.13 +Description: Fibronectin type III domain +Scores for complete sequences (score includes all domains): + --- full sequence --- --- best 1 domain --- -#dom- + E-value score bias E-value score bias exp N Sequence Description + ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- + + [No hits detected that satisfy reporting thresholds] + + +Domain annotation for each sequence (and alignments): + + [No targets detected that satisfy reporting thresholds] + + +Internal pipeline statistics summary: +------------------------------------- +Query model(s): 1 (86 nodes) +Target sequences: 2 (301 residues searched) +Passed MSV filter: 0 (0); expected 0.0 (0.02) +Passed bias filter: 0 (0); expected 0.0 (0.02) +Passed Vit filter: 0 (0); expected 0.0 (0.001) +Passed Fwd filter: 0 (0); expected 0.0 (1e-05) +Initial search space (Z): 2 [actual number of targets] +Domain search space (domZ): 0 [number of targets reported over threshold] +# CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00.00 +# Mc/sec: 144.45 +// +[ok]