changeset 8:3bb58751f4ee draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hmmer3 commit 061757dd7b3bfe66b7738fd54bd6c5e135d9afe8
author iuc
date Mon, 06 Nov 2023 20:22:49 +0000
parents d753d9169482
children df7a52791be9
files hmmsearch.xml macros.xml test-data/cut_ga_test.out
diffstat 3 files changed, 207 insertions(+), 66 deletions(-) [+]
line wrap: on
line diff
--- a/hmmsearch.xml	Wed Jul 21 14:14:52 2021 +0000
+++ b/hmmsearch.xml	Mon Nov 06 20:22:49 2023 +0000
@@ -1,9 +1,10 @@
 <?xml version="1.0"?>
-<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy0">
+<tool id="hmmer_hmmsearch" name="hmmsearch" version="@TOOL_VERSION@+galaxy1">
   <description>search profile(s) against a sequence database</description>
-  <macros>
+    <macros>
     <import>macros.xml</import>
   </macros>
+  <expand macro="bio_tools"/>
   <expand macro="requirements"/>
   <expand macro="stdio"/>
   <command><![CDATA[
@@ -12,7 +13,7 @@
 
 @OFORMAT_WITH_OPTS@
 @THRESHOLDS@
-@CUT@
+@THRESHOLDS_DOM@
 @ACCEL_HEUR@
 @ADV_OPTS@
 @CPU@
@@ -27,8 +28,7 @@
     <!-- todo use Galaxy features like data libraries/data tables/??? -->
     <param name="seqdb" type="data" format="fasta" label="Sequence database to search against"/>
     <expand macro="oformat_with_opts_dom_pfam"/>
-    <expand macro="thresholds_xml"/>
-    <expand macro="cut"/>
+    <expand macro="thresholds_cut_dom_xml"/>
     <expand macro="accel_heur_xml"/>
     <expand macro="adv_opts"/>
     <expand macro="seed"/>
@@ -65,6 +65,41 @@
           <expand macro="assert_out" tool="hmmsearch"/>
       </output>
     </test>
+    <test expect_num_outputs="1"><!-- test with additional evalue threshold options  set -->
+      <param name="hmmfile" value="fn3.hmm"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <conditional name="repopt">
+        <param name="incE" value="0.00001"/>
+        <param name="incdomE" value="0.0001"/>
+      </conditional>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output">
+          <expand macro="assert_out" tool="hmmsearch"/>
+      </output>
+      <assert_command>
+        <has_text text="-E 10"/>
+        <has_text text="--incE 1e-05"/>
+        <has_text text="--domE 10"/>
+        <has_text text="--incdomE 0.0001"/>
+      </assert_command>
+    </test>
+    <test expect_num_outputs="1"><!-- test with cut_ga set -->
+      <param name="hmmfile" value="fn3.hmm"/>
+      <param name="seqdb" value="uniprot_matches.fasta"/>
+      <conditional name="repopt">
+        <param name="repopt_sel" value="--cut_ga"/>
+      </conditional>
+      <expand macro="oformat_test" />
+      <param name="oformat" value=""/>
+      <expand macro="seed_test" />
+      <output name="output" file="cut_ga_test.out" lines_diff="10">
+          <expand macro="assert_out" tool="hmmsearch"/>
+      </output>
+    </test>
+
+
   </tests>
   <help><![CDATA[
 @HELP_PRE@
--- a/macros.xml	Wed Jul 21 14:14:52 2021 +0000
+++ b/macros.xml	Mon Nov 06 20:22:49 2023 +0000
@@ -6,6 +6,11 @@
       <yield/>
     </requirements>
   </xml>
+  <xml name="bio_tools">
+      <xrefs>
+          <xref type="bio.tools">hmmer3</xref>
+      </xrefs>
+  </xml>
   <token name="@TOOL_VERSION@">3.3.2</token>
   <xml name="stdio">
     <stdio>
@@ -17,69 +22,135 @@
       <regex match="Exception:"/>
     </stdio>
   </xml>
+
+  <!-- command line for thresholds_* -->
   <token name="@THRESHOLDS@">
--E $E
---domE $domE
-
-#if str($T):
-    -T $T
-#end if
-
-#if str($domT):
-    --domT $domT
+#if $repopt.repopt_sel == "evalue"
+  -E $repopt.E
+  #if str($repopt.incE) != ""
+    --incE $repopt.incE
+  #end if
+#elif $repopt.repopt_sel == "score"
+  -T $repopt.T
+  #if str($repopt.incT) != ""
+    --incT $repopt.incT
+  #end if
+#else
+  $repopt.repopt_sel
 #end if
-
-#if str($incE):
-    --incE $incE
-#end if
-
-#if str($incdomE):
-    --incdomE $incdomE
-#end if
-
-#if str($incT):
-    --incT $incT
-#end if
-
-#if str($incdomT):
-    --incdomT $incdomT
+  </token>
+  <!-- additional command line thresholds*dom* -->
+  <token name="@THRESHOLDS_DOM@">
+#if $repopt.repopt_sel == "evalue"
+  --domE $repopt.domE
+  #if str($repopt.incdomE) != ""
+    --incdomE $repopt.incdomE
+  #end if
+#elif $repopt.repopt_sel == "score"
+  --domT $repopt.domT
+  #if str($repopt.incdomT) != ""
+    --incdomT $repopt.incdomT
+  #end if
 #end if
   </token>
+
   <xml name="thresholds_xml">
-    <!-- Options controlling reporting thresholds -->
-    <param argument="-E" type="float" min="0" value="10.0" label="report sequences &lt;= this E-Value threshold in output" />
-    <param argument="--domE" type="float" min="0" value="10.0" label="report domains &lt;= this E-Value threshold in output" />
-    <param argument="-T" type="float" optional="true" label="report sequences &gt;= this score threshold in output" />
-    <param argument="--domT" type="float" optional="true" label="report domains &gt;= this score threshold in output" />
     <!-- Options controlling inclusion (significance) thresholds -->
-    <param argument="--incE" type="float" optional="true" label="consider sequences &lt;= this E-Value threshold as significant" />
+    <conditional name="repopt">
+      <param name="repopt_sel" type="select" label="Threshold option">
+        <option value="evalue" selected="true">E-value (reporting threshold)</option>
+        <option value="score">Score (reporting threshold)</option>
+        <yield name="additional_options"/>
+      </param>
+      <when value="evalue">
+        <param argument="-E" type="float" min="0" value="10" label="E-value threshold" help="Report sequences &lt;= this E-Value threshold in output. Default: 10.0" />
+        <param argument="--incE" type="float" optional="true" label="consider sequences &lt;= this E-Value threshold as significant" />
+        <yield name="edom_params"/>
+      </when>
+      <when value="score">
+        <param argument="-T" type="float" value="" label="Score Threshold" help="Report sequences &gt;= this score threshold in output. This option is incompatible with -E, --cut_ga,--cut_nc" />
+        <param argument="--incT" type="float" optional="true" label="consider sequences &gt;= this score threshold as significant" />
+        <yield name="tdom_params"/>
+      </when>
+      <yield name="additional_whens"/>
+    </conditional>
+  </xml>
+
+  <xml name="thresholds_dom_xml">
+    <expand macro="thresholds_xml">
+      <token name="edom_params">
+        <expand macro="dome_thresholds"/>
+      </token>
+      <token name="tdom_params">
+        <expand macro="domt_thresholds"/>
+      </token>
+    </expand>
+  </xml>
+
+  <xml name="thresholds_cut_xml">
+    <expand macro="thresholds_xml">
+      <token name="additional_options">
+        <expand macro="model_specific_options"/>
+      </token>
+      <token name="additional_whens">
+        <expand macro="model_specific_whens"/>
+      </token>
+    </expand>
+  </xml>
+  
+  <xml name="thresholds_cut_dom_xml">
+    <expand macro="thresholds_xml">
+      <token name="edom_params">
+        <expand macro="dome_thresholds"/>
+      </token>
+      <token name="tdom_params">
+        <expand macro="domt_thresholds"/>
+      </token>
+      <token name="additional_options">
+        <expand macro="model_specific_options"/>
+      </token>
+      <token name="additional_whens">
+        <expand macro="model_specific_whens"/>
+      </token>
+    </expand>
+  </xml>
+
+  <xml name="model_specific_options">
+    <option value="--cut_ga">Use profile's GA gathering cutoffs (model specific threshold)</option>
+    <option value="--cut_nc">Use profile's NC gathering cutoffs (model specific threshold)</option>
+    <option value="--cut_tc">Use profile's TC gathering cutoffs (model specific threshold)</option>
+  </xml>
+
+  <xml name="model_specific_whens">
+    <when value="--cut_ga"/>
+    <when value="--cut_nc"/>
+    <when value="--cut_tc"/>
+  </xml>
+
+  <xml name="dome_thresholds">
+    <param argument="--domE" type="float" min="0" value="10" label="Domains E-value threshold" help="Report domains &lt;= this E-Value threshold in output. Default: 10.0. This option is incompatible with --domT, --cut_ga" />
     <param argument="--incdomE" type="float" optional="true" label="consider domains &lt;= this E-Value threshold as significant" />
-    <param argument="--incT" type="float" optional="true" label="consider sequences &gt;= this score threshold as significant" />
+  </xml>
+
+  <xml name="domt_thresholds">
+    <param argument="--domT" type="float" value="" label="Domains Score Threshold" help="Report domains &gt;= this score threshold in output. This option is incompatible with --domE, --cut_ga" />
     <param argument="--incdomT" type="float" optional="true" label="consider domains &gt;= this score threshold as significant" />
   </xml>
-  <token name="@THRESHOLDS_NODOM@">
--E $E
 
-#if str($T):
-    -T $T
-#end if
-
-#if str($incE):
-    --incE $incE
-#end if
-
-#if str($incT):
-    --incT $incT
-#end if
-  </token>
   <xml name="thresholds_nodom">
     <!-- Options controlling reporting thresholds -->
-    <param argument="-E" type="float" min="0" value="10.0" label="report sequences &lt;= this E-Value threshold in output" />
-    <param argument="-T" type="float" optional="true" label="report sequences &gt;= this score threshold in output" />
+    <param argument="-E" type="float" min="0" optional="true" label="E-Value Threshold" help="Report sequences &lt;= this E-Value threshold in output. Default: 10.0. This option is incompatible with option -T,--cut_ga, --cut_nc"  />
+    <param argument="-T" type="float" optional="true" label="Score Threshold" help="Report sequences &gt;= this score threshold in output. This option is incompatible with -E, --cut_ga,--cut_nc"  />
     <!-- Options controlling inclusion (significance) thresholds -->
     <param argument="--incE" type="float" optional="true" label="consider sequences &lt;= this E-Value threshold as significant" />
     <param argument="--incT" type="float" optional="true" label="consider sequences &gt;= this score threshold as significant" />
   </xml>
+  <xml name="cut">
+    <param argument="--cut_ga" type="boolean" truevalue="--cut_ga" falsevalue="" label="use profile's GA gathering cutoffs to set all thresholding" help="This option is incompatible with options -E,-T,--domE,--domT" />
+    <param argument="--cut_nc" type="boolean" truevalue="--cut_nc" falsevalue="" label="use profile's NC gathering cutoffs to set all thresholding" help="This option is incompatible with options -E,-T,--domE,--domT"  />
+    <param argument="--cut_tc" type="boolean" truevalue="--cut_tc" falsevalue="" label="use profile's TC gathering cutoffs to set all thresholding" />
+  </xml>
+
   <token name="@ACCEL_HEUR@">
 $max
 --F1 $F1
@@ -115,7 +186,7 @@
     <param argument="--Eft" type="float" min="0" max="1" value="0.04" label="tail mass for Forward exponential tail tau fit" />
   </xml>
   <token name="@OFORMAT_WITH_OPTS@">
-#if $oformat:      
+#if $oformat:
     #for o in str($oformat).split(','):
         --$o '$getVar($o, 'MISSING_OUTPUT'+$o)'
     #end for
@@ -144,7 +215,7 @@
     <expand macro="oformat_with_opts_dom">
       <option value="pfamtblout" selected="true">Table of hits and domains in Pfam format (--pfamtblout)</option>
     </expand>
-  </xml>  
+  </xml>
 
   <xml name="oformat_with_opts_dfam_alisc">
     <!-- Options directing output -->
@@ -192,14 +263,14 @@
       <has_line_matching expression="# @TOOL@.*"/>
       <has_line_matching expression="\[ok\]"/>
     </assert_contents>
-  </xml> 
+  </xml>
 
   <xml name="assert_tblout" token_tool="">
     <assert_contents>
       <has_line_matching expression="# Program:         @TOOL@"/>
       <has_line_matching expression="# \[ok\]"/>
     </assert_contents>
-  </xml> 
+  </xml>
 
   <xml name="oformat_test">
       <param name="notextw" value="true" />
@@ -342,16 +413,6 @@
       </when>
     </conditional>
   </xml>
-  <token name="@CUT@">
-$cut_ga
-$cut_nc
-$cut_tc
-  </token>
-  <xml name="cut">
-    <param argument="--cut_ga" type="boolean" truevalue="--cut_ga" falsevalue="" label="use profile's GA gathering cutoffs to set all thresholding" />
-    <param argument="--cut_nc" type="boolean" truevalue="--cut_nc" falsevalue="" label="use profile's NC gathering cutoffs to set all thresholding" />
-    <param argument="--cut_tc" type="boolean" truevalue="--cut_tc" falsevalue="" label="use profile's TC gathering cutoffs to set all thresholding" />
-  </xml>
   <token name="@MCSS@">
 --$mcs.model_construction_strategy_select
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cut_ga_test.out	Mon Nov 06 20:22:49 2023 +0000
@@ -0,0 +1,45 @@
+# hmmsearch :: search profile(s) against a sequence database
+# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
+# Copyright (C) 2020 Howard Hughes Medical Institute.
+# Freely distributed under the BSD open source license.
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# query HMM file:                  /tmp/saskia/tmpug0qcpsy/files/9/9/b/dataset_99bcd3c6-6d16-4e09-9878-7117f928bb24.dat
+# target sequence database:        /tmp/saskia/tmpug0qcpsy/files/f/9/9/dataset_f99d0a82-fcb5-4323-b2d4-8b218045c72e.dat
+# max ASCII text line length:      unlimited
+# model-specific thresholding:     GA cutoffs
+# Vit filter P threshold:       <= 0.001
+# Fwd filter P threshold:       <= 1e-05
+# random number seed set to:       4
+# number of worker threads:        0
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+Query:       fn3  [M=86]
+Accession:   PF00041.13
+Description: Fibronectin type III domain
+Scores for complete sequences (score includes all domains):
+   --- full sequence ---   --- best 1 domain ---    -#dom-
+    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
+    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------
+
+   [No hits detected that satisfy reporting thresholds]
+
+
+Domain annotation for each sequence (and alignments):
+
+   [No targets detected that satisfy reporting thresholds]
+
+
+Internal pipeline statistics summary:
+-------------------------------------
+Query model(s):                            1  (86 nodes)
+Target sequences:                          2  (301 residues searched)
+Passed MSV filter:                         0  (0); expected 0.0 (0.02)
+Passed bias filter:                        0  (0); expected 0.0 (0.02)
+Passed Vit filter:                         0  (0); expected 0.0 (0.001)
+Passed Fwd filter:                         0  (0); expected 0.0 (1e-05)
+Initial search space (Z):                  2  [actual number of targets]
+Domain search space  (domZ):               0  [number of targets reported over threshold]
+# CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00.00
+# Mc/sec: 144.45
+//
+[ok]