diff cmsearch.xml @ 2:fac157e22e1b draft

Uploaded
author bgruening
date Fri, 13 Feb 2015 03:10:51 -0500
parents 55bb96edfc07
children 2c2c5e5e495b
line wrap: on
line diff
--- a/cmsearch.xml	Thu Apr 24 15:02:05 2014 -0400
+++ b/cmsearch.xml	Fri Feb 13 03:10:51 2015 -0500
@@ -1,21 +1,22 @@
 <tool id="infernal_cmsearch" name="Search covariance model(s)" version="1.1.0.2">
     <description>against a sequence database (cmsearch)</description>
-    <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="100" shared_inputs="" merge_outputs="outfile,multiple_alignment_output"></parallelism>
+    <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="500" shared_inputs="" merge_outputs="outfile,multiple_alignment_output"></parallelism>
     <requirements>
         <requirement type="package">infernal</requirement>
         <requirement type="package" version="1.1">infernal</requirement>
-        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+        <requirement type="package" version="8.22">gnu_coreutils</requirement>
     </requirements>
     <command>
+<![CDATA[
         ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy
         ## it will be converted to a tab delimited file and piped to Galaxy
         temp_tabular_output=\$(mktemp);
 
-        cmsearch 
+        cmsearch
             ## Infernal Options
             --cpu "\${GALAXY_SLOTS:-12}"
             -o /dev/null
-            --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip 
+            --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip
             $bottomonly
             $toponly
             $cyk
@@ -52,13 +53,14 @@
 
             ## sequence file
             $seqdb
-            2>&#38;1
+            2>&1
             ;
 
             ## 1. replace all lines starting # (comment lines)
             ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces)
             sed -e 's/#.*$//' -e '/^$/d' -e 's/\s\+/\t/g' -e 's/\t/ /18g' \$temp_tabular_output > $outfile
 
+]]>
     </command>
         <inputs>
 
@@ -83,32 +85,32 @@
                 </when>
             </conditional>
 
-            <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean" 
+            <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean"
                 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
 
-            <param name="bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean" 
+            <param name="bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
                 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
-            <param name="toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean" 
+            <param name="toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
                 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
 
-            <param name="cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean" 
+            <param name="cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
                 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
-            <param name="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean" 
+            <param name="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
                 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
 
-            <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean" 
+            <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
                 label="Turn off truncated hit detection" help=""/>
 
             <!-- accelleration pipeline -->
 
-            <param name="max" truevalue="--max" falsevalue="" checked="False" type="boolean" 
+            <param name="max" truevalue="--max" falsevalue="" checked="False" type="boolean"
                 label="Turn off all filters, and run non-banded Inside on every full-length target sequence" help="This
                 increases sensitivity somewhat, at an extremely large cost in speed."/>
 
-            <param name="nohmm" truevalue="--nohmm" falsevalue="" checked="False" type="boolean" 
+            <param name="nohmm" truevalue="--nohmm" falsevalue="" checked="False" type="boolean"
                 label="Turn off all HMM filter stages " help=""/>
 
-            <param name="mid" truevalue="--mid" falsevalue="" checked="False" type="boolean" 
+            <param name="mid" truevalue="--mid" falsevalue="" checked="False" type="boolean"
                 label="Turn off the HMM SSV and Viterbi filter stages" help=""/>
 
 
@@ -182,7 +184,7 @@
                 </when>
             </conditional>
 
-            <param name="A" truevalue="-A" falsevalue="" checked="False" type="boolean" 
+            <param name="A" truevalue="-A" falsevalue="" checked="False" type="boolean"
                 label="Save a multiple alignment of all significant hits" help="... those satisfying inclusion thresholds"/>
 
         </inputs>
@@ -195,13 +197,14 @@
 
     </outputs>
     <help>
+<![CDATA[
 
 
 **What it does**
 
 Infernal is used to search sequence databases for homologs of structural RNA sequences, and to make
 sequence- and structure-based RNA sequence alignments. Infernal needs a profile from a structurally
-annotated multiple sequence alignment of an RNA family with a position-specific scoring system for substitutions, 
+annotated multiple sequence alignment of an RNA family with a position-specific scoring system for substitutions,
 insertions, and deletions. Positions in the profile that are basepaired in the consensus secondary
 structure of the alignment are modeled as dependent on one another, allowing Infernal’s scoring system to
 consider the secondary structure, in addition to the primary sequence, of the family being modeled. Infernal
@@ -209,7 +212,7 @@
 grammar (SCFG) (Lari and Young, 1990).
 
 Compared to other alignment and database search tools based only on sequence comparison, Infernal
-aims to be significantly more accurate and more able to detect remote homologs because it models sequence 
+aims to be significantly more accurate and more able to detect remote homologs because it models sequence
 and structure.
 
 
@@ -225,8 +228,8 @@
 (7) mdl to (model coord): The end of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.
 (8) seq from (ali coord): The start of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
 (9) seq to (ali coord): The end of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
-(10) strand: The strand on which the hit occurs on the sequence. ’+’ if the hit is on the top (Watson) strand, ’-’ if the hit is on the bottom (Crick) strand. If on the top strand, the “seq from” value will be less than or equal to the “seq to” value, else it will be greater than or equal to it. 
-(11) trunc: Indicates if this is predicted to be a truncated CM hit or not. This will be “no” if it is a CM hit that is not predicted to be truncated by the end of the sequence, “5’ ” or “3’ ” if the hit is predicted to have one or more 5’ or 3’ residues missing due to a artificial truncation of the sequence, or “5’&amp;3”’ if the hit is predicted to have one or more 5’ residues missing and one or more 3’ residues missing. If the hit is an HMM hit, this will always be ’-’.
+(10) strand: The strand on which the hit occurs on the sequence. ’+’ if the hit is on the top (Watson) strand, ’-’ if the hit is on the bottom (Crick) strand. If on the top strand, the “seq from” value will be less than or equal to the “seq to” value, else it will be greater than or equal to it.
+(11) trunc: Indicates if this is predicted to be a truncated CM hit or not. This will be “no” if it is a CM hit that is not predicted to be truncated by the end of the sequence, “5’ ” or “3’ ” if the hit is predicted to have one or more 5’ or 3’ residues missing due to a artificial truncation of the sequence, or “5’&3”’ if the hit is predicted to have one or more 5’ residues missing and one or more 3’ residues missing. If the hit is an HMM hit, this will always be ’-’.
 (12) pass: Indicates what “pass” of the pipeline the hit was detected on. This is probably only useful for testing and debugging. Non-truncated hits are found on the first pass, truncated hits are found on successive passes.
 (13) gc: Fraction of G and C nucleotides in the hit.
 (14) bias: The biased-composition correction: the bit score difference contributed by the null3 model for CM hits, or the null2 model for HMM hits. High bias scores may be a red flag for a false positive. It is difficult to correct for all possible ways in which a nonrandom but nonhomologous biological sequences can appear to be similar, such as short-period tandem repeats, so there are cases where the bias correction is not strong enough (creating false positives).
@@ -244,11 +247,12 @@
 How do I cite Infernal?
 -----------------------
 
-The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013). 
+The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013).
 
 **Galaxy Wrapper Author**::
 
     *  Bjoern Gruening, University of Freiburg
 
+]]>
     </help>
 </tool>