Galaxy |

Changeset 1:55bb96edfc07 (2014-04-24)

Previous changeset 0:652f9d550531 (2014-04-22) Next changeset 2:fac157e22e1b (2015-02-13)

Commit message:
Uploaded

modified:
cmsearch.xml

added:
cmalign.xml

diff -r 652f9d550531 -r 55bb96edfc07 cmalign.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cmalign.xml Thu Apr 24 15:02:05 2014 -0400

b'@@ -0,0 +1,374 @@\n+<tool id="infernal_cmsearch" name="Search covariance model(s)" version="1.1.0.2">\n+ <description>against a sequence database (cmsearch)</description>\n+ <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="100" shared_inputs="" merge_outputs="outfile,multiple_alignment_output"></parallelism>\n+ <requirements>\n+ <requirement type="package">infernal</requirement>\n+ <requirement type="package" version="1.1">infernal</requirement>\n+ <requirement type="package" version="8.21">gnu_coreutils</requirement>\n+ </requirements>\n+ <command>\n+ ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy\n+ ## it will be converted to a tab delimited file and piped to Galaxy\n+ temp_tabular_output=\\$(mktemp);\n+\n+ cmsearch \n+ ## Infernal Options\n+ --cpu "\\${GALAXY_SLOTS:-12}"\n+ -o /dev/null\n+ --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip \n+ $bottomonly\n+ $toponly\n+ $cyk\n+ $notrunc\n+ $max\n+ $nohmm\n+ $mid\n+ ##$bitscore_thresholds\n+ --tblout \\$temp_tabular_output\n+ $g\n+ #if $A:\n+ $A $multiple_alignment_output\n+ #end if\n+\n+ #if $inclusion_thresholds_opts.inclusion_thresholds_selector == "--incE":\n+ --incE $inclusion_thresholds_opts.incE\n+ #elif $inclusion_thresholds_opts.inclusion_thresholds_selector == "--incT":\n+ --incT $inclusion_thresholds_opts.incT\n+ #end if\n+\n+ #if $reporting_thresholds_opts.reporting_thresholds_selector == "-E":\n+ -E $reporting_thresholds_opts.E\n+ #elif $reporting_thresholds_opts.reporting_thresholds_selector == "-T":\n+ -T $reporting_thresholds_opts.T\n+ #end if\n+\n+ ## CM file from the history or stored as database on disc\n+\n+ #if $cm_opts.cm_opts_selector == "db":\n+ $cm_opts.database.fields.path\n+ #else:\n+ $cm_opts.cmfile\n+ #end if\n+\n+ ## sequence file\n+ $seqdb\n+ 2>&1\n+ ;\n+\n+ ## 1. replace all lines starting # (comment lines)\n+ ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces)\n+ sed -e \'s/#.*$//\' -e \'/^$/d\' -e \'s/ /\\t/g\' -e \'s/\\t/ /18g\' \\$temp_tabular_output > $outfile\n+\n+ </command>\n+ <inputs>\n+\n+ <param name="seqdb" type="data" format="fasta" label="Sequence database"/>\n+ <conditional name="cm_opts">\n+ <param name="cm_opts_selector" type="select" label="Subject covariance models">\n+ <option value="db" selected="True">Locally installed covariance models</option>\n+ <option value="histdb">Covariance model from your history</option>\n+ </param>\n+ <when value="db">\n+ <param name="database" type="select" label="Covariance models">\n+ <options from_file="infernal.loc">\n+ <column name="value" index="0"/>\n+ <column name="name" index="1"/>\n+ <column name="path" index="2"/>\n+ </options>\n+ </param>\n+ </when>\n+ <when value="histdb">\n+ <param name="cmfile" type="data" format="txt" label="Covariance models file from the history."/>\n+ </when>\n+ </conditional>\n+\n+ <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean" \n+ label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with resp'..b'e of N consensus positions.\n+(7) mdl to (model coord): The end of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.\n+(8) seq from (ali coord): The start of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.\n+(9) seq to (ali coord): The end of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.\n+(10) strand: The strand on which the hit occurs on the sequence. \xe2\x80\x99+\xe2\x80\x99 if the hit is on the top (Watson) strand, \xe2\x80\x99-\xe2\x80\x99 if the hit is on the bottom (Crick) strand. If on the top strand, the \xe2\x80\x9cseq from\xe2\x80\x9d value will be less than or equal to the \xe2\x80\x9cseq to\xe2\x80\x9d value, else it will be greater than or equal to it. \n+(11) trunc: Indicates if this is predicted to be a truncated CM hit or not. This will be \xe2\x80\x9cno\xe2\x80\x9d if it is a CM hit that is not predicted to be truncated by the end of the sequence, \xe2\x80\x9c5\xe2\x80\x99 \xe2\x80\x9d or \xe2\x80\x9c3\xe2\x80\x99 \xe2\x80\x9d if the hit is predicted to have one or more 5\xe2\x80\x99 or 3\xe2\x80\x99 residues missing due to a artificial truncation of the sequence, or \xe2\x80\x9c5\xe2\x80\x99&3\xe2\x80\x9d\xe2\x80\x99 if the hit is predicted to have one or more 5\xe2\x80\x99 residues missing and one or more 3\xe2\x80\x99 residues missing. If the hit is an HMM hit, this will always be \xe2\x80\x99-\xe2\x80\x99.\n+(12) pass: Indicates what \xe2\x80\x9cpass\xe2\x80\x9d of the pipeline the hit was detected on. This is probably only useful for testing and debugging. Non-truncated hits are found on the first pass, truncated hits are found on successive passes.\n+(13) gc: Fraction of G and C nucleotides in the hit.\n+(14) bias: The biased-composition correction: the bit score difference contributed by the null3 model for CM hits, or the null2 model for HMM hits. High bias scores may be a red flag for a false positive. It is difficult to correct for all possible ways in which a nonrandom but nonhomologous biological sequences can appear to be similar, such as short-period tandem repeats, so there are cases where the bias correction is not strong enough (creating false positives).\n+(15) score: The score (in bits) for this target/query comparison. It includes the biased-composition cor-rection (the \xe2\x80\x9cnull3\xe2\x80\x9d model for CM hits, or the \xe2\x80\x9cnull2\xe2\x80\x9d model for HMM hits).\n+(16) E-value: The expectation value (statistical significance) of the target. This is a per query E-value; i.e. calculated as the expected number of false positives achieving this comparison\xe2\x80\x99s score for a single query against the search space Z. For cmsearch Z is defined as the total number of nucleotides in the target dataset multiplied by 2 because both strands are searched. For cmscan Z is the total number of nucleotides in the query sequence multiplied by 2 because both strands are searched and multiplied by the number of models in the target database. If you search with multiple queries and if you want to control the overall false positive rate of that search rather than the false positive rate per query, you will want to multiply this per-query E-value by how many queries you\xe2\x80\x99re doing.\n+(17) inc: Indicates whether or not this hit achieves the inclusion threshold: \xe2\x80\x99!\xe2\x80\x99 if it does, \xe2\x80\x99?\xe2\x80\x99 if it does not (and rather only achieves the reporting threshold). By default, the inclusion threshold is an E-value of 0.01 and the reporting threshold is an E-value of 10.0, but these can be changed with command line options as described in the manual pages.\n+(18) description of target: The remainder of the line is the target\xe2\x80\x99s description line, as free text.\n+\n+\n+For further questions please refere to the Infernal Userguide_.\n+\n+.. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf\n+\n+\n+How do I cite Infernal?\n+-----------------------\n+\n+The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013). \n+\n+**Galaxy Wrapper Author**::\n+\n+ * Bjoern Gruening, University of Freiburg\n+\n+ </help>\n+</tool>\n'

diff -r 652f9d550531 -r 55bb96edfc07 cmsearch.xml
--- a/cmsearch.xml Tue Apr 22 13:56:22 2014 -0400
+++ b/cmsearch.xml Thu Apr 24 15:02:05 2014 -0400

@@ -57,7 +57,7 @@

             ## 1. replace all lines starting # (comment lines)
             ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces)
-            sed -e 's/#.*$//' -e '/^$/d' -e 's/ /\t/g' -e 's/\t/ /18g' \$temp_tabular_output > $outfile
+            sed -e 's/#.*$//' -e '/^$/d' -e 's/\s\+/\t/g' -e 's/\t/ /18g' \$temp_tabular_output > $outfile

     </command>
         <inputs>