Mercurial > repos > guru-ananda > rhmm
changeset 0:e090cf6dd3f5 draft
Imported from capsule None
author | devteam |
---|---|
date | Thu, 22 Jan 2015 10:40:18 -0500 |
parents | |
children | 38f5cd46ffd3 |
files | hmm/hmm.xml hmm/r_wrapper.sh hmm/tool_dependencies.xml |
diffstat | 3 files changed, 184 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/hmm.xml Thu Jan 22 10:40:18 2015 -0500 @@ -0,0 +1,152 @@ +<tool id="hmm_1" name="Fit HMM " version="1.0.0"> + <description>on numeric data</description> + <command interpreter="bash">\$R_SCRIPT_PATH/r_wrapper.sh $script_file</command> + + <inputs> + <param name="input" type="data" format="tabular" label="Dataset"/> + <param name="var_cols" label="Select columns containing observations " type="data_column" data_ref="input" numerical="True" multiple="true" > + <validator type="no_options" message="Please select at least one column."/> + </param> + <param name="samp_col" label="Select column containing sample numbers " type="data_column" data_ref="input" numerical="True" multiple="false" > + <validator type="no_options" message="Please select a column."/> + </param> + <param name="header" type="select" label="Treat first line as header? "> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <param name="nStates" size="10" type="integer" value="2" label="Number of hidden states " /> + <conditional name="disChoice"> + <param name="dis" type="select" label="Distribution"> + <option value="NORMAL" selected="true">Normal</option> + <option value="DISCRETE">Discrete</option> + <option value="MIXTURE">Mixture</option> + </param> + <when value="NORMAL" /> + <when value="DISCRETE" /> + <when value="MIXTURE"> + <param name="nMixt" size="10" type="integer" value="2" label="Number of mixtures of normal distributions " /> + </when> + </conditional> + <!-- + <conditional name="asymptChoice"> + <param name="asymptCov" type="select" label="Compute asymptotic covariance matrix? "> + <option value="FALSE" selected="true">No</option> + <option value="TRUE">Yes</option> + </param> + <when value="FALSE" /> + <when value="TRUE"> + <param name="asymptMethod" type="select" label="Method for computing asymptotic covariance matrix "> + <option value="nlme" selected="true">nlme</option> + <option value="optim">optim</option> + </param> + </when> + </conditional> + --> + </inputs> + + <configfiles> + <configfile name="script_file"> + ## Setup R error handling to go to stderr + options( show.error.messages=F, + error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) }, + warn = -1 ) + suppressPackageStartupMessages(library('RHmm')) + + #if str($header) == "yes" + inp = read.table( "${input.file_name}", header=T ) + #else + inp = read.table( "${input.file_name}", header=F ) + #end if + + samp_numbers = unique(inp[, ${samp_col}]) + + if (length(samp_numbers) == 1){ + samp_list = inp[,c(${var_cols})] + } else { + samp_list=list() + for (i in 1:length(samp_numbers)) { + samp_list[[i]] = inp[(inp[,${samp_col}] == samp_numbers[i]),c(${var_cols})] + } + } + + nStates = ${nStates} + dis = "$disChoice['dis']" + nMixt = 0 + + #if $disChoice['dis'] == "MIXTURE" + nMixt = ${disChoice.nMixt} + #end if + + ##asymptCov = $asymptChoice['asymptCov'] + asymptCov = "FALSE" + asymptMethod = "nlme" + + ##if (asymptCov == "TRUE") { + ## asymptMethod = "${asymptChoice.asymptMethod}" + ##} + + #if $disChoice['dis'] == "MIXTURE" + if (asymptCov == "TRUE") { + myfit = HMMFit(samp_list, nStates=nStates, dis=dis, nMixt=nMixt, asymptCov=asymptCov, asymptMethod=asymptMethod) + } else { + myfit = HMMFit(samp_list, nStates=nStates, dis=dis, nMixt=nMixt) + } + #else + if (asymptCov == "TRUE") { + myfit = HMMFit(samp_list, nStates=nStates, dis=dis, asymptCov=asymptCov, asymptMethod=asymptMethod) + } else { + myfit = HMMFit(samp_list, nStates=nStates, dis=dis) + } + #end if + + myfittxt=capture.output(myfit) + cat(myfittxt,file="${out_file1}",sep="\n") + + + samp_list_stateSol = list() + if (length(samp_numbers) == 1){ + samp_list_stateSol[[1]]=unlist(viterbi(myfit, samp_list)["states"]) + } else { + for (i in 1:length(samp_numbers)) { + samp_list_stateSol[[i]]=unlist(viterbi(myfit, samp_list[[i]])["states"]) + } + } + inp_stateSol=cbind(inp,unlist(samp_list_stateSol)) + write.table(inp_stateSol,file="${out_file2}",sep="\t",row.names=F,col.names=F,quote=F) + + </configfile> + </configfiles> + + <outputs> + <data format="txt" name="out_file1" /> + <data format="input" name="out_file2" /> + </outputs> + + <requirements> + <requirement type="set_environment">R_SCRIPT_PATH</requirement> + <requirement type="package" version="2.15.0">R</requirement> + </requirements> + +<help> + +.. class:: infomark + +**What it does** + +This tool uses the 'HMMFit' and 'viterbi' functions from 'RHmm' library from R statistical package to fit an Hidden Markov Model using Baum-Welch algorithm, and calculate the optimal hidden states sequence using Viterbi's algorithm. + +It returns two outputs - one containing summary statistics for HMMFit, and the other containing state numbers appended as a new column to the input data. + +*Ollivier TARAMASCO and Sebastian Bauer (2010). RHmm: Hidden Markov Models simulations and estimations. R package version 1.4.4. http://CRAN.R-project.org/package=RHmm.* + +----- + +.. class:: warningmark + +**Note** + +The tool fails if any of the observation columns contain non-numeric data. + + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/r_wrapper.sh Thu Jan 22 10:40:18 2015 -0500 @@ -0,0 +1,23 @@ +#!/bin/sh + +### Run R providing the R script in $1 as standard input and passing +### the remaining arguments on the command line + +# Function that writes a message to stderr and exits +function fail +{ + echo "$@" >&2 + exit 1 +} + +# Ensure R executable is found +which R > /dev/null || fail "'R' is required by this tool but was not found on path" + +# Extract first argument +infile=$1; shift + +# Ensure the file exists +test -f $infile || fail "R input file '$infile' does not exist" + +# Invoke R passing file named by first argument to stdin +R --vanilla --slave $* < $infile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/tool_dependencies.xml Thu Jan 22 10:40:18 2015 -0500 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <set_environment version="1.0"> + <environment_variable action="set_to" name="R_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> + </set_environment> + <package name="R" version="2.15.0"> + <repository changeset_revision="6c34eaa82fed" name="package_r_2_15_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>