diff install_amplicon_analysis.sh @ 3:3ab198df8f3f draft

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 15390f18b91d838880d952eb2714f689bbd8a042
author pjbriggs
date Thu, 18 Oct 2018 09:18:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/install_amplicon_analysis.sh	Thu Oct 18 09:18:04 2018 -0400
@@ -0,0 +1,425 @@
+#!/bin/sh -e
+#
+# Prototype script to setup a conda environment with the
+# dependencies needed for the Amplicon_analysis_pipeline
+# script
+#
+# Handle command line
+usage()
+{
+    echo "Usage: $(basename $0) [DIR]"
+    echo ""
+    echo "Installs the Amplicon_analysis_pipeline package plus"
+    echo "dependencies in directory DIR (or current directory "
+    echo "if DIR not supplied)"
+}
+if [ ! -z "$1" ] ; then
+    # Check if help was requested
+    case "$1" in
+	--help|-h)
+	    usage
+	    exit 0
+	    ;;
+    esac
+    # Assume it's the installation directory
+    cd $1
+fi
+# Versions
+PIPELINE_VERSION=1.2.3
+RDP_CLASSIFIER_VERSION=2.2
+# Directories
+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
+BIN_DIR=${TOP_DIR}/bin
+CONDA_DIR=${TOP_DIR}/conda
+CONDA_BIN=${CONDA_DIR}/bin
+CONDA_LIB=${CONDA_DIR}/lib
+CONDA=${CONDA_BIN}/conda
+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
+#
+# Functions
+#
+# Report failure and terminate script
+fail()
+{
+    echo ""
+    echo ERROR $@ >&2
+    echo ""
+    echo "$(basename $0): installation failed"
+    exit 1
+}
+#
+# Rewrite the shebangs in the installed conda scripts
+# to remove the full path to conda 'bin' directory
+rewrite_conda_shebangs()
+{
+    pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
+    find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
+}
+#
+# Install conda
+install_conda()
+{
+    echo "++++++++++++++++"
+    echo "Installing conda"
+    echo "++++++++++++++++"
+    if [ -e ${CONDA_DIR} ] ; then
+	echo "*** $CONDA_DIR already exists ***" >&2
+	return
+    fi
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+    bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
+    echo Installed conda in ${CONDA_DIR}
+    # Update the installation files
+    # This is to avoid problems when the length the installation
+    # directory path exceeds the limit for the shebang statement
+    # in the conda files
+    echo ""
+    echo -n "Rewriting conda shebangs..."
+    rewrite_conda_shebangs
+    echo "ok"
+    echo -n "Adding conda bin to PATH..."
+    PATH=${CONDA_BIN}:$PATH
+    echo "ok"
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# Create conda environment
+install_conda_packages()
+{
+    echo "+++++++++++++++++++++++++"
+    echo "Installing conda packages"
+    echo "+++++++++++++++++++++++++"
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    cat >environment.yml <<EOF
+name: ${ENV_NAME}
+channels:
+  - defaults
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=2.7
+  - cutadapt=1.11
+  - sickle-trim=1.33
+  - bioawk=1.0
+  - pandaseq=2.8.1
+  - spades=3.5.0
+  - fastqc=0.11.3
+  - qiime=1.8.0
+  - blast-legacy=2.2.26
+  - fasta-splitter=0.2.4
+  - rdp_classifier=$RDP_CLASSIFIER_VERSION
+  - vsearch=1.1.3
+  # Need to explicitly specify libgfortran
+  # version (otherwise get version incompatible
+  # with numpy=1.7.1)
+  - libgfortran=1.0
+  # Compilers needed to build R
+  - gcc_linux-64
+  - gxx_linux-64
+  - gfortran_linux-64
+EOF
+    ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
+    echo Created conda environment in ${ENV_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# Install all the non-conda dependencies in a single
+# function (invokes separate functions for each package)
+install_non_conda_packages()
+{
+    echo "+++++++++++++++++++++++++++++"
+    echo "Installing non-conda packages"
+    echo "+++++++++++++++++++++++++++++"
+    # Temporary working directory
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    # Amplicon analysis pipeline
+    echo -n "Installing Amplicon_analysis_pipeline..."
+    if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
+	echo "already installed"
+    else
+	install_amplicon_analysis_pipeline
+	echo "ok"
+    fi
+    # ChimeraSlayer
+    echo -n "Installing ChimeraSlayer..."
+    if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
+	echo "already installed"
+    else
+	install_chimeraslayer
+	echo "ok"
+    fi
+    # Uclust
+    echo -n "Installing uclust for QIIME/pyNAST..."
+    if [ -e ${BIN_DIR}/uclust ] ; then
+	echo "already installed"
+    else
+	install_uclust
+	echo "ok"
+    fi
+    # R 3.2.1"
+    echo -n "Checking for R 3.2.1..."
+    if [ -e ${BIN_DIR}/R ] ; then
+	echo "R already installed"
+    else
+	echo "not found"
+	install_R_3_2_1
+    fi
+}
+#
+# Amplicon analyis pipeline
+install_amplicon_analysis_pipeline()
+{
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz
+    tar zxf v${PIPELINE_VERSION}.tar.gz
+    cd Amplicon_analysis-${PIPELINE_VERSION}
+    INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
+    for f in *.sh ; do
+	/bin/cp $f $INSTALL_DIR
+    done
+    /bin/cp -r uc2otutab $INSTALL_DIR
+    mkdir -p ${BIN_DIR}
+    cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
+#!/usr/bin/env bash
+#
+# Point to Qiime config
+export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
+# Set up the RDP jar file
+export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+# Put the scripts onto the PATH
+export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
+# Activate the conda environment
+export PATH=${CONDA_BIN}:\$PATH
+source ${CONDA_BIN}/activate ${ENV_NAME}
+# Execute the driver script with the supplied arguments
+$INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
+exit \$?
+EOF
+    chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
+    cat >${BIN_DIR}/install_reference_data.sh <<EOF
+#!/usr/bin/env bash -e
+#
+function usage() {
+  echo "Usage: \$(basename \$0) DIR"
+}
+if [ -z "\$1" ] ; then
+  usage
+  exit 0
+elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
+  usage
+  echo ""
+  echo "Install reference data into DIR"
+  exit 0
+fi
+echo "=========================================="
+echo "Installing Amplicon analysis pipeline data"
+echo "=========================================="
+if [ ! -e "\$1" ] ; then
+    echo "Making directory \$1"
+    mkdir -p \$1
+fi
+cd \$1
+DATA_DIR=\$(pwd)
+echo "Installing reference data under \$DATA_DIR"
+$INSTALL_DIR/References.sh
+echo ""
+echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
+echo "to use the reference data from this directory"
+echo ""
+echo "\$(basename \$0): finished"
+EOF
+    chmod 0755 ${BIN_DIR}/install_reference_data.sh
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# ChimeraSlayer
+install_chimeraslayer()
+{
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
+    tar zxf microbiomeutil_2010-04-29.tar.gz
+    cd microbiomeutil_2010-04-29
+    INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
+    /bin/cp -r ChimeraSlayer $INSTALL_DIR
+    cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
+#!/usr/bin/env bash
+export PATH=$INSTALL_DIR:\$PATH
+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
+EOF
+    chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
+    chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# uclust required for QIIME/pyNAST
+# License only allows this version to be used with those two packages
+# See: http://drive5.com/uclust/downloads1_2_22q.html
+install_uclust()
+{
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
+    INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
+    /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
+    chmod 0755 ${INSTALL_DIR}/uclust
+    ln -s  ${INSTALL_DIR}/uclust ${BIN_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# R 3.2.1
+# Can't use version from conda due to dependency conflicts
+install_R_3_2_1()
+{
+    . ${CONDA_BIN}/activate ${ENV_NAME}
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    echo -n "Fetching R 3.2.1 source code..."
+    wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz
+    echo "ok"
+    INSTALL_DIR=${TOP_DIR}
+    mkdir -p $INSTALL_DIR
+    echo -n "Unpacking source code..."
+    tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1
+    echo "ok"
+    cd R-3.2.1
+    echo -n "Running configure..."
+    ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1
+    echo "ok"
+    echo -n "Running make..."
+    make >>INSTALL.log 2>&1
+    echo "ok"
+    echo -n "Running make install..."
+    make install >>INSTALL.log 2>&1
+    echo "ok"
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+    . ${CONDA_BIN}/deactivate
+}
+setup_pipeline_environment()
+{
+    echo "+++++++++++++++++++++++++++++++"
+    echo "Setting up pipeline environment"
+    echo "+++++++++++++++++++++++++++++++"
+    # vsearch113
+    echo -n "Setting up vsearch113..."
+    if [ -e ${BIN_DIR}/vsearch113 ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then
+	echo "failed"
+	fail "vsearch not found"
+    else
+	ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113
+	echo "ok"
+    fi
+    # fasta_splitter.pl
+    echo -n "Setting up fasta_splitter.pl..."
+    if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
+	echo "failed"
+	fail "fasta-splitter.pl not found"
+    else
+	ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
+	echo "ok"
+    fi
+    # rdp_classifier.jar
+    local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+    echo -n "Setting up rdp_classifier.jar..."
+    if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
+	echo "failed"
+	fail "rdp_classifier.jar not found"
+    else
+	mkdir -p ${TOP_DIR}/share/rdp_classifier
+	ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
+	echo "ok"	
+    fi
+    # qiime_config
+    echo -n "Setting up qiime_config..."
+    if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
+	echo "already exists"
+    else
+	mkdir -p ${TOP_DIR}/qiime
+	cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
+qiime_scripts_dir	${ENV_DIR}/bin
+EOF-qiime-config
+	echo "ok"
+    fi
+}
+#
+# Remove the compilers from the conda environment
+# Not sure if this step is necessary
+remove_conda_compilers()
+{
+    echo "+++++++++++++++++++++++++++++++++++++++++"
+    echo "Removing compilers from conda environment"
+    echo "+++++++++++++++++++++++++++++++++++++++++"
+    ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64
+}
+#
+# Top level script does the installation
+echo "======================================="
+echo "Amplicon_analysis_pipeline installation"
+echo "======================================="
+echo "Installing into ${TOP_DIR}"
+if [ -e ${TOP_DIR} ] ; then
+    fail "Directory already exists"
+fi
+mkdir -p ${TOP_DIR}
+install_conda
+install_conda_packages
+install_non_conda_packages
+setup_pipeline_environment
+remove_conda_compilers
+echo "===================================="
+echo "Amplicon_analysis_pipeline installed"
+echo "===================================="
+echo ""
+echo "Install reference data using:"
+echo ""
+echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
+echo ""
+echo "Run pipeline scripts using:"
+echo ""
+echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
+echo ""
+echo "(or add ${BIN_DIR} to your PATH)"
+echo ""
+echo "$(basename $0): finished"
+##
+#