Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
view install_amplicon_analysis.sh @ 4:86a12d75ebe4 draft default tip
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
author | pjbriggs |
---|---|
date | Fri, 20 Dec 2019 06:59:49 -0500 |
parents | 3ab198df8f3f |
children |
line wrap: on
line source
#!/bin/sh -e # # Prototype script to setup a conda environment with the # dependencies needed for the Amplicon_analysis_pipeline # script # # Handle command line usage() { echo "Usage: $(basename $0) [DIR]" echo "" echo "Installs the Amplicon_analysis_pipeline package plus" echo "dependencies in directory DIR (or current directory " echo "if DIR not supplied)" } if [ ! -z "$1" ] ; then # Check if help was requested case "$1" in --help|-h) usage exit 0 ;; esac # Assume it's the installation directory cd $1 fi # Versions PIPELINE_VERSION=1.2.3 RDP_CLASSIFIER_VERSION=2.2 # Directories TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION} BIN_DIR=${TOP_DIR}/bin CONDA_DIR=${TOP_DIR}/conda CONDA_BIN=${CONDA_DIR}/bin CONDA_LIB=${CONDA_DIR}/lib CONDA=${CONDA_BIN}/conda ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}" ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME # # Functions # # Report failure and terminate script fail() { echo "" echo ERROR $@ >&2 echo "" echo "$(basename $0): installation failed" exit 1 } # # Rewrite the shebangs in the installed conda scripts # to remove the full path to conda 'bin' directory rewrite_conda_shebangs() { pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g" find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \; } # # Install conda install_conda() { echo "++++++++++++++++" echo "Installing conda" echo "++++++++++++++++" if [ -e ${CONDA_DIR} ] ; then echo "*** $CONDA_DIR already exists ***" >&2 return fi local cwd=$(pwd) local wd=$(mktemp -d) cd $wd wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR} echo Installed conda in ${CONDA_DIR} # Update the installation files # This is to avoid problems when the length the installation # directory path exceeds the limit for the shebang statement # in the conda files echo "" echo -n "Rewriting conda shebangs..." rewrite_conda_shebangs echo "ok" echo -n "Adding conda bin to PATH..." PATH=${CONDA_BIN}:$PATH echo "ok" cd $cwd rm -rf $wd/* rmdir $wd } # # Create conda environment install_conda_packages() { echo "+++++++++++++++++++++++++" echo "Installing conda packages" echo "+++++++++++++++++++++++++" local cwd=$(pwd) local wd=$(mktemp -d) cd $wd cat >environment.yml <<EOF name: ${ENV_NAME} channels: - defaults - conda-forge - bioconda dependencies: - python=2.7 - cutadapt=1.11 - sickle-trim=1.33 - bioawk=1.0 - pandaseq=2.8.1 - spades=3.5.0 - fastqc=0.11.3 - qiime=1.8.0 - blast-legacy=2.2.26 - fasta-splitter=0.2.4 - rdp_classifier=$RDP_CLASSIFIER_VERSION - vsearch=1.1.3 # Need to explicitly specify libgfortran # version (otherwise get version incompatible # with numpy=1.7.1) - libgfortran=1.0 # Compilers needed to build R - gcc_linux-64 - gxx_linux-64 - gfortran_linux-64 EOF ${CONDA} env create --name "${ENV_NAME}" -f environment.yml echo Created conda environment in ${ENV_DIR} cd $cwd rm -rf $wd/* rmdir $wd } # # Install all the non-conda dependencies in a single # function (invokes separate functions for each package) install_non_conda_packages() { echo "+++++++++++++++++++++++++++++" echo "Installing non-conda packages" echo "+++++++++++++++++++++++++++++" # Temporary working directory local wd=$(mktemp -d) local cwd=$(pwd) local wd=$(mktemp -d) cd $wd # Amplicon analysis pipeline echo -n "Installing Amplicon_analysis_pipeline..." if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then echo "already installed" else install_amplicon_analysis_pipeline echo "ok" fi # ChimeraSlayer echo -n "Installing ChimeraSlayer..." if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then echo "already installed" else install_chimeraslayer echo "ok" fi # Uclust echo -n "Installing uclust for QIIME/pyNAST..." if [ -e ${BIN_DIR}/uclust ] ; then echo "already installed" else install_uclust echo "ok" fi # R 3.2.1" echo -n "Checking for R 3.2.1..." if [ -e ${BIN_DIR}/R ] ; then echo "R already installed" else echo "not found" install_R_3_2_1 fi } # # Amplicon analyis pipeline install_amplicon_analysis_pipeline() { local wd=$(mktemp -d) local cwd=$(pwd) local wd=$(mktemp -d) cd $wd wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz tar zxf v${PIPELINE_VERSION}.tar.gz cd Amplicon_analysis-${PIPELINE_VERSION} INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION} mkdir -p $INSTALL_DIR ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline for f in *.sh ; do /bin/cp $f $INSTALL_DIR done /bin/cp -r uc2otutab $INSTALL_DIR mkdir -p ${BIN_DIR} cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF #!/usr/bin/env bash # # Point to Qiime config export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config # Set up the RDP jar file export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar # Put the scripts onto the PATH export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH # Activate the conda environment export PATH=${CONDA_BIN}:\$PATH source ${CONDA_BIN}/activate ${ENV_NAME} # Execute the driver script with the supplied arguments $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@ exit \$? EOF chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh cat >${BIN_DIR}/install_reference_data.sh <<EOF #!/usr/bin/env bash -e # function usage() { echo "Usage: \$(basename \$0) DIR" } if [ -z "\$1" ] ; then usage exit 0 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then usage echo "" echo "Install reference data into DIR" exit 0 fi echo "==========================================" echo "Installing Amplicon analysis pipeline data" echo "==========================================" if [ ! -e "\$1" ] ; then echo "Making directory \$1" mkdir -p \$1 fi cd \$1 DATA_DIR=\$(pwd) echo "Installing reference data under \$DATA_DIR" $INSTALL_DIR/References.sh echo "" echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh" echo "to use the reference data from this directory" echo "" echo "\$(basename \$0): finished" EOF chmod 0755 ${BIN_DIR}/install_reference_data.sh cd $cwd rm -rf $wd/* rmdir $wd } # # ChimeraSlayer install_chimeraslayer() { local cwd=$(pwd) local wd=$(mktemp -d) cd $wd wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz tar zxf microbiomeutil_2010-04-29.tar.gz cd microbiomeutil_2010-04-29 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29 mkdir -p $INSTALL_DIR ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer /bin/cp -r ChimeraSlayer $INSTALL_DIR cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF #!/usr/bin/env bash export PATH=$INSTALL_DIR:\$PATH $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@ EOF chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl cd $cwd rm -rf $wd/* rmdir $wd } # # uclust required for QIIME/pyNAST # License only allows this version to be used with those two packages # See: http://drive5.com/uclust/downloads1_2_22q.html install_uclust() { local wd=$(mktemp -d) local cwd=$(pwd) local wd=$(mktemp -d) cd $wd wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22 mkdir -p $INSTALL_DIR ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust chmod 0755 ${INSTALL_DIR}/uclust ln -s ${INSTALL_DIR}/uclust ${BIN_DIR} cd $cwd rm -rf $wd/* rmdir $wd } # # R 3.2.1 # Can't use version from conda due to dependency conflicts install_R_3_2_1() { . ${CONDA_BIN}/activate ${ENV_NAME} local cwd=$(pwd) local wd=$(mktemp -d) cd $wd echo -n "Fetching R 3.2.1 source code..." wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz echo "ok" INSTALL_DIR=${TOP_DIR} mkdir -p $INSTALL_DIR echo -n "Unpacking source code..." tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1 echo "ok" cd R-3.2.1 echo -n "Running configure..." ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1 echo "ok" echo -n "Running make..." make >>INSTALL.log 2>&1 echo "ok" echo -n "Running make install..." make install >>INSTALL.log 2>&1 echo "ok" cd $cwd rm -rf $wd/* rmdir $wd . ${CONDA_BIN}/deactivate } setup_pipeline_environment() { echo "+++++++++++++++++++++++++++++++" echo "Setting up pipeline environment" echo "+++++++++++++++++++++++++++++++" # vsearch113 echo -n "Setting up vsearch113..." if [ -e ${BIN_DIR}/vsearch113 ] ; then echo "already exists" elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then echo "failed" fail "vsearch not found" else ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113 echo "ok" fi # fasta_splitter.pl echo -n "Setting up fasta_splitter.pl..." if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then echo "already exists" elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then echo "failed" fail "fasta-splitter.pl not found" else ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl echo "ok" fi # rdp_classifier.jar local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar echo -n "Setting up rdp_classifier.jar..." if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then echo "already exists" elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then echo "failed" fail "rdp_classifier.jar not found" else mkdir -p ${TOP_DIR}/share/rdp_classifier ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} echo "ok" fi # qiime_config echo -n "Setting up qiime_config..." if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then echo "already exists" else mkdir -p ${TOP_DIR}/qiime cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config qiime_scripts_dir ${ENV_DIR}/bin EOF-qiime-config echo "ok" fi } # # Remove the compilers from the conda environment # Not sure if this step is necessary remove_conda_compilers() { echo "+++++++++++++++++++++++++++++++++++++++++" echo "Removing compilers from conda environment" echo "+++++++++++++++++++++++++++++++++++++++++" ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64 } # # Top level script does the installation echo "=======================================" echo "Amplicon_analysis_pipeline installation" echo "=======================================" echo "Installing into ${TOP_DIR}" if [ -e ${TOP_DIR} ] ; then fail "Directory already exists" fi mkdir -p ${TOP_DIR} install_conda install_conda_packages install_non_conda_packages setup_pipeline_environment remove_conda_compilers echo "====================================" echo "Amplicon_analysis_pipeline installed" echo "====================================" echo "" echo "Install reference data using:" echo "" echo "\$ ${BIN_DIR}/install_reference_data.sh DIR" echo "" echo "Run pipeline scripts using:" echo "" echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..." echo "" echo "(or add ${BIN_DIR} to your PATH)" echo "" echo "$(basename $0): finished" ## #