comparison install_amplicon_analysis.sh @ 3:3ab198df8f3f draft

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 15390f18b91d838880d952eb2714f689bbd8a042
author pjbriggs
date Thu, 18 Oct 2018 09:18:04 -0400
parents
children
comparison
equal deleted inserted replaced
2:43d6f81bc667 3:3ab198df8f3f
1 #!/bin/sh -e
2 #
3 # Prototype script to setup a conda environment with the
4 # dependencies needed for the Amplicon_analysis_pipeline
5 # script
6 #
7 # Handle command line
8 usage()
9 {
10 echo "Usage: $(basename $0) [DIR]"
11 echo ""
12 echo "Installs the Amplicon_analysis_pipeline package plus"
13 echo "dependencies in directory DIR (or current directory "
14 echo "if DIR not supplied)"
15 }
16 if [ ! -z "$1" ] ; then
17 # Check if help was requested
18 case "$1" in
19 --help|-h)
20 usage
21 exit 0
22 ;;
23 esac
24 # Assume it's the installation directory
25 cd $1
26 fi
27 # Versions
28 PIPELINE_VERSION=1.2.3
29 RDP_CLASSIFIER_VERSION=2.2
30 # Directories
31 TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
32 BIN_DIR=${TOP_DIR}/bin
33 CONDA_DIR=${TOP_DIR}/conda
34 CONDA_BIN=${CONDA_DIR}/bin
35 CONDA_LIB=${CONDA_DIR}/lib
36 CONDA=${CONDA_BIN}/conda
37 ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
38 ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
39 #
40 # Functions
41 #
42 # Report failure and terminate script
43 fail()
44 {
45 echo ""
46 echo ERROR $@ >&2
47 echo ""
48 echo "$(basename $0): installation failed"
49 exit 1
50 }
51 #
52 # Rewrite the shebangs in the installed conda scripts
53 # to remove the full path to conda 'bin' directory
54 rewrite_conda_shebangs()
55 {
56 pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
57 find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
58 }
59 #
60 # Install conda
61 install_conda()
62 {
63 echo "++++++++++++++++"
64 echo "Installing conda"
65 echo "++++++++++++++++"
66 if [ -e ${CONDA_DIR} ] ; then
67 echo "*** $CONDA_DIR already exists ***" >&2
68 return
69 fi
70 local cwd=$(pwd)
71 local wd=$(mktemp -d)
72 cd $wd
73 wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
74 bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
75 echo Installed conda in ${CONDA_DIR}
76 # Update the installation files
77 # This is to avoid problems when the length the installation
78 # directory path exceeds the limit for the shebang statement
79 # in the conda files
80 echo ""
81 echo -n "Rewriting conda shebangs..."
82 rewrite_conda_shebangs
83 echo "ok"
84 echo -n "Adding conda bin to PATH..."
85 PATH=${CONDA_BIN}:$PATH
86 echo "ok"
87 cd $cwd
88 rm -rf $wd/*
89 rmdir $wd
90 }
91 #
92 # Create conda environment
93 install_conda_packages()
94 {
95 echo "+++++++++++++++++++++++++"
96 echo "Installing conda packages"
97 echo "+++++++++++++++++++++++++"
98 local cwd=$(pwd)
99 local wd=$(mktemp -d)
100 cd $wd
101 cat >environment.yml <<EOF
102 name: ${ENV_NAME}
103 channels:
104 - defaults
105 - conda-forge
106 - bioconda
107 dependencies:
108 - python=2.7
109 - cutadapt=1.11
110 - sickle-trim=1.33
111 - bioawk=1.0
112 - pandaseq=2.8.1
113 - spades=3.5.0
114 - fastqc=0.11.3
115 - qiime=1.8.0
116 - blast-legacy=2.2.26
117 - fasta-splitter=0.2.4
118 - rdp_classifier=$RDP_CLASSIFIER_VERSION
119 - vsearch=1.1.3
120 # Need to explicitly specify libgfortran
121 # version (otherwise get version incompatible
122 # with numpy=1.7.1)
123 - libgfortran=1.0
124 # Compilers needed to build R
125 - gcc_linux-64
126 - gxx_linux-64
127 - gfortran_linux-64
128 EOF
129 ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
130 echo Created conda environment in ${ENV_DIR}
131 cd $cwd
132 rm -rf $wd/*
133 rmdir $wd
134 }
135 #
136 # Install all the non-conda dependencies in a single
137 # function (invokes separate functions for each package)
138 install_non_conda_packages()
139 {
140 echo "+++++++++++++++++++++++++++++"
141 echo "Installing non-conda packages"
142 echo "+++++++++++++++++++++++++++++"
143 # Temporary working directory
144 local wd=$(mktemp -d)
145 local cwd=$(pwd)
146 local wd=$(mktemp -d)
147 cd $wd
148 # Amplicon analysis pipeline
149 echo -n "Installing Amplicon_analysis_pipeline..."
150 if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
151 echo "already installed"
152 else
153 install_amplicon_analysis_pipeline
154 echo "ok"
155 fi
156 # ChimeraSlayer
157 echo -n "Installing ChimeraSlayer..."
158 if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
159 echo "already installed"
160 else
161 install_chimeraslayer
162 echo "ok"
163 fi
164 # Uclust
165 echo -n "Installing uclust for QIIME/pyNAST..."
166 if [ -e ${BIN_DIR}/uclust ] ; then
167 echo "already installed"
168 else
169 install_uclust
170 echo "ok"
171 fi
172 # R 3.2.1"
173 echo -n "Checking for R 3.2.1..."
174 if [ -e ${BIN_DIR}/R ] ; then
175 echo "R already installed"
176 else
177 echo "not found"
178 install_R_3_2_1
179 fi
180 }
181 #
182 # Amplicon analyis pipeline
183 install_amplicon_analysis_pipeline()
184 {
185 local wd=$(mktemp -d)
186 local cwd=$(pwd)
187 local wd=$(mktemp -d)
188 cd $wd
189 wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz
190 tar zxf v${PIPELINE_VERSION}.tar.gz
191 cd Amplicon_analysis-${PIPELINE_VERSION}
192 INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
193 mkdir -p $INSTALL_DIR
194 ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
195 for f in *.sh ; do
196 /bin/cp $f $INSTALL_DIR
197 done
198 /bin/cp -r uc2otutab $INSTALL_DIR
199 mkdir -p ${BIN_DIR}
200 cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
201 #!/usr/bin/env bash
202 #
203 # Point to Qiime config
204 export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
205 # Set up the RDP jar file
206 export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
207 # Put the scripts onto the PATH
208 export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
209 # Activate the conda environment
210 export PATH=${CONDA_BIN}:\$PATH
211 source ${CONDA_BIN}/activate ${ENV_NAME}
212 # Execute the driver script with the supplied arguments
213 $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
214 exit \$?
215 EOF
216 chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
217 cat >${BIN_DIR}/install_reference_data.sh <<EOF
218 #!/usr/bin/env bash -e
219 #
220 function usage() {
221 echo "Usage: \$(basename \$0) DIR"
222 }
223 if [ -z "\$1" ] ; then
224 usage
225 exit 0
226 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
227 usage
228 echo ""
229 echo "Install reference data into DIR"
230 exit 0
231 fi
232 echo "=========================================="
233 echo "Installing Amplicon analysis pipeline data"
234 echo "=========================================="
235 if [ ! -e "\$1" ] ; then
236 echo "Making directory \$1"
237 mkdir -p \$1
238 fi
239 cd \$1
240 DATA_DIR=\$(pwd)
241 echo "Installing reference data under \$DATA_DIR"
242 $INSTALL_DIR/References.sh
243 echo ""
244 echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
245 echo "to use the reference data from this directory"
246 echo ""
247 echo "\$(basename \$0): finished"
248 EOF
249 chmod 0755 ${BIN_DIR}/install_reference_data.sh
250 cd $cwd
251 rm -rf $wd/*
252 rmdir $wd
253 }
254 #
255 # ChimeraSlayer
256 install_chimeraslayer()
257 {
258 local cwd=$(pwd)
259 local wd=$(mktemp -d)
260 cd $wd
261 wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
262 tar zxf microbiomeutil_2010-04-29.tar.gz
263 cd microbiomeutil_2010-04-29
264 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
265 mkdir -p $INSTALL_DIR
266 ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
267 /bin/cp -r ChimeraSlayer $INSTALL_DIR
268 cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
269 #!/usr/bin/env bash
270 export PATH=$INSTALL_DIR:\$PATH
271 $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
272 EOF
273 chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
274 chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
275 cd $cwd
276 rm -rf $wd/*
277 rmdir $wd
278 }
279 #
280 # uclust required for QIIME/pyNAST
281 # License only allows this version to be used with those two packages
282 # See: http://drive5.com/uclust/downloads1_2_22q.html
283 install_uclust()
284 {
285 local wd=$(mktemp -d)
286 local cwd=$(pwd)
287 local wd=$(mktemp -d)
288 cd $wd
289 wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
290 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
291 mkdir -p $INSTALL_DIR
292 ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
293 /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
294 chmod 0755 ${INSTALL_DIR}/uclust
295 ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}
296 cd $cwd
297 rm -rf $wd/*
298 rmdir $wd
299 }
300 #
301 # R 3.2.1
302 # Can't use version from conda due to dependency conflicts
303 install_R_3_2_1()
304 {
305 . ${CONDA_BIN}/activate ${ENV_NAME}
306 local cwd=$(pwd)
307 local wd=$(mktemp -d)
308 cd $wd
309 echo -n "Fetching R 3.2.1 source code..."
310 wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz
311 echo "ok"
312 INSTALL_DIR=${TOP_DIR}
313 mkdir -p $INSTALL_DIR
314 echo -n "Unpacking source code..."
315 tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1
316 echo "ok"
317 cd R-3.2.1
318 echo -n "Running configure..."
319 ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1
320 echo "ok"
321 echo -n "Running make..."
322 make >>INSTALL.log 2>&1
323 echo "ok"
324 echo -n "Running make install..."
325 make install >>INSTALL.log 2>&1
326 echo "ok"
327 cd $cwd
328 rm -rf $wd/*
329 rmdir $wd
330 . ${CONDA_BIN}/deactivate
331 }
332 setup_pipeline_environment()
333 {
334 echo "+++++++++++++++++++++++++++++++"
335 echo "Setting up pipeline environment"
336 echo "+++++++++++++++++++++++++++++++"
337 # vsearch113
338 echo -n "Setting up vsearch113..."
339 if [ -e ${BIN_DIR}/vsearch113 ] ; then
340 echo "already exists"
341 elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then
342 echo "failed"
343 fail "vsearch not found"
344 else
345 ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113
346 echo "ok"
347 fi
348 # fasta_splitter.pl
349 echo -n "Setting up fasta_splitter.pl..."
350 if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
351 echo "already exists"
352 elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
353 echo "failed"
354 fail "fasta-splitter.pl not found"
355 else
356 ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
357 echo "ok"
358 fi
359 # rdp_classifier.jar
360 local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
361 echo -n "Setting up rdp_classifier.jar..."
362 if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
363 echo "already exists"
364 elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
365 echo "failed"
366 fail "rdp_classifier.jar not found"
367 else
368 mkdir -p ${TOP_DIR}/share/rdp_classifier
369 ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
370 echo "ok"
371 fi
372 # qiime_config
373 echo -n "Setting up qiime_config..."
374 if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
375 echo "already exists"
376 else
377 mkdir -p ${TOP_DIR}/qiime
378 cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
379 qiime_scripts_dir ${ENV_DIR}/bin
380 EOF-qiime-config
381 echo "ok"
382 fi
383 }
384 #
385 # Remove the compilers from the conda environment
386 # Not sure if this step is necessary
387 remove_conda_compilers()
388 {
389 echo "+++++++++++++++++++++++++++++++++++++++++"
390 echo "Removing compilers from conda environment"
391 echo "+++++++++++++++++++++++++++++++++++++++++"
392 ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64
393 }
394 #
395 # Top level script does the installation
396 echo "======================================="
397 echo "Amplicon_analysis_pipeline installation"
398 echo "======================================="
399 echo "Installing into ${TOP_DIR}"
400 if [ -e ${TOP_DIR} ] ; then
401 fail "Directory already exists"
402 fi
403 mkdir -p ${TOP_DIR}
404 install_conda
405 install_conda_packages
406 install_non_conda_packages
407 setup_pipeline_environment
408 remove_conda_compilers
409 echo "===================================="
410 echo "Amplicon_analysis_pipeline installed"
411 echo "===================================="
412 echo ""
413 echo "Install reference data using:"
414 echo ""
415 echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
416 echo ""
417 echo "Run pipeline scripts using:"
418 echo ""
419 echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
420 echo ""
421 echo "(or add ${BIN_DIR} to your PATH)"
422 echo ""
423 echo "$(basename $0): finished"
424 ##
425 #