Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison install_amplicon_analysis.sh @ 3:3ab198df8f3f draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 15390f18b91d838880d952eb2714f689bbd8a042
author | pjbriggs |
---|---|
date | Thu, 18 Oct 2018 09:18:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:43d6f81bc667 | 3:3ab198df8f3f |
---|---|
1 #!/bin/sh -e | |
2 # | |
3 # Prototype script to setup a conda environment with the | |
4 # dependencies needed for the Amplicon_analysis_pipeline | |
5 # script | |
6 # | |
7 # Handle command line | |
8 usage() | |
9 { | |
10 echo "Usage: $(basename $0) [DIR]" | |
11 echo "" | |
12 echo "Installs the Amplicon_analysis_pipeline package plus" | |
13 echo "dependencies in directory DIR (or current directory " | |
14 echo "if DIR not supplied)" | |
15 } | |
16 if [ ! -z "$1" ] ; then | |
17 # Check if help was requested | |
18 case "$1" in | |
19 --help|-h) | |
20 usage | |
21 exit 0 | |
22 ;; | |
23 esac | |
24 # Assume it's the installation directory | |
25 cd $1 | |
26 fi | |
27 # Versions | |
28 PIPELINE_VERSION=1.2.3 | |
29 RDP_CLASSIFIER_VERSION=2.2 | |
30 # Directories | |
31 TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION} | |
32 BIN_DIR=${TOP_DIR}/bin | |
33 CONDA_DIR=${TOP_DIR}/conda | |
34 CONDA_BIN=${CONDA_DIR}/bin | |
35 CONDA_LIB=${CONDA_DIR}/lib | |
36 CONDA=${CONDA_BIN}/conda | |
37 ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}" | |
38 ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME | |
39 # | |
40 # Functions | |
41 # | |
42 # Report failure and terminate script | |
43 fail() | |
44 { | |
45 echo "" | |
46 echo ERROR $@ >&2 | |
47 echo "" | |
48 echo "$(basename $0): installation failed" | |
49 exit 1 | |
50 } | |
51 # | |
52 # Rewrite the shebangs in the installed conda scripts | |
53 # to remove the full path to conda 'bin' directory | |
54 rewrite_conda_shebangs() | |
55 { | |
56 pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g" | |
57 find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \; | |
58 } | |
59 # | |
60 # Install conda | |
61 install_conda() | |
62 { | |
63 echo "++++++++++++++++" | |
64 echo "Installing conda" | |
65 echo "++++++++++++++++" | |
66 if [ -e ${CONDA_DIR} ] ; then | |
67 echo "*** $CONDA_DIR already exists ***" >&2 | |
68 return | |
69 fi | |
70 local cwd=$(pwd) | |
71 local wd=$(mktemp -d) | |
72 cd $wd | |
73 wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh | |
74 bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR} | |
75 echo Installed conda in ${CONDA_DIR} | |
76 # Update the installation files | |
77 # This is to avoid problems when the length the installation | |
78 # directory path exceeds the limit for the shebang statement | |
79 # in the conda files | |
80 echo "" | |
81 echo -n "Rewriting conda shebangs..." | |
82 rewrite_conda_shebangs | |
83 echo "ok" | |
84 echo -n "Adding conda bin to PATH..." | |
85 PATH=${CONDA_BIN}:$PATH | |
86 echo "ok" | |
87 cd $cwd | |
88 rm -rf $wd/* | |
89 rmdir $wd | |
90 } | |
91 # | |
92 # Create conda environment | |
93 install_conda_packages() | |
94 { | |
95 echo "+++++++++++++++++++++++++" | |
96 echo "Installing conda packages" | |
97 echo "+++++++++++++++++++++++++" | |
98 local cwd=$(pwd) | |
99 local wd=$(mktemp -d) | |
100 cd $wd | |
101 cat >environment.yml <<EOF | |
102 name: ${ENV_NAME} | |
103 channels: | |
104 - defaults | |
105 - conda-forge | |
106 - bioconda | |
107 dependencies: | |
108 - python=2.7 | |
109 - cutadapt=1.11 | |
110 - sickle-trim=1.33 | |
111 - bioawk=1.0 | |
112 - pandaseq=2.8.1 | |
113 - spades=3.5.0 | |
114 - fastqc=0.11.3 | |
115 - qiime=1.8.0 | |
116 - blast-legacy=2.2.26 | |
117 - fasta-splitter=0.2.4 | |
118 - rdp_classifier=$RDP_CLASSIFIER_VERSION | |
119 - vsearch=1.1.3 | |
120 # Need to explicitly specify libgfortran | |
121 # version (otherwise get version incompatible | |
122 # with numpy=1.7.1) | |
123 - libgfortran=1.0 | |
124 # Compilers needed to build R | |
125 - gcc_linux-64 | |
126 - gxx_linux-64 | |
127 - gfortran_linux-64 | |
128 EOF | |
129 ${CONDA} env create --name "${ENV_NAME}" -f environment.yml | |
130 echo Created conda environment in ${ENV_DIR} | |
131 cd $cwd | |
132 rm -rf $wd/* | |
133 rmdir $wd | |
134 } | |
135 # | |
136 # Install all the non-conda dependencies in a single | |
137 # function (invokes separate functions for each package) | |
138 install_non_conda_packages() | |
139 { | |
140 echo "+++++++++++++++++++++++++++++" | |
141 echo "Installing non-conda packages" | |
142 echo "+++++++++++++++++++++++++++++" | |
143 # Temporary working directory | |
144 local wd=$(mktemp -d) | |
145 local cwd=$(pwd) | |
146 local wd=$(mktemp -d) | |
147 cd $wd | |
148 # Amplicon analysis pipeline | |
149 echo -n "Installing Amplicon_analysis_pipeline..." | |
150 if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then | |
151 echo "already installed" | |
152 else | |
153 install_amplicon_analysis_pipeline | |
154 echo "ok" | |
155 fi | |
156 # ChimeraSlayer | |
157 echo -n "Installing ChimeraSlayer..." | |
158 if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then | |
159 echo "already installed" | |
160 else | |
161 install_chimeraslayer | |
162 echo "ok" | |
163 fi | |
164 # Uclust | |
165 echo -n "Installing uclust for QIIME/pyNAST..." | |
166 if [ -e ${BIN_DIR}/uclust ] ; then | |
167 echo "already installed" | |
168 else | |
169 install_uclust | |
170 echo "ok" | |
171 fi | |
172 # R 3.2.1" | |
173 echo -n "Checking for R 3.2.1..." | |
174 if [ -e ${BIN_DIR}/R ] ; then | |
175 echo "R already installed" | |
176 else | |
177 echo "not found" | |
178 install_R_3_2_1 | |
179 fi | |
180 } | |
181 # | |
182 # Amplicon analyis pipeline | |
183 install_amplicon_analysis_pipeline() | |
184 { | |
185 local wd=$(mktemp -d) | |
186 local cwd=$(pwd) | |
187 local wd=$(mktemp -d) | |
188 cd $wd | |
189 wget -q https://github.com/MTutino/Amplicon_analysis/archive/v${PIPELINE_VERSION}.tar.gz | |
190 tar zxf v${PIPELINE_VERSION}.tar.gz | |
191 cd Amplicon_analysis-${PIPELINE_VERSION} | |
192 INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION} | |
193 mkdir -p $INSTALL_DIR | |
194 ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline | |
195 for f in *.sh ; do | |
196 /bin/cp $f $INSTALL_DIR | |
197 done | |
198 /bin/cp -r uc2otutab $INSTALL_DIR | |
199 mkdir -p ${BIN_DIR} | |
200 cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF | |
201 #!/usr/bin/env bash | |
202 # | |
203 # Point to Qiime config | |
204 export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config | |
205 # Set up the RDP jar file | |
206 export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar | |
207 # Put the scripts onto the PATH | |
208 export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH | |
209 # Activate the conda environment | |
210 export PATH=${CONDA_BIN}:\$PATH | |
211 source ${CONDA_BIN}/activate ${ENV_NAME} | |
212 # Execute the driver script with the supplied arguments | |
213 $INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@ | |
214 exit \$? | |
215 EOF | |
216 chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh | |
217 cat >${BIN_DIR}/install_reference_data.sh <<EOF | |
218 #!/usr/bin/env bash -e | |
219 # | |
220 function usage() { | |
221 echo "Usage: \$(basename \$0) DIR" | |
222 } | |
223 if [ -z "\$1" ] ; then | |
224 usage | |
225 exit 0 | |
226 elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then | |
227 usage | |
228 echo "" | |
229 echo "Install reference data into DIR" | |
230 exit 0 | |
231 fi | |
232 echo "==========================================" | |
233 echo "Installing Amplicon analysis pipeline data" | |
234 echo "==========================================" | |
235 if [ ! -e "\$1" ] ; then | |
236 echo "Making directory \$1" | |
237 mkdir -p \$1 | |
238 fi | |
239 cd \$1 | |
240 DATA_DIR=\$(pwd) | |
241 echo "Installing reference data under \$DATA_DIR" | |
242 $INSTALL_DIR/References.sh | |
243 echo "" | |
244 echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh" | |
245 echo "to use the reference data from this directory" | |
246 echo "" | |
247 echo "\$(basename \$0): finished" | |
248 EOF | |
249 chmod 0755 ${BIN_DIR}/install_reference_data.sh | |
250 cd $cwd | |
251 rm -rf $wd/* | |
252 rmdir $wd | |
253 } | |
254 # | |
255 # ChimeraSlayer | |
256 install_chimeraslayer() | |
257 { | |
258 local cwd=$(pwd) | |
259 local wd=$(mktemp -d) | |
260 cd $wd | |
261 wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz | |
262 tar zxf microbiomeutil_2010-04-29.tar.gz | |
263 cd microbiomeutil_2010-04-29 | |
264 INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29 | |
265 mkdir -p $INSTALL_DIR | |
266 ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer | |
267 /bin/cp -r ChimeraSlayer $INSTALL_DIR | |
268 cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF | |
269 #!/usr/bin/env bash | |
270 export PATH=$INSTALL_DIR:\$PATH | |
271 $INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@ | |
272 EOF | |
273 chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl | |
274 chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl | |
275 cd $cwd | |
276 rm -rf $wd/* | |
277 rmdir $wd | |
278 } | |
279 # | |
280 # uclust required for QIIME/pyNAST | |
281 # License only allows this version to be used with those two packages | |
282 # See: http://drive5.com/uclust/downloads1_2_22q.html | |
283 install_uclust() | |
284 { | |
285 local wd=$(mktemp -d) | |
286 local cwd=$(pwd) | |
287 local wd=$(mktemp -d) | |
288 cd $wd | |
289 wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64 | |
290 INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22 | |
291 mkdir -p $INSTALL_DIR | |
292 ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust | |
293 /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust | |
294 chmod 0755 ${INSTALL_DIR}/uclust | |
295 ln -s ${INSTALL_DIR}/uclust ${BIN_DIR} | |
296 cd $cwd | |
297 rm -rf $wd/* | |
298 rmdir $wd | |
299 } | |
300 # | |
301 # R 3.2.1 | |
302 # Can't use version from conda due to dependency conflicts | |
303 install_R_3_2_1() | |
304 { | |
305 . ${CONDA_BIN}/activate ${ENV_NAME} | |
306 local cwd=$(pwd) | |
307 local wd=$(mktemp -d) | |
308 cd $wd | |
309 echo -n "Fetching R 3.2.1 source code..." | |
310 wget -q http://cran.r-project.org/src/base/R-3/R-3.2.1.tar.gz | |
311 echo "ok" | |
312 INSTALL_DIR=${TOP_DIR} | |
313 mkdir -p $INSTALL_DIR | |
314 echo -n "Unpacking source code..." | |
315 tar xzf R-3.2.1.tar.gz >INSTALL.log 2>&1 | |
316 echo "ok" | |
317 cd R-3.2.1 | |
318 echo -n "Running configure..." | |
319 ./configure --prefix=$INSTALL_DIR --with-x=no --with-readline=no >>INSTALL.log 2>&1 | |
320 echo "ok" | |
321 echo -n "Running make..." | |
322 make >>INSTALL.log 2>&1 | |
323 echo "ok" | |
324 echo -n "Running make install..." | |
325 make install >>INSTALL.log 2>&1 | |
326 echo "ok" | |
327 cd $cwd | |
328 rm -rf $wd/* | |
329 rmdir $wd | |
330 . ${CONDA_BIN}/deactivate | |
331 } | |
332 setup_pipeline_environment() | |
333 { | |
334 echo "+++++++++++++++++++++++++++++++" | |
335 echo "Setting up pipeline environment" | |
336 echo "+++++++++++++++++++++++++++++++" | |
337 # vsearch113 | |
338 echo -n "Setting up vsearch113..." | |
339 if [ -e ${BIN_DIR}/vsearch113 ] ; then | |
340 echo "already exists" | |
341 elif [ ! -e ${ENV_DIR}/bin/vsearch ] ; then | |
342 echo "failed" | |
343 fail "vsearch not found" | |
344 else | |
345 ln -s ${ENV_DIR}/bin/vsearch ${BIN_DIR}/vsearch113 | |
346 echo "ok" | |
347 fi | |
348 # fasta_splitter.pl | |
349 echo -n "Setting up fasta_splitter.pl..." | |
350 if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then | |
351 echo "already exists" | |
352 elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then | |
353 echo "failed" | |
354 fail "fasta-splitter.pl not found" | |
355 else | |
356 ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl | |
357 echo "ok" | |
358 fi | |
359 # rdp_classifier.jar | |
360 local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar | |
361 echo -n "Setting up rdp_classifier.jar..." | |
362 if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then | |
363 echo "already exists" | |
364 elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then | |
365 echo "failed" | |
366 fail "rdp_classifier.jar not found" | |
367 else | |
368 mkdir -p ${TOP_DIR}/share/rdp_classifier | |
369 ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} | |
370 echo "ok" | |
371 fi | |
372 # qiime_config | |
373 echo -n "Setting up qiime_config..." | |
374 if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then | |
375 echo "already exists" | |
376 else | |
377 mkdir -p ${TOP_DIR}/qiime | |
378 cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config | |
379 qiime_scripts_dir ${ENV_DIR}/bin | |
380 EOF-qiime-config | |
381 echo "ok" | |
382 fi | |
383 } | |
384 # | |
385 # Remove the compilers from the conda environment | |
386 # Not sure if this step is necessary | |
387 remove_conda_compilers() | |
388 { | |
389 echo "+++++++++++++++++++++++++++++++++++++++++" | |
390 echo "Removing compilers from conda environment" | |
391 echo "+++++++++++++++++++++++++++++++++++++++++" | |
392 ${CONDA} remove -y -n ${ENV_NAME} gcc_linux-64 gxx_linux-64 gfortran_linux-64 | |
393 } | |
394 # | |
395 # Top level script does the installation | |
396 echo "=======================================" | |
397 echo "Amplicon_analysis_pipeline installation" | |
398 echo "=======================================" | |
399 echo "Installing into ${TOP_DIR}" | |
400 if [ -e ${TOP_DIR} ] ; then | |
401 fail "Directory already exists" | |
402 fi | |
403 mkdir -p ${TOP_DIR} | |
404 install_conda | |
405 install_conda_packages | |
406 install_non_conda_packages | |
407 setup_pipeline_environment | |
408 remove_conda_compilers | |
409 echo "====================================" | |
410 echo "Amplicon_analysis_pipeline installed" | |
411 echo "====================================" | |
412 echo "" | |
413 echo "Install reference data using:" | |
414 echo "" | |
415 echo "\$ ${BIN_DIR}/install_reference_data.sh DIR" | |
416 echo "" | |
417 echo "Run pipeline scripts using:" | |
418 echo "" | |
419 echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..." | |
420 echo "" | |
421 echo "(or add ${BIN_DIR} to your PATH)" | |
422 echo "" | |
423 echo "$(basename $0): finished" | |
424 ## | |
425 # |