Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
changeset 4:86a12d75ebe4 draft default tip
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
author | pjbriggs |
---|---|
date | Fri, 20 Dec 2019 06:59:49 -0500 (2019-12-20) |
parents | 3ab198df8f3f |
children | |
files | README.rst amplicon_analysis_pipeline.py amplicon_analysis_pipeline.xml install_amplicon_analysis-1.3.5.sh install_amplicon_analysis-1.3.6.sh outputs.txt tool_dependencies.xml updating-to-pipeline-1.3-DADA2.txt |
diffstat | 8 files changed, 1058 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Oct 18 09:18:04 2018 -0400 +++ b/README.rst Fri Dec 20 06:59:49 2019 -0500 @@ -174,6 +174,7 @@ ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- +1.3.5.0 Updated to Amplicon_Analysis_Pipeline version 1.3.5. 1.2.3.0 Updated to Amplicon_Analysis_Pipeline version 1.2.3; install dependencies via tool_dependencies.xml. 1.2.2.0 Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes
--- a/amplicon_analysis_pipeline.py Thu Oct 18 09:18:04 2018 -0400 +++ b/amplicon_analysis_pipeline.py Fri Dec 20 06:59:49 2019 -0500 @@ -117,9 +117,9 @@ p.add_argument("-L",dest="minimum_length") p.add_argument("-l",dest="sliding_window_length") p.add_argument("-P",dest="pipeline", - choices=["vsearch","uparse","qiime"], - type=str.lower, - default="vsearch") + choices=["Vsearch","DADA2"], + type=str, + default="Vsearch") p.add_argument("-S",dest="use_silva",action="store_true") p.add_argument("-H",dest="use_homd",action="store_true") p.add_argument("-r",dest="reference_data_path") @@ -155,12 +155,15 @@ sample_names.append(sample_name) # Reference database - if args.use_silva: + if args.pipeline == "Vsearch": + if args.use_silva: + ref_database = "silva" + elif args.use_homd: + ref_database = "homd" + else: + ref_database = "gg" + elif args.pipeline == "DADA2": ref_database = "silva" - elif args.use_homd: - ref_database = "homd" - else: - ref_database = "gg" # Construct the pipeline command print "Amplicon analysis: constructing pipeline command" @@ -180,10 +183,11 @@ if args.reference_data_path: pipeline.add_args("-r",args.reference_data_path) pipeline.add_args("-P",args.pipeline) - if ref_database == "silva": - pipeline.add_args("-S") - elif ref_database == "homd": - pipeline.add_args("-H") + if args.pipeline == "Vsearch": + if ref_database == "silva": + pipeline.add_args("-S") + elif ref_database == "homd": + pipeline.add_args("-H") # Echo the pipeline command to stdout print "Running %s" % pipeline @@ -277,6 +281,9 @@ """) # Look for raw and trimmed FastQC output for each sample for sample_name in sample_names: + # Replace underscores with hyphens in sample names + sample_name = sample_name.replace('_','-') + # Write HTML file with links to the FastQC boxplots fastqc_dir = os.path.join(sample_name,"FastQC") quality_boxplots.write("<h2>%s</h2>" % sample_name) for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): @@ -306,13 +313,41 @@ </html> """) + # Handle DADA2 error rate plot PDFs + if args.pipeline == "DADA2": + print("Amplicon analysis: collecting error rate plots") + error_rate_plots_dir = os.path.abspath( + os.path.join("DADA2_OTU_tables", + "Error_rate_plots")) + error_rate_plot_pdfs = [os.path.basename(pdf) + for pdf in + sorted(glob.glob( + os.path.join(error_rate_plots_dir,"*.pdf")))] + error_rate_plots_html = os.path.join(error_rate_plots_dir, + "error_rate_plots.html") + with open(error_rate_plots_html,"w") as error_rate_plots_out: + error_rate_plots_out.write("""<html> +<head> +<title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title> +<head> +<body> +<h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1> +""") + error_rate_plots_out.write("<ul>\n") + for pdf in error_rate_plot_pdfs: + error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf)) + error_rate_plots_out.write("<ul>\n") + error_rate_plots_out.write("""</body> +</html> +""") + # Handle additional output when categories file was supplied if args.categories_file is not None: # Alpha diversity boxplots print "Amplicon analysis: indexing alpha diversity boxplots" boxplots_dir = os.path.abspath( os.path.join("RESULTS", - "%s_%s" % (args.pipeline.title(), + "%s_%s" % (args.pipeline, ref_database), "Alpha_diversity", "Alpha_diversity_boxplot",
--- a/amplicon_analysis_pipeline.xml Thu Oct 18 09:18:04 2018 -0400 +++ b/amplicon_analysis_pipeline.xml Fri Dec 20 06:59:49 2019 -0500 @@ -1,19 +1,28 @@ -<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.3.0"> +<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.6.0"> <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description> <requirements> - <requirement type="package" version="1.2.3">amplicon_analysis_pipeline</requirement> + <requirement type="package" version="1.3.6">amplicon_analysis_pipeline</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <command><![CDATA[ + + ## Convenience variable for pipeline name + #set $pipeline_name = $pipeline.pipeline_name + ## Set the reference database name - #if $reference_database == "-S" - #set reference_database_name = "silva" - #else if $reference_database == "-H" - #set reference_database_name = "homd" + #if str( $pipeline_name ) == "DADA2" + #set reference_database_name = "silva" #else - #set reference_database_name = "gg" + #set reference_database = $pipeline.reference_database + #if $reference_database == "-S" + #set reference_database_name = "silva" + #else if $reference_database == "-H" + #set reference_database_name = "homd" + #else + #set reference_database_name = "gg" + #end if #end if ## Run the amplicon analysis pipeline wrapper @@ -37,9 +46,9 @@ #if str( $minimum_length ) != "" -L $minimum_length #end if - -P $pipeline - -r \$AMPLICON_ANALYSIS_REF_DATA_PATH - #if str( $reference_database ) != "" + -P $pipeline_name + -r \${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData} + #if str( $pipeline_name ) != "DADA2" ${reference_database} #end if #if str($categories_file_in) != 'None' @@ -60,48 +69,60 @@ ## Collect outputs cp Metatable_log/Metatable_mod.txt "${metatable_mod}" && - cp ${pipeline}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" && - cp ${pipeline}_OTU_tables/otus.tre "${otus_tre_file}" && - cp RESULTS/${pipeline}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" && - cp RESULTS/${pipeline}_${reference_database_name}/table_summary.txt "${table_summary_file}" && - cp Multiplexed_files/${pipeline}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" && - cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" && + #if str( $pipeline_name ) == "Vsearch" + ## Vsearch-specific + cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" && + cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" && + cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" && + #else + ## DADA2-specific + cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" && + cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" && + #end if + cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" && + cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" && + cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" && cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" && - ## HTML outputs + ## OTU table heatmap + cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}" && - ## OTU table - mkdir $heatmap_otu_table_html.files_path && - cp -r RESULTS/${pipeline}_${reference_database_name}/Heatmap/js $heatmap_otu_table_html.files_path && - cp RESULTS/${pipeline}_${reference_database_name}/Heatmap/otu_table.html "${heatmap_otu_table_html}" && + ## HTML outputs ## Phylum genus barcharts mkdir $phylum_genus_dist_barcharts_html.files_path && - cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path && - cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path && - cp RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" && + cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path && + cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path && + cp RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" && ## Beta diversity weighted 2d plots mkdir $beta_div_even_weighted_2d_plots.files_path && - cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path && - cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" && + cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path && + cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" && ## Beta diversity unweighted 2d plots mkdir $beta_div_even_unweighted_2d_plots.files_path && - cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path && - cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" && + cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path && + cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" && ## Alpha diversity rarefaction plots mkdir $alpha_div_rarefaction_plots.files_path && - cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots && - cp -r RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path && + cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots && + cp -r RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path && + + ## DADA2 error rate plots + #if str($pipeline_name) == "DADA2" + mkdir $dada2_error_rate_plots.files_path && + cp DADA2_OTU_tables/Error_rate_plots/error_rate_plots.html $dada2_error_rate_plots && + cp -r DADA2_OTU_tables/Error_rate_plots/*.pdf $dada2_error_rate_plots.files_path && + #end if ## Categories data #if str($categories_file_in) != 'None' ## Alpha diversity boxplots mkdir $alpha_div_boxplots.files_path && cp alpha_diversity_boxplots.html "$alpha_div_boxplots" && - cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path && + cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path && #end if ## Pipeline outputs (log files etc) @@ -161,55 +182,63 @@ <param type="integer" name="sliding_window_length" value="10" label="Minimum length in bp to retain a read after trimming" help="Supplied to Sickle; default is 10 (-l)" /> - <param type="select" name="pipeline" - label="Pipeline to use for analysis"> - <option value="Vsearch" selected="true" >Vsearch</option> - <!-- - Remove the QIIME and Uparse options for now - <option value="QIIME">QIIME</option> - <option value="Uparse">Uparse</option> - --> - </param> - <param type="select" name="reference_database" - label="Reference database"> - <option value="" selected="true">GreenGenes</option> - <option value="-S">Silva</option> - <option value="-H">Human Oral Microbiome Database (HOMD)</option> - </param> + <conditional name="pipeline"> + <param type="select" name="pipeline_name" + label="Pipeline to use for analysis"> + <option value="Vsearch" selected="true" >Vsearch</option> + <option value="DADA2">DADA2</option> + </param> + <when value="Vsearch"> + <param type="select" name="reference_database" + label="Reference database"> + <option value="" selected="true">GreenGenes</option> + <option value="-S">Silva</option> + <option value="-H">Human Oral Microbiome Database (HOMD)</option> + </param> + </when> + <when value="DADA2"> + </when> + </conditional> </inputs> <outputs> <data format="tabular" name="metatable_mod" label="${tool.name}:${title} Metatable_mod.txt" /> <data format="tabular" name="read_counts_out" - label="${tool.name} (${pipeline}):${title} read counts" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} read counts"> + <filter>pipeline['pipeline_name'] == 'Vsearch'</filter> + </data> <data format="biom" name="tax_otu_table_biom_file" - label="${tool.name} (${pipeline}):${title} tax OTU table (biom format)" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} tax OTU table (biom format)" /> <data format="tabular" name="otus_tre_file" - label="${tool.name} (${pipeline}):${title} otus.tre" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} otus.tre" /> <data format="html" name="phylum_genus_dist_barcharts_html" - label="${tool.name} (${pipeline}):${title} phylum genus dist barcharts HTML" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} phylum genus dist barcharts HTML" /> <data format="tabular" name="otus_count_file" - label="${tool.name} (${pipeline}):${title} OTUs count file" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} OTUs count file" /> <data format="tabular" name="table_summary_file" - label="${tool.name} (${pipeline}):${title} table summary file" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} table summary file" /> <data format="fasta" name="dereplicated_nonchimera_otus_fasta" - label="${tool.name} (${pipeline}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" /> <data format="html" name="fastqc_quality_boxplots_html" - label="${tool.name} (${pipeline}):${title} FastQC per-base quality boxplots HTML" /> - <data format="html" name="heatmap_otu_table_html" - label="${tool.name} (${pipeline}):${title} heatmap OTU table HTML" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} FastQC per-base quality boxplots HTML" /> + <data format="pdf" name="heatmap_otu_table_pdf" + label="${tool.name} (${pipeline.pipeline_name}):${title} heatmap OTU table PDF" /> <data format="html" name="beta_div_even_weighted_2d_plots" - label="${tool.name} (${pipeline}):${title} beta diversity weighted 2D plots HTML" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity weighted 2D plots HTML" /> <data format="html" name="beta_div_even_unweighted_2d_plots" - label="${tool.name} (${pipeline}):${title} beta diversity unweighted 2D plots HTML" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" /> <data format="html" name="alpha_div_rarefaction_plots" - label="${tool.name} (${pipeline}):${title} alpha diversity rarefaction plots HTML" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" /> + <data format="html" name="dada2_error_rate_plots" + label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots"> + <filter>pipeline['pipeline_name'] == 'DADA2'</filter> + </data> <data format="html" name="alpha_div_boxplots" - label="${tool.name} (${pipeline}):${title} alpha diversity boxplots"> + label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots"> <filter>categories_file_in is not None</filter> </data> <data format="html" name="log_files" - label="${tool.name} (${pipeline}):${title} log files" /> + label="${tool.name} (${pipeline.pipeline_name}):${title} log files" /> </outputs> <tests> </tests> @@ -395,12 +424,11 @@ 380 (used for V3-V4 16S sequencing; expected length ~440bp) * **Pipeline to use for analysis** Choose the pipeline to use for OTU - clustering and chimera removal. The Galaxy tool currently supports - ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added - shortly (the tools are already available for the stand-alone pipeline). + clustering and chimera removal. The Galaxy tool supports the ``Vsearch`` + and ``DADA2`` pipelines. - * **Reference database** Choose between ``GreenGenes`` and ``Silva`` - databases for taxa assignment. + * **Reference database** Choose between ``GreenGenes``, ``Silva`` or + ``HOMD`` (Human Oral Microbiome Database) for taxa assignment. Click on **Execute** to start the analysis. @@ -408,30 +436,31 @@ ********** Results are entirely generated using QIIME scripts. The results will -appear in the History panel when the analysis is completed +appear in the History panel when the analysis is completed. - * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format - (http://biom-format.org/) +The following outputs are captured: - * **Vsearch_OTUs.tree** Phylogenetic tree constructed using - ``make_phylogeny.py`` (fasttree) QIIME script - (http://qiime.org/scripts/make_phylogeny.html) + * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)** + The OTU table in BIOM format (http://biom-format.org/) - * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar - charts at Phylum, Genus and Species level + * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py`` + (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html) + + * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at + Phylum, Genus and Species level (http://qiime.org/scripts/summarize_taxa.html and http://qiime.org/scripts/plot_taxa_summary.html) - * **Vsearch_OTUs_count_file** Summary of OTU counts per sample + * **OTUs_count_file** Summary of OTU counts per sample (http://biom-format.org/documentation/summarizing_biom_tables.html) - * **Vsearch_table_summary_file** Summary of sequences counts per sample + * **Table_summary_file** Summary of sequences counts per sample (http://biom-format.org/documentation/summarizing_biom_tables.html) - * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta** - Fasta file with OTU sequences + * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa** + Fasta file with OTU sequences (Vsearch|DADA2) - * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap + * **Heatmap_PDF** OTU heatmap in PDF format (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html ) * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_amplicon_analysis-1.3.5.sh Fri Dec 20 06:59:49 2019 -0500 @@ -0,0 +1,399 @@ +#!/bin/sh -e +# +# Prototype script to setup a conda environment with the +# dependencies needed for the Amplicon_analysis_pipeline +# script +# +# Handle command line +usage() +{ + echo "Usage: $(basename $0) [DIR]" + echo "" + echo "Installs the Amplicon_analysis_pipeline package plus" + echo "dependencies in directory DIR (or current directory " + echo "if DIR not supplied)" +} +if [ ! -z "$1" ] ; then + # Check if help was requested + case "$1" in + --help|-h) + usage + exit 0 + ;; + esac + # Assume it's the installation directory + cd $1 +fi +# Versions +PIPELINE_VERSION=1.3.5 +CONDA_REQUIRED_VERSION=4.6.14 +RDP_CLASSIFIER_VERSION=2.2 +# Directories +TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION} +BIN_DIR=${TOP_DIR}/bin +CONDA_DIR=${TOP_DIR}/conda +CONDA_BIN=${CONDA_DIR}/bin +CONDA_LIB=${CONDA_DIR}/lib +CONDA=${CONDA_BIN}/conda +ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}" +ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME +# +# Functions +# +# Report failure and terminate script +fail() +{ + echo "" + echo ERROR $@ >&2 + echo "" + echo "$(basename $0): installation failed" + exit 1 +} +# +# Rewrite the shebangs in the installed conda scripts +# to remove the full path to conda 'bin' directory +rewrite_conda_shebangs() +{ + pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g" + find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \; +} +# +# Reset conda version if required +reset_conda_version() +{ + CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)" + echo conda version: ${CONDA_VERSION} + if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then + echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION" + ${CONDA_BIN}/conda config --set allow_conda_downgrades true + ${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION} + else + echo "conda version ok" + fi +} +# +# Install conda +install_conda() +{ + echo "++++++++++++++++" + echo "Installing conda" + echo "++++++++++++++++" + if [ -e ${CONDA_DIR} ] ; then + echo "*** $CONDA_DIR already exists ***" >&2 + return + fi + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh + bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR} + echo Installed conda in ${CONDA_DIR} + echo -n "Adding conda bin to PATH..." + export PATH=${CONDA_BIN}:$PATH + echo "ok" + # Reset the conda version to a known working version + # (to avoid problems observed with e.g. conda 4.7.10) + echo "" + reset_conda_version + # Update the installation files + # This is to avoid problems when the length the installation + # directory path exceeds the limit for the shebang statement + # in the conda files + echo "" + echo -n "Rewriting conda shebangs..." + rewrite_conda_shebangs + echo "ok" + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# Create conda environment +install_conda_packages() +{ + echo "+++++++++++++++++++++++++" + echo "Installing conda packages" + echo "+++++++++++++++++++++++++" + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + cat >environment.yml <<EOF +name: ${ENV_NAME} +channels: + - defaults + - conda-forge + - bioconda +dependencies: + - python=2.7 + - cutadapt=1.8 + - sickle-trim=1.33 + - bioawk=1.0 + - pandaseq=2.8.1 + - spades=3.10.1 + - fastqc=0.11.3 + - qiime=1.9.1 + - blast-legacy=2.2.26 + - fasta-splitter=0.2.6 + - rdp_classifier=$RDP_CLASSIFIER_VERSION + - vsearch=2.10.4 + - r=3.5.1 + - r-tidyverse=1.2.1 + - bioconductor-dada2=1.8 + - bioconductor-biomformat=1.8.0 +EOF + ${CONDA} env create --name "${ENV_NAME}" -f environment.yml + if [ $? -ne 0 ] ; then + fail "Non-zero exit status from 'conda env create'" + elif [ ! -e "${ENV_DIR}" ] ; then + fail "Failed to create conda environment: ${ENV_DIR} not found" + fi + echo Created conda environment in ${ENV_DIR} + cd $cwd + rm -rf $wd/* + rmdir $wd + # + # Patch qiime 1.9.1 tools to switch deprecated 'axisbg' + # matplotlib property to 'facecolor': + # https://matplotlib.org/api/prev_api_changes/api_changes_2.0.0.html + echo "" + for exe in make_2d_plots.py plot_taxa_summary.py ; do + echo -n "Patching ${exe}..." + find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/axisbg=/facecolor=/g' {} \; + echo "done" + done + # + # Patch qiime 1.9.1 tools to switch deprecated 'set_axis_bgcolor' + # method call to 'set_facecolor': + # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.set_axis_bgcolor.html + for exe in make_rarefaction_plots.py ; do + echo -n "Patching ${exe}..." + find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/set_axis_bgcolor/set_facecolor/g' {} \; + echo "done" + done +} +# +# Install all the non-conda dependencies in a single +# function (invokes separate functions for each package) +install_non_conda_packages() +{ + echo "+++++++++++++++++++++++++++++" + echo "Installing non-conda packages" + echo "+++++++++++++++++++++++++++++" + # Temporary working directory + local wd=$(mktemp -d) + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + # Amplicon analysis pipeline + echo -n "Installing Amplicon_analysis_pipeline..." + if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then + echo "already installed" + else + install_amplicon_analysis_pipeline + echo "ok" + fi + # ChimeraSlayer + echo -n "Installing ChimeraSlayer..." + if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then + echo "already installed" + else + install_chimeraslayer + echo "ok" + fi + # Uclust + # This no longer seems to be available for download from + # drive5.com so don't download + echo "WARNING uclust not available: skipping installation" +} +# +# Amplicon analyis pipeline +install_amplicon_analysis_pipeline() +{ + local wd=$(mktemp -d) + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://github.com/MTutino/Amplicon_analysis/archive/${PIPELINE_VERSION}.tar.gz + tar zxf ${PIPELINE_VERSION}.tar.gz + cd Amplicon_analysis-${PIPELINE_VERSION} + INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION} + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline + for f in *.sh *.R ; do + /bin/cp $f $INSTALL_DIR + done + /bin/cp -r uc2otutab $INSTALL_DIR + mkdir -p ${BIN_DIR} + cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF +#!/usr/bin/env bash +# +# Point to Qiime config +export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config +# Set up the RDP jar file +export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar +# Set the Matplotlib backend +export MPLBACKEND="agg" +# Put the scripts onto the PATH +export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH +# Activate the conda environment +export PATH=${CONDA_BIN}:\$PATH +source ${CONDA_BIN}/activate ${ENV_NAME} +# Execute the driver script with the supplied arguments +$INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@ +exit \$? +EOF + chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh + cat >${BIN_DIR}/install_reference_data.sh <<EOF +#!/usr/bin/env bash -e +# +function usage() { + echo "Usage: \$(basename \$0) DIR" +} +if [ -z "\$1" ] ; then + usage + exit 0 +elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then + usage + echo "" + echo "Install reference data into DIR" + exit 0 +fi +echo "==========================================" +echo "Installing Amplicon analysis pipeline data" +echo "==========================================" +if [ ! -e "\$1" ] ; then + echo "Making directory \$1" + mkdir -p \$1 +fi +cd \$1 +DATA_DIR=\$(pwd) +echo "Installing reference data under \$DATA_DIR" +$INSTALL_DIR/References.sh +echo "" +echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh" +echo "to use the reference data from this directory" +echo "" +echo "\$(basename \$0): finished" +EOF + chmod 0755 ${BIN_DIR}/install_reference_data.sh + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# ChimeraSlayer +install_chimeraslayer() +{ + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz + tar zxf microbiomeutil_2010-04-29.tar.gz + cd microbiomeutil_2010-04-29 + INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29 + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer + /bin/cp -r ChimeraSlayer $INSTALL_DIR + cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF +#!/usr/bin/env bash +export PATH=$INSTALL_DIR:\$PATH +$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@ +EOF + chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl + chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# uclust required for QIIME/pyNAST +# License only allows this version to be used with those two packages +# See: http://drive5.com/uclust/downloads1_2_22q.html +install_uclust() +{ + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64 + INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22 + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust + /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust + chmod 0755 ${INSTALL_DIR}/uclust + ln -s ${INSTALL_DIR}/uclust ${BIN_DIR} + cd $cwd + rm -rf $wd/* + rmdir $wd +} +setup_pipeline_environment() +{ + echo "+++++++++++++++++++++++++++++++" + echo "Setting up pipeline environment" + echo "+++++++++++++++++++++++++++++++" + # fasta_splitter.pl + echo -n "Setting up fasta_splitter.pl..." + if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then + echo "already exists" + elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then + echo "failed" + fail "fasta-splitter.pl not found" + else + ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl + echo "ok" + fi + # rdp_classifier.jar + local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar + echo -n "Setting up rdp_classifier.jar..." + if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then + echo "already exists" + elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then + echo "failed" + fail "rdp_classifier.jar not found" + else + mkdir -p ${TOP_DIR}/share/rdp_classifier + ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} + echo "ok" + fi + # qiime_config + echo -n "Setting up qiime_config..." + if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then + echo "already exists" + else + mkdir -p ${TOP_DIR}/qiime + cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config +qiime_scripts_dir ${ENV_DIR}/bin +EOF-qiime-config + echo "ok" + fi +} +# +# Top level script does the installation +echo "=======================================" +echo "Amplicon_analysis_pipeline installation" +echo "=======================================" +echo "Installing into ${TOP_DIR}" +if [ -e ${TOP_DIR} ] ; then + fail "Directory already exists" +fi +mkdir -p ${TOP_DIR} +install_conda +install_conda_packages +install_non_conda_packages +setup_pipeline_environment +echo "====================================" +echo "Amplicon_analysis_pipeline installed" +echo "====================================" +echo "" +echo "Install reference data using:" +echo "" +echo "\$ ${BIN_DIR}/install_reference_data.sh DIR" +echo "" +echo "Run pipeline scripts using:" +echo "" +echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..." +echo "" +echo "(or add ${BIN_DIR} to your PATH)" +echo "" +echo "$(basename $0): finished" +## +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_amplicon_analysis-1.3.6.sh Fri Dec 20 06:59:49 2019 -0500 @@ -0,0 +1,399 @@ +#!/bin/sh -e +# +# Prototype script to setup a conda environment with the +# dependencies needed for the Amplicon_analysis_pipeline +# script +# +# Handle command line +usage() +{ + echo "Usage: $(basename $0) [DIR]" + echo "" + echo "Installs the Amplicon_analysis_pipeline package plus" + echo "dependencies in directory DIR (or current directory " + echo "if DIR not supplied)" +} +if [ ! -z "$1" ] ; then + # Check if help was requested + case "$1" in + --help|-h) + usage + exit 0 + ;; + esac + # Assume it's the installation directory + cd $1 +fi +# Versions +PIPELINE_VERSION=1.3.6 +CONDA_REQUIRED_VERSION=4.6.14 +RDP_CLASSIFIER_VERSION=2.2 +# Directories +TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION} +BIN_DIR=${TOP_DIR}/bin +CONDA_DIR=${TOP_DIR}/conda +CONDA_BIN=${CONDA_DIR}/bin +CONDA_LIB=${CONDA_DIR}/lib +CONDA=${CONDA_BIN}/conda +ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}" +ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME +# +# Functions +# +# Report failure and terminate script +fail() +{ + echo "" + echo ERROR $@ >&2 + echo "" + echo "$(basename $0): installation failed" + exit 1 +} +# +# Rewrite the shebangs in the installed conda scripts +# to remove the full path to conda 'bin' directory +rewrite_conda_shebangs() +{ + pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g" + find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \; +} +# +# Reset conda version if required +reset_conda_version() +{ + CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)" + echo conda version: ${CONDA_VERSION} + if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then + echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION" + ${CONDA_BIN}/conda config --set allow_conda_downgrades true + ${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION} + else + echo "conda version ok" + fi +} +# +# Install conda +install_conda() +{ + echo "++++++++++++++++" + echo "Installing conda" + echo "++++++++++++++++" + if [ -e ${CONDA_DIR} ] ; then + echo "*** $CONDA_DIR already exists ***" >&2 + return + fi + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh + bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR} + echo Installed conda in ${CONDA_DIR} + echo -n "Adding conda bin to PATH..." + export PATH=${CONDA_BIN}:$PATH + echo "ok" + # Reset the conda version to a known working version + # (to avoid problems observed with e.g. conda 4.7.10) + echo "" + reset_conda_version + # Update the installation files + # This is to avoid problems when the length the installation + # directory path exceeds the limit for the shebang statement + # in the conda files + echo "" + echo -n "Rewriting conda shebangs..." + rewrite_conda_shebangs + echo "ok" + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# Create conda environment +install_conda_packages() +{ + echo "+++++++++++++++++++++++++" + echo "Installing conda packages" + echo "+++++++++++++++++++++++++" + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + cat >environment.yml <<EOF +name: ${ENV_NAME} +channels: + - defaults + - conda-forge + - bioconda +dependencies: + - python=2.7 + - cutadapt=1.8 + - sickle-trim=1.33 + - bioawk=1.0 + - pandaseq=2.8.1 + - spades=3.10.1 + - fastqc=0.11.3 + - qiime=1.9.1 + - blast-legacy=2.2.26 + - fasta-splitter=0.2.6 + - rdp_classifier=$RDP_CLASSIFIER_VERSION + - vsearch=2.10.4 + - r=3.5.1 + - r-tidyverse=1.2.1 + - bioconductor-dada2=1.8 + - bioconductor-biomformat=1.8.0 +EOF + ${CONDA} env create --name "${ENV_NAME}" -f environment.yml + if [ $? -ne 0 ] ; then + fail "Non-zero exit status from 'conda env create'" + elif [ ! -e "${ENV_DIR}" ] ; then + fail "Failed to create conda environment: ${ENV_DIR} not found" + fi + echo Created conda environment in ${ENV_DIR} + cd $cwd + rm -rf $wd/* + rmdir $wd + # + # Patch qiime 1.9.1 tools to switch deprecated 'axisbg' + # matplotlib property to 'facecolor': + # https://matplotlib.org/api/prev_api_changes/api_changes_2.0.0.html + echo "" + for exe in make_2d_plots.py plot_taxa_summary.py ; do + echo -n "Patching ${exe}..." + find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/axisbg=/facecolor=/g' {} \; + echo "done" + done + # + # Patch qiime 1.9.1 tools to switch deprecated 'set_axis_bgcolor' + # method call to 'set_facecolor': + # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.set_axis_bgcolor.html + for exe in make_rarefaction_plots.py ; do + echo -n "Patching ${exe}..." + find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/set_axis_bgcolor/set_facecolor/g' {} \; + echo "done" + done +} +# +# Install all the non-conda dependencies in a single +# function (invokes separate functions for each package) +install_non_conda_packages() +{ + echo "+++++++++++++++++++++++++++++" + echo "Installing non-conda packages" + echo "+++++++++++++++++++++++++++++" + # Temporary working directory + local wd=$(mktemp -d) + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + # Amplicon analysis pipeline + echo -n "Installing Amplicon_analysis_pipeline..." + if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then + echo "already installed" + else + install_amplicon_analysis_pipeline + echo "ok" + fi + # ChimeraSlayer + echo -n "Installing ChimeraSlayer..." + if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then + echo "already installed" + else + install_chimeraslayer + echo "ok" + fi + # Uclust + # This no longer seems to be available for download from + # drive5.com so don't download + echo "WARNING uclust not available: skipping installation" +} +# +# Amplicon analyis pipeline +install_amplicon_analysis_pipeline() +{ + local wd=$(mktemp -d) + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://github.com/MTutino/Amplicon_analysis/archive/${PIPELINE_VERSION}.tar.gz + tar zxf ${PIPELINE_VERSION}.tar.gz + cd Amplicon_analysis-${PIPELINE_VERSION} + INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION} + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline + for f in *.sh *.R ; do + /bin/cp $f $INSTALL_DIR + done + /bin/cp -r uc2otutab $INSTALL_DIR + mkdir -p ${BIN_DIR} + cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF +#!/usr/bin/env bash +# +# Point to Qiime config +export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config +# Set up the RDP jar file +export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar +# Set the Matplotlib backend +export MPLBACKEND="agg" +# Put the scripts onto the PATH +export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH +# Activate the conda environment +export PATH=${CONDA_BIN}:\$PATH +source ${CONDA_BIN}/activate ${ENV_NAME} +# Execute the driver script with the supplied arguments +$INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@ +exit \$? +EOF + chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh + cat >${BIN_DIR}/install_reference_data.sh <<EOF +#!/usr/bin/env bash -e +# +function usage() { + echo "Usage: \$(basename \$0) DIR" +} +if [ -z "\$1" ] ; then + usage + exit 0 +elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then + usage + echo "" + echo "Install reference data into DIR" + exit 0 +fi +echo "==========================================" +echo "Installing Amplicon analysis pipeline data" +echo "==========================================" +if [ ! -e "\$1" ] ; then + echo "Making directory \$1" + mkdir -p \$1 +fi +cd \$1 +DATA_DIR=\$(pwd) +echo "Installing reference data under \$DATA_DIR" +$INSTALL_DIR/References.sh +echo "" +echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh" +echo "to use the reference data from this directory" +echo "" +echo "\$(basename \$0): finished" +EOF + chmod 0755 ${BIN_DIR}/install_reference_data.sh + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# ChimeraSlayer +install_chimeraslayer() +{ + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz + tar zxf microbiomeutil_2010-04-29.tar.gz + cd microbiomeutil_2010-04-29 + INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29 + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer + /bin/cp -r ChimeraSlayer $INSTALL_DIR + cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF +#!/usr/bin/env bash +export PATH=$INSTALL_DIR:\$PATH +$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@ +EOF + chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl + chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl + cd $cwd + rm -rf $wd/* + rmdir $wd +} +# +# uclust required for QIIME/pyNAST +# License only allows this version to be used with those two packages +# See: http://drive5.com/uclust/downloads1_2_22q.html +install_uclust() +{ + local cwd=$(pwd) + local wd=$(mktemp -d) + cd $wd + wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64 + INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22 + mkdir -p $INSTALL_DIR + ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust + /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust + chmod 0755 ${INSTALL_DIR}/uclust + ln -s ${INSTALL_DIR}/uclust ${BIN_DIR} + cd $cwd + rm -rf $wd/* + rmdir $wd +} +setup_pipeline_environment() +{ + echo "+++++++++++++++++++++++++++++++" + echo "Setting up pipeline environment" + echo "+++++++++++++++++++++++++++++++" + # fasta_splitter.pl + echo -n "Setting up fasta_splitter.pl..." + if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then + echo "already exists" + elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then + echo "failed" + fail "fasta-splitter.pl not found" + else + ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl + echo "ok" + fi + # rdp_classifier.jar + local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar + echo -n "Setting up rdp_classifier.jar..." + if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then + echo "already exists" + elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then + echo "failed" + fail "rdp_classifier.jar not found" + else + mkdir -p ${TOP_DIR}/share/rdp_classifier + ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} + echo "ok" + fi + # qiime_config + echo -n "Setting up qiime_config..." + if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then + echo "already exists" + else + mkdir -p ${TOP_DIR}/qiime + cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config +qiime_scripts_dir ${ENV_DIR}/bin +EOF-qiime-config + echo "ok" + fi +} +# +# Top level script does the installation +echo "=======================================" +echo "Amplicon_analysis_pipeline installation" +echo "=======================================" +echo "Installing into ${TOP_DIR}" +if [ -e ${TOP_DIR} ] ; then + fail "Directory already exists" +fi +mkdir -p ${TOP_DIR} +install_conda +install_conda_packages +install_non_conda_packages +setup_pipeline_environment +echo "====================================" +echo "Amplicon_analysis_pipeline installed" +echo "====================================" +echo "" +echo "Install reference data using:" +echo "" +echo "\$ ${BIN_DIR}/install_reference_data.sh DIR" +echo "" +echo "Run pipeline scripts using:" +echo "" +echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..." +echo "" +echo "(or add ${BIN_DIR} to your PATH)" +echo "" +echo "$(basename $0): finished" +## +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/outputs.txt Fri Dec 20 06:59:49 2019 -0500 @@ -0,0 +1,41 @@ +ok.. Metatable_log/Metatable_mod.txt +ok.. Vsearch_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom +ok.. Vsearch_OTU_tables/otus.tre +ok.. RESULTS/Vsearch_gg/OTUs_count.txt +ok.. RESULTS/Vsearch_gg/table_summary.txt +ok.. Multiplexed_files/Vsearch_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta +ok.. QUALITY_CONTROL/Reads_count.txt +ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper +NO.. RESULTS/Vsearch_gg/Heatmap/js -> RESULTS/Vsearch_gg/Heatmap.pdf +NO.. RESULTS/Vsearch_gg/Heatmap/otu_table.html -> MISSING +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/charts/ +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/raw_data/ +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/bar_charts.html +ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/* +ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/* +ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/rarefaction_plots.html +ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/average_plots +ok.. RESULTS/Vsearch_gg/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf + +??.. Metatable_log/Metatable_mod.txt +NO.. DADA2_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom +ok.. DADA2_OTU_tables/otus.tre +ok.. RESULTS/DADA2_silva/OTUs_count.txt +ok.. RESULTS/DADA2_silva/table_summary.txt +ok.. Multiplexed_files/DADA2_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta --> DADA2_OTU_tables/seqs.fa +NO.. QUALITY_CONTROL/Reads_count.txt -> Vsearch only +ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper +NO.. RESULTS/DADA2_silva/Heatmap/js -> RESULTS/DADA2_silva/Heatmap.pdf +NO.. RESULTS/DADA2_silva/Heatmap/otu_table.html +ok.. RESULTS/DADA2_silva/phylum_genus_charts/charts/ +ok.. RESULTS/DADA2_silva/phylum_genus_charts/raw_data/ +ok.. RESULTS/DADA2_silva/phylum_genus_charts/bar_charts.html +ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/* +ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/* +ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/rarefaction_plots.html +ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/average_plots +ok.. RESULTS/DADA2_silva/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf -> missing? (didn't include categories?)
--- a/tool_dependencies.xml Thu Oct 18 09:18:04 2018 -0400 +++ b/tool_dependencies.xml Fri Dec 20 06:59:49 2019 -0500 @@ -1,14 +1,14 @@ <?xml version="1.0"?> <tool_dependency> - <package name="amplicon_analysis_pipeline" version="1.2.3"> + <package name="amplicon_analysis_pipeline" version="1.3.6"> <install version="1.0"> <actions> - <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis.sh</action> + <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis-1.3.6.sh</action> <action type="shell_command"> - sh ./install_amplicon_analysis.sh $INSTALL_DIR + sh ./install_amplicon_analysis-1.3.6.sh $INSTALL_DIR </action> <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.2.3/bin</environment_variable> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.3.6/bin</environment_variable> </action> </actions> </install>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/updating-to-pipeline-1.3-DADA2.txt Fri Dec 20 06:59:49 2019 -0500 @@ -0,0 +1,58 @@ +Notes on updating Galaxy tool to pipeline 1.3 (DADA2) +===================================================== + +Where stuff is: + +* projects/Amplicon_analysis-galaxy: git repo for Galaxy tool (these + developments are in the 'update-to-Amplicon_analysis_pipeline-1.3' + branch, PR #50: + https://github.com/pjbriggs/Amplicon_analysis-galaxy/pull/50) + +* scratchpad/test_Amplicon_analysis_pipeline_DADA2: directory for + running/testing the updates + +So far: + +* Updated the installer for pipeline version 1.3.2 + +* Have been trying to run the pipeline manually outside of Galaxy + on popov & CSF3: + -- DADA2 works on popov (can't remember if it works on CSF3) + -- Vsearch pipeline fails on popov and CSF3 (but errors are + different) + +* Mauro is looking at fixing the errors while I carry on trying + to update the Galaxy tool + +Random notes from my notebook: + +p44: + +* DADA2 uses NSLOTS environment variable from the local environment + (so can get number of cores on cluster; if NSLOTS not set then + gets number of cores on local machine) + +* DADA2 has new outputs: + -- DADA2_OTU_tables/Error_rate_plots/ <-- need to capture all + PDFs from this folder + +pp78-79: + +* Galaxy wrapper could check that 'Run' column is in supplied + metatable file (if it's not present then pipeline will fail + now) + +* DADA2 has its own reference database + +* DADA2 produces same outputs as Vsearch (with name changed from + "Vsearch_*" to "DADA2_*", plus extras: + -- Vsearch_OTUs.tre -> otus.tre + -- Vsearch_multiplexed_linearised_dereplicated_mc2_repset_nonchimeras_OTUS.fasta -> seqs.fa + -- There might be issues with the heatmap + +p83: notes on progress... + +p95: + +* Confirms heatmap is now e.g. RESULTS/Vsearch_silva/Heatmap.pdf + (instead of HTML output)