Galaxy |

Changeset 4:86a12d75ebe4 (2019-12-20)

Previous changeset 3:3ab198df8f3f (2018-10-18)

Commit message:
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8

modified:
README.rst
amplicon_analysis_pipeline.py
amplicon_analysis_pipeline.xml
tool_dependencies.xml

added:
install_amplicon_analysis-1.3.5.sh
install_amplicon_analysis-1.3.6.sh
outputs.txt
updating-to-pipeline-1.3-DADA2.txt

diff -r 3ab198df8f3f -r 86a12d75ebe4 README.rst
--- a/README.rst Thu Oct 18 09:18:04 2018 -0400
+++ b/README.rst Fri Dec 20 06:59:49 2019 -0500

@@ -174,6 +174,7 @@
========== ======================================================================
Version    Changes
---------- ----------------------------------------------------------------------
+1.3.5.0    Updated to Amplicon_Analysis_Pipeline version 1.3.5.
1.2.3.0    Updated to Amplicon_Analysis_Pipeline version 1.2.3; install
            dependencies via tool_dependencies.xml.
1.2.2.0    Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes

diff -r 3ab198df8f3f -r 86a12d75ebe4 amplicon_analysis_pipeline.py
--- a/amplicon_analysis_pipeline.py Thu Oct 18 09:18:04 2018 -0400
+++ b/amplicon_analysis_pipeline.py Fri Dec 20 06:59:49 2019 -0500

[

@@ -117,9 +117,9 @@
     p.add_argument("-L",dest="minimum_length")
     p.add_argument("-l",dest="sliding_window_length")
     p.add_argument("-P",dest="pipeline",
-                   choices=["vsearch","uparse","qiime"],
-                   type=str.lower,
-                   default="vsearch")
+                   choices=["Vsearch","DADA2"],
+                   type=str,
+                   default="Vsearch")
     p.add_argument("-S",dest="use_silva",action="store_true")
     p.add_argument("-H",dest="use_homd",action="store_true")
     p.add_argument("-r",dest="reference_data_path")
@@ -155,12 +155,15 @@
             sample_names.append(sample_name)

     # Reference database
-    if args.use_silva:
+    if args.pipeline == "Vsearch":
+        if args.use_silva:
+            ref_database = "silva"
+        elif args.use_homd:
+            ref_database = "homd"
+        else:
+            ref_database = "gg"
+    elif args.pipeline == "DADA2":
         ref_database = "silva"
-    elif args.use_homd:
-        ref_database = "homd"
-    else:
-        ref_database = "gg"

     # Construct the pipeline command
     print "Amplicon analysis: constructing pipeline command"
@@ -180,10 +183,11 @@
     if args.reference_data_path:
         pipeline.add_args("-r",args.reference_data_path)
     pipeline.add_args("-P",args.pipeline)
-    if ref_database == "silva":
-        pipeline.add_args("-S")
-    elif ref_database == "homd":
-        pipeline.add_args("-H")
+    if args.pipeline == "Vsearch":
+        if ref_database == "silva":
+            pipeline.add_args("-S")
+        elif ref_database == "homd":
+            pipeline.add_args("-H")

     # Echo the pipeline command to stdout
     print "Running %s" % pipeline
@@ -277,6 +281,9 @@
""")
         # Look for raw and trimmed FastQC output for each sample
         for sample_name in sample_names:
+            # Replace underscores with hyphens in sample names
+            sample_name = sample_name.replace('_','-')
+            # Write HTML file with links to the FastQC boxplots
             fastqc_dir = os.path.join(sample_name,"FastQC")
             quality_boxplots.write("<h2>%s</h2>" % sample_name)
             for d in ("Raw","cutdapt_sickle/Q%s" % phred_score):
@@ -306,13 +313,41 @@
</html>
""")

+    # Handle DADA2 error rate plot PDFs
+    if args.pipeline == "DADA2":
+        print("Amplicon analysis: collecting error rate plots")
+        error_rate_plots_dir = os.path.abspath(
+            os.path.join("DADA2_OTU_tables",
+                         "Error_rate_plots"))
+        error_rate_plot_pdfs = [os.path.basename(pdf)
+                                for pdf in
+                                sorted(glob.glob(
+                                    os.path.join(error_rate_plots_dir,"*.pdf")))]
+        error_rate_plots_html = os.path.join(error_rate_plots_dir,
+                                             "error_rate_plots.html")
+        with open(error_rate_plots_html,"w") as error_rate_plots_out:
+            error_rate_plots_out.write("""<html>
+<head>
+<title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title>
+<head>
+<body>
+<h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1>
+""")
+            error_rate_plots_out.write("<ul>\n")
+            for pdf in error_rate_plot_pdfs:
+                error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf))
+            error_rate_plots_out.write("<ul>\n")
+            error_rate_plots_out.write("""</body>
+</html>
+""")
+
     # Handle additional output when categories file was supplied
     if args.categories_file is not None:
         # Alpha diversity boxplots
         print "Amplicon analysis: indexing alpha diversity boxplots"
         boxplots_dir = os.path.abspath(
             os.path.join("RESULTS",
-                         "%s_%s" % (args.pipeline.title(),
+                         "%s_%s" % (args.pipeline,
                                     ref_database),
                          "Alpha_diversity",
                          "Alpha_diversity_boxplot",

diff -r 3ab198df8f3f -r 86a12d75ebe4 amplicon_analysis_pipeline.xml
--- a/amplicon_analysis_pipeline.xml Thu Oct 18 09:18:04 2018 -0400
+++ b/amplicon_analysis_pipeline.xml Fri Dec 20 06:59:49 2019 -0500

[

b'@@ -1,19 +1,28 @@\n-<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.3.0">\n+<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.6.0">\n <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description>\n <requirements>\n- <requirement type="package" version="1.2.3">amplicon_analysis_pipeline</requirement>\n+ <requirement type="package" version="1.3.6">amplicon_analysis_pipeline</requirement>\n </requirements>\n <stdio>\n <exit_code range="1:" />\n </stdio>\n <command><![CDATA[\n+\n+ ## Convenience variable for pipeline name\n+ #set $pipeline_name = $pipeline.pipeline_name\n+\n ## Set the reference database name\n- #if $reference_database == "-S"\n- #set reference_database_name = "silva"\n- #else if $reference_database == "-H"\n- #set reference_database_name = "homd"\n+ #if str( $pipeline_name ) == "DADA2"\n+ #set reference_database_name = "silva"\n #else\n- #set reference_database_name = "gg"\n+ #set reference_database = $pipeline.reference_database\n+ #if $reference_database == "-S"\n+ #set reference_database_name = "silva"\n+ #else if $reference_database == "-H"\n+ #set reference_database_name = "homd"\n+ #else\n+ #set reference_database_name = "gg"\n+ #end if\n #end if\n \n ## Run the amplicon analysis pipeline wrapper\n@@ -37,9 +46,9 @@\n #if str( $minimum_length ) != ""\n -L $minimum_length\n #end if\n- -P $pipeline\n- -r \\$AMPLICON_ANALYSIS_REF_DATA_PATH\n- #if str( $reference_database ) != ""\n+ -P $pipeline_name\n+ -r \\${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData}\n+ #if str( $pipeline_name ) != "DADA2"\n ${reference_database}\n #end if\n #if str($categories_file_in) != \'None\'\n@@ -60,48 +69,60 @@\n \n ## Collect outputs\n cp Metatable_log/Metatable_mod.txt "${metatable_mod}" &&\n- cp ${pipeline}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n- cp ${pipeline}_OTU_tables/otus.tre "${otus_tre_file}" &&\n- cp RESULTS/${pipeline}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&\n- cp RESULTS/${pipeline}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&\n- cp Multiplexed_files/${pipeline}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&\n- cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&\n+ #if str( $pipeline_name ) == "Vsearch"\n+ ## Vsearch-specific\n+ cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n+ cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&\n+ cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&\n+ #else\n+ ## DADA2-specific\n+ cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n+ cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" &&\n+ #end if\n+ cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" &&\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&\n cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" &&\n \n- ## HTML outputs\n+ ## OTU table heatmap\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}" &&\n \n- ## OTU table\n- mkdir $heatmap_otu_table_html.files_path &&\n- cp -r RESULTS/${pipeline}_${reference_database_name}/Heatmap/js $heatmap_otu_table_html.files_path &&\n- cp RESULTS/${pipeline}_${reference_database_name}/Heatmap/otu_table.html "${heatmap_otu_table_html}" &&\n+ ## HTML outputs\n \n ## Phylum genus barcharts\n mkdir $phylum_genus_dist_barcharts_html.files_p'..b'="${tool.name} (${pipeline}):${title} beta diversity unweighted 2D plots HTML" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" />\n <data format="html" name="alpha_div_rarefaction_plots"\n-\t label="${tool.name} (${pipeline}):${title} alpha diversity rarefaction plots HTML" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" />\n+ <data format="html" name="dada2_error_rate_plots"\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots">\n+ <filter>pipeline[\'pipeline_name\'] == \'DADA2\'</filter>\n+ </data>\n <data format="html" name="alpha_div_boxplots"\n-\t label="${tool.name} (${pipeline}):${title} alpha diversity boxplots">\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots">\n <filter>categories_file_in is not None</filter>\n </data>\n <data format="html" name="log_files"\n-\t label="${tool.name} (${pipeline}):${title} log files" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} log files" />\n </outputs>\n <tests>\n </tests>\n@@ -395,12 +424,11 @@\n 380 (used for V3-V4 16S sequencing; expected length ~440bp)\n \n * **Pipeline to use for analysis** Choose the pipeline to use for OTU\n- clustering and chimera removal. The Galaxy tool currently supports\n- ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added\n- shortly (the tools are already available for the stand-alone pipeline).\n+ clustering and chimera removal. The Galaxy tool supports the ``Vsearch``\n+ and ``DADA2`` pipelines.\n \n- * **Reference database** Choose between ``GreenGenes`` and ``Silva``\n- databases for taxa assignment.\n+ * **Reference database** Choose between ``GreenGenes``, ``Silva`` or\n+ ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.\n \n Click on **Execute** to start the analysis.\n \n@@ -408,30 +436,31 @@\n **********\n \n Results are entirely generated using QIIME scripts. The results will \n-appear in the History panel when the analysis is completed\n+appear in the History panel when the analysis is completed.\n \n- * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format\n- (http://biom-format.org/)\n+The following outputs are captured:\n \n- * **Vsearch_OTUs.tree** Phylogenetic tree constructed using\n- ``make_phylogeny.py`` (fasttree) QIIME script\n- (http://qiime.org/scripts/make_phylogeny.html)\n+ * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**\n+ The OTU table in BIOM format (http://biom-format.org/)\n \n- * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar\n- charts at Phylum, Genus and Species level\n+ * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``\n+ (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)\n+\n+ * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at\n+ Phylum, Genus and Species level\n (http://qiime.org/scripts/summarize_taxa.html and\n http://qiime.org/scripts/plot_taxa_summary.html)\n \n- * **Vsearch_OTUs_count_file** Summary of OTU counts per sample\n+ * **OTUs_count_file** Summary of OTU counts per sample\n (http://biom-format.org/documentation/summarizing_biom_tables.html)\n \n- * **Vsearch_table_summary_file** Summary of sequences counts per sample\n+ * **Table_summary_file** Summary of sequences counts per sample\n (http://biom-format.org/documentation/summarizing_biom_tables.html)\n \n- * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta**\n- Fasta file with OTU sequences\n+ * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**\n+ Fasta file with OTU sequences (Vsearch|DADA2)\n \n- * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap\n+ * **Heatmap_PDF** OTU heatmap in PDF format\n (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )\n \n * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML\n'

diff -r 3ab198df8f3f -r 86a12d75ebe4 install_amplicon_analysis-1.3.5.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/install_amplicon_analysis-1.3.5.sh Fri Dec 20 06:59:49 2019 -0500

[

b'@@ -0,0 +1,399 @@\n+#!/bin/sh -e\n+#\n+# Prototype script to setup a conda environment with the\n+# dependencies needed for the Amplicon_analysis_pipeline\n+# script\n+#\n+# Handle command line\n+usage()\n+{\n+ echo "Usage: $(basename $0) [DIR]"\n+ echo ""\n+ echo "Installs the Amplicon_analysis_pipeline package plus"\n+ echo "dependencies in directory DIR (or current directory "\n+ echo "if DIR not supplied)"\n+}\n+if [ ! -z "$1" ] ; then\n+ # Check if help was requested\n+ case "$1" in\n+\t--help|-h)\n+\t usage\n+\t exit 0\n+\t ;;\n+ esac\n+ # Assume it\'s the installation directory\n+ cd $1\n+fi\n+# Versions\n+PIPELINE_VERSION=1.3.5\n+CONDA_REQUIRED_VERSION=4.6.14\n+RDP_CLASSIFIER_VERSION=2.2\n+# Directories\n+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}\n+BIN_DIR=${TOP_DIR}/bin\n+CONDA_DIR=${TOP_DIR}/conda\n+CONDA_BIN=${CONDA_DIR}/bin\n+CONDA_LIB=${CONDA_DIR}/lib\n+CONDA=${CONDA_BIN}/conda\n+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"\n+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME\n+#\n+# Functions\n+#\n+# Report failure and terminate script\n+fail()\n+{\n+ echo ""\n+ echo ERROR $@ >&2\n+ echo ""\n+ echo "$(basename $0): installation failed"\n+ exit 1\n+}\n+#\n+# Rewrite the shebangs in the installed conda scripts\n+# to remove the full path to conda \'bin\' directory\n+rewrite_conda_shebangs()\n+{\n+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"\n+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \\;\n+}\n+#\n+# Reset conda version if required\n+reset_conda_version()\n+{\n+ CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d\' \' -f2)"\n+ echo conda version: ${CONDA_VERSION}\n+ if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then\n+\techo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"\n+\t${CONDA_BIN}/conda config --set allow_conda_downgrades true\n+\t${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}\n+ else\n+\techo "conda version ok"\n+ fi\n+}\n+#\n+# Install conda\n+install_conda()\n+{\n+ echo "++++++++++++++++"\n+ echo "Installing conda"\n+ echo "++++++++++++++++"\n+ if [ -e ${CONDA_DIR} ] ; then\n+\techo "*** $CONDA_DIR already exists ***" >&2\n+\treturn\n+ fi\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh\n+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}\n+ echo Installed conda in ${CONDA_DIR}\n+ echo -n "Adding conda bin to PATH..."\n+ export PATH=${CONDA_BIN}:$PATH\n+ echo "ok"\n+ # Reset the conda version to a known working version\n+ # (to avoid problems observed with e.g. conda 4.7.10)\n+ echo ""\n+ reset_conda_version\n+ # Update the installation files\n+ # This is to avoid problems when the length the installation\n+ # directory path exceeds the limit for the shebang statement\n+ # in the conda files\n+ echo ""\n+ echo -n "Rewriting conda shebangs..."\n+ rewrite_conda_shebangs\n+ echo "ok"\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# Create conda environment\n+install_conda_packages()\n+{\n+ echo "+++++++++++++++++++++++++"\n+ echo "Installing conda packages"\n+ echo "+++++++++++++++++++++++++"\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ cat >environment.yml <<EOF\n+name: ${ENV_NAME}\n+channels:\n+ - defaults\n+ - conda-forge\n+ - bioconda\n+dependencies:\n+ - python=2.7\n+ - cutadapt=1.8\n+ - sickle-trim=1.33\n+ - bioawk=1.0\n+ - pandaseq=2.8.1\n+ - spades=3.10.1\n+ - fastqc=0.11.3\n+ - qiime=1.9.1\n+ - blast-legacy=2.2.26\n+ - fasta-splitter=0.2.6\n+ - rdp_classifier=$RDP_CLASSIFIER_VERSION\n+ - vsearch=2.10.4\n+ - r=3.5.1\n+ - r-tidyverse=1.2.1\n+ - bioconductor-dada2=1.8\n+ - bioconductor-biomformat=1.8.0\n+EOF\n+ ${CONDA} env create --name "${ENV_NAME}" -f environment.yml\n+ if [ $? -ne 0 ] ; then\n+\tfail "Non-zero exit status from \'conda env create\'"\n+ elif [ ! -e "${ENV_DIR}" ] ; then\n+\tfail "Failed to create conda environment: ${ENV_DIR} not found"\n+ '..b'-r \\$DATA_DIR\' when running Amplicon_analysis_pipeline.sh"\n+echo "to use the reference data from this directory"\n+echo ""\n+echo "\\$(basename \\$0): finished"\n+EOF\n+ chmod 0755 ${BIN_DIR}/install_reference_data.sh\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# ChimeraSlayer\n+install_chimeraslayer()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz\n+ tar zxf microbiomeutil_2010-04-29.tar.gz\n+ cd microbiomeutil_2010-04-29\n+ INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer\n+ /bin/cp -r ChimeraSlayer $INSTALL_DIR\n+ cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF\n+#!/usr/bin/env bash\n+export PATH=$INSTALL_DIR:\\$PATH\n+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@\n+EOF\n+ chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl\n+ chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# uclust required for QIIME/pyNAST\n+# License only allows this version to be used with those two packages\n+# See: http://drive5.com/uclust/downloads1_2_22q.html\n+install_uclust()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64\n+ INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust\n+ /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust\n+ chmod 0755 ${INSTALL_DIR}/uclust\n+ ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+setup_pipeline_environment()\n+{\n+ echo "+++++++++++++++++++++++++++++++"\n+ echo "Setting up pipeline environment"\n+ echo "+++++++++++++++++++++++++++++++"\n+ # fasta_splitter.pl\n+ echo -n "Setting up fasta_splitter.pl..."\n+ if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then\n+\techo "failed"\n+\tfail "fasta-splitter.pl not found"\n+ else\n+\tln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl\n+\techo "ok"\n+ fi\n+ # rdp_classifier.jar\n+ local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar\n+ echo -n "Setting up rdp_classifier.jar..."\n+ if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then\n+\techo "failed"\n+\tfail "rdp_classifier.jar not found"\n+ else\n+\tmkdir -p ${TOP_DIR}/share/rdp_classifier\n+\tln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}\n+\techo "ok"\t\n+ fi\n+ # qiime_config\n+ echo -n "Setting up qiime_config..."\n+ if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then\n+\techo "already exists"\n+ else\n+\tmkdir -p ${TOP_DIR}/qiime\n+\tcat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config\n+qiime_scripts_dir\t${ENV_DIR}/bin\n+EOF-qiime-config\n+\techo "ok"\n+ fi\n+}\n+#\n+# Top level script does the installation\n+echo "======================================="\n+echo "Amplicon_analysis_pipeline installation"\n+echo "======================================="\n+echo "Installing into ${TOP_DIR}"\n+if [ -e ${TOP_DIR} ] ; then\n+ fail "Directory already exists"\n+fi\n+mkdir -p ${TOP_DIR}\n+install_conda\n+install_conda_packages\n+install_non_conda_packages\n+setup_pipeline_environment\n+echo "===================================="\n+echo "Amplicon_analysis_pipeline installed"\n+echo "===================================="\n+echo ""\n+echo "Install reference data using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/install_reference_data.sh DIR"\n+echo ""\n+echo "Run pipeline scripts using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."\n+echo ""\n+echo "(or add ${BIN_DIR} to your PATH)"\n+echo ""\n+echo "$(basename $0): finished"\n+##\n+#\n'

diff -r 3ab198df8f3f -r 86a12d75ebe4 install_amplicon_analysis-1.3.6.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/install_amplicon_analysis-1.3.6.sh Fri Dec 20 06:59:49 2019 -0500

[

b'@@ -0,0 +1,399 @@\n+#!/bin/sh -e\n+#\n+# Prototype script to setup a conda environment with the\n+# dependencies needed for the Amplicon_analysis_pipeline\n+# script\n+#\n+# Handle command line\n+usage()\n+{\n+ echo "Usage: $(basename $0) [DIR]"\n+ echo ""\n+ echo "Installs the Amplicon_analysis_pipeline package plus"\n+ echo "dependencies in directory DIR (or current directory "\n+ echo "if DIR not supplied)"\n+}\n+if [ ! -z "$1" ] ; then\n+ # Check if help was requested\n+ case "$1" in\n+\t--help|-h)\n+\t usage\n+\t exit 0\n+\t ;;\n+ esac\n+ # Assume it\'s the installation directory\n+ cd $1\n+fi\n+# Versions\n+PIPELINE_VERSION=1.3.6\n+CONDA_REQUIRED_VERSION=4.6.14\n+RDP_CLASSIFIER_VERSION=2.2\n+# Directories\n+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}\n+BIN_DIR=${TOP_DIR}/bin\n+CONDA_DIR=${TOP_DIR}/conda\n+CONDA_BIN=${CONDA_DIR}/bin\n+CONDA_LIB=${CONDA_DIR}/lib\n+CONDA=${CONDA_BIN}/conda\n+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"\n+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME\n+#\n+# Functions\n+#\n+# Report failure and terminate script\n+fail()\n+{\n+ echo ""\n+ echo ERROR $@ >&2\n+ echo ""\n+ echo "$(basename $0): installation failed"\n+ exit 1\n+}\n+#\n+# Rewrite the shebangs in the installed conda scripts\n+# to remove the full path to conda \'bin\' directory\n+rewrite_conda_shebangs()\n+{\n+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"\n+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \\;\n+}\n+#\n+# Reset conda version if required\n+reset_conda_version()\n+{\n+ CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d\' \' -f2)"\n+ echo conda version: ${CONDA_VERSION}\n+ if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then\n+\techo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"\n+\t${CONDA_BIN}/conda config --set allow_conda_downgrades true\n+\t${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}\n+ else\n+\techo "conda version ok"\n+ fi\n+}\n+#\n+# Install conda\n+install_conda()\n+{\n+ echo "++++++++++++++++"\n+ echo "Installing conda"\n+ echo "++++++++++++++++"\n+ if [ -e ${CONDA_DIR} ] ; then\n+\techo "*** $CONDA_DIR already exists ***" >&2\n+\treturn\n+ fi\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh\n+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}\n+ echo Installed conda in ${CONDA_DIR}\n+ echo -n "Adding conda bin to PATH..."\n+ export PATH=${CONDA_BIN}:$PATH\n+ echo "ok"\n+ # Reset the conda version to a known working version\n+ # (to avoid problems observed with e.g. conda 4.7.10)\n+ echo ""\n+ reset_conda_version\n+ # Update the installation files\n+ # This is to avoid problems when the length the installation\n+ # directory path exceeds the limit for the shebang statement\n+ # in the conda files\n+ echo ""\n+ echo -n "Rewriting conda shebangs..."\n+ rewrite_conda_shebangs\n+ echo "ok"\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# Create conda environment\n+install_conda_packages()\n+{\n+ echo "+++++++++++++++++++++++++"\n+ echo "Installing conda packages"\n+ echo "+++++++++++++++++++++++++"\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ cat >environment.yml <<EOF\n+name: ${ENV_NAME}\n+channels:\n+ - defaults\n+ - conda-forge\n+ - bioconda\n+dependencies:\n+ - python=2.7\n+ - cutadapt=1.8\n+ - sickle-trim=1.33\n+ - bioawk=1.0\n+ - pandaseq=2.8.1\n+ - spades=3.10.1\n+ - fastqc=0.11.3\n+ - qiime=1.9.1\n+ - blast-legacy=2.2.26\n+ - fasta-splitter=0.2.6\n+ - rdp_classifier=$RDP_CLASSIFIER_VERSION\n+ - vsearch=2.10.4\n+ - r=3.5.1\n+ - r-tidyverse=1.2.1\n+ - bioconductor-dada2=1.8\n+ - bioconductor-biomformat=1.8.0\n+EOF\n+ ${CONDA} env create --name "${ENV_NAME}" -f environment.yml\n+ if [ $? -ne 0 ] ; then\n+\tfail "Non-zero exit status from \'conda env create\'"\n+ elif [ ! -e "${ENV_DIR}" ] ; then\n+\tfail "Failed to create conda environment: ${ENV_DIR} not found"\n+ '..b'\'-r \\$DATA_DIR\' when running Amplicon_analysis_pipeline.sh"\n+echo "to use the reference data from this directory"\n+echo ""\n+echo "\\$(basename \\$0): finished"\n+EOF\n+ chmod 0755 ${BIN_DIR}/install_reference_data.sh\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# ChimeraSlayer\n+install_chimeraslayer()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz\n+ tar zxf microbiomeutil_2010-04-29.tar.gz\n+ cd microbiomeutil_2010-04-29\n+ INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer\n+ /bin/cp -r ChimeraSlayer $INSTALL_DIR\n+ cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF\n+#!/usr/bin/env bash\n+export PATH=$INSTALL_DIR:\\$PATH\n+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@\n+EOF\n+ chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl\n+ chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# uclust required for QIIME/pyNAST\n+# License only allows this version to be used with those two packages\n+# See: http://drive5.com/uclust/downloads1_2_22q.html\n+install_uclust()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64\n+ INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust\n+ /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust\n+ chmod 0755 ${INSTALL_DIR}/uclust\n+ ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+setup_pipeline_environment()\n+{\n+ echo "+++++++++++++++++++++++++++++++"\n+ echo "Setting up pipeline environment"\n+ echo "+++++++++++++++++++++++++++++++"\n+ # fasta_splitter.pl\n+ echo -n "Setting up fasta_splitter.pl..."\n+ if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then\n+\techo "failed"\n+\tfail "fasta-splitter.pl not found"\n+ else\n+\tln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl\n+\techo "ok"\n+ fi\n+ # rdp_classifier.jar\n+ local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar\n+ echo -n "Setting up rdp_classifier.jar..."\n+ if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then\n+\techo "failed"\n+\tfail "rdp_classifier.jar not found"\n+ else\n+\tmkdir -p ${TOP_DIR}/share/rdp_classifier\n+\tln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}\n+\techo "ok"\n+ fi\n+ # qiime_config\n+ echo -n "Setting up qiime_config..."\n+ if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then\n+\techo "already exists"\n+ else\n+\tmkdir -p ${TOP_DIR}/qiime\n+\tcat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config\n+qiime_scripts_dir\t${ENV_DIR}/bin\n+EOF-qiime-config\n+\techo "ok"\n+ fi\n+}\n+#\n+# Top level script does the installation\n+echo "======================================="\n+echo "Amplicon_analysis_pipeline installation"\n+echo "======================================="\n+echo "Installing into ${TOP_DIR}"\n+if [ -e ${TOP_DIR} ] ; then\n+ fail "Directory already exists"\n+fi\n+mkdir -p ${TOP_DIR}\n+install_conda\n+install_conda_packages\n+install_non_conda_packages\n+setup_pipeline_environment\n+echo "===================================="\n+echo "Amplicon_analysis_pipeline installed"\n+echo "===================================="\n+echo ""\n+echo "Install reference data using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/install_reference_data.sh DIR"\n+echo ""\n+echo "Run pipeline scripts using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."\n+echo ""\n+echo "(or add ${BIN_DIR} to your PATH)"\n+echo ""\n+echo "$(basename $0): finished"\n+##\n+#\n'

diff -r 3ab198df8f3f -r 86a12d75ebe4 outputs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/outputs.txt Fri Dec 20 06:59:49 2019 -0500

@@ -0,0 +1,41 @@
+ok.. Metatable_log/Metatable_mod.txt
+ok.. Vsearch_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom
+ok.. Vsearch_OTU_tables/otus.tre
+ok.. RESULTS/Vsearch_gg/OTUs_count.txt
+ok.. RESULTS/Vsearch_gg/table_summary.txt
+ok.. Multiplexed_files/Vsearch_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta
+ok.. QUALITY_CONTROL/Reads_count.txt
+ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper
+NO.. RESULTS/Vsearch_gg/Heatmap/js -> RESULTS/Vsearch_gg/Heatmap.pdf
+NO.. RESULTS/Vsearch_gg/Heatmap/otu_table.html -> MISSING
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/charts/
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/raw_data/
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/bar_charts.html
+ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/*
+ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/*
+ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/rarefaction_plots.html
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/average_plots
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf
+
+??.. Metatable_log/Metatable_mod.txt
+NO.. DADA2_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom
+ok.. DADA2_OTU_tables/otus.tre
+ok.. RESULTS/DADA2_silva/OTUs_count.txt
+ok.. RESULTS/DADA2_silva/table_summary.txt
+ok.. Multiplexed_files/DADA2_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta --> DADA2_OTU_tables/seqs.fa
+NO.. QUALITY_CONTROL/Reads_count.txt -> Vsearch only
+ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper
+NO.. RESULTS/DADA2_silva/Heatmap/js -> RESULTS/DADA2_silva/Heatmap.pdf
+NO.. RESULTS/DADA2_silva/Heatmap/otu_table.html
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/charts/
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/raw_data/
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/bar_charts.html
+ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/*
+ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/*
+ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/rarefaction_plots.html
+ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/average_plots
+ok.. RESULTS/DADA2_silva/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf -> missing? (didn't include categories?)

diff -r 3ab198df8f3f -r 86a12d75ebe4 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Oct 18 09:18:04 2018 -0400
+++ b/tool_dependencies.xml Fri Dec 20 06:59:49 2019 -0500

@@ -1,14 +1,14 @@
<?xml version="1.0"?>
<tool_dependency>
-  <package name="amplicon_analysis_pipeline" version="1.2.3">
+  <package name="amplicon_analysis_pipeline" version="1.3.6">
     <install version="1.0">
       <actions>
- <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis.sh</action>
+ <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis-1.3.6.sh</action>
<action type="shell_command">
-   sh ./install_amplicon_analysis.sh $INSTALL_DIR
+   sh ./install_amplicon_analysis-1.3.6.sh $INSTALL_DIR
</action>
<action type="set_environment">
-     <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.2.3/bin</environment_variable>
+     <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.3.6/bin</environment_variable>
</action>
       </actions>
     </install>

diff -r 3ab198df8f3f -r 86a12d75ebe4 updating-to-pipeline-1.3-DADA2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/updating-to-pipeline-1.3-DADA2.txt Fri Dec 20 06:59:49 2019 -0500

@@ -0,0 +1,58 @@
+Notes on updating Galaxy tool to pipeline 1.3 (DADA2)
+=====================================================
+
+Where stuff is:
+
+* projects/Amplicon_analysis-galaxy: git repo for Galaxy tool (these
+  developments are in the 'update-to-Amplicon_analysis_pipeline-1.3'
+  branch, PR #50:
+  https://github.com/pjbriggs/Amplicon_analysis-galaxy/pull/50)
+
+* scratchpad/test_Amplicon_analysis_pipeline_DADA2: directory for
+  running/testing the updates
+
+So far:
+
+* Updated the installer for pipeline version 1.3.2
+
+* Have been trying to run the pipeline manually outside of Galaxy
+  on popov & CSF3:
+  -- DADA2 works on popov (can't remember if it works on CSF3)
+  -- Vsearch pipeline fails on popov and CSF3 (but errors are
+     different)
+
+* Mauro is looking at fixing the errors while I carry on trying
+  to update the Galaxy tool
+
+Random notes from my notebook:
+
+p44:
+
+* DADA2 uses NSLOTS environment variable from the local environment
+  (so can get number of cores on cluster; if NSLOTS not set then
+  gets number of cores on local machine)
+
+* DADA2 has new outputs:
+  -- DADA2_OTU_tables/Error_rate_plots/ <-- need to capture all
+     PDFs from this folder
+
+pp78-79:
+
+* Galaxy wrapper could check that 'Run' column is in supplied
+  metatable file (if it's not present then pipeline will fail
+  now)
+
+* DADA2 has its own reference database
+
+* DADA2 produces same outputs as Vsearch (with name changed from
+  "Vsearch_*" to "DADA2_*", plus extras:
+  -- Vsearch_OTUs.tre -> otus.tre
+  -- Vsearch_multiplexed_linearised_dereplicated_mc2_repset_nonchimeras_OTUS.fasta -> seqs.fa
+  -- There might be issues with the heatmap
+
+p83: notes on progress...
+
+p95:
+
+* Confirms heatmap is now e.g. RESULTS/Vsearch_silva/Heatmap.pdf
+  (instead of HTML output)