Mercurial > repos > pjbriggs > amplicon_analysis_pipeline

--- a/README.rst	Thu Oct 18 09:18:04 2018 -0400
+++ b/README.rst	Fri Dec 20 06:59:49 2019 -0500
@@ -174,6 +174,7 @@
 ========== ======================================================================
 Version    Changes
 ---------- ----------------------------------------------------------------------
+1.3.5.0    Updated to Amplicon_Analysis_Pipeline version 1.3.5.
 1.2.3.0    Updated to Amplicon_Analysis_Pipeline version 1.2.3; install
            dependencies via tool_dependencies.xml.
 1.2.2.0    Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes
--- a/amplicon_analysis_pipeline.py	Thu Oct 18 09:18:04 2018 -0400
+++ b/amplicon_analysis_pipeline.py	Fri Dec 20 06:59:49 2019 -0500
@@ -117,9 +117,9 @@
     p.add_argument("-L",dest="minimum_length")
     p.add_argument("-l",dest="sliding_window_length")
     p.add_argument("-P",dest="pipeline",
-                   choices=["vsearch","uparse","qiime"],
-                   type=str.lower,
-                   default="vsearch")
+                   choices=["Vsearch","DADA2"],
+                   type=str,
+                   default="Vsearch")
     p.add_argument("-S",dest="use_silva",action="store_true")
     p.add_argument("-H",dest="use_homd",action="store_true")
     p.add_argument("-r",dest="reference_data_path")
@@ -155,12 +155,15 @@
             sample_names.append(sample_name)

     # Reference database
-    if args.use_silva:
+    if args.pipeline == "Vsearch":
+        if args.use_silva:
+            ref_database = "silva"
+        elif args.use_homd:
+            ref_database = "homd"
+        else:
+            ref_database = "gg"
+    elif args.pipeline == "DADA2":
         ref_database = "silva"
-    elif args.use_homd:
-        ref_database = "homd"
-    else:
-        ref_database = "gg"

     # Construct the pipeline command
     print "Amplicon analysis: constructing pipeline command"
@@ -180,10 +183,11 @@
     if args.reference_data_path:
         pipeline.add_args("-r",args.reference_data_path)
     pipeline.add_args("-P",args.pipeline)
-    if ref_database == "silva":
-        pipeline.add_args("-S")
-    elif ref_database == "homd":
-        pipeline.add_args("-H")
+    if args.pipeline == "Vsearch":
+        if ref_database == "silva":
+            pipeline.add_args("-S")
+        elif ref_database == "homd":
+            pipeline.add_args("-H")

     # Echo the pipeline command to stdout
     print "Running %s" % pipeline
@@ -277,6 +281,9 @@
 """)
         # Look for raw and trimmed FastQC output for each sample
         for sample_name in sample_names:
+            # Replace underscores with hyphens in sample names
+            sample_name = sample_name.replace('_','-')
+            # Write HTML file with links to the FastQC boxplots
             fastqc_dir = os.path.join(sample_name,"FastQC")
             quality_boxplots.write("<h2>%s</h2>" % sample_name)
             for d in ("Raw","cutdapt_sickle/Q%s" % phred_score):
@@ -306,13 +313,41 @@
 </html>
 """)

+    # Handle DADA2 error rate plot PDFs
+    if args.pipeline == "DADA2":
+        print("Amplicon analysis: collecting error rate plots")
+        error_rate_plots_dir = os.path.abspath(
+            os.path.join("DADA2_OTU_tables",
+                         "Error_rate_plots"))
+        error_rate_plot_pdfs = [os.path.basename(pdf)
+                                for pdf in
+                                sorted(glob.glob(
+                                    os.path.join(error_rate_plots_dir,"*.pdf")))]
+        error_rate_plots_html = os.path.join(error_rate_plots_dir,
+                                             "error_rate_plots.html")
+        with open(error_rate_plots_html,"w") as error_rate_plots_out:
+            error_rate_plots_out.write("""<html>
+<head>
+<title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title>
+<head>
+<body>
+<h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1>
+""")
+            error_rate_plots_out.write("<ul>\n")
+            for pdf in error_rate_plot_pdfs:
+                error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf))
+            error_rate_plots_out.write("<ul>\n")
+            error_rate_plots_out.write("""</body>
+</html>
+""")
+
     # Handle additional output when categories file was supplied
     if args.categories_file is not None:
         # Alpha diversity boxplots
         print "Amplicon analysis: indexing alpha diversity boxplots"
         boxplots_dir = os.path.abspath(
             os.path.join("RESULTS",
-                         "%s_%s" % (args.pipeline.title(),
+                         "%s_%s" % (args.pipeline,
                                     ref_database),
                          "Alpha_diversity",
                          "Alpha_diversity_boxplot",
--- a/amplicon_analysis_pipeline.xml	Thu Oct 18 09:18:04 2018 -0400
+++ b/amplicon_analysis_pipeline.xml	Fri Dec 20 06:59:49 2019 -0500
@@ -1,19 +1,28 @@
-<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.3.0">
+<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.6.0">
   <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description>
   <requirements>
-    <requirement type="package" version="1.2.3">amplicon_analysis_pipeline</requirement>
+    <requirement type="package" version="1.3.6">amplicon_analysis_pipeline</requirement>
   </requirements>
   <stdio>
     <exit_code range="1:" />
   </stdio>
   <command><![CDATA[
+
+  ## Convenience variable for pipeline name
+  #set $pipeline_name = $pipeline.pipeline_name
+
   ## Set the reference database name
-  #if $reference_database == "-S"
-    #set reference_database_name = "silva"
-  #else if $reference_database == "-H"
-    #set reference_database_name = "homd"
+  #if str( $pipeline_name ) == "DADA2"
+     #set reference_database_name = "silva"
   #else
-    #set reference_database_name = "gg"
+     #set reference_database = $pipeline.reference_database
+     #if $reference_database == "-S"
+        #set reference_database_name = "silva"
+     #else if $reference_database == "-H"
+        #set reference_database_name = "homd"
+     #else
+        #set reference_database_name = "gg"
+     #end if
   #end if

   ## Run the amplicon analysis pipeline wrapper
@@ -37,9 +46,9 @@
   #if str( $minimum_length ) != ""
   -L $minimum_length
   #end if
-  -P $pipeline
-  -r \$AMPLICON_ANALYSIS_REF_DATA_PATH
-  #if str( $reference_database ) != ""
+  -P $pipeline_name
+  -r \${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData}
+  #if str( $pipeline_name ) != "DADA2"
     ${reference_database}
   #end if
   #if str($categories_file_in) != 'None'
@@ -60,48 +69,60 @@

   ## Collect outputs
   cp Metatable_log/Metatable_mod.txt "${metatable_mod}" &&
-  cp ${pipeline}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
-  cp ${pipeline}_OTU_tables/otus.tre "${otus_tre_file}" &&
-  cp RESULTS/${pipeline}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&
-  cp RESULTS/${pipeline}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&
-  cp Multiplexed_files/${pipeline}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&
-  cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&
+  #if str( $pipeline_name ) == "Vsearch"
+    ## Vsearch-specific
+    cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
+    cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&
+    cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&
+  #else
+    ## DADA2-specific
+    cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
+    cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" &&
+  #end if
+  cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&
   cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" &&

-  ## HTML outputs
+  ## OTU table heatmap
+  cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}" &&

-  ## OTU table
-  mkdir $heatmap_otu_table_html.files_path &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/Heatmap/js $heatmap_otu_table_html.files_path &&
-  cp RESULTS/${pipeline}_${reference_database_name}/Heatmap/otu_table.html "${heatmap_otu_table_html}" &&
+  ## HTML outputs

   ## Phylum genus barcharts
   mkdir $phylum_genus_dist_barcharts_html.files_path &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path &&
-  cp RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" &&
+  cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path &&
+  cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" &&

   ## Beta diversity weighted 2d plots
   mkdir $beta_div_even_weighted_2d_plots.files_path &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path &&
-  cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" &&
+  cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" &&

   ## Beta diversity unweighted 2d plots
   mkdir $beta_div_even_unweighted_2d_plots.files_path &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path &&
-  cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" &&
+  cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" &&

   ## Alpha diversity rarefaction plots
   mkdir $alpha_div_rarefaction_plots.files_path &&
-  cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots &&
-  cp -r RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path &&
+  cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots &&
+  cp -r RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path &&
+
+  ## DADA2 error rate plots
+  #if str($pipeline_name) == "DADA2"
+    mkdir $dada2_error_rate_plots.files_path &&
+    cp DADA2_OTU_tables/Error_rate_plots/error_rate_plots.html $dada2_error_rate_plots &&
+    cp -r DADA2_OTU_tables/Error_rate_plots/*.pdf $dada2_error_rate_plots.files_path &&
+  #end if

   ## Categories data
   #if str($categories_file_in) != 'None'
     ## Alpha diversity boxplots
     mkdir $alpha_div_boxplots.files_path &&
     cp alpha_diversity_boxplots.html "$alpha_div_boxplots" &&
-    cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path &&
+    cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path &&
   #end if

   ## Pipeline outputs (log files etc)
@@ -161,55 +182,63 @@
     <param type="integer" name="sliding_window_length" value="10"
 	   label="Minimum length in bp to retain a read after trimming"
 	   help="Supplied to Sickle; default is 10 (-l)" />
-    <param type="select" name="pipeline"
-	    label="Pipeline to use for analysis">
-      <option value="Vsearch" selected="true" >Vsearch</option>
-      <!--
-      Remove the QIIME and Uparse options for now
-      <option value="QIIME">QIIME</option>
-      <option value="Uparse">Uparse</option>
-      -->
-    </param>
-    <param type="select" name="reference_database"
-	   label="Reference database">
-      <option value="" selected="true">GreenGenes</option>
-      <option value="-S">Silva</option>
-      <option value="-H">Human Oral Microbiome Database (HOMD)</option>
-    </param>
+    <conditional name="pipeline">
+      <param type="select" name="pipeline_name"
+	     label="Pipeline to use for analysis">
+	<option value="Vsearch" selected="true" >Vsearch</option>
+	<option value="DADA2">DADA2</option>
+      </param>
+      <when value="Vsearch">
+	<param type="select" name="reference_database"
+	       label="Reference database">
+	  <option value="" selected="true">GreenGenes</option>
+	  <option value="-S">Silva</option>
+	  <option value="-H">Human Oral Microbiome Database (HOMD)</option>
+	</param>
+      </when>
+      <when value="DADA2">
+      </when>
+    </conditional>
   </inputs>
   <outputs>
     <data format="tabular" name="metatable_mod"
 	  label="${tool.name}:${title} Metatable_mod.txt" />
     <data format="tabular" name="read_counts_out"
-	  label="${tool.name} (${pipeline}):${title} read counts" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} read counts">
+      <filter>pipeline['pipeline_name'] == 'Vsearch'</filter>
+    </data>
     <data format="biom" name="tax_otu_table_biom_file"
-	  label="${tool.name} (${pipeline}):${title} tax OTU table (biom format)" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} tax OTU table (biom format)" />
     <data format="tabular" name="otus_tre_file"
-	  label="${tool.name} (${pipeline}):${title} otus.tre" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} otus.tre" />
     <data format="html" name="phylum_genus_dist_barcharts_html"
-	  label="${tool.name} (${pipeline}):${title} phylum genus dist barcharts HTML" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} phylum genus dist barcharts HTML" />
     <data format="tabular" name="otus_count_file"
-	  label="${tool.name} (${pipeline}):${title} OTUs count file" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} OTUs count file" />
     <data format="tabular" name="table_summary_file"
-	  label="${tool.name} (${pipeline}):${title} table summary file" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} table summary file" />
     <data format="fasta" name="dereplicated_nonchimera_otus_fasta"
-	  label="${tool.name} (${pipeline}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" />
     <data format="html" name="fastqc_quality_boxplots_html"
-	  label="${tool.name} (${pipeline}):${title} FastQC per-base quality boxplots HTML" />
-    <data format="html" name="heatmap_otu_table_html"
-	  label="${tool.name} (${pipeline}):${title} heatmap OTU table HTML" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} FastQC per-base quality boxplots HTML" />
+    <data format="pdf" name="heatmap_otu_table_pdf"
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} heatmap OTU table PDF" />
     <data format="html" name="beta_div_even_weighted_2d_plots"
-	  label="${tool.name} (${pipeline}):${title} beta diversity weighted 2D plots HTML" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity weighted 2D plots HTML" />
     <data format="html" name="beta_div_even_unweighted_2d_plots"
-	  label="${tool.name} (${pipeline}):${title} beta diversity unweighted 2D plots HTML" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" />
     <data format="html" name="alpha_div_rarefaction_plots"
-	  label="${tool.name} (${pipeline}):${title} alpha diversity rarefaction plots HTML" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" />
+    <data format="html" name="dada2_error_rate_plots"
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots">
+      <filter>pipeline['pipeline_name'] == 'DADA2'</filter>
+    </data>
     <data format="html" name="alpha_div_boxplots"
-	  label="${tool.name} (${pipeline}):${title} alpha diversity boxplots">
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots">
       <filter>categories_file_in is not None</filter>
     </data>
     <data format="html" name="log_files"
-	  label="${tool.name} (${pipeline}):${title} log files" />
+	  label="${tool.name} (${pipeline.pipeline_name}):${title} log files" />
   </outputs>
   <tests>
   </tests>
@@ -395,12 +424,11 @@
    380 (used for V3-V4 16S sequencing; expected length ~440bp)

  * **Pipeline to use for analysis** Choose the pipeline to use for OTU
-   clustering and chimera removal. The Galaxy tool currently supports
-   ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added
-   shortly (the tools are already available for the stand-alone pipeline).
+   clustering and chimera removal. The Galaxy tool supports the ``Vsearch``
+   and ``DADA2`` pipelines.

- * **Reference database** Choose between ``GreenGenes`` and ``Silva``
-   databases for taxa assignment.
+ * **Reference database** Choose between ``GreenGenes``, ``Silva`` or
+   ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.

 Click on **Execute** to start the analysis.

@@ -408,30 +436,31 @@
 **********

 Results are entirely generated using QIIME scripts. The results will
-appear in the History panel when the analysis is completed
+appear in the History panel when the analysis is completed.

- * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format
-   (http://biom-format.org/)
+The following outputs are captured:

- * **Vsearch_OTUs.tree** Phylogenetic tree constructed using
-   ``make_phylogeny.py`` (fasttree) QIIME script
-   (http://qiime.org/scripts/make_phylogeny.html)
+ * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**
+   The OTU table in BIOM format (http://biom-format.org/)

- * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar
-   charts at Phylum, Genus and Species level
+ * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``
+   (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)
+
+ * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at
+   Phylum, Genus and Species level
    (http://qiime.org/scripts/summarize_taxa.html and
    http://qiime.org/scripts/plot_taxa_summary.html)

- * **Vsearch_OTUs_count_file** Summary of OTU counts per sample
+ * **OTUs_count_file** Summary of OTU counts per sample
    (http://biom-format.org/documentation/summarizing_biom_tables.html)

- * **Vsearch_table_summary_file** Summary of sequences counts per sample
+ * **Table_summary_file** Summary of sequences counts per sample
    (http://biom-format.org/documentation/summarizing_biom_tables.html)

- * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta**
-   Fasta file with OTU sequences
+ * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**
+   Fasta file with OTU sequences (Vsearch|DADA2)

- * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap
+ * **Heatmap_PDF** OTU heatmap in PDF format
    (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )

  * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/install_amplicon_analysis-1.3.5.sh	Fri Dec 20 06:59:49 2019 -0500
@@ -0,0 +1,399 @@
+#!/bin/sh -e
+#
+# Prototype script to setup a conda environment with the
+# dependencies needed for the Amplicon_analysis_pipeline
+# script
+#
+# Handle command line
+usage()
+{
+    echo "Usage: $(basename $0) [DIR]"
+    echo ""
+    echo "Installs the Amplicon_analysis_pipeline package plus"
+    echo "dependencies in directory DIR (or current directory "
+    echo "if DIR not supplied)"
+}
+if [ ! -z "$1" ] ; then
+    # Check if help was requested
+    case "$1" in
+	--help|-h)
+	    usage
+	    exit 0
+	    ;;
+    esac
+    # Assume it's the installation directory
+    cd $1
+fi
+# Versions
+PIPELINE_VERSION=1.3.5
+CONDA_REQUIRED_VERSION=4.6.14
+RDP_CLASSIFIER_VERSION=2.2
+# Directories
+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
+BIN_DIR=${TOP_DIR}/bin
+CONDA_DIR=${TOP_DIR}/conda
+CONDA_BIN=${CONDA_DIR}/bin
+CONDA_LIB=${CONDA_DIR}/lib
+CONDA=${CONDA_BIN}/conda
+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
+#
+# Functions
+#
+# Report failure and terminate script
+fail()
+{
+    echo ""
+    echo ERROR $@ >&2
+    echo ""
+    echo "$(basename $0): installation failed"
+    exit 1
+}
+#
+# Rewrite the shebangs in the installed conda scripts
+# to remove the full path to conda 'bin' directory
+rewrite_conda_shebangs()
+{
+    pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
+    find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
+}
+#
+# Reset conda version if required
+reset_conda_version()
+{
+    CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)"
+    echo conda version: ${CONDA_VERSION}
+    if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then
+	echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"
+	${CONDA_BIN}/conda config --set allow_conda_downgrades true
+	${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}
+    else
+	echo "conda version ok"
+    fi
+}
+#
+# Install conda
+install_conda()
+{
+    echo "++++++++++++++++"
+    echo "Installing conda"
+    echo "++++++++++++++++"
+    if [ -e ${CONDA_DIR} ] ; then
+	echo "*** $CONDA_DIR already exists ***" >&2
+	return
+    fi
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+    bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
+    echo Installed conda in ${CONDA_DIR}
+    echo -n "Adding conda bin to PATH..."
+    export PATH=${CONDA_BIN}:$PATH
+    echo "ok"
+    # Reset the conda version to a known working version
+    # (to avoid problems observed with e.g. conda 4.7.10)
+    echo ""
+    reset_conda_version
+    # Update the installation files
+    # This is to avoid problems when the length the installation
+    # directory path exceeds the limit for the shebang statement
+    # in the conda files
+    echo ""
+    echo -n "Rewriting conda shebangs..."
+    rewrite_conda_shebangs
+    echo "ok"
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# Create conda environment
+install_conda_packages()
+{
+    echo "+++++++++++++++++++++++++"
+    echo "Installing conda packages"
+    echo "+++++++++++++++++++++++++"
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    cat >environment.yml <<EOF
+name: ${ENV_NAME}
+channels:
+  - defaults
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=2.7
+  - cutadapt=1.8
+  - sickle-trim=1.33
+  - bioawk=1.0
+  - pandaseq=2.8.1
+  - spades=3.10.1
+  - fastqc=0.11.3
+  - qiime=1.9.1
+  - blast-legacy=2.2.26
+  - fasta-splitter=0.2.6
+  - rdp_classifier=$RDP_CLASSIFIER_VERSION
+  - vsearch=2.10.4
+  - r=3.5.1
+  - r-tidyverse=1.2.1
+  - bioconductor-dada2=1.8
+  - bioconductor-biomformat=1.8.0
+EOF
+    ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
+    if [ $? -ne 0 ] ; then
+	fail "Non-zero exit status from 'conda env create'"
+    elif [ ! -e "${ENV_DIR}" ] ; then
+	fail "Failed to create conda environment: ${ENV_DIR} not found"
+    fi
+    echo Created conda environment in ${ENV_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+    #
+    # Patch qiime 1.9.1 tools to switch deprecated 'axisbg'
+    # matplotlib property to 'facecolor':
+    # https://matplotlib.org/api/prev_api_changes/api_changes_2.0.0.html
+    echo ""
+    for exe in make_2d_plots.py plot_taxa_summary.py ; do
+	echo -n "Patching ${exe}..."
+	find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/axisbg=/facecolor=/g' {} \;
+	echo "done"
+    done
+    #
+    # Patch qiime 1.9.1 tools to switch deprecated 'set_axis_bgcolor'
+    # method call to 'set_facecolor':
+    # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.set_axis_bgcolor.html
+    for exe in make_rarefaction_plots.py ; do
+	echo -n "Patching ${exe}..."
+	find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/set_axis_bgcolor/set_facecolor/g' {} \;
+	echo "done"
+    done
+}
+#
+# Install all the non-conda dependencies in a single
+# function (invokes separate functions for each package)
+install_non_conda_packages()
+{
+    echo "+++++++++++++++++++++++++++++"
+    echo "Installing non-conda packages"
+    echo "+++++++++++++++++++++++++++++"
+    # Temporary working directory
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    # Amplicon analysis pipeline
+    echo -n "Installing Amplicon_analysis_pipeline..."
+    if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
+	echo "already installed"
+    else
+	install_amplicon_analysis_pipeline
+	echo "ok"
+    fi
+    # ChimeraSlayer
+    echo -n "Installing ChimeraSlayer..."
+    if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
+	echo "already installed"
+    else
+	install_chimeraslayer
+	echo "ok"
+    fi
+    # Uclust
+    # This no longer seems to be available for download from
+    # drive5.com so don't download
+    echo "WARNING uclust not available: skipping installation"
+}
+#
+# Amplicon analyis pipeline
+install_amplicon_analysis_pipeline()
+{
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://github.com/MTutino/Amplicon_analysis/archive/${PIPELINE_VERSION}.tar.gz
+    tar zxf ${PIPELINE_VERSION}.tar.gz
+    cd Amplicon_analysis-${PIPELINE_VERSION}
+    INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
+    for f in *.sh *.R ; do
+	/bin/cp $f $INSTALL_DIR
+    done
+    /bin/cp -r uc2otutab $INSTALL_DIR
+    mkdir -p ${BIN_DIR}
+    cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
+#!/usr/bin/env bash
+#
+# Point to Qiime config
+export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
+# Set up the RDP jar file
+export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+# Set the Matplotlib backend
+export MPLBACKEND="agg"
+# Put the scripts onto the PATH
+export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
+# Activate the conda environment
+export PATH=${CONDA_BIN}:\$PATH
+source ${CONDA_BIN}/activate ${ENV_NAME}
+# Execute the driver script with the supplied arguments
+$INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
+exit \$?
+EOF
+    chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
+    cat >${BIN_DIR}/install_reference_data.sh <<EOF
+#!/usr/bin/env bash -e
+#
+function usage() {
+  echo "Usage: \$(basename \$0) DIR"
+}
+if [ -z "\$1" ] ; then
+  usage
+  exit 0
+elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
+  usage
+  echo ""
+  echo "Install reference data into DIR"
+  exit 0
+fi
+echo "=========================================="
+echo "Installing Amplicon analysis pipeline data"
+echo "=========================================="
+if [ ! -e "\$1" ] ; then
+    echo "Making directory \$1"
+    mkdir -p \$1
+fi
+cd \$1
+DATA_DIR=\$(pwd)
+echo "Installing reference data under \$DATA_DIR"
+$INSTALL_DIR/References.sh
+echo ""
+echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
+echo "to use the reference data from this directory"
+echo ""
+echo "\$(basename \$0): finished"
+EOF
+    chmod 0755 ${BIN_DIR}/install_reference_data.sh
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# ChimeraSlayer
+install_chimeraslayer()
+{
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
+    tar zxf microbiomeutil_2010-04-29.tar.gz
+    cd microbiomeutil_2010-04-29
+    INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
+    /bin/cp -r ChimeraSlayer $INSTALL_DIR
+    cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
+#!/usr/bin/env bash
+export PATH=$INSTALL_DIR:\$PATH
+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
+EOF
+    chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
+    chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# uclust required for QIIME/pyNAST
+# License only allows this version to be used with those two packages
+# See: http://drive5.com/uclust/downloads1_2_22q.html
+install_uclust()
+{
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
+    INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
+    /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
+    chmod 0755 ${INSTALL_DIR}/uclust
+    ln -s  ${INSTALL_DIR}/uclust ${BIN_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+setup_pipeline_environment()
+{
+    echo "+++++++++++++++++++++++++++++++"
+    echo "Setting up pipeline environment"
+    echo "+++++++++++++++++++++++++++++++"
+    # fasta_splitter.pl
+    echo -n "Setting up fasta_splitter.pl..."
+    if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
+	echo "failed"
+	fail "fasta-splitter.pl not found"
+    else
+	ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
+	echo "ok"
+    fi
+    # rdp_classifier.jar
+    local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+    echo -n "Setting up rdp_classifier.jar..."
+    if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
+	echo "failed"
+	fail "rdp_classifier.jar not found"
+    else
+	mkdir -p ${TOP_DIR}/share/rdp_classifier
+	ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
+	echo "ok"
+    fi
+    # qiime_config
+    echo -n "Setting up qiime_config..."
+    if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
+	echo "already exists"
+    else
+	mkdir -p ${TOP_DIR}/qiime
+	cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
+qiime_scripts_dir	${ENV_DIR}/bin
+EOF-qiime-config
+	echo "ok"
+    fi
+}
+#
+# Top level script does the installation
+echo "======================================="
+echo "Amplicon_analysis_pipeline installation"
+echo "======================================="
+echo "Installing into ${TOP_DIR}"
+if [ -e ${TOP_DIR} ] ; then
+    fail "Directory already exists"
+fi
+mkdir -p ${TOP_DIR}
+install_conda
+install_conda_packages
+install_non_conda_packages
+setup_pipeline_environment
+echo "===================================="
+echo "Amplicon_analysis_pipeline installed"
+echo "===================================="
+echo ""
+echo "Install reference data using:"
+echo ""
+echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
+echo ""
+echo "Run pipeline scripts using:"
+echo ""
+echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
+echo ""
+echo "(or add ${BIN_DIR} to your PATH)"
+echo ""
+echo "$(basename $0): finished"
+##
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/install_amplicon_analysis-1.3.6.sh	Fri Dec 20 06:59:49 2019 -0500
@@ -0,0 +1,399 @@
+#!/bin/sh -e
+#
+# Prototype script to setup a conda environment with the
+# dependencies needed for the Amplicon_analysis_pipeline
+# script
+#
+# Handle command line
+usage()
+{
+    echo "Usage: $(basename $0) [DIR]"
+    echo ""
+    echo "Installs the Amplicon_analysis_pipeline package plus"
+    echo "dependencies in directory DIR (or current directory "
+    echo "if DIR not supplied)"
+}
+if [ ! -z "$1" ] ; then
+    # Check if help was requested
+    case "$1" in
+	--help|-h)
+	    usage
+	    exit 0
+	    ;;
+    esac
+    # Assume it's the installation directory
+    cd $1
+fi
+# Versions
+PIPELINE_VERSION=1.3.6
+CONDA_REQUIRED_VERSION=4.6.14
+RDP_CLASSIFIER_VERSION=2.2
+# Directories
+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}
+BIN_DIR=${TOP_DIR}/bin
+CONDA_DIR=${TOP_DIR}/conda
+CONDA_BIN=${CONDA_DIR}/bin
+CONDA_LIB=${CONDA_DIR}/lib
+CONDA=${CONDA_BIN}/conda
+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"
+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME
+#
+# Functions
+#
+# Report failure and terminate script
+fail()
+{
+    echo ""
+    echo ERROR $@ >&2
+    echo ""
+    echo "$(basename $0): installation failed"
+    exit 1
+}
+#
+# Rewrite the shebangs in the installed conda scripts
+# to remove the full path to conda 'bin' directory
+rewrite_conda_shebangs()
+{
+    pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"
+    find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \;
+}
+#
+# Reset conda version if required
+reset_conda_version()
+{
+    CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d' ' -f2)"
+    echo conda version: ${CONDA_VERSION}
+    if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then
+	echo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"
+	${CONDA_BIN}/conda config --set allow_conda_downgrades true
+	${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}
+    else
+	echo "conda version ok"
+    fi
+}
+#
+# Install conda
+install_conda()
+{
+    echo "++++++++++++++++"
+    echo "Installing conda"
+    echo "++++++++++++++++"
+    if [ -e ${CONDA_DIR} ] ; then
+	echo "*** $CONDA_DIR already exists ***" >&2
+	return
+    fi
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+    bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}
+    echo Installed conda in ${CONDA_DIR}
+    echo -n "Adding conda bin to PATH..."
+    export PATH=${CONDA_BIN}:$PATH
+    echo "ok"
+    # Reset the conda version to a known working version
+    # (to avoid problems observed with e.g. conda 4.7.10)
+    echo ""
+    reset_conda_version
+    # Update the installation files
+    # This is to avoid problems when the length the installation
+    # directory path exceeds the limit for the shebang statement
+    # in the conda files
+    echo ""
+    echo -n "Rewriting conda shebangs..."
+    rewrite_conda_shebangs
+    echo "ok"
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# Create conda environment
+install_conda_packages()
+{
+    echo "+++++++++++++++++++++++++"
+    echo "Installing conda packages"
+    echo "+++++++++++++++++++++++++"
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    cat >environment.yml <<EOF
+name: ${ENV_NAME}
+channels:
+  - defaults
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=2.7
+  - cutadapt=1.8
+  - sickle-trim=1.33
+  - bioawk=1.0
+  - pandaseq=2.8.1
+  - spades=3.10.1
+  - fastqc=0.11.3
+  - qiime=1.9.1
+  - blast-legacy=2.2.26
+  - fasta-splitter=0.2.6
+  - rdp_classifier=$RDP_CLASSIFIER_VERSION
+  - vsearch=2.10.4
+  - r=3.5.1
+  - r-tidyverse=1.2.1
+  - bioconductor-dada2=1.8
+  - bioconductor-biomformat=1.8.0
+EOF
+    ${CONDA} env create --name "${ENV_NAME}" -f environment.yml
+    if [ $? -ne 0 ] ; then
+	fail "Non-zero exit status from 'conda env create'"
+    elif [ ! -e "${ENV_DIR}" ] ; then
+	fail "Failed to create conda environment: ${ENV_DIR} not found"
+    fi
+    echo Created conda environment in ${ENV_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+    #
+    # Patch qiime 1.9.1 tools to switch deprecated 'axisbg'
+    # matplotlib property to 'facecolor':
+    # https://matplotlib.org/api/prev_api_changes/api_changes_2.0.0.html
+    echo ""
+    for exe in make_2d_plots.py plot_taxa_summary.py ; do
+	echo -n "Patching ${exe}..."
+	find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/axisbg=/facecolor=/g' {} \;
+	echo "done"
+    done
+    #
+    # Patch qiime 1.9.1 tools to switch deprecated 'set_axis_bgcolor'
+    # method call to 'set_facecolor':
+    # https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.set_axis_bgcolor.html
+    for exe in make_rarefaction_plots.py ; do
+	echo -n "Patching ${exe}..."
+	find ${CONDA_DIR} -type f -name "$exe" -exec sed -i 's/set_axis_bgcolor/set_facecolor/g' {} \;
+	echo "done"
+    done
+}
+#
+# Install all the non-conda dependencies in a single
+# function (invokes separate functions for each package)
+install_non_conda_packages()
+{
+    echo "+++++++++++++++++++++++++++++"
+    echo "Installing non-conda packages"
+    echo "+++++++++++++++++++++++++++++"
+    # Temporary working directory
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    # Amplicon analysis pipeline
+    echo -n "Installing Amplicon_analysis_pipeline..."
+    if [ -e ${BIN_DIR}/Amplicon_analysis_pipeline.sh ] ; then
+	echo "already installed"
+    else
+	install_amplicon_analysis_pipeline
+	echo "ok"
+    fi
+    # ChimeraSlayer
+    echo -n "Installing ChimeraSlayer..."
+    if [ -e ${BIN_DIR}/ChimeraSlayer.pl ] ; then
+	echo "already installed"
+    else
+	install_chimeraslayer
+	echo "ok"
+    fi
+    # Uclust
+    # This no longer seems to be available for download from
+    # drive5.com so don't download
+    echo "WARNING uclust not available: skipping installation"
+}
+#
+# Amplicon analyis pipeline
+install_amplicon_analysis_pipeline()
+{
+    local wd=$(mktemp -d)
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://github.com/MTutino/Amplicon_analysis/archive/${PIPELINE_VERSION}.tar.gz
+    tar zxf ${PIPELINE_VERSION}.tar.gz
+    cd Amplicon_analysis-${PIPELINE_VERSION}
+    INSTALL_DIR=${TOP_DIR}/share/amplicon_analysis_pipeline-${PIPELINE_VERSION}
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/amplicon_analysis_pipeline
+    for f in *.sh *.R ; do
+	/bin/cp $f $INSTALL_DIR
+    done
+    /bin/cp -r uc2otutab $INSTALL_DIR
+    mkdir -p ${BIN_DIR}
+    cat >${BIN_DIR}/Amplicon_analysis_pipeline.sh <<EOF
+#!/usr/bin/env bash
+#
+# Point to Qiime config
+export QIIME_CONFIG_FP=${TOP_DIR}/qiime/qiime_config
+# Set up the RDP jar file
+export RDP_JAR_PATH=${TOP_DIR}/share/rdp_classifier/rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+# Set the Matplotlib backend
+export MPLBACKEND="agg"
+# Put the scripts onto the PATH
+export PATH=${BIN_DIR}:${INSTALL_DIR}:\$PATH
+# Activate the conda environment
+export PATH=${CONDA_BIN}:\$PATH
+source ${CONDA_BIN}/activate ${ENV_NAME}
+# Execute the driver script with the supplied arguments
+$INSTALL_DIR/Amplicon_analysis_pipeline.sh \$@
+exit \$?
+EOF
+    chmod 0755 ${BIN_DIR}/Amplicon_analysis_pipeline.sh
+    cat >${BIN_DIR}/install_reference_data.sh <<EOF
+#!/usr/bin/env bash -e
+#
+function usage() {
+  echo "Usage: \$(basename \$0) DIR"
+}
+if [ -z "\$1" ] ; then
+  usage
+  exit 0
+elif [ "\$1" == "--help" ] || [ "\$1" == "-h" ] ; then
+  usage
+  echo ""
+  echo "Install reference data into DIR"
+  exit 0
+fi
+echo "=========================================="
+echo "Installing Amplicon analysis pipeline data"
+echo "=========================================="
+if [ ! -e "\$1" ] ; then
+    echo "Making directory \$1"
+    mkdir -p \$1
+fi
+cd \$1
+DATA_DIR=\$(pwd)
+echo "Installing reference data under \$DATA_DIR"
+$INSTALL_DIR/References.sh
+echo ""
+echo "Use '-r \$DATA_DIR' when running Amplicon_analysis_pipeline.sh"
+echo "to use the reference data from this directory"
+echo ""
+echo "\$(basename \$0): finished"
+EOF
+    chmod 0755 ${BIN_DIR}/install_reference_data.sh
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# ChimeraSlayer
+install_chimeraslayer()
+{
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz
+    tar zxf microbiomeutil_2010-04-29.tar.gz
+    cd microbiomeutil_2010-04-29
+    INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer
+    /bin/cp -r ChimeraSlayer $INSTALL_DIR
+    cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF
+#!/usr/bin/env bash
+export PATH=$INSTALL_DIR:\$PATH
+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@
+EOF
+    chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl
+    chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+#
+# uclust required for QIIME/pyNAST
+# License only allows this version to be used with those two packages
+# See: http://drive5.com/uclust/downloads1_2_22q.html
+install_uclust()
+{
+    local cwd=$(pwd)
+    local wd=$(mktemp -d)
+    cd $wd
+    wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64
+    INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22
+    mkdir -p $INSTALL_DIR
+    ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust
+    /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust
+    chmod 0755 ${INSTALL_DIR}/uclust
+    ln -s  ${INSTALL_DIR}/uclust ${BIN_DIR}
+    cd $cwd
+    rm -rf $wd/*
+    rmdir $wd
+}
+setup_pipeline_environment()
+{
+    echo "+++++++++++++++++++++++++++++++"
+    echo "Setting up pipeline environment"
+    echo "+++++++++++++++++++++++++++++++"
+    # fasta_splitter.pl
+    echo -n "Setting up fasta_splitter.pl..."
+    if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then
+	echo "failed"
+	fail "fasta-splitter.pl not found"
+    else
+	ln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl
+	echo "ok"
+    fi
+    # rdp_classifier.jar
+    local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar
+    echo -n "Setting up rdp_classifier.jar..."
+    if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then
+	echo "already exists"
+    elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then
+	echo "failed"
+	fail "rdp_classifier.jar not found"
+    else
+	mkdir -p ${TOP_DIR}/share/rdp_classifier
+	ln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}
+	echo "ok"
+    fi
+    # qiime_config
+    echo -n "Setting up qiime_config..."
+    if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then
+	echo "already exists"
+    else
+	mkdir -p ${TOP_DIR}/qiime
+	cat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config
+qiime_scripts_dir	${ENV_DIR}/bin
+EOF-qiime-config
+	echo "ok"
+    fi
+}
+#
+# Top level script does the installation
+echo "======================================="
+echo "Amplicon_analysis_pipeline installation"
+echo "======================================="
+echo "Installing into ${TOP_DIR}"
+if [ -e ${TOP_DIR} ] ; then
+    fail "Directory already exists"
+fi
+mkdir -p ${TOP_DIR}
+install_conda
+install_conda_packages
+install_non_conda_packages
+setup_pipeline_environment
+echo "===================================="
+echo "Amplicon_analysis_pipeline installed"
+echo "===================================="
+echo ""
+echo "Install reference data using:"
+echo ""
+echo "\$ ${BIN_DIR}/install_reference_data.sh DIR"
+echo ""
+echo "Run pipeline scripts using:"
+echo ""
+echo "\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."
+echo ""
+echo "(or add ${BIN_DIR} to your PATH)"
+echo ""
+echo "$(basename $0): finished"
+##
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/outputs.txt	Fri Dec 20 06:59:49 2019 -0500
@@ -0,0 +1,41 @@
+ok.. Metatable_log/Metatable_mod.txt
+ok.. Vsearch_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom
+ok.. Vsearch_OTU_tables/otus.tre
+ok.. RESULTS/Vsearch_gg/OTUs_count.txt
+ok.. RESULTS/Vsearch_gg/table_summary.txt
+ok.. Multiplexed_files/Vsearch_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta
+ok.. QUALITY_CONTROL/Reads_count.txt
+ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper
+NO.. RESULTS/Vsearch_gg/Heatmap/js -> RESULTS/Vsearch_gg/Heatmap.pdf
+NO.. RESULTS/Vsearch_gg/Heatmap/otu_table.html -> MISSING
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/charts/
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/raw_data/
+ok.. RESULTS/Vsearch_gg/phylum_genus_charts/bar_charts.html
+ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/*
+ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/*
+ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/rarefaction_plots.html
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/average_plots
+ok.. RESULTS/Vsearch_gg/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf
+
+??.. Metatable_log/Metatable_mod.txt
+NO.. DADA2_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom
+ok.. DADA2_OTU_tables/otus.tre
+ok.. RESULTS/DADA2_silva/OTUs_count.txt
+ok.. RESULTS/DADA2_silva/table_summary.txt
+ok.. Multiplexed_files/DADA2_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta --> DADA2_OTU_tables/seqs.fa
+NO.. QUALITY_CONTROL/Reads_count.txt -> Vsearch only
+ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper
+NO.. RESULTS/DADA2_silva/Heatmap/js -> RESULTS/DADA2_silva/Heatmap.pdf
+NO.. RESULTS/DADA2_silva/Heatmap/otu_table.html
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/charts/
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/raw_data/
+ok.. RESULTS/DADA2_silva/phylum_genus_charts/bar_charts.html
+ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/*
+ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/*
+ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html
+ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/rarefaction_plots.html
+ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/average_plots
+ok.. RESULTS/DADA2_silva/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf -> missing? (didn't include categories?)
--- a/tool_dependencies.xml	Thu Oct 18 09:18:04 2018 -0400
+++ b/tool_dependencies.xml	Fri Dec 20 06:59:49 2019 -0500
@@ -1,14 +1,14 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="amplicon_analysis_pipeline" version="1.2.3">
+  <package name="amplicon_analysis_pipeline" version="1.3.6">
     <install version="1.0">
       <actions>
-	<action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis.sh</action>
+	<action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis-1.3.6.sh</action>
 	<action type="shell_command">
-	  sh ./install_amplicon_analysis.sh $INSTALL_DIR
+	  sh ./install_amplicon_analysis-1.3.6.sh $INSTALL_DIR
 	</action>
 	<action type="set_environment">
-	    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.2.3/bin</environment_variable>
+	    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.3.6/bin</environment_variable>
 	</action>
       </actions>
     </install>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/updating-to-pipeline-1.3-DADA2.txt	Fri Dec 20 06:59:49 2019 -0500
@@ -0,0 +1,58 @@
+Notes on updating Galaxy tool to pipeline 1.3 (DADA2)
+=====================================================
+
+Where stuff is:
+
+* projects/Amplicon_analysis-galaxy: git repo for Galaxy tool (these
+  developments are in the 'update-to-Amplicon_analysis_pipeline-1.3'
+  branch, PR #50:
+  https://github.com/pjbriggs/Amplicon_analysis-galaxy/pull/50)
+
+* scratchpad/test_Amplicon_analysis_pipeline_DADA2: directory for
+  running/testing the updates
+
+So far:
+
+* Updated the installer for pipeline version 1.3.2
+
+* Have been trying to run the pipeline manually outside of Galaxy
+  on popov & CSF3:
+  -- DADA2 works on popov (can't remember if it works on CSF3)
+  -- Vsearch pipeline fails on popov and CSF3 (but errors are
+     different)
+
+* Mauro is looking at fixing the errors while I carry on trying
+  to update the Galaxy tool
+
+Random notes from my notebook:
+
+p44:
+
+* DADA2 uses NSLOTS environment variable from the local environment
+  (so can get number of cores on cluster; if NSLOTS not set then
+  gets number of cores on local machine)
+
+* DADA2 has new outputs:
+  -- DADA2_OTU_tables/Error_rate_plots/ <-- need to capture all
+     PDFs from this folder
+
+pp78-79:
+
+* Galaxy wrapper could check that 'Run' column is in supplied
+  metatable file (if it's not present then pipeline will fail
+  now)
+
+* DADA2 has its own reference database
+
+* DADA2 produces same outputs as Vsearch (with name changed from
+  "Vsearch_*" to "DADA2_*", plus extras:
+  -- Vsearch_OTUs.tre -> otus.tre
+  -- Vsearch_multiplexed_linearised_dereplicated_mc2_repset_nonchimeras_OTUS.fasta -> seqs.fa
+  -- There might be issues with the heatmap
+
+p83: notes on progress...
+
+p95:
+
+* Confirms heatmap is now e.g. RESULTS/Vsearch_silva/Heatmap.pdf
+  (instead of HTML output)