Previous changeset 3:3ab198df8f3f (2018-10-18) |
Commit message:
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8 |
modified:
README.rst amplicon_analysis_pipeline.py amplicon_analysis_pipeline.xml tool_dependencies.xml |
added:
install_amplicon_analysis-1.3.5.sh install_amplicon_analysis-1.3.6.sh outputs.txt updating-to-pipeline-1.3-DADA2.txt |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 README.rst --- a/README.rst Thu Oct 18 09:18:04 2018 -0400 +++ b/README.rst Fri Dec 20 06:59:49 2019 -0500 |
b |
@@ -174,6 +174,7 @@ ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- +1.3.5.0 Updated to Amplicon_Analysis_Pipeline version 1.3.5. 1.2.3.0 Updated to Amplicon_Analysis_Pipeline version 1.2.3; install dependencies via tool_dependencies.xml. 1.2.2.0 Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 amplicon_analysis_pipeline.py --- a/amplicon_analysis_pipeline.py Thu Oct 18 09:18:04 2018 -0400 +++ b/amplicon_analysis_pipeline.py Fri Dec 20 06:59:49 2019 -0500 |
[ |
@@ -117,9 +117,9 @@ p.add_argument("-L",dest="minimum_length") p.add_argument("-l",dest="sliding_window_length") p.add_argument("-P",dest="pipeline", - choices=["vsearch","uparse","qiime"], - type=str.lower, - default="vsearch") + choices=["Vsearch","DADA2"], + type=str, + default="Vsearch") p.add_argument("-S",dest="use_silva",action="store_true") p.add_argument("-H",dest="use_homd",action="store_true") p.add_argument("-r",dest="reference_data_path") @@ -155,12 +155,15 @@ sample_names.append(sample_name) # Reference database - if args.use_silva: + if args.pipeline == "Vsearch": + if args.use_silva: + ref_database = "silva" + elif args.use_homd: + ref_database = "homd" + else: + ref_database = "gg" + elif args.pipeline == "DADA2": ref_database = "silva" - elif args.use_homd: - ref_database = "homd" - else: - ref_database = "gg" # Construct the pipeline command print "Amplicon analysis: constructing pipeline command" @@ -180,10 +183,11 @@ if args.reference_data_path: pipeline.add_args("-r",args.reference_data_path) pipeline.add_args("-P",args.pipeline) - if ref_database == "silva": - pipeline.add_args("-S") - elif ref_database == "homd": - pipeline.add_args("-H") + if args.pipeline == "Vsearch": + if ref_database == "silva": + pipeline.add_args("-S") + elif ref_database == "homd": + pipeline.add_args("-H") # Echo the pipeline command to stdout print "Running %s" % pipeline @@ -277,6 +281,9 @@ """) # Look for raw and trimmed FastQC output for each sample for sample_name in sample_names: + # Replace underscores with hyphens in sample names + sample_name = sample_name.replace('_','-') + # Write HTML file with links to the FastQC boxplots fastqc_dir = os.path.join(sample_name,"FastQC") quality_boxplots.write("<h2>%s</h2>" % sample_name) for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): @@ -306,13 +313,41 @@ </html> """) + # Handle DADA2 error rate plot PDFs + if args.pipeline == "DADA2": + print("Amplicon analysis: collecting error rate plots") + error_rate_plots_dir = os.path.abspath( + os.path.join("DADA2_OTU_tables", + "Error_rate_plots")) + error_rate_plot_pdfs = [os.path.basename(pdf) + for pdf in + sorted(glob.glob( + os.path.join(error_rate_plots_dir,"*.pdf")))] + error_rate_plots_html = os.path.join(error_rate_plots_dir, + "error_rate_plots.html") + with open(error_rate_plots_html,"w") as error_rate_plots_out: + error_rate_plots_out.write("""<html> +<head> +<title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title> +<head> +<body> +<h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1> +""") + error_rate_plots_out.write("<ul>\n") + for pdf in error_rate_plot_pdfs: + error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf)) + error_rate_plots_out.write("<ul>\n") + error_rate_plots_out.write("""</body> +</html> +""") + # Handle additional output when categories file was supplied if args.categories_file is not None: # Alpha diversity boxplots print "Amplicon analysis: indexing alpha diversity boxplots" boxplots_dir = os.path.abspath( os.path.join("RESULTS", - "%s_%s" % (args.pipeline.title(), + "%s_%s" % (args.pipeline, ref_database), "Alpha_diversity", "Alpha_diversity_boxplot", |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 amplicon_analysis_pipeline.xml --- a/amplicon_analysis_pipeline.xml Thu Oct 18 09:18:04 2018 -0400 +++ b/amplicon_analysis_pipeline.xml Fri Dec 20 06:59:49 2019 -0500 |
[ |
b'@@ -1,19 +1,28 @@\n-<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.3.0">\n+<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.6.0">\n <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description>\n <requirements>\n- <requirement type="package" version="1.2.3">amplicon_analysis_pipeline</requirement>\n+ <requirement type="package" version="1.3.6">amplicon_analysis_pipeline</requirement>\n </requirements>\n <stdio>\n <exit_code range="1:" />\n </stdio>\n <command><![CDATA[\n+\n+ ## Convenience variable for pipeline name\n+ #set $pipeline_name = $pipeline.pipeline_name\n+\n ## Set the reference database name\n- #if $reference_database == "-S"\n- #set reference_database_name = "silva"\n- #else if $reference_database == "-H"\n- #set reference_database_name = "homd"\n+ #if str( $pipeline_name ) == "DADA2"\n+ #set reference_database_name = "silva"\n #else\n- #set reference_database_name = "gg"\n+ #set reference_database = $pipeline.reference_database\n+ #if $reference_database == "-S"\n+ #set reference_database_name = "silva"\n+ #else if $reference_database == "-H"\n+ #set reference_database_name = "homd"\n+ #else\n+ #set reference_database_name = "gg"\n+ #end if\n #end if\n \n ## Run the amplicon analysis pipeline wrapper\n@@ -37,9 +46,9 @@\n #if str( $minimum_length ) != ""\n -L $minimum_length\n #end if\n- -P $pipeline\n- -r \\$AMPLICON_ANALYSIS_REF_DATA_PATH\n- #if str( $reference_database ) != ""\n+ -P $pipeline_name\n+ -r \\${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData}\n+ #if str( $pipeline_name ) != "DADA2"\n ${reference_database}\n #end if\n #if str($categories_file_in) != \'None\'\n@@ -60,48 +69,60 @@\n \n ## Collect outputs\n cp Metatable_log/Metatable_mod.txt "${metatable_mod}" &&\n- cp ${pipeline}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n- cp ${pipeline}_OTU_tables/otus.tre "${otus_tre_file}" &&\n- cp RESULTS/${pipeline}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&\n- cp RESULTS/${pipeline}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&\n- cp Multiplexed_files/${pipeline}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&\n- cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&\n+ #if str( $pipeline_name ) == "Vsearch"\n+ ## Vsearch-specific\n+ cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n+ cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&\n+ cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&\n+ #else\n+ ## DADA2-specific\n+ cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&\n+ cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" &&\n+ #end if\n+ cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" &&\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&\n cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" &&\n \n- ## HTML outputs\n+ ## OTU table heatmap\n+ cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}" &&\n \n- ## OTU table\n- mkdir $heatmap_otu_table_html.files_path &&\n- cp -r RESULTS/${pipeline}_${reference_database_name}/Heatmap/js $heatmap_otu_table_html.files_path &&\n- cp RESULTS/${pipeline}_${reference_database_name}/Heatmap/otu_table.html "${heatmap_otu_table_html}" &&\n+ ## HTML outputs\n \n ## Phylum genus barcharts\n mkdir $phylum_genus_dist_barcharts_html.files_p'..b'="${tool.name} (${pipeline}):${title} beta diversity unweighted 2D plots HTML" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" />\n <data format="html" name="alpha_div_rarefaction_plots"\n-\t label="${tool.name} (${pipeline}):${title} alpha diversity rarefaction plots HTML" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" />\n+ <data format="html" name="dada2_error_rate_plots"\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots">\n+ <filter>pipeline[\'pipeline_name\'] == \'DADA2\'</filter>\n+ </data>\n <data format="html" name="alpha_div_boxplots"\n-\t label="${tool.name} (${pipeline}):${title} alpha diversity boxplots">\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots">\n <filter>categories_file_in is not None</filter>\n </data>\n <data format="html" name="log_files"\n-\t label="${tool.name} (${pipeline}):${title} log files" />\n+\t label="${tool.name} (${pipeline.pipeline_name}):${title} log files" />\n </outputs>\n <tests>\n </tests>\n@@ -395,12 +424,11 @@\n 380 (used for V3-V4 16S sequencing; expected length ~440bp)\n \n * **Pipeline to use for analysis** Choose the pipeline to use for OTU\n- clustering and chimera removal. The Galaxy tool currently supports\n- ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added\n- shortly (the tools are already available for the stand-alone pipeline).\n+ clustering and chimera removal. The Galaxy tool supports the ``Vsearch``\n+ and ``DADA2`` pipelines.\n \n- * **Reference database** Choose between ``GreenGenes`` and ``Silva``\n- databases for taxa assignment.\n+ * **Reference database** Choose between ``GreenGenes``, ``Silva`` or\n+ ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.\n \n Click on **Execute** to start the analysis.\n \n@@ -408,30 +436,31 @@\n **********\n \n Results are entirely generated using QIIME scripts. The results will \n-appear in the History panel when the analysis is completed\n+appear in the History panel when the analysis is completed.\n \n- * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format\n- (http://biom-format.org/)\n+The following outputs are captured:\n \n- * **Vsearch_OTUs.tree** Phylogenetic tree constructed using\n- ``make_phylogeny.py`` (fasttree) QIIME script\n- (http://qiime.org/scripts/make_phylogeny.html)\n+ * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**\n+ The OTU table in BIOM format (http://biom-format.org/)\n \n- * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar\n- charts at Phylum, Genus and Species level\n+ * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``\n+ (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)\n+\n+ * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at\n+ Phylum, Genus and Species level\n (http://qiime.org/scripts/summarize_taxa.html and\n http://qiime.org/scripts/plot_taxa_summary.html)\n \n- * **Vsearch_OTUs_count_file** Summary of OTU counts per sample\n+ * **OTUs_count_file** Summary of OTU counts per sample\n (http://biom-format.org/documentation/summarizing_biom_tables.html)\n \n- * **Vsearch_table_summary_file** Summary of sequences counts per sample\n+ * **Table_summary_file** Summary of sequences counts per sample\n (http://biom-format.org/documentation/summarizing_biom_tables.html)\n \n- * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta**\n- Fasta file with OTU sequences\n+ * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**\n+ Fasta file with OTU sequences (Vsearch|DADA2)\n \n- * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap\n+ * **Heatmap_PDF** OTU heatmap in PDF format\n (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )\n \n * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML\n' |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 install_amplicon_analysis-1.3.5.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_amplicon_analysis-1.3.5.sh Fri Dec 20 06:59:49 2019 -0500 |
[ |
b'@@ -0,0 +1,399 @@\n+#!/bin/sh -e\n+#\n+# Prototype script to setup a conda environment with the\n+# dependencies needed for the Amplicon_analysis_pipeline\n+# script\n+#\n+# Handle command line\n+usage()\n+{\n+ echo "Usage: $(basename $0) [DIR]"\n+ echo ""\n+ echo "Installs the Amplicon_analysis_pipeline package plus"\n+ echo "dependencies in directory DIR (or current directory "\n+ echo "if DIR not supplied)"\n+}\n+if [ ! -z "$1" ] ; then\n+ # Check if help was requested\n+ case "$1" in\n+\t--help|-h)\n+\t usage\n+\t exit 0\n+\t ;;\n+ esac\n+ # Assume it\'s the installation directory\n+ cd $1\n+fi\n+# Versions\n+PIPELINE_VERSION=1.3.5\n+CONDA_REQUIRED_VERSION=4.6.14\n+RDP_CLASSIFIER_VERSION=2.2\n+# Directories\n+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}\n+BIN_DIR=${TOP_DIR}/bin\n+CONDA_DIR=${TOP_DIR}/conda\n+CONDA_BIN=${CONDA_DIR}/bin\n+CONDA_LIB=${CONDA_DIR}/lib\n+CONDA=${CONDA_BIN}/conda\n+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"\n+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME\n+#\n+# Functions\n+#\n+# Report failure and terminate script\n+fail()\n+{\n+ echo ""\n+ echo ERROR $@ >&2\n+ echo ""\n+ echo "$(basename $0): installation failed"\n+ exit 1\n+}\n+#\n+# Rewrite the shebangs in the installed conda scripts\n+# to remove the full path to conda \'bin\' directory\n+rewrite_conda_shebangs()\n+{\n+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"\n+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \\;\n+}\n+#\n+# Reset conda version if required\n+reset_conda_version()\n+{\n+ CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d\' \' -f2)"\n+ echo conda version: ${CONDA_VERSION}\n+ if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then\n+\techo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"\n+\t${CONDA_BIN}/conda config --set allow_conda_downgrades true\n+\t${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}\n+ else\n+\techo "conda version ok"\n+ fi\n+}\n+#\n+# Install conda\n+install_conda()\n+{\n+ echo "++++++++++++++++"\n+ echo "Installing conda"\n+ echo "++++++++++++++++"\n+ if [ -e ${CONDA_DIR} ] ; then\n+\techo "*** $CONDA_DIR already exists ***" >&2\n+\treturn\n+ fi\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh\n+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}\n+ echo Installed conda in ${CONDA_DIR}\n+ echo -n "Adding conda bin to PATH..."\n+ export PATH=${CONDA_BIN}:$PATH\n+ echo "ok"\n+ # Reset the conda version to a known working version\n+ # (to avoid problems observed with e.g. conda 4.7.10)\n+ echo ""\n+ reset_conda_version\n+ # Update the installation files\n+ # This is to avoid problems when the length the installation\n+ # directory path exceeds the limit for the shebang statement\n+ # in the conda files\n+ echo ""\n+ echo -n "Rewriting conda shebangs..."\n+ rewrite_conda_shebangs\n+ echo "ok"\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# Create conda environment\n+install_conda_packages()\n+{\n+ echo "+++++++++++++++++++++++++"\n+ echo "Installing conda packages"\n+ echo "+++++++++++++++++++++++++"\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ cat >environment.yml <<EOF\n+name: ${ENV_NAME}\n+channels:\n+ - defaults\n+ - conda-forge\n+ - bioconda\n+dependencies:\n+ - python=2.7\n+ - cutadapt=1.8\n+ - sickle-trim=1.33\n+ - bioawk=1.0\n+ - pandaseq=2.8.1\n+ - spades=3.10.1\n+ - fastqc=0.11.3\n+ - qiime=1.9.1\n+ - blast-legacy=2.2.26\n+ - fasta-splitter=0.2.6\n+ - rdp_classifier=$RDP_CLASSIFIER_VERSION\n+ - vsearch=2.10.4\n+ - r=3.5.1\n+ - r-tidyverse=1.2.1\n+ - bioconductor-dada2=1.8\n+ - bioconductor-biomformat=1.8.0\n+EOF\n+ ${CONDA} env create --name "${ENV_NAME}" -f environment.yml\n+ if [ $? -ne 0 ] ; then\n+\tfail "Non-zero exit status from \'conda env create\'"\n+ elif [ ! -e "${ENV_DIR}" ] ; then\n+\tfail "Failed to create conda environment: ${ENV_DIR} not found"\n+ '..b'-r \\$DATA_DIR\' when running Amplicon_analysis_pipeline.sh"\n+echo "to use the reference data from this directory"\n+echo ""\n+echo "\\$(basename \\$0): finished"\n+EOF\n+ chmod 0755 ${BIN_DIR}/install_reference_data.sh\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# ChimeraSlayer\n+install_chimeraslayer()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz\n+ tar zxf microbiomeutil_2010-04-29.tar.gz\n+ cd microbiomeutil_2010-04-29\n+ INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer\n+ /bin/cp -r ChimeraSlayer $INSTALL_DIR\n+ cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF\n+#!/usr/bin/env bash\n+export PATH=$INSTALL_DIR:\\$PATH\n+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@\n+EOF\n+ chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl\n+ chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# uclust required for QIIME/pyNAST\n+# License only allows this version to be used with those two packages\n+# See: http://drive5.com/uclust/downloads1_2_22q.html\n+install_uclust()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64\n+ INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust\n+ /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust\n+ chmod 0755 ${INSTALL_DIR}/uclust\n+ ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+setup_pipeline_environment()\n+{\n+ echo "+++++++++++++++++++++++++++++++"\n+ echo "Setting up pipeline environment"\n+ echo "+++++++++++++++++++++++++++++++"\n+ # fasta_splitter.pl\n+ echo -n "Setting up fasta_splitter.pl..."\n+ if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then\n+\techo "failed"\n+\tfail "fasta-splitter.pl not found"\n+ else\n+\tln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl\n+\techo "ok"\n+ fi\n+ # rdp_classifier.jar\n+ local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar\n+ echo -n "Setting up rdp_classifier.jar..."\n+ if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then\n+\techo "failed"\n+\tfail "rdp_classifier.jar not found"\n+ else\n+\tmkdir -p ${TOP_DIR}/share/rdp_classifier\n+\tln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}\n+\techo "ok"\t\n+ fi\n+ # qiime_config\n+ echo -n "Setting up qiime_config..."\n+ if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then\n+\techo "already exists"\n+ else\n+\tmkdir -p ${TOP_DIR}/qiime\n+\tcat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config\n+qiime_scripts_dir\t${ENV_DIR}/bin\n+EOF-qiime-config\n+\techo "ok"\n+ fi\n+}\n+#\n+# Top level script does the installation\n+echo "======================================="\n+echo "Amplicon_analysis_pipeline installation"\n+echo "======================================="\n+echo "Installing into ${TOP_DIR}"\n+if [ -e ${TOP_DIR} ] ; then\n+ fail "Directory already exists"\n+fi\n+mkdir -p ${TOP_DIR}\n+install_conda\n+install_conda_packages\n+install_non_conda_packages\n+setup_pipeline_environment\n+echo "===================================="\n+echo "Amplicon_analysis_pipeline installed"\n+echo "===================================="\n+echo ""\n+echo "Install reference data using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/install_reference_data.sh DIR"\n+echo ""\n+echo "Run pipeline scripts using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."\n+echo ""\n+echo "(or add ${BIN_DIR} to your PATH)"\n+echo ""\n+echo "$(basename $0): finished"\n+##\n+#\n' |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 install_amplicon_analysis-1.3.6.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_amplicon_analysis-1.3.6.sh Fri Dec 20 06:59:49 2019 -0500 |
[ |
b'@@ -0,0 +1,399 @@\n+#!/bin/sh -e\n+#\n+# Prototype script to setup a conda environment with the\n+# dependencies needed for the Amplicon_analysis_pipeline\n+# script\n+#\n+# Handle command line\n+usage()\n+{\n+ echo "Usage: $(basename $0) [DIR]"\n+ echo ""\n+ echo "Installs the Amplicon_analysis_pipeline package plus"\n+ echo "dependencies in directory DIR (or current directory "\n+ echo "if DIR not supplied)"\n+}\n+if [ ! -z "$1" ] ; then\n+ # Check if help was requested\n+ case "$1" in\n+\t--help|-h)\n+\t usage\n+\t exit 0\n+\t ;;\n+ esac\n+ # Assume it\'s the installation directory\n+ cd $1\n+fi\n+# Versions\n+PIPELINE_VERSION=1.3.6\n+CONDA_REQUIRED_VERSION=4.6.14\n+RDP_CLASSIFIER_VERSION=2.2\n+# Directories\n+TOP_DIR=$(pwd)/Amplicon_analysis-${PIPELINE_VERSION}\n+BIN_DIR=${TOP_DIR}/bin\n+CONDA_DIR=${TOP_DIR}/conda\n+CONDA_BIN=${CONDA_DIR}/bin\n+CONDA_LIB=${CONDA_DIR}/lib\n+CONDA=${CONDA_BIN}/conda\n+ENV_NAME="amplicon_analysis_pipeline@${PIPELINE_VERSION}"\n+ENV_DIR=${CONDA_DIR}/envs/$ENV_NAME\n+#\n+# Functions\n+#\n+# Report failure and terminate script\n+fail()\n+{\n+ echo ""\n+ echo ERROR $@ >&2\n+ echo ""\n+ echo "$(basename $0): installation failed"\n+ exit 1\n+}\n+#\n+# Rewrite the shebangs in the installed conda scripts\n+# to remove the full path to conda \'bin\' directory\n+rewrite_conda_shebangs()\n+{\n+ pattern="s,^#!${CONDA_BIN}/,#!/usr/bin/env ,g"\n+ find ${CONDA_BIN} -type f -exec sed -i "$pattern" {} \\;\n+}\n+#\n+# Reset conda version if required\n+reset_conda_version()\n+{\n+ CONDA_VERSION="$(${CONDA_BIN}/conda -V 2>&1 | head -n 1 | cut -d\' \' -f2)"\n+ echo conda version: ${CONDA_VERSION}\n+ if [ "${CONDA_VERSION}" != "${CONDA_REQUIRED_VERSION}" ] ; then\n+\techo "Resetting conda to last known working version $CONDA_REQUIRED_VERSION"\n+\t${CONDA_BIN}/conda config --set allow_conda_downgrades true\n+\t${CONDA_BIN}/conda install -y conda=${CONDA_REQUIRED_VERSION}\n+ else\n+\techo "conda version ok"\n+ fi\n+}\n+#\n+# Install conda\n+install_conda()\n+{\n+ echo "++++++++++++++++"\n+ echo "Installing conda"\n+ echo "++++++++++++++++"\n+ if [ -e ${CONDA_DIR} ] ; then\n+\techo "*** $CONDA_DIR already exists ***" >&2\n+\treturn\n+ fi\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh\n+ bash ./Miniconda2-latest-Linux-x86_64.sh -b -p ${CONDA_DIR}\n+ echo Installed conda in ${CONDA_DIR}\n+ echo -n "Adding conda bin to PATH..."\n+ export PATH=${CONDA_BIN}:$PATH\n+ echo "ok"\n+ # Reset the conda version to a known working version\n+ # (to avoid problems observed with e.g. conda 4.7.10)\n+ echo ""\n+ reset_conda_version\n+ # Update the installation files\n+ # This is to avoid problems when the length the installation\n+ # directory path exceeds the limit for the shebang statement\n+ # in the conda files\n+ echo ""\n+ echo -n "Rewriting conda shebangs..."\n+ rewrite_conda_shebangs\n+ echo "ok"\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# Create conda environment\n+install_conda_packages()\n+{\n+ echo "+++++++++++++++++++++++++"\n+ echo "Installing conda packages"\n+ echo "+++++++++++++++++++++++++"\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ cat >environment.yml <<EOF\n+name: ${ENV_NAME}\n+channels:\n+ - defaults\n+ - conda-forge\n+ - bioconda\n+dependencies:\n+ - python=2.7\n+ - cutadapt=1.8\n+ - sickle-trim=1.33\n+ - bioawk=1.0\n+ - pandaseq=2.8.1\n+ - spades=3.10.1\n+ - fastqc=0.11.3\n+ - qiime=1.9.1\n+ - blast-legacy=2.2.26\n+ - fasta-splitter=0.2.6\n+ - rdp_classifier=$RDP_CLASSIFIER_VERSION\n+ - vsearch=2.10.4\n+ - r=3.5.1\n+ - r-tidyverse=1.2.1\n+ - bioconductor-dada2=1.8\n+ - bioconductor-biomformat=1.8.0\n+EOF\n+ ${CONDA} env create --name "${ENV_NAME}" -f environment.yml\n+ if [ $? -ne 0 ] ; then\n+\tfail "Non-zero exit status from \'conda env create\'"\n+ elif [ ! -e "${ENV_DIR}" ] ; then\n+\tfail "Failed to create conda environment: ${ENV_DIR} not found"\n+ '..b'\'-r \\$DATA_DIR\' when running Amplicon_analysis_pipeline.sh"\n+echo "to use the reference data from this directory"\n+echo ""\n+echo "\\$(basename \\$0): finished"\n+EOF\n+ chmod 0755 ${BIN_DIR}/install_reference_data.sh\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# ChimeraSlayer\n+install_chimeraslayer()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q https://sourceforge.net/projects/microbiomeutil/files/__OLD_VERSIONS/microbiomeutil_2010-04-29.tar.gz\n+ tar zxf microbiomeutil_2010-04-29.tar.gz\n+ cd microbiomeutil_2010-04-29\n+ INSTALL_DIR=${TOP_DIR}/share/microbiome_chimeraslayer-2010-04-29\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/microbiome_chimeraslayer\n+ /bin/cp -r ChimeraSlayer $INSTALL_DIR\n+ cat >${BIN_DIR}/ChimeraSlayer.pl <<EOF\n+#!/usr/bin/env bash\n+export PATH=$INSTALL_DIR:\\$PATH\n+$INSTALL_DIR/ChimeraSlayer/ChimeraSlayer.pl $@\n+EOF\n+ chmod 0755 ${INSTALL_DIR}/ChimeraSlayer/ChimeraSlayer.pl\n+ chmod 0755 ${BIN_DIR}/ChimeraSlayer.pl\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+#\n+# uclust required for QIIME/pyNAST\n+# License only allows this version to be used with those two packages\n+# See: http://drive5.com/uclust/downloads1_2_22q.html\n+install_uclust()\n+{\n+ local cwd=$(pwd)\n+ local wd=$(mktemp -d)\n+ cd $wd\n+ wget -q http://drive5.com/uclust/uclustq1.2.22_i86linux64\n+ INSTALL_DIR=${TOP_DIR}/share/uclust-1.2.22\n+ mkdir -p $INSTALL_DIR\n+ ln -s $INSTALL_DIR ${TOP_DIR}/share/uclust\n+ /bin/mv uclustq1.2.22_i86linux64 ${INSTALL_DIR}/uclust\n+ chmod 0755 ${INSTALL_DIR}/uclust\n+ ln -s ${INSTALL_DIR}/uclust ${BIN_DIR}\n+ cd $cwd\n+ rm -rf $wd/*\n+ rmdir $wd\n+}\n+setup_pipeline_environment()\n+{\n+ echo "+++++++++++++++++++++++++++++++"\n+ echo "Setting up pipeline environment"\n+ echo "+++++++++++++++++++++++++++++++"\n+ # fasta_splitter.pl\n+ echo -n "Setting up fasta_splitter.pl..."\n+ if [ -e ${BIN_DIR}/fasta-splitter.pl ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ] ; then\n+\techo "failed"\n+\tfail "fasta-splitter.pl not found"\n+ else\n+\tln -s ${ENV_DIR}/share/fasta-splitter/fasta-splitter.pl ${BIN_DIR}/fasta-splitter.pl\n+\techo "ok"\n+ fi\n+ # rdp_classifier.jar\n+ local rdp_classifier_jar=rdp_classifier-${RDP_CLASSIFIER_VERSION}.jar\n+ echo -n "Setting up rdp_classifier.jar..."\n+ if [ -e ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar} ] ; then\n+\techo "already exists"\n+ elif [ ! -e ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ] ; then\n+\techo "failed"\n+\tfail "rdp_classifier.jar not found"\n+ else\n+\tmkdir -p ${TOP_DIR}/share/rdp_classifier\n+\tln -s ${ENV_DIR}/share/rdp_classifier/rdp_classifier.jar ${TOP_DIR}/share/rdp_classifier/${rdp_classifier_jar}\n+\techo "ok"\n+ fi\n+ # qiime_config\n+ echo -n "Setting up qiime_config..."\n+ if [ -e ${TOP_DIR}/qiime/qiime_config ] ; then\n+\techo "already exists"\n+ else\n+\tmkdir -p ${TOP_DIR}/qiime\n+\tcat >${TOP_DIR}/qiime/qiime_config <<EOF-qiime-config\n+qiime_scripts_dir\t${ENV_DIR}/bin\n+EOF-qiime-config\n+\techo "ok"\n+ fi\n+}\n+#\n+# Top level script does the installation\n+echo "======================================="\n+echo "Amplicon_analysis_pipeline installation"\n+echo "======================================="\n+echo "Installing into ${TOP_DIR}"\n+if [ -e ${TOP_DIR} ] ; then\n+ fail "Directory already exists"\n+fi\n+mkdir -p ${TOP_DIR}\n+install_conda\n+install_conda_packages\n+install_non_conda_packages\n+setup_pipeline_environment\n+echo "===================================="\n+echo "Amplicon_analysis_pipeline installed"\n+echo "===================================="\n+echo ""\n+echo "Install reference data using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/install_reference_data.sh DIR"\n+echo ""\n+echo "Run pipeline scripts using:"\n+echo ""\n+echo "\\$ ${BIN_DIR}/Amplicon_analysis_pipeline.sh ..."\n+echo ""\n+echo "(or add ${BIN_DIR} to your PATH)"\n+echo ""\n+echo "$(basename $0): finished"\n+##\n+#\n' |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 outputs.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/outputs.txt Fri Dec 20 06:59:49 2019 -0500 |
b |
@@ -0,0 +1,41 @@ +ok.. Metatable_log/Metatable_mod.txt +ok.. Vsearch_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom +ok.. Vsearch_OTU_tables/otus.tre +ok.. RESULTS/Vsearch_gg/OTUs_count.txt +ok.. RESULTS/Vsearch_gg/table_summary.txt +ok.. Multiplexed_files/Vsearch_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta +ok.. QUALITY_CONTROL/Reads_count.txt +ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper +NO.. RESULTS/Vsearch_gg/Heatmap/js -> RESULTS/Vsearch_gg/Heatmap.pdf +NO.. RESULTS/Vsearch_gg/Heatmap/otu_table.html -> MISSING +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/charts/ +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/raw_data/ +ok.. RESULTS/Vsearch_gg/phylum_genus_charts/bar_charts.html +ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/* +ok.. RESULTS/Vsearch_gg/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/* +ok.. RESULTS/Vsearch_gg/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/rarefaction_plots.html +ok.. RESULTS/Vsearch_gg/Alpha_diversity/rarefaction_curves/average_plots +ok.. RESULTS/Vsearch_gg/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf + +??.. Metatable_log/Metatable_mod.txt +NO.. DADA2_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom +ok.. DADA2_OTU_tables/otus.tre +ok.. RESULTS/DADA2_silva/OTUs_count.txt +ok.. RESULTS/DADA2_silva/table_summary.txt +ok.. Multiplexed_files/DADA2_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta --> DADA2_OTU_tables/seqs.fa +NO.. QUALITY_CONTROL/Reads_count.txt -> Vsearch only +ok.. fastqc_quality_boxplots.html -> generated by the Python wrapper +NO.. RESULTS/DADA2_silva/Heatmap/js -> RESULTS/DADA2_silva/Heatmap.pdf +NO.. RESULTS/DADA2_silva/Heatmap/otu_table.html +ok.. RESULTS/DADA2_silva/phylum_genus_charts/charts/ +ok.. RESULTS/DADA2_silva/phylum_genus_charts/raw_data/ +ok.. RESULTS/DADA2_silva/phylum_genus_charts/bar_charts.html +ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/* +ok.. RESULTS/DADA2_silva/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/* +ok.. RESULTS/DADA2_silva/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html +ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/rarefaction_plots.html +ok.. RESULTS/DADA2_silva/Alpha_diversity/rarefaction_curves/average_plots +ok.. RESULTS/DADA2_silva/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf -> missing? (didn't include categories?) |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 tool_dependencies.xml --- a/tool_dependencies.xml Thu Oct 18 09:18:04 2018 -0400 +++ b/tool_dependencies.xml Fri Dec 20 06:59:49 2019 -0500 |
b |
@@ -1,14 +1,14 @@ <?xml version="1.0"?> <tool_dependency> - <package name="amplicon_analysis_pipeline" version="1.2.3"> + <package name="amplicon_analysis_pipeline" version="1.3.6"> <install version="1.0"> <actions> - <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis.sh</action> + <action type="download_file">https://raw.githubusercontent.com/pjbriggs/Amplicon_analysis-galaxy/master/install_amplicon_analysis-1.3.6.sh</action> <action type="shell_command"> - sh ./install_amplicon_analysis.sh $INSTALL_DIR + sh ./install_amplicon_analysis-1.3.6.sh $INSTALL_DIR </action> <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.2.3/bin</environment_variable> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/Amplicon_analysis-1.3.6/bin</environment_variable> </action> </actions> </install> |
b |
diff -r 3ab198df8f3f -r 86a12d75ebe4 updating-to-pipeline-1.3-DADA2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/updating-to-pipeline-1.3-DADA2.txt Fri Dec 20 06:59:49 2019 -0500 |
b |
@@ -0,0 +1,58 @@ +Notes on updating Galaxy tool to pipeline 1.3 (DADA2) +===================================================== + +Where stuff is: + +* projects/Amplicon_analysis-galaxy: git repo for Galaxy tool (these + developments are in the 'update-to-Amplicon_analysis_pipeline-1.3' + branch, PR #50: + https://github.com/pjbriggs/Amplicon_analysis-galaxy/pull/50) + +* scratchpad/test_Amplicon_analysis_pipeline_DADA2: directory for + running/testing the updates + +So far: + +* Updated the installer for pipeline version 1.3.2 + +* Have been trying to run the pipeline manually outside of Galaxy + on popov & CSF3: + -- DADA2 works on popov (can't remember if it works on CSF3) + -- Vsearch pipeline fails on popov and CSF3 (but errors are + different) + +* Mauro is looking at fixing the errors while I carry on trying + to update the Galaxy tool + +Random notes from my notebook: + +p44: + +* DADA2 uses NSLOTS environment variable from the local environment + (so can get number of cores on cluster; if NSLOTS not set then + gets number of cores on local machine) + +* DADA2 has new outputs: + -- DADA2_OTU_tables/Error_rate_plots/ <-- need to capture all + PDFs from this folder + +pp78-79: + +* Galaxy wrapper could check that 'Run' column is in supplied + metatable file (if it's not present then pipeline will fail + now) + +* DADA2 has its own reference database + +* DADA2 produces same outputs as Vsearch (with name changed from + "Vsearch_*" to "DADA2_*", plus extras: + -- Vsearch_OTUs.tre -> otus.tre + -- Vsearch_multiplexed_linearised_dereplicated_mc2_repset_nonchimeras_OTUS.fasta -> seqs.fa + -- There might be issues with the heatmap + +p83: notes on progress... + +p95: + +* Confirms heatmap is now e.g. RESULTS/Vsearch_silva/Heatmap.pdf + (instead of HTML output) |