comparison amplicon_analysis_pipeline.xml @ 4:86a12d75ebe4 draft default tip

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
author pjbriggs
date Fri, 20 Dec 2019 06:59:49 -0500
parents 3ab198df8f3f
children
comparison
equal deleted inserted replaced
3:3ab198df8f3f 4:86a12d75ebe4
1 <tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.3.0"> 1 <tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.6.0">
2 <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description> 2 <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.2.3">amplicon_analysis_pipeline</requirement> 4 <requirement type="package" version="1.3.6">amplicon_analysis_pipeline</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <exit_code range="1:" /> 7 <exit_code range="1:" />
8 </stdio> 8 </stdio>
9 <command><![CDATA[ 9 <command><![CDATA[
10
11 ## Convenience variable for pipeline name
12 #set $pipeline_name = $pipeline.pipeline_name
13
10 ## Set the reference database name 14 ## Set the reference database name
11 #if $reference_database == "-S" 15 #if str( $pipeline_name ) == "DADA2"
12 #set reference_database_name = "silva" 16 #set reference_database_name = "silva"
13 #else if $reference_database == "-H"
14 #set reference_database_name = "homd"
15 #else 17 #else
16 #set reference_database_name = "gg" 18 #set reference_database = $pipeline.reference_database
19 #if $reference_database == "-S"
20 #set reference_database_name = "silva"
21 #else if $reference_database == "-H"
22 #set reference_database_name = "homd"
23 #else
24 #set reference_database_name = "gg"
25 #end if
17 #end if 26 #end if
18 27
19 ## Run the amplicon analysis pipeline wrapper 28 ## Run the amplicon analysis pipeline wrapper
20 python $__tool_directory__/amplicon_analysis_pipeline.py 29 python $__tool_directory__/amplicon_analysis_pipeline.py
21 ## Set options 30 ## Set options
35 -O $minimum_overlap 44 -O $minimum_overlap
36 #end if 45 #end if
37 #if str( $minimum_length ) != "" 46 #if str( $minimum_length ) != ""
38 -L $minimum_length 47 -L $minimum_length
39 #end if 48 #end if
40 -P $pipeline 49 -P $pipeline_name
41 -r \$AMPLICON_ANALYSIS_REF_DATA_PATH 50 -r \${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData}
42 #if str( $reference_database ) != "" 51 #if str( $pipeline_name ) != "DADA2"
43 ${reference_database} 52 ${reference_database}
44 #end if 53 #end if
45 #if str($categories_file_in) != 'None' 54 #if str($categories_file_in) != 'None'
46 -c "${categories_file_in}" 55 -c "${categories_file_in}"
47 #end if 56 #end if
58 #end for 67 #end for
59 && 68 &&
60 69
61 ## Collect outputs 70 ## Collect outputs
62 cp Metatable_log/Metatable_mod.txt "${metatable_mod}" && 71 cp Metatable_log/Metatable_mod.txt "${metatable_mod}" &&
63 cp ${pipeline}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" && 72 #if str( $pipeline_name ) == "Vsearch"
64 cp ${pipeline}_OTU_tables/otus.tre "${otus_tre_file}" && 73 ## Vsearch-specific
65 cp RESULTS/${pipeline}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" && 74 cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
66 cp RESULTS/${pipeline}_${reference_database_name}/table_summary.txt "${table_summary_file}" && 75 cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&
67 cp Multiplexed_files/${pipeline}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" && 76 cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&
68 cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" && 77 #else
78 ## DADA2-specific
79 cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
80 cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" &&
81 #end if
82 cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" &&
83 cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&
84 cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&
69 cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" && 85 cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" &&
70 86
87 ## OTU table heatmap
88 cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}" &&
89
71 ## HTML outputs 90 ## HTML outputs
72
73 ## OTU table
74 mkdir $heatmap_otu_table_html.files_path &&
75 cp -r RESULTS/${pipeline}_${reference_database_name}/Heatmap/js $heatmap_otu_table_html.files_path &&
76 cp RESULTS/${pipeline}_${reference_database_name}/Heatmap/otu_table.html "${heatmap_otu_table_html}" &&
77 91
78 ## Phylum genus barcharts 92 ## Phylum genus barcharts
79 mkdir $phylum_genus_dist_barcharts_html.files_path && 93 mkdir $phylum_genus_dist_barcharts_html.files_path &&
80 cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path && 94 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path &&
81 cp -r RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path && 95 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path &&
82 cp RESULTS/${pipeline}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" && 96 cp RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" &&
83 97
84 ## Beta diversity weighted 2d plots 98 ## Beta diversity weighted 2d plots
85 mkdir $beta_div_even_weighted_2d_plots.files_path && 99 mkdir $beta_div_even_weighted_2d_plots.files_path &&
86 cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path && 100 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path &&
87 cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" && 101 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" &&
88 102
89 ## Beta diversity unweighted 2d plots 103 ## Beta diversity unweighted 2d plots
90 mkdir $beta_div_even_unweighted_2d_plots.files_path && 104 mkdir $beta_div_even_unweighted_2d_plots.files_path &&
91 cp -r RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path && 105 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path &&
92 cp RESULTS/${pipeline}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" && 106 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" &&
93 107
94 ## Alpha diversity rarefaction plots 108 ## Alpha diversity rarefaction plots
95 mkdir $alpha_div_rarefaction_plots.files_path && 109 mkdir $alpha_div_rarefaction_plots.files_path &&
96 cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots && 110 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots &&
97 cp -r RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path && 111 cp -r RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path &&
112
113 ## DADA2 error rate plots
114 #if str($pipeline_name) == "DADA2"
115 mkdir $dada2_error_rate_plots.files_path &&
116 cp DADA2_OTU_tables/Error_rate_plots/error_rate_plots.html $dada2_error_rate_plots &&
117 cp -r DADA2_OTU_tables/Error_rate_plots/*.pdf $dada2_error_rate_plots.files_path &&
118 #end if
98 119
99 ## Categories data 120 ## Categories data
100 #if str($categories_file_in) != 'None' 121 #if str($categories_file_in) != 'None'
101 ## Alpha diversity boxplots 122 ## Alpha diversity boxplots
102 mkdir $alpha_div_boxplots.files_path && 123 mkdir $alpha_div_boxplots.files_path &&
103 cp alpha_diversity_boxplots.html "$alpha_div_boxplots" && 124 cp alpha_diversity_boxplots.html "$alpha_div_boxplots" &&
104 cp RESULTS/${pipeline}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path && 125 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path &&
105 #end if 126 #end if
106 127
107 ## Pipeline outputs (log files etc) 128 ## Pipeline outputs (log files etc)
108 mkdir $log_files.files_path && 129 mkdir $log_files.files_path &&
109 cp Amplicon_analysis_pipeline.log $log_files.files_path && 130 cp Amplicon_analysis_pipeline.log $log_files.files_path &&
159 label="Minimum length in bp to keep sequence after overlapping" 180 label="Minimum length in bp to keep sequence after overlapping"
160 help="Default is 200 (-L)" /> 181 help="Default is 200 (-L)" />
161 <param type="integer" name="sliding_window_length" value="10" 182 <param type="integer" name="sliding_window_length" value="10"
162 label="Minimum length in bp to retain a read after trimming" 183 label="Minimum length in bp to retain a read after trimming"
163 help="Supplied to Sickle; default is 10 (-l)" /> 184 help="Supplied to Sickle; default is 10 (-l)" />
164 <param type="select" name="pipeline" 185 <conditional name="pipeline">
165 label="Pipeline to use for analysis"> 186 <param type="select" name="pipeline_name"
166 <option value="Vsearch" selected="true" >Vsearch</option> 187 label="Pipeline to use for analysis">
167 <!-- 188 <option value="Vsearch" selected="true" >Vsearch</option>
168 Remove the QIIME and Uparse options for now 189 <option value="DADA2">DADA2</option>
169 <option value="QIIME">QIIME</option> 190 </param>
170 <option value="Uparse">Uparse</option> 191 <when value="Vsearch">
171 --> 192 <param type="select" name="reference_database"
172 </param> 193 label="Reference database">
173 <param type="select" name="reference_database" 194 <option value="" selected="true">GreenGenes</option>
174 label="Reference database"> 195 <option value="-S">Silva</option>
175 <option value="" selected="true">GreenGenes</option> 196 <option value="-H">Human Oral Microbiome Database (HOMD)</option>
176 <option value="-S">Silva</option> 197 </param>
177 <option value="-H">Human Oral Microbiome Database (HOMD)</option> 198 </when>
178 </param> 199 <when value="DADA2">
200 </when>
201 </conditional>
179 </inputs> 202 </inputs>
180 <outputs> 203 <outputs>
181 <data format="tabular" name="metatable_mod" 204 <data format="tabular" name="metatable_mod"
182 label="${tool.name}:${title} Metatable_mod.txt" /> 205 label="${tool.name}:${title} Metatable_mod.txt" />
183 <data format="tabular" name="read_counts_out" 206 <data format="tabular" name="read_counts_out"
184 label="${tool.name} (${pipeline}):${title} read counts" /> 207 label="${tool.name} (${pipeline.pipeline_name}):${title} read counts">
208 <filter>pipeline['pipeline_name'] == 'Vsearch'</filter>
209 </data>
185 <data format="biom" name="tax_otu_table_biom_file" 210 <data format="biom" name="tax_otu_table_biom_file"
186 label="${tool.name} (${pipeline}):${title} tax OTU table (biom format)" /> 211 label="${tool.name} (${pipeline.pipeline_name}):${title} tax OTU table (biom format)" />
187 <data format="tabular" name="otus_tre_file" 212 <data format="tabular" name="otus_tre_file"
188 label="${tool.name} (${pipeline}):${title} otus.tre" /> 213 label="${tool.name} (${pipeline.pipeline_name}):${title} otus.tre" />
189 <data format="html" name="phylum_genus_dist_barcharts_html" 214 <data format="html" name="phylum_genus_dist_barcharts_html"
190 label="${tool.name} (${pipeline}):${title} phylum genus dist barcharts HTML" /> 215 label="${tool.name} (${pipeline.pipeline_name}):${title} phylum genus dist barcharts HTML" />
191 <data format="tabular" name="otus_count_file" 216 <data format="tabular" name="otus_count_file"
192 label="${tool.name} (${pipeline}):${title} OTUs count file" /> 217 label="${tool.name} (${pipeline.pipeline_name}):${title} OTUs count file" />
193 <data format="tabular" name="table_summary_file" 218 <data format="tabular" name="table_summary_file"
194 label="${tool.name} (${pipeline}):${title} table summary file" /> 219 label="${tool.name} (${pipeline.pipeline_name}):${title} table summary file" />
195 <data format="fasta" name="dereplicated_nonchimera_otus_fasta" 220 <data format="fasta" name="dereplicated_nonchimera_otus_fasta"
196 label="${tool.name} (${pipeline}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" /> 221 label="${tool.name} (${pipeline.pipeline_name}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" />
197 <data format="html" name="fastqc_quality_boxplots_html" 222 <data format="html" name="fastqc_quality_boxplots_html"
198 label="${tool.name} (${pipeline}):${title} FastQC per-base quality boxplots HTML" /> 223 label="${tool.name} (${pipeline.pipeline_name}):${title} FastQC per-base quality boxplots HTML" />
199 <data format="html" name="heatmap_otu_table_html" 224 <data format="pdf" name="heatmap_otu_table_pdf"
200 label="${tool.name} (${pipeline}):${title} heatmap OTU table HTML" /> 225 label="${tool.name} (${pipeline.pipeline_name}):${title} heatmap OTU table PDF" />
201 <data format="html" name="beta_div_even_weighted_2d_plots" 226 <data format="html" name="beta_div_even_weighted_2d_plots"
202 label="${tool.name} (${pipeline}):${title} beta diversity weighted 2D plots HTML" /> 227 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity weighted 2D plots HTML" />
203 <data format="html" name="beta_div_even_unweighted_2d_plots" 228 <data format="html" name="beta_div_even_unweighted_2d_plots"
204 label="${tool.name} (${pipeline}):${title} beta diversity unweighted 2D plots HTML" /> 229 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" />
205 <data format="html" name="alpha_div_rarefaction_plots" 230 <data format="html" name="alpha_div_rarefaction_plots"
206 label="${tool.name} (${pipeline}):${title} alpha diversity rarefaction plots HTML" /> 231 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" />
232 <data format="html" name="dada2_error_rate_plots"
233 label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots">
234 <filter>pipeline['pipeline_name'] == 'DADA2'</filter>
235 </data>
207 <data format="html" name="alpha_div_boxplots" 236 <data format="html" name="alpha_div_boxplots"
208 label="${tool.name} (${pipeline}):${title} alpha diversity boxplots"> 237 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots">
209 <filter>categories_file_in is not None</filter> 238 <filter>categories_file_in is not None</filter>
210 </data> 239 </data>
211 <data format="html" name="log_files" 240 <data format="html" name="log_files"
212 label="${tool.name} (${pipeline}):${title} log files" /> 241 label="${tool.name} (${pipeline.pipeline_name}):${title} log files" />
213 </outputs> 242 </outputs>
214 <tests> 243 <tests>
215 </tests> 244 </tests>
216 <help><![CDATA[ 245 <help><![CDATA[
217 246
393 minimum sequence length used by Pandaseq to keep a sequence after the 422 minimum sequence length used by Pandaseq to keep a sequence after the
394 overlapping. This depends on the expected amplicon length. Default is 423 overlapping. This depends on the expected amplicon length. Default is
395 380 (used for V3-V4 16S sequencing; expected length ~440bp) 424 380 (used for V3-V4 16S sequencing; expected length ~440bp)
396 425
397 * **Pipeline to use for analysis** Choose the pipeline to use for OTU 426 * **Pipeline to use for analysis** Choose the pipeline to use for OTU
398 clustering and chimera removal. The Galaxy tool currently supports 427 clustering and chimera removal. The Galaxy tool supports the ``Vsearch``
399 ``Vsearch`` only. ``Uparse`` and ``QIIME`` are planned to be added 428 and ``DADA2`` pipelines.
400 shortly (the tools are already available for the stand-alone pipeline). 429
401 430 * **Reference database** Choose between ``GreenGenes``, ``Silva`` or
402 * **Reference database** Choose between ``GreenGenes`` and ``Silva`` 431 ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.
403 databases for taxa assignment.
404 432
405 Click on **Execute** to start the analysis. 433 Click on **Execute** to start the analysis.
406 434
407 5. Results 435 5. Results
408 ********** 436 **********
409 437
410 Results are entirely generated using QIIME scripts. The results will 438 Results are entirely generated using QIIME scripts. The results will
411 appear in the History panel when the analysis is completed 439 appear in the History panel when the analysis is completed.
412 440
413 * **Vsearch_tax_OTU_table (biom format)** The OTU table in BIOM format 441 The following outputs are captured:
414 (http://biom-format.org/) 442
415 443 * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**
416 * **Vsearch_OTUs.tree** Phylogenetic tree constructed using 444 The OTU table in BIOM format (http://biom-format.org/)
417 ``make_phylogeny.py`` (fasttree) QIIME script 445
418 (http://qiime.org/scripts/make_phylogeny.html) 446 * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``
419 447 (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)
420 * **Vsearch_phylum_genus_dist_barcharts_HTML** HTML file with bar 448
421 charts at Phylum, Genus and Species level 449 * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at
450 Phylum, Genus and Species level
422 (http://qiime.org/scripts/summarize_taxa.html and 451 (http://qiime.org/scripts/summarize_taxa.html and
423 http://qiime.org/scripts/plot_taxa_summary.html) 452 http://qiime.org/scripts/plot_taxa_summary.html)
424 453
425 * **Vsearch_OTUs_count_file** Summary of OTU counts per sample 454 * **OTUs_count_file** Summary of OTU counts per sample
426 (http://biom-format.org/documentation/summarizing_biom_tables.html) 455 (http://biom-format.org/documentation/summarizing_biom_tables.html)
427 456
428 * **Vsearch_table_summary_file** Summary of sequences counts per sample 457 * **Table_summary_file** Summary of sequences counts per sample
429 (http://biom-format.org/documentation/summarizing_biom_tables.html) 458 (http://biom-format.org/documentation/summarizing_biom_tables.html)
430 459
431 * **Vsearch_multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta** 460 * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**
432 Fasta file with OTU sequences 461 Fasta file with OTU sequences (Vsearch|DADA2)
433 462
434 * **Vsearch_heatmap_OTU_table_HTML** Interactive OTU heatmap 463 * **Heatmap_PDF** OTU heatmap in PDF format
435 (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html ) 464 (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )
436 465
437 * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML 466 * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
438 format using weighted Unifrac distance measure. Samples are grouped 467 format using weighted Unifrac distance measure. Samples are grouped
439 by the column names present in the Metatable file. The samples are 468 by the column names present in the Metatable file. The samples are