comparison amplicon_analysis_pipeline.py @ 4:86a12d75ebe4 draft default tip

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 7be61b7ed35ca3deaad68d2eae384c8cd365bcb8
author pjbriggs
date Fri, 20 Dec 2019 06:59:49 -0500
parents 3ab198df8f3f
children
comparison
equal deleted inserted replaced
3:3ab198df8f3f 4:86a12d75ebe4
115 p.add_argument("-q",dest="trimming_threshold") 115 p.add_argument("-q",dest="trimming_threshold")
116 p.add_argument("-O",dest="minimum_overlap") 116 p.add_argument("-O",dest="minimum_overlap")
117 p.add_argument("-L",dest="minimum_length") 117 p.add_argument("-L",dest="minimum_length")
118 p.add_argument("-l",dest="sliding_window_length") 118 p.add_argument("-l",dest="sliding_window_length")
119 p.add_argument("-P",dest="pipeline", 119 p.add_argument("-P",dest="pipeline",
120 choices=["vsearch","uparse","qiime"], 120 choices=["Vsearch","DADA2"],
121 type=str.lower, 121 type=str,
122 default="vsearch") 122 default="Vsearch")
123 p.add_argument("-S",dest="use_silva",action="store_true") 123 p.add_argument("-S",dest="use_silva",action="store_true")
124 p.add_argument("-H",dest="use_homd",action="store_true") 124 p.add_argument("-H",dest="use_homd",action="store_true")
125 p.add_argument("-r",dest="reference_data_path") 125 p.add_argument("-r",dest="reference_data_path")
126 p.add_argument("-c",dest="categories_file") 126 p.add_argument("-c",dest="categories_file")
127 args = p.parse_args() 127 args = p.parse_args()
153 final_name.write("%s\n" % '\t'.join((r1,sample_name))) 153 final_name.write("%s\n" % '\t'.join((r1,sample_name)))
154 final_name.write("%s\n" % '\t'.join((r2,sample_name))) 154 final_name.write("%s\n" % '\t'.join((r2,sample_name)))
155 sample_names.append(sample_name) 155 sample_names.append(sample_name)
156 156
157 # Reference database 157 # Reference database
158 if args.use_silva: 158 if args.pipeline == "Vsearch":
159 if args.use_silva:
160 ref_database = "silva"
161 elif args.use_homd:
162 ref_database = "homd"
163 else:
164 ref_database = "gg"
165 elif args.pipeline == "DADA2":
159 ref_database = "silva" 166 ref_database = "silva"
160 elif args.use_homd:
161 ref_database = "homd"
162 else:
163 ref_database = "gg"
164 167
165 # Construct the pipeline command 168 # Construct the pipeline command
166 print "Amplicon analysis: constructing pipeline command" 169 print "Amplicon analysis: constructing pipeline command"
167 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh") 170 pipeline = PipelineCmd("Amplicon_analysis_pipeline.sh")
168 if args.forward_pcr_primer: 171 if args.forward_pcr_primer:
178 if args.sliding_window_length: 181 if args.sliding_window_length:
179 pipeline.add_args("-l",args.sliding_window_length) 182 pipeline.add_args("-l",args.sliding_window_length)
180 if args.reference_data_path: 183 if args.reference_data_path:
181 pipeline.add_args("-r",args.reference_data_path) 184 pipeline.add_args("-r",args.reference_data_path)
182 pipeline.add_args("-P",args.pipeline) 185 pipeline.add_args("-P",args.pipeline)
183 if ref_database == "silva": 186 if args.pipeline == "Vsearch":
184 pipeline.add_args("-S") 187 if ref_database == "silva":
185 elif ref_database == "homd": 188 pipeline.add_args("-S")
186 pipeline.add_args("-H") 189 elif ref_database == "homd":
190 pipeline.add_args("-H")
187 191
188 # Echo the pipeline command to stdout 192 # Echo the pipeline command to stdout
189 print "Running %s" % pipeline 193 print "Running %s" % pipeline
190 194
191 # Run the pipeline 195 # Run the pipeline
275 <body> 279 <body>
276 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1> 280 <h1>Amplicon analysis pipeline: Per-base Quality Boxplots (FastQC)</h1>
277 """) 281 """)
278 # Look for raw and trimmed FastQC output for each sample 282 # Look for raw and trimmed FastQC output for each sample
279 for sample_name in sample_names: 283 for sample_name in sample_names:
284 # Replace underscores with hyphens in sample names
285 sample_name = sample_name.replace('_','-')
286 # Write HTML file with links to the FastQC boxplots
280 fastqc_dir = os.path.join(sample_name,"FastQC") 287 fastqc_dir = os.path.join(sample_name,"FastQC")
281 quality_boxplots.write("<h2>%s</h2>" % sample_name) 288 quality_boxplots.write("<h2>%s</h2>" % sample_name)
282 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score): 289 for d in ("Raw","cutdapt_sickle/Q%s" % phred_score):
283 quality_boxplots.write("<h3>%s</h3>" % d) 290 quality_boxplots.write("<h3>%s</h3>" % d)
284 fastqc_html_files = glob.glob( 291 fastqc_html_files = glob.glob(
304 boxplot)) 311 boxplot))
305 quality_boxplots.write("""</body> 312 quality_boxplots.write("""</body>
306 </html> 313 </html>
307 """) 314 """)
308 315
316 # Handle DADA2 error rate plot PDFs
317 if args.pipeline == "DADA2":
318 print("Amplicon analysis: collecting error rate plots")
319 error_rate_plots_dir = os.path.abspath(
320 os.path.join("DADA2_OTU_tables",
321 "Error_rate_plots"))
322 error_rate_plot_pdfs = [os.path.basename(pdf)
323 for pdf in
324 sorted(glob.glob(
325 os.path.join(error_rate_plots_dir,"*.pdf")))]
326 error_rate_plots_html = os.path.join(error_rate_plots_dir,
327 "error_rate_plots.html")
328 with open(error_rate_plots_html,"w") as error_rate_plots_out:
329 error_rate_plots_out.write("""<html>
330 <head>
331 <title>Amplicon analysis pipeline: DADA2 Error Rate Plots</title>
332 <head>
333 <body>
334 <h1>Amplicon analysis pipeline: DADA2 Error Rate Plots</h1>
335 """)
336 error_rate_plots_out.write("<ul>\n")
337 for pdf in error_rate_plot_pdfs:
338 error_rate_plots_out.write("<li>%s</li>\n" % ahref(pdf))
339 error_rate_plots_out.write("<ul>\n")
340 error_rate_plots_out.write("""</body>
341 </html>
342 """)
343
309 # Handle additional output when categories file was supplied 344 # Handle additional output when categories file was supplied
310 if args.categories_file is not None: 345 if args.categories_file is not None:
311 # Alpha diversity boxplots 346 # Alpha diversity boxplots
312 print "Amplicon analysis: indexing alpha diversity boxplots" 347 print "Amplicon analysis: indexing alpha diversity boxplots"
313 boxplots_dir = os.path.abspath( 348 boxplots_dir = os.path.abspath(
314 os.path.join("RESULTS", 349 os.path.join("RESULTS",
315 "%s_%s" % (args.pipeline.title(), 350 "%s_%s" % (args.pipeline,
316 ref_database), 351 ref_database),
317 "Alpha_diversity", 352 "Alpha_diversity",
318 "Alpha_diversity_boxplot", 353 "Alpha_diversity_boxplot",
319 "Categories_shannon")) 354 "Categories_shannon"))
320 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir 355 print "Amplicon analysis: gathering PDFs from %s" % boxplots_dir