Mercurial > repos > pjbriggs > macs21
annotate macs21_wrapper.py @ 5:3c435705aca5 draft default tip
New version 2.1.2-galaxy1 (updates UCSC dependencies)
| author | pjbriggs |
|---|---|
| date | Tue, 27 Jun 2023 07:54:55 +0000 |
| parents | 00d73c812399 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/bin/env python |
| 2 # | |
| 3 # Galaxy wrapper to run MACS 2.1 | |
| 4 # | |
| 5 # Completely rewritten from the original macs2 wrapped by Ziru Zhou | |
| 6 # taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 | |
| 7 | |
| 8 import sys | |
| 9 import os | |
| 10 import subprocess | |
| 11 import tempfile | |
| 12 import shutil | |
| 13 | |
| 14 def move_file(working_dir,name,destination): | |
| 15 """Move a file 'name' from 'working_dir' to 'destination' | |
| 16 | |
| 17 """ | |
| 18 if destination is None: | |
| 19 # Nothing to do | |
| 20 return | |
| 21 source = os.path.join(working_dir,name) | |
| 22 if os.path.exists(source): | |
| 23 shutil.move(source,destination) | |
| 24 | |
| 25 def convert_xls_to_interval(xls_file,interval_file,header=None): | |
| 26 """Convert MACS XLS file to interval | |
| 27 | |
| 28 From the MACS readme: "Coordinates in XLS is 1-based which is different with | |
| 29 BED format." | |
| 30 | |
| 31 However this function no longer performs any coordinate conversions, it | |
| 32 simply ensures that any blank or non-data lines are commented out | |
| 33 | |
| 34 """ | |
| 35 fp = open(interval_file,'wb') | |
| 36 if header: | |
| 37 fp.write('#%s\n' % header) | |
| 38 for line in open(xls_file): | |
| 39 # Keep all existing comment lines | |
| 40 if line.startswith('#'): | |
| 41 fp.write(line) | |
| 42 else: | |
| 43 # Split line into fields and test to see if | |
| 44 # the 'start' field is actually an integer | |
| 45 fields = line.split('\t') | |
| 46 if len(fields) > 1: | |
| 47 try: | |
| 48 int(fields[1]) | |
| 49 except ValueError: | |
| 50 # Integer conversion failed so comment out | |
| 51 # "bad" line instead | |
| 52 fields[0] = "#%s" % fields[0] | |
| 53 fp.write( '\t'.join( fields ) ) | |
| 54 fp.close() | |
| 55 | |
| 56 def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, | |
| 57 chrom_sizes,working_dir=None): | |
| 58 """Make bigWig file from a bedGraph | |
| 59 | |
| 60 The protocol is: | |
| 61 | |
| 62 $ fetchChromSizes.sh mm9 > mm9.chrom.sizes | |
| 63 $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped | |
|
2
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
64 $ bedSort treat.clipped treat.clipped.sorted |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
65 $ bedGraphToBigWig treat.clipped.sorted mm9.chrom.sizes treat.bw |
| 0 | 66 |
| 67 Get the binaries from | |
| 68 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ | |
| 69 | |
| 70 We skip the fetchChromSizes step if the 'chrom_sizes' | |
| 71 argument supplied a valid file with the chromosome sizes | |
| 72 for the genome build in question. | |
| 73 | |
| 74 """ | |
| 75 print "Generating bigWig from bedGraph..." | |
| 76 # Check for chromosome sizes | |
| 77 if not os.path.exists(chrom_sizes): | |
| 78 # Determine genome build | |
| 79 chrom_sizes = os.path.basename(chrom_sizes) | |
| 80 genome_build = chrom_sizes.split('.')[0] | |
| 81 if genome_build == '?': | |
| 82 # No genome build set | |
| 83 sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n") | |
| 84 sys.stderr.write("Assign a genome build to your input dataset and rerun\n") | |
| 85 sys.exit(1) | |
| 86 print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build | |
| 87 # Run fetchChromSizes | |
| 88 chrom_sizes = os.path.join(working_dir,chrom_sizes) | |
| 89 stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") | |
| 90 cmd = "fetchChromSizes %s" % genome_build | |
| 91 print "Running %s" % cmd | |
| 92 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, | |
| 93 stdout=open(chrom_sizes,'wb'), | |
| 94 stderr=open(stderr_file,'wb')) | |
| 95 proc.wait() | |
| 96 # Copy stderr from fetchChromSizes for information only | |
| 97 for line in open(stderr_file,'r'): | |
| 98 print line.strip() | |
| 99 os.remove(stderr_file) | |
| 100 # Check that the sizes file was downloaded | |
| 101 if not os.path.exists(chrom_sizes): | |
| 102 sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build) | |
| 103 sys.exit(1) | |
| 104 # Run bedClip | |
| 105 treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) | |
| 106 cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) | |
| 107 print "Running %s" % cmd | |
| 108 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
| 109 proc.wait() | |
| 110 # Check that clipped file exists | |
| 111 treat_clipped = os.path.join(working_dir,treat_clipped) | |
| 112 if not os.path.exists(treat_clipped): | |
| 113 sys.stderr.write("Failed to create clipped bed file\n") | |
| 114 sys.exit(1) | |
|
2
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
115 # Run bedSort |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
116 treat_clipped_sorted = "%s.sorted" % os.path.basename(treat_clipped) |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
117 cmd = "bedSort %s %s" % (treat_clipped,treat_clipped_sorted) |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
118 print "Running %s" % cmd |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
119 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
120 proc.wait() |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
121 # Check that sorted file exists |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
122 treat_clipped_sorted = os.path.join(working_dir,treat_clipped_sorted) |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
123 if not os.path.exists(treat_clipped_sorted): |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
124 sys.stderr.write("Failed to create sorted clipped bed file\n") |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
125 sys.exit(1) |
| 0 | 126 # Run bedGraphToBigWig |
|
2
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
127 cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped_sorted, |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
128 chrom_sizes, |
| 0 | 129 bigwig_file) |
| 130 print "Running %s" % cmd | |
| 131 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
| 132 proc.wait() | |
| 133 # Clean up temporary chrom length file | |
| 134 if os.path.dirname(chrom_sizes) == working_dir: | |
| 135 print "Removing temporary chrom sizes file" | |
| 136 os.remove(chrom_sizes) | |
| 137 | |
| 138 if __name__ == "__main__": | |
| 139 | |
| 140 # Echo the command line | |
| 141 print ' '.join(sys.argv) | |
| 142 | |
| 143 # Initialise output files - values are set by reading from | |
| 144 # the command line supplied by the Galaxy wrapper | |
| 145 output_extra_html = None | |
| 146 output_extra_path = None | |
| 147 output_broadpeaks = None | |
| 148 output_gappedpeaks = None | |
| 149 output_narrowpeaks = None | |
| 150 output_treat_pileup = None | |
| 151 output_lambda_bedgraph = None | |
| 152 output_bigwig = None | |
| 153 output_xls_to_interval_peaks_file = None | |
| 154 output_peaks = None | |
| 155 output_bdgcmp = None | |
| 156 | |
| 157 # Other initialisations | |
| 158 chrom_sizes_file = None | |
| 159 | |
| 160 # Build the MACS 2.1 command line | |
| 161 # Initial arguments are always the same: command & input ChIP-seq file name | |
| 162 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] | |
| 163 | |
| 164 # Process remaining args | |
| 165 for arg in sys.argv[3:]: | |
| 166 if arg.startswith('--format='): | |
| 167 # Convert format to uppercase | |
| 168 format_ = arg.split('=')[1].upper() | |
| 169 cmdline.append("--format=%s" % format_) | |
| 170 elif arg.startswith('--name='): | |
| 171 # Replace whitespace in name with underscores | |
| 172 experiment_name = '_'.join(arg.split('=')[1].split()) | |
| 173 cmdline.append("--name=%s" % experiment_name) | |
| 174 elif arg.startswith('--length='): | |
| 175 # Extract chromosome size file | |
| 176 chrom_sizes_file = arg.split('=')[1] | |
| 177 elif arg.startswith('--output-'): | |
| 178 # Handle destinations for output files | |
| 179 arg0,filen = arg.split('=') | |
| 180 if arg0 == '--output-summits': | |
| 181 output_summits = filen | |
| 182 elif arg0 == '--output-extra-files': | |
| 183 output_extra_html = filen | |
| 184 elif arg0 == '--output-extra-files-path': | |
| 185 output_extra_path = filen | |
| 186 elif arg0 == '--output-broadpeaks': | |
| 187 output_broadpeaks = filen | |
| 188 elif arg0 == '--output-gappedpeaks': | |
| 189 output_gappedpeaks = filen | |
| 190 elif arg0 == '--output-narrowpeaks': | |
| 191 output_narrowpeaks = filen | |
| 192 elif arg0 == '--output-pileup': | |
| 193 output_treat_pileup = filen | |
| 194 elif arg0 == '--output-lambda-bedgraph': | |
| 195 output_lambda_bedgraph = filen | |
| 196 elif arg0 == '--output-bigwig': | |
| 197 output_bigwig = filen | |
| 198 elif arg0 == '--output-xls-to-interval': | |
| 199 output_xls_to_interval_peaks_file = filen | |
| 200 elif arg0 == '--output-peaks': | |
| 201 output_peaks = filen | |
| 202 else: | |
| 203 # Pass remaining args directly to MACS | |
| 204 # command line | |
| 205 cmdline.append(arg) | |
| 206 | |
| 207 cmdline = ' '.join(cmdline) | |
| 208 print "Generated command line:\n%s" % cmdline | |
| 209 | |
| 210 # Execute MACS2 | |
| 211 # | |
| 212 # Make a working directory | |
| 213 working_dir = tempfile.mkdtemp() | |
| 214 # | |
| 215 # Collect stderr in a file for reporting later | |
| 216 stderr_filen = tempfile.NamedTemporaryFile().name | |
| 217 # | |
| 218 # Run MACS2 | |
| 219 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir, | |
| 220 stderr=open(stderr_filen,'wb')) | |
| 221 proc.wait() | |
|
2
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
222 exit_code = proc.returncode |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
223 if exit_code != 0: |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
224 sys.stderr.write(open(stderr_filen,'rb').read()) |
|
00d73c812399
Version 2.1.0-6: add sorting step in bigWig generation, and explicitly terminate tool on error from MACS2.
pjbriggs
parents:
0
diff
changeset
|
225 sys.exit(exit_code) |
| 0 | 226 |
| 227 # Run R script to create PDF from model script | |
| 228 if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)): | |
| 229 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \ | |
| 230 (experiment_name, experiment_name) | |
| 231 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir) | |
| 232 proc.wait() | |
| 233 | |
| 234 # Convert XLS to interval, if requested | |
| 235 if output_xls_to_interval_peaks_file is not None: | |
| 236 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) | |
| 237 if os.path.exists(peaks_xls_file): | |
| 238 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, | |
| 239 header='peaks file') | |
| 240 | |
| 241 # Create bigWig from bedGraph, if requested | |
| 242 if output_bigwig is not None: | |
| 243 treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) | |
| 244 if os.path.exists(treat_bedgraph_file): | |
| 245 make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, | |
| 246 chrom_sizes_file,working_dir) | |
| 247 | |
| 248 # Move MACS2 output files from working dir to their final destinations | |
| 249 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) | |
| 250 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) | |
| 251 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) | |
| 252 move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks) | |
| 253 move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks) | |
| 254 move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup) | |
| 255 move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph) | |
| 256 move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp) | |
| 257 | |
| 258 # Move remaining file to the 'extra files' path and link from the HTML | |
| 259 # file to allow user to access them from within Galaxy | |
| 260 html_file = open(output_extra_html,'wb') | |
| 261 html_file.write('<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name) | |
| 262 # Make the 'extra files' directory | |
| 263 os.mkdir(output_extra_path) | |
| 264 # Move the files | |
| 265 for filen in sorted(os.listdir(working_dir)): | |
| 266 shutil.move(os.path.join(working_dir,filen), | |
| 267 os.path.join(output_extra_path,filen)) | |
| 268 html_file.write( '<li><a href="%s">%s</a></li>\n' % (filen,filen)) | |
| 269 # All files moved, close out HTML | |
| 270 html_file.write( '</ul></p>\n' ) | |
| 271 # Append any stderr output | |
| 272 html_file.write('<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % | |
| 273 open(stderr_filen,'rb').read()) | |
| 274 html_file.write('</body></html>\n') | |
| 275 html_file.close() | |
| 276 | |
| 277 # Clean up the working directory and files | |
| 278 os.unlink(stderr_filen) | |
| 279 os.rmdir(working_dir) |
