0
|
1 #!/bin/env python
|
|
2 #
|
|
3 # Galaxy wrapper to run MACS 2.1
|
|
4 #
|
|
5 # Completely rewritten from the original macs2 wrapped by Ziru Zhou
|
|
6 # taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2
|
|
7
|
|
8 import sys
|
|
9 import os
|
|
10 import subprocess
|
|
11 import tempfile
|
|
12 import shutil
|
|
13
|
|
14 def move_file(working_dir,name,destination):
|
|
15 """Move a file 'name' from 'working_dir' to 'destination'
|
|
16
|
|
17 """
|
|
18 if destination is None:
|
|
19 # Nothing to do
|
|
20 return
|
|
21 source = os.path.join(working_dir,name)
|
|
22 if os.path.exists(source):
|
|
23 shutil.move(source,destination)
|
|
24
|
|
25 def convert_xls_to_interval(xls_file,interval_file,header=None):
|
|
26 """Convert MACS XLS file to interval
|
|
27
|
|
28 From the MACS readme: "Coordinates in XLS is 1-based which is different with
|
|
29 BED format."
|
|
30
|
|
31 However this function no longer performs any coordinate conversions, it
|
|
32 simply ensures that any blank or non-data lines are commented out
|
|
33
|
|
34 """
|
|
35 fp = open(interval_file,'wb')
|
|
36 if header:
|
|
37 fp.write('#%s\n' % header)
|
|
38 for line in open(xls_file):
|
|
39 # Keep all existing comment lines
|
|
40 if line.startswith('#'):
|
|
41 fp.write(line)
|
|
42 else:
|
|
43 # Split line into fields and test to see if
|
|
44 # the 'start' field is actually an integer
|
|
45 fields = line.split('\t')
|
|
46 if len(fields) > 1:
|
|
47 try:
|
|
48 int(fields[1])
|
|
49 except ValueError:
|
|
50 # Integer conversion failed so comment out
|
|
51 # "bad" line instead
|
|
52 fields[0] = "#%s" % fields[0]
|
|
53 fp.write( '\t'.join( fields ) )
|
|
54 fp.close()
|
|
55
|
|
56 def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file,
|
|
57 chrom_sizes,working_dir=None):
|
|
58 """Make bigWig file from a bedGraph
|
|
59
|
|
60 The protocol is:
|
|
61
|
|
62 $ fetchChromSizes.sh mm9 > mm9.chrom.sizes
|
|
63 $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped
|
|
64 $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw
|
|
65
|
|
66 Get the binaries from
|
|
67 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/
|
|
68
|
|
69 We skip the fetchChromSizes step if the 'chrom_sizes'
|
|
70 argument supplied a valid file with the chromosome sizes
|
|
71 for the genome build in question.
|
|
72
|
|
73 """
|
|
74 print "Generating bigWig from bedGraph..."
|
|
75 # Check for chromosome sizes
|
|
76 if not os.path.exists(chrom_sizes):
|
|
77 # Determine genome build
|
|
78 chrom_sizes = os.path.basename(chrom_sizes)
|
|
79 genome_build = chrom_sizes.split('.')[0]
|
|
80 if genome_build == '?':
|
|
81 # No genome build set
|
|
82 sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n")
|
|
83 sys.stderr.write("Assign a genome build to your input dataset and rerun\n")
|
|
84 sys.exit(1)
|
|
85 print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build
|
|
86 # Run fetchChromSizes
|
|
87 chrom_sizes = os.path.join(working_dir,chrom_sizes)
|
|
88 stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr")
|
|
89 cmd = "fetchChromSizes %s" % genome_build
|
|
90 print "Running %s" % cmd
|
|
91 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
|
|
92 stdout=open(chrom_sizes,'wb'),
|
|
93 stderr=open(stderr_file,'wb'))
|
|
94 proc.wait()
|
|
95 # Copy stderr from fetchChromSizes for information only
|
|
96 for line in open(stderr_file,'r'):
|
|
97 print line.strip()
|
|
98 os.remove(stderr_file)
|
|
99 # Check that the sizes file was downloaded
|
|
100 if not os.path.exists(chrom_sizes):
|
|
101 sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build)
|
|
102 sys.exit(1)
|
|
103 # Run bedClip
|
|
104 treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file)
|
|
105 cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped)
|
|
106 print "Running %s" % cmd
|
|
107 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
|
|
108 proc.wait()
|
|
109 # Check that clipped file exists
|
|
110 treat_clipped = os.path.join(working_dir,treat_clipped)
|
|
111 if not os.path.exists(treat_clipped):
|
|
112 sys.stderr.write("Failed to create clipped bed file\n")
|
|
113 sys.exit(1)
|
|
114 # Run bedGraphToBigWig
|
|
115 cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes,
|
|
116 bigwig_file)
|
|
117 print "Running %s" % cmd
|
|
118 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
|
|
119 proc.wait()
|
|
120 # Clean up temporary chrom length file
|
|
121 if os.path.dirname(chrom_sizes) == working_dir:
|
|
122 print "Removing temporary chrom sizes file"
|
|
123 os.remove(chrom_sizes)
|
|
124
|
|
125 if __name__ == "__main__":
|
|
126
|
|
127 # Echo the command line
|
|
128 print ' '.join(sys.argv)
|
|
129
|
|
130 # Initialise output files - values are set by reading from
|
|
131 # the command line supplied by the Galaxy wrapper
|
|
132 output_extra_html = None
|
|
133 output_extra_path = None
|
|
134 output_broadpeaks = None
|
|
135 output_gappedpeaks = None
|
|
136 output_narrowpeaks = None
|
|
137 output_treat_pileup = None
|
|
138 output_lambda_bedgraph = None
|
|
139 output_bigwig = None
|
|
140 output_xls_to_interval_peaks_file = None
|
|
141 output_peaks = None
|
|
142 output_bdgcmp = None
|
|
143
|
|
144 # Other initialisations
|
|
145 chrom_sizes_file = None
|
|
146
|
|
147 # Build the MACS 2.1 command line
|
|
148 # Initial arguments are always the same: command & input ChIP-seq file name
|
|
149 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])]
|
|
150
|
|
151 # Process remaining args
|
|
152 for arg in sys.argv[3:]:
|
|
153 if arg.startswith('--format='):
|
|
154 # Convert format to uppercase
|
|
155 format_ = arg.split('=')[1].upper()
|
|
156 cmdline.append("--format=%s" % format_)
|
|
157 elif arg.startswith('--name='):
|
|
158 # Replace whitespace in name with underscores
|
|
159 experiment_name = '_'.join(arg.split('=')[1].split())
|
|
160 cmdline.append("--name=%s" % experiment_name)
|
|
161 elif arg.startswith('--length='):
|
|
162 # Extract chromosome size file
|
|
163 chrom_sizes_file = arg.split('=')[1]
|
|
164 elif arg.startswith('--output-'):
|
|
165 # Handle destinations for output files
|
|
166 arg0,filen = arg.split('=')
|
|
167 if arg0 == '--output-summits':
|
|
168 output_summits = filen
|
|
169 elif arg0 == '--output-extra-files':
|
|
170 output_extra_html = filen
|
|
171 elif arg0 == '--output-extra-files-path':
|
|
172 output_extra_path = filen
|
|
173 elif arg0 == '--output-broadpeaks':
|
|
174 output_broadpeaks = filen
|
|
175 elif arg0 == '--output-gappedpeaks':
|
|
176 output_gappedpeaks = filen
|
|
177 elif arg0 == '--output-narrowpeaks':
|
|
178 output_narrowpeaks = filen
|
|
179 elif arg0 == '--output-pileup':
|
|
180 output_treat_pileup = filen
|
|
181 elif arg0 == '--output-lambda-bedgraph':
|
|
182 output_lambda_bedgraph = filen
|
|
183 elif arg0 == '--output-bigwig':
|
|
184 output_bigwig = filen
|
|
185 elif arg0 == '--output-xls-to-interval':
|
|
186 output_xls_to_interval_peaks_file = filen
|
|
187 elif arg0 == '--output-peaks':
|
|
188 output_peaks = filen
|
|
189 else:
|
|
190 # Pass remaining args directly to MACS
|
|
191 # command line
|
|
192 cmdline.append(arg)
|
|
193
|
|
194 cmdline = ' '.join(cmdline)
|
|
195 print "Generated command line:\n%s" % cmdline
|
|
196
|
|
197 # Execute MACS2
|
|
198 #
|
|
199 # Make a working directory
|
|
200 working_dir = tempfile.mkdtemp()
|
|
201 #
|
|
202 # Collect stderr in a file for reporting later
|
|
203 stderr_filen = tempfile.NamedTemporaryFile().name
|
|
204 #
|
|
205 # Run MACS2
|
|
206 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir,
|
|
207 stderr=open(stderr_filen,'wb'))
|
|
208 proc.wait()
|
|
209
|
|
210 # Run R script to create PDF from model script
|
|
211 if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)):
|
|
212 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \
|
|
213 (experiment_name, experiment_name)
|
|
214 proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir)
|
|
215 proc.wait()
|
|
216
|
|
217 # Convert XLS to interval, if requested
|
|
218 if output_xls_to_interval_peaks_file is not None:
|
|
219 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name )
|
|
220 if os.path.exists(peaks_xls_file):
|
|
221 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file,
|
|
222 header='peaks file')
|
|
223
|
|
224 # Create bigWig from bedGraph, if requested
|
|
225 if output_bigwig is not None:
|
|
226 treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name)
|
|
227 if os.path.exists(treat_bedgraph_file):
|
|
228 make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig,
|
|
229 chrom_sizes_file,working_dir)
|
|
230
|
|
231 # Move MACS2 output files from working dir to their final destinations
|
|
232 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)
|
|
233 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks)
|
|
234 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks)
|
|
235 move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks)
|
|
236 move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks)
|
|
237 move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup)
|
|
238 move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph)
|
|
239 move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp)
|
|
240
|
|
241 # Move remaining file to the 'extra files' path and link from the HTML
|
|
242 # file to allow user to access them from within Galaxy
|
|
243 html_file = open(output_extra_html,'wb')
|
|
244 html_file.write('<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name)
|
|
245 # Make the 'extra files' directory
|
|
246 os.mkdir(output_extra_path)
|
|
247 # Move the files
|
|
248 for filen in sorted(os.listdir(working_dir)):
|
|
249 shutil.move(os.path.join(working_dir,filen),
|
|
250 os.path.join(output_extra_path,filen))
|
|
251 html_file.write( '<li><a href="%s">%s</a></li>\n' % (filen,filen))
|
|
252 # All files moved, close out HTML
|
|
253 html_file.write( '</ul></p>\n' )
|
|
254 # Append any stderr output
|
|
255 html_file.write('<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' %
|
|
256 open(stderr_filen,'rb').read())
|
|
257 html_file.write('</body></html>\n')
|
|
258 html_file.close()
|
|
259
|
|
260 # Clean up the working directory and files
|
|
261 os.unlink(stderr_filen)
|
|
262 os.rmdir(working_dir)
|