# HG changeset patch
# User pjbriggs
# Date 1435666578 14400
# Node ID 06cb587a5e87ee70e0de5668c8071efb1766ee54
Uploaded initial version 2.1.0-4.
diff -r 000000000000 -r 06cb587a5e87 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Tue Jun 30 08:16:18 2015 -0400
@@ -0,0 +1,99 @@
+MACS21: Model-based Analysis of ChIP-Seq (MACS 2.1.0) peak calling
+==================================================================
+
+Galaxy tool wrapper for the peak calling function of MACS 2.1.0. MACS has been
+developed by Tao Lui
+https://github.com/taoliu/MACS/
+
+The reference for MACS is:
+
+- Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers
+ RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol.
+ 2008;9(9):R137
+
+Automated installation
+======================
+
+Installation via the Galaxy Tool Shed will take of installing the tool wrapper and
+the MACS 2.1.0 program.
+
+Manual Installation
+===================
+
+There are two files to install:
+
+- ``macs21_wrapper.xml`` (the Galaxy tool definition)
+- ``macs21_wrapper.py.sh`` (the Python script wrapper)
+
+The suggested location is in a ``tools/macs21/`` folder. You will then
+need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool
+by adding the line:
+
+
+
+You will also need to install MACS 2.1.0 and its dependencies:
+
+- https://pypi.python.org/pypi/MACS2
+
+and ensure that it's on your Galaxy user's ``PATH`` when running the tool.
+
+If you want to run the functional tests, copy the sample test files under
+sample test files under Galaxy's ``test-data/`` directory. Then:
+
+ ./run_tests.sh -id macs2_wrapper
+
+
+History
+=======
+
+This tool was originally based on the ``modencode-dcc`` MACS2 tool developed
+by Ziru Zhou (ziruzhou@gmail.com), specifically the ``16:14f378e35191``
+revision of the tool available via
+
+- http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2
+
+This version has been substantially modified both to adapt it to MACS 2.1.0, and
+to re-implement the internal workings of the tool to conform with current
+practices in invoking commands from Galaxy, and to add new functionality.
+
+========== ======================================================================
+Version Changes
+---------- ----------------------------------------------------------------------
+2.1.0-4 - Remove 'bdgcmp' functionality.
+2.1.0-3 - Add tool tests
+2.1.0-2 - Add option to create bigWig file from bedGraphs; fix bug with -B
+ option; make --mfold defaults consistent.
+2.1.0-1 - Initial version
+========== ======================================================================
+
+
+Developers
+==========
+
+This tool is developed on the following GitHub repository:
+https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/macs21
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
+the ``package_macs21_wrapper.sh`` script.
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff -r 000000000000 -r 06cb587a5e87 macs21_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macs21_wrapper.py Tue Jun 30 08:16:18 2015 -0400
@@ -0,0 +1,262 @@
+#!/bin/env python
+#
+# Galaxy wrapper to run MACS 2.1
+#
+# Completely rewritten from the original macs2 wrapped by Ziru Zhou
+# taken from http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2
+
+import sys
+import os
+import subprocess
+import tempfile
+import shutil
+
+def move_file(working_dir,name,destination):
+ """Move a file 'name' from 'working_dir' to 'destination'
+
+ """
+ if destination is None:
+ # Nothing to do
+ return
+ source = os.path.join(working_dir,name)
+ if os.path.exists(source):
+ shutil.move(source,destination)
+
+def convert_xls_to_interval(xls_file,interval_file,header=None):
+ """Convert MACS XLS file to interval
+
+ From the MACS readme: "Coordinates in XLS is 1-based which is different with
+ BED format."
+
+ However this function no longer performs any coordinate conversions, it
+ simply ensures that any blank or non-data lines are commented out
+
+ """
+ fp = open(interval_file,'wb')
+ if header:
+ fp.write('#%s\n' % header)
+ for line in open(xls_file):
+ # Keep all existing comment lines
+ if line.startswith('#'):
+ fp.write(line)
+ else:
+ # Split line into fields and test to see if
+ # the 'start' field is actually an integer
+ fields = line.split('\t')
+ if len(fields) > 1:
+ try:
+ int(fields[1])
+ except ValueError:
+ # Integer conversion failed so comment out
+ # "bad" line instead
+ fields[0] = "#%s" % fields[0]
+ fp.write( '\t'.join( fields ) )
+ fp.close()
+
+def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file,
+ chrom_sizes,working_dir=None):
+ """Make bigWig file from a bedGraph
+
+ The protocol is:
+
+ $ fetchChromSizes.sh mm9 > mm9.chrom.sizes
+ $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped
+ $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw
+
+ Get the binaries from
+ http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/
+
+ We skip the fetchChromSizes step if the 'chrom_sizes'
+ argument supplied a valid file with the chromosome sizes
+ for the genome build in question.
+
+ """
+ print "Generating bigWig from bedGraph..."
+ # Check for chromosome sizes
+ if not os.path.exists(chrom_sizes):
+ # Determine genome build
+ chrom_sizes = os.path.basename(chrom_sizes)
+ genome_build = chrom_sizes.split('.')[0]
+ if genome_build == '?':
+ # No genome build set
+ sys.stderr.write("ERROR genome build not set, cannot get sizes for '?'\n")
+ sys.stderr.write("Assign a genome build to your input dataset and rerun\n")
+ sys.exit(1)
+ print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build
+ # Run fetchChromSizes
+ chrom_sizes = os.path.join(working_dir,chrom_sizes)
+ stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr")
+ cmd = "fetchChromSizes %s" % genome_build
+ print "Running %s" % cmd
+ proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
+ stdout=open(chrom_sizes,'wb'),
+ stderr=open(stderr_file,'wb'))
+ proc.wait()
+ # Copy stderr from fetchChromSizes for information only
+ for line in open(stderr_file,'r'):
+ print line.strip()
+ os.remove(stderr_file)
+ # Check that the sizes file was downloaded
+ if not os.path.exists(chrom_sizes):
+ sys.stderr.write("Failed to download chrom sizes for '%s'\n" % genome_build)
+ sys.exit(1)
+ # Run bedClip
+ treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file)
+ cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped)
+ print "Running %s" % cmd
+ proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+ proc.wait()
+ # Check that clipped file exists
+ treat_clipped = os.path.join(working_dir,treat_clipped)
+ if not os.path.exists(treat_clipped):
+ sys.stderr.write("Failed to create clipped bed file\n")
+ sys.exit(1)
+ # Run bedGraphToBigWig
+ cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes,
+ bigwig_file)
+ print "Running %s" % cmd
+ proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+ proc.wait()
+ # Clean up temporary chrom length file
+ if os.path.dirname(chrom_sizes) == working_dir:
+ print "Removing temporary chrom sizes file"
+ os.remove(chrom_sizes)
+
+if __name__ == "__main__":
+
+ # Echo the command line
+ print ' '.join(sys.argv)
+
+ # Initialise output files - values are set by reading from
+ # the command line supplied by the Galaxy wrapper
+ output_extra_html = None
+ output_extra_path = None
+ output_broadpeaks = None
+ output_gappedpeaks = None
+ output_narrowpeaks = None
+ output_treat_pileup = None
+ output_lambda_bedgraph = None
+ output_bigwig = None
+ output_xls_to_interval_peaks_file = None
+ output_peaks = None
+ output_bdgcmp = None
+
+ # Other initialisations
+ chrom_sizes_file = None
+
+ # Build the MACS 2.1 command line
+ # Initial arguments are always the same: command & input ChIP-seq file name
+ cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])]
+
+ # Process remaining args
+ for arg in sys.argv[3:]:
+ if arg.startswith('--format='):
+ # Convert format to uppercase
+ format_ = arg.split('=')[1].upper()
+ cmdline.append("--format=%s" % format_)
+ elif arg.startswith('--name='):
+ # Replace whitespace in name with underscores
+ experiment_name = '_'.join(arg.split('=')[1].split())
+ cmdline.append("--name=%s" % experiment_name)
+ elif arg.startswith('--length='):
+ # Extract chromosome size file
+ chrom_sizes_file = arg.split('=')[1]
+ elif arg.startswith('--output-'):
+ # Handle destinations for output files
+ arg0,filen = arg.split('=')
+ if arg0 == '--output-summits':
+ output_summits = filen
+ elif arg0 == '--output-extra-files':
+ output_extra_html = filen
+ elif arg0 == '--output-extra-files-path':
+ output_extra_path = filen
+ elif arg0 == '--output-broadpeaks':
+ output_broadpeaks = filen
+ elif arg0 == '--output-gappedpeaks':
+ output_gappedpeaks = filen
+ elif arg0 == '--output-narrowpeaks':
+ output_narrowpeaks = filen
+ elif arg0 == '--output-pileup':
+ output_treat_pileup = filen
+ elif arg0 == '--output-lambda-bedgraph':
+ output_lambda_bedgraph = filen
+ elif arg0 == '--output-bigwig':
+ output_bigwig = filen
+ elif arg0 == '--output-xls-to-interval':
+ output_xls_to_interval_peaks_file = filen
+ elif arg0 == '--output-peaks':
+ output_peaks = filen
+ else:
+ # Pass remaining args directly to MACS
+ # command line
+ cmdline.append(arg)
+
+ cmdline = ' '.join(cmdline)
+ print "Generated command line:\n%s" % cmdline
+
+ # Execute MACS2
+ #
+ # Make a working directory
+ working_dir = tempfile.mkdtemp()
+ #
+ # Collect stderr in a file for reporting later
+ stderr_filen = tempfile.NamedTemporaryFile().name
+ #
+ # Run MACS2
+ proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir,
+ stderr=open(stderr_filen,'wb'))
+ proc.wait()
+
+ # Run R script to create PDF from model script
+ if os.path.exists(os.path.join(working_dir,"%s_model.r" % experiment_name)):
+ cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % \
+ (experiment_name, experiment_name)
+ proc = subprocess.Popen(args=cmdline,shell=True,cwd=working_dir)
+ proc.wait()
+
+ # Convert XLS to interval, if requested
+ if output_xls_to_interval_peaks_file is not None:
+ peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name )
+ if os.path.exists(peaks_xls_file):
+ convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file,
+ header='peaks file')
+
+ # Create bigWig from bedGraph, if requested
+ if output_bigwig is not None:
+ treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name)
+ if os.path.exists(treat_bedgraph_file):
+ make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig,
+ chrom_sizes_file,working_dir)
+
+ # Move MACS2 output files from working dir to their final destinations
+ move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)
+ move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks)
+ move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks)
+ move_file(working_dir,"%s_peaks.broadPeak" % experiment_name,output_broadpeaks)
+ move_file(working_dir,"%s_peaks.gappedPeak" % experiment_name,output_gappedpeaks)
+ move_file(working_dir,"%s_treat_pileup.bdg" % experiment_name,output_treat_pileup)
+ move_file(working_dir,"%s_control_lambda.bdg" % experiment_name,output_lambda_bedgraph)
+ move_file(working_dir,"bdgcmp_out.bdg",output_bdgcmp)
+
+ # Move remaining file to the 'extra files' path and link from the HTML
+ # file to allow user to access them from within Galaxy
+ html_file = open(output_extra_html,'wb')
+ html_file.write('
Additional output created by MACS (%s)
Additional Files:
\n' % experiment_name)
+ # Make the 'extra files' directory
+ os.mkdir(output_extra_path)
+ # Move the files
+ for filen in sorted(os.listdir(working_dir)):
+ shutil.move(os.path.join(working_dir,filen),
+ os.path.join(output_extra_path,filen))
+ html_file.write( '