# HG changeset patch # User stemcellcommons # Date 1400602238 14400 # Node ID c05f607d116c73f66a38b13f3ab8890ff89fec02 # Parent 642c0da30ca61f45da48c9bd533a7cbf0d034825 Replace simplejson with json. Add option to call broad peaks. diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.py --- a/macs2_wrapper.py Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.py Tue May 20 12:10:38 2014 -0400 @@ -1,12 +1,9 @@ -#purpose: macs2 python wrapper -#author: Ziru Zhou -#date: November, 2012 +# macs2 python wrapper +# based on http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip from galaxy import eggs -import pkg_resources -pkg_resources.require( "simplejson" ) -import simplejson +import json CHUNK_SIZE = 1024 @@ -38,12 +35,12 @@ #keep all existing comment lines if line.startswith( '#' ): out.write( line ) - #added for macs2 since there is an extra newline + #added for macs2 since there is an extra newline elif line.startswith( '\n' ): out.write( line ) elif not wrote_header: out.write( '#%s' % line ) - print line + print line wrote_header = True else: fields = line.split( '\t' ) @@ -57,8 +54,8 @@ #========================================================================================== def main(): #take in options file and output file names - options = simplejson.load( open( sys.argv[1] ) ) - outputs = simplejson.load( open( sys.argv[2] ) ) + options = json.load( open( sys.argv[1] ) ) + outputs = json.load( open( sys.argv[2] ) ) #================================================================================= #parse options and execute macs2 @@ -71,26 +68,29 @@ #================================================================================= if (options['command'] == "callpeak"): - output_bed = outputs['output_bed_file'] - output_extra_html = outputs['output_extra_file'] - output_extra_path = outputs['output_extra_file_path'] - output_peaks = outputs['output_peaks_file'] - output_narrowpeaks = outputs['output_narrowpeaks_file'] - output_xls_to_interval_peaks_file = outputs['output_xls_to_interval_peaks_file'] - output_xls_to_interval_negative_peaks_file = outputs['output_xls_to_interval_negative_peaks_file'] + output_bed = outputs['output_bed_file'] + output_extra_html = outputs['output_extra_file'] + output_extra_path = outputs['output_extra_file_path'] + output_peaks = outputs['output_peaks_file'] + output_narrowpeaks = outputs['output_narrowpeaks_file'] + output_xls_to_interval_peaks_file = outputs['output_xls_to_interval_peaks_file'] + output_xls_to_interval_negative_peaks_file = outputs['output_xls_to_interval_negative_peaks_file'] - if 'pvalue' in options: - cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --pvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['pvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) - elif 'qvalue' in options: - cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --qvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['qvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) - - if 'nomodel' in options: - cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) + if 'pvalue' in options: + cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --pvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['pvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) + elif 'qvalue' in options: + cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --qvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['qvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) + + if 'broad_cutoff' in options: + cmdline += " --broad --broad-cutoff=%s" % (options['broad_cutoff']) + + if 'nomodel' in options: + cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) #================================================================================= if (options['command'] == "bdgcmp"): - output_bdgcmp = outputs['output_bdgcmp_file'] + output_bdgcmp = outputs['output_bdgcmp_file'] - cmdline = "%s -m %s -p %s -o bdgcmp_out.bdg" % ( cmdline, options['m'], options['pseudocount'] ) + cmdline = "%s -m %s -p %s -o bdgcmp_out.bdg" % ( cmdline, options['m'], options['pseudocount'] ) #================================================================================= tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user @@ -115,59 +115,59 @@ #================================================================================= #move files generated by callpeak command if (options['command'] == "callpeak"): - #run R to create pdf from model script - if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): - cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) - proc.wait() + #run R to create pdf from model script + if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): + cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) + proc.wait() - #move bed out to proper output file - created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) - if os.path.exists( created_bed_name ): - shutil.move( created_bed_name, output_bed ) + #move bed out to proper output file + created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) + if os.path.exists( created_bed_name ): + shutil.move( created_bed_name, output_bed ) - #OICR peak_xls file - created_peak_xls_file = os.path.join( tmp_dir, "%s_peaks.xls" % experiment_name ) - if os.path.exists( created_peak_xls_file ): - # shutil.copy( created_peak_xls_file, os.path.join ( "/mnt/galaxyData/tmp/", "%s_peaks.xls" % ( os.path.basename(output_extra_path) ))) - shutil.copyfile( created_peak_xls_file, output_peaks ) + #OICR peak_xls file + created_peak_xls_file = os.path.join( tmp_dir, "%s_peaks.xls" % experiment_name ) + if os.path.exists( created_peak_xls_file ): + # shutil.copy( created_peak_xls_file, os.path.join ( "/mnt/galaxyData/tmp/", "%s_peaks.xls" % ( os.path.basename(output_extra_path) ))) + shutil.copyfile( created_peak_xls_file, output_peaks ) - #peaks.encodepeaks (narrowpeaks) file - created_narrowpeak_file = os.path.join (tmp_dir, "%s_peaks.encodePeak" % experiment_name ) - if os.path.exists( created_narrowpeak_file ): - shutil.move (created_narrowpeak_file, output_narrowpeaks ) + #peaks.encodepeaks (narrowpeaks) file + created_narrowpeak_file = os.path.join (tmp_dir, "%s_peaks.encodePeak" % experiment_name ) + if os.path.exists( created_narrowpeak_file ): + shutil.move (created_narrowpeak_file, output_narrowpeaks ) - #parse xls files to interval files as needed - #if 'xls_to_interval' in options: - if (options['xls_to_interval'] == "True"): - create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' ) - create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - print "negative file exists" - xls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = 'negative peaks file' ) + #parse xls files to interval files as needed + #if 'xls_to_interval' in options: + if (options['xls_to_interval'] == "True"): + create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) + if os.path.exists( create_peak_xls_file ): + xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' ) + create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) + if os.path.exists( create_peak_xls_file ): + print "negative file exists" + xls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = 'negative peaks file' ) - #move all remaining files to extra files path of html file output to allow user download - out_html = open( output_extra_html, 'wb' ) - out_html.write( '
%s\n' % open( stderr_name, 'rb' ).read() ) - out_html.write( '\n' ) - out_html.close() + #move all remaining files to extra files path of html file output to allow user download + out_html = open( output_extra_html, 'wb' ) + out_html.write( '
%s\n' % open( stderr_name, 'rb' ).read() ) + out_html.write( '\n' ) + out_html.close() #================================================================================= #move files generated by bdgcmp command if (options['command'] == "bdgcmp"): - created_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" ) - if os.path.exists( created_bdgcmp_file ): - shutil.move (created_bdgcmp_file, output_bdgcmp ) - + created_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" ) + if os.path.exists( created_bdgcmp_file ): + shutil.move (created_bdgcmp_file, output_bdgcmp ) + #================================================================================= #cleanup #================================================================================= diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.xml --- a/macs2_wrapper.xml Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.xml Tue May 20 12:10:38 2014 -0400 @@ -1,6 +1,7 @@