# HG changeset patch # User stemcellcommons # Date 1400602238 14400 # Node ID c05f607d116c73f66a38b13f3ab8890ff89fec02 # Parent 642c0da30ca61f45da48c9bd533a7cbf0d034825 Replace simplejson with json. Add option to call broad peaks. diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.py --- a/macs2_wrapper.py Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.py Tue May 20 12:10:38 2014 -0400 @@ -1,12 +1,9 @@ -#purpose: macs2 python wrapper -#author: Ziru Zhou -#date: November, 2012 +# macs2 python wrapper +# based on http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip from galaxy import eggs -import pkg_resources -pkg_resources.require( "simplejson" ) -import simplejson +import json CHUNK_SIZE = 1024 @@ -38,12 +35,12 @@ #keep all existing comment lines if line.startswith( '#' ): out.write( line ) - #added for macs2 since there is an extra newline + #added for macs2 since there is an extra newline elif line.startswith( '\n' ): out.write( line ) elif not wrote_header: out.write( '#%s' % line ) - print line + print line wrote_header = True else: fields = line.split( '\t' ) @@ -57,8 +54,8 @@ #========================================================================================== def main(): #take in options file and output file names - options = simplejson.load( open( sys.argv[1] ) ) - outputs = simplejson.load( open( sys.argv[2] ) ) + options = json.load( open( sys.argv[1] ) ) + outputs = json.load( open( sys.argv[2] ) ) #================================================================================= #parse options and execute macs2 @@ -71,26 +68,29 @@ #================================================================================= if (options['command'] == "callpeak"): - output_bed = outputs['output_bed_file'] - output_extra_html = outputs['output_extra_file'] - output_extra_path = outputs['output_extra_file_path'] - output_peaks = outputs['output_peaks_file'] - output_narrowpeaks = outputs['output_narrowpeaks_file'] - output_xls_to_interval_peaks_file = outputs['output_xls_to_interval_peaks_file'] - output_xls_to_interval_negative_peaks_file = outputs['output_xls_to_interval_negative_peaks_file'] + output_bed = outputs['output_bed_file'] + output_extra_html = outputs['output_extra_file'] + output_extra_path = outputs['output_extra_file_path'] + output_peaks = outputs['output_peaks_file'] + output_narrowpeaks = outputs['output_narrowpeaks_file'] + output_xls_to_interval_peaks_file = outputs['output_xls_to_interval_peaks_file'] + output_xls_to_interval_negative_peaks_file = outputs['output_xls_to_interval_negative_peaks_file'] - if 'pvalue' in options: - cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --pvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['pvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) - elif 'qvalue' in options: - cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --qvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['qvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) - - if 'nomodel' in options: - cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) + if 'pvalue' in options: + cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --pvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['pvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) + elif 'qvalue' in options: + cmdline = "%s --format='%s' --name='%s' --gsize='%s' --bw='%s' --qvalue='%s' --mfold %s %s %s %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['bw'], options['qvalue'], options['mfoldlo'], options['mfoldhi'], options['nolambda'], options['bdg'] ) + + if 'broad_cutoff' in options: + cmdline += " --broad --broad-cutoff=%s" % (options['broad_cutoff']) + + if 'nomodel' in options: + cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) #================================================================================= if (options['command'] == "bdgcmp"): - output_bdgcmp = outputs['output_bdgcmp_file'] + output_bdgcmp = outputs['output_bdgcmp_file'] - cmdline = "%s -m %s -p %s -o bdgcmp_out.bdg" % ( cmdline, options['m'], options['pseudocount'] ) + cmdline = "%s -m %s -p %s -o bdgcmp_out.bdg" % ( cmdline, options['m'], options['pseudocount'] ) #================================================================================= tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user @@ -115,59 +115,59 @@ #================================================================================= #move files generated by callpeak command if (options['command'] == "callpeak"): - #run R to create pdf from model script - if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): - cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) - proc.wait() + #run R to create pdf from model script + if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): + cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) + proc.wait() - #move bed out to proper output file - created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) - if os.path.exists( created_bed_name ): - shutil.move( created_bed_name, output_bed ) + #move bed out to proper output file + created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) + if os.path.exists( created_bed_name ): + shutil.move( created_bed_name, output_bed ) - #OICR peak_xls file - created_peak_xls_file = os.path.join( tmp_dir, "%s_peaks.xls" % experiment_name ) - if os.path.exists( created_peak_xls_file ): - # shutil.copy( created_peak_xls_file, os.path.join ( "/mnt/galaxyData/tmp/", "%s_peaks.xls" % ( os.path.basename(output_extra_path) ))) - shutil.copyfile( created_peak_xls_file, output_peaks ) + #OICR peak_xls file + created_peak_xls_file = os.path.join( tmp_dir, "%s_peaks.xls" % experiment_name ) + if os.path.exists( created_peak_xls_file ): + # shutil.copy( created_peak_xls_file, os.path.join ( "/mnt/galaxyData/tmp/", "%s_peaks.xls" % ( os.path.basename(output_extra_path) ))) + shutil.copyfile( created_peak_xls_file, output_peaks ) - #peaks.encodepeaks (narrowpeaks) file - created_narrowpeak_file = os.path.join (tmp_dir, "%s_peaks.encodePeak" % experiment_name ) - if os.path.exists( created_narrowpeak_file ): - shutil.move (created_narrowpeak_file, output_narrowpeaks ) + #peaks.encodepeaks (narrowpeaks) file + created_narrowpeak_file = os.path.join (tmp_dir, "%s_peaks.encodePeak" % experiment_name ) + if os.path.exists( created_narrowpeak_file ): + shutil.move (created_narrowpeak_file, output_narrowpeaks ) - #parse xls files to interval files as needed - #if 'xls_to_interval' in options: - if (options['xls_to_interval'] == "True"): - create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' ) - create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - print "negative file exists" - xls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = 'negative peaks file' ) + #parse xls files to interval files as needed + #if 'xls_to_interval' in options: + if (options['xls_to_interval'] == "True"): + create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) + if os.path.exists( create_peak_xls_file ): + xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = 'peaks file' ) + create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) + if os.path.exists( create_peak_xls_file ): + print "negative file exists" + xls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = 'negative peaks file' ) - #move all remaining files to extra files path of html file output to allow user download - out_html = open( output_extra_html, 'wb' ) - out_html.write( 'Additional output created by MACS (%s)

Additional Files:

\n' ) - out_html.write( '

Messages from MACS:

\n

%s

\n' % open( stderr_name, 'rb' ).read() ) - out_html.write( '\n' ) - out_html.close() + #move all remaining files to extra files path of html file output to allow user download + out_html = open( output_extra_html, 'wb' ) + out_html.write( 'Additional output created by MACS (%s)

Additional Files:

\n' ) + out_html.write( '

Messages from MACS:

\n

%s

\n' % open( stderr_name, 'rb' ).read() ) + out_html.write( '\n' ) + out_html.close() #================================================================================= #move files generated by bdgcmp command if (options['command'] == "bdgcmp"): - created_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" ) - if os.path.exists( created_bdgcmp_file ): - shutil.move (created_bdgcmp_file, output_bdgcmp ) - + created_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" ) + if os.path.exists( created_bdgcmp_file ): + shutil.move (created_bdgcmp_file, output_bdgcmp ) + #================================================================================= #cleanup #================================================================================= diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.xml --- a/macs2_wrapper.xml Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.xml Tue May 20 12:10:38 2014 -0400 @@ -1,6 +1,7 @@ Model-based Analysis of ChIP-Seq macs2_wrapper.py $options_file $outputs_file + macs2 --version @@ -8,53 +9,63 @@ - - - - + + + + - + - - - - + + + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + + + - + @@ -67,18 +78,18 @@ - + - - - - - - + + + + + + - + @@ -114,28 +125,28 @@ <% -import simplejson +import json %> ##======================================================================================= #set $__outputs = { 'command':str( $major_command.major_command_selector ) } #if str( $major_command.major_command_selector ) == 'callpeak': - #set $__outputs['output_bed_file'] = str( $output_bed_file ) - #set $__outputs['output_extra_file'] = str( $output_extra_files ) - #set $__outputs['output_extra_file_path'] = str( $output_extra_files.files_path ) - #set $__outputs['output_peaks_file'] = str( $output_peaks_file ) - #set $__outputs['output_narrowpeaks_file'] = str( $output_narrowpeaks_file ) - #set $__outputs['output_xls_to_interval_peaks_file'] = str( $output_xls_to_interval_peaks_file ) - #set $__outputs['output_xls_to_interval_negative_peaks_file'] = str( $output_xls_to_interval_negative_peaks_file ) + #set $__outputs['output_bed_file'] = str( $output_bed_file ) + #set $__outputs['output_extra_file'] = str( $output_extra_files ) + #set $__outputs['output_extra_file_path'] = str( $output_extra_files.files_path ) + #set $__outputs['output_peaks_file'] = str( $output_peaks_file ) + #set $__outputs['output_narrowpeaks_file'] = str( $output_narrowpeaks_file ) + #set $__outputs['output_xls_to_interval_peaks_file'] = str( $output_xls_to_interval_peaks_file ) + #set $__outputs['output_xls_to_interval_negative_peaks_file'] = str( $output_xls_to_interval_negative_peaks_file ) #end if ##======================================================================================= #if str( $major_command.major_command_selector ) == 'bdgcmp': - #set $__outputs['output_bdgcmp_file'] = str( $output_bdgcmp_file ) + #set $__outputs['output_bdgcmp_file'] = str( $output_bdgcmp_file ) #end if -${ simplejson.dumps( __outputs ) } +${ json.dumps( __outputs ) } <% -import simplejson +import json %> ##======================================================================================= #set $__options = { 'experiment_name':str( $experiment_name ) } @@ -146,57 +157,60 @@ ##control/input files #set $__options['input_control'] = [] #if str( $major_command.input_control_file1 ) != 'None': - #set $_hole = __options['input_control'].append( str( $major_command.input_control_file1 ) ) + #set $_hole = __options['input_control'].append( str( $major_command.input_control_file1 ) ) #end if #if str( $major_command.major_command_selector ) == 'callpeak': - #set $__options['command'] = str( "callpeak" ) - #set $__options['gsize'] = int( $major_command.gsize ) - #set $__options['bw'] = str( $major_command.bw ) - #set $__options['bdg'] = str( $major_command.bdg ) - #set $__options['xls_to_interval'] = str( $major_command.xls_to_interval ) - - ##advanced options - #if str( $major_command.advanced_options.advanced_options_selector ) == 'on': - #set $__options['mfoldlo'] = int( $major_command.advanced_options.mfoldlo ) - #set $__options['mfoldhi'] = int( $major_command.advanced_options.mfoldhi ) - #set $__options['nolambda'] = str( $major_command.advanced_options.nolambda ) - #else: - #set $__options['mfoldlo'] = int( "10" ) - #set $__options['mfoldhi'] = int( "30" ) - #set $__options['nolambda'] = str( "" ) - #end if + #set $__options['command'] = str( "callpeak" ) + #set $__options['gsize'] = int( $major_command.gsize ) + #set $__options['bw'] = str( $major_command.bw ) + #set $__options['bdg'] = str( $major_command.bdg ) + #set $__options['xls_to_interval'] = str( $major_command.xls_to_interval ) - ##enable xls file options - ##if str( $major_command.xls_to_interval ) == 'create': - ##set $__options['xls_to_interval'] = { 'peaks_file': str( $output_xls_to_interval_peaks_file ), 'negative_peaks_file': str( $output_xls_to_interval_negative_peaks_file ) } - ##end if - - ##pq value select options - #if str( $major_command.pq_options.pq_options_selector ) == 'qvalue': - #set $__options['qvalue'] = str( $major_command.pq_options.qvalue ) - #else: - #set $__options['pvalue'] = str( $major_command.pq_options.pvalue ) - #end if - - ##model options - #if str( $major_command.nomodel_type.nomodel_type_selector ) == 'nomodel': - #set $__options['nomodel'] = str( $major_command.nomodel_type.shiftsize ) - #end if + ##advanced options + #if str( $major_command.advanced_options.advanced_options_selector ) == 'on': + #set $__options['mfoldlo'] = int( $major_command.advanced_options.mfoldlo ) + #set $__options['mfoldhi'] = int( $major_command.advanced_options.mfoldhi ) + #set $__options['nolambda'] = str( $major_command.advanced_options.nolambda ) + #if str($major_command.advanced_options.broad_options.broad_options_selector) == 'broad' + #set $__options['broad_cutoff'] = str($major_command.advanced_options.broad_options.broad_cutoff) + #end if + #else: + #set $__options['mfoldlo'] = int( "10" ) + #set $__options['mfoldhi'] = int( "30" ) + #set $__options['nolambda'] = str( "" ) + #end if + + ##enable xls file options + ##if str( $major_command.xls_to_interval ) == 'create': + ##set $__options['xls_to_interval'] = { 'peaks_file': str( $output_xls_to_interval_peaks_file ), 'negative_peaks_file': str( $output_xls_to_interval_negative_peaks_file ) } + ##end if + + ##pq value select options + #if str( $major_command.pq_options.pq_options_selector ) == 'qvalue': + #set $__options['qvalue'] = str( $major_command.pq_options.qvalue ) + #else: + #set $__options['pvalue'] = str( $major_command.pq_options.pvalue ) + #end if + + ##model options + #if str( $major_command.nomodel_type.nomodel_type_selector ) == 'nomodel': + #set $__options['nomodel'] = str( $major_command.nomodel_type.shiftsize ) + #end if #end if ##======================================================================================= #if str( $major_command.major_command_selector ) == 'bdgcmp': - #set $__options['command'] = str( "bdgcmp" ) - #set $__options['pseudocount'] = float( str( $major_command.pseudocount ) ) - #set $__options['m'] = str( $major_command.bdgcmp_options.bdgcmp_options_selector ) + #set $__options['command'] = str( "bdgcmp" ) + #set $__options['pseudocount'] = float( str( $major_command.pseudocount ) ) + #set $__options['m'] = str( $major_command.bdgcmp_options.bdgcmp_options_selector ) #end if ##======================================================================================= -${ simplejson.dumps( __options ) } +${ json.dumps( __options ) } - + **What it does**