# HG changeset patch # User ryo-tas # Date 1329118633 18000 # Node ID 845215ebd13237794d6a67e5afb5d959230b8726 # Parent c1eeccec29d15ca4f8167fde4e38e6dcd2aa153e Deleted selected files diff -r c1eeccec29d1 -r 845215ebd132 macs14/macs14_wrapper.py --- a/macs14/macs14_wrapper.py Mon Feb 13 02:34:11 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,132 +0,0 @@ -import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip -from galaxy import eggs -import pkg_resources -pkg_resources.require( "simplejson" ) -import simplejson - -CHUNK_SIZE = 1024 - -def gunzip_cat_glob_path( glob_path, target_filename, delete = False ): - out = open( target_filename, 'wb' ) - for filename in glob.glob( glob_path ): - fh = gzip.open( filename, 'rb' ) - while True: - data = fh.read( CHUNK_SIZE ) - if data: - out.write( data ) - else: - break - fh.close() - if delete: - os.unlink( filename ) - out.close() - -def xls_to_interval( xls_file, interval_file, header = None ): - out = open( interval_file, 'wb' ) - if header: - out.write( '#%s\n' % header ) - wrote_header = False - #From macs readme: Coordinates in XLS is 1-based which is different with BED format. - for line in open( xls_file ): - #keep all existing comment lines - if line.startswith( '#' ): - out.write( line ) - elif not wrote_header: - out.write( '#%s' % line ) - wrote_header = True - else: - fields = line.split( '\t' ) - if len( fields ) > 1: - fields[1] = str( int( fields[1] ) - 1 ) - out.write( '\t'.join( fields ) ) - out.close() - -def main(): - options = simplejson.load( open( sys.argv[1] ) ) - output_bed = sys.argv[2] - output_extra_html = sys.argv[3] - output_extra_path = sys.argv[4] - - experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier - cmdline = "macs14 -t %s" % ",".join( options['input_chipseq'] ) - if options['input_control']: - cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) ) - cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --slocal='%s' --llocal='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['slocal'], options['llocal'], options['futurefdr'] ) - if 'wig' in options: - cmdline = "%s --wig --space='%s'" % ( cmdline, options['wig']['space'] ) - if 'nomodel' in options: - cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] ) - if 'diag' in options: - cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] ) - - cmdline_macs14 = cmdline - tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user - stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) ) - proc.wait() - #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log - #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup - if proc.returncode: - stderr_f = open( stderr_name ) - while True: - chunk = stderr_f.read( CHUNK_SIZE ) - if not chunk: - stderr_f.close() - break - sys.stderr.write( chunk ) - - #run R to create pdf from model script - if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ): - cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name ) - proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir ) - proc.wait() - - - #move bed out to proper output file - created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name ) - if os.path.exists( created_bed_name ): - shutil.move( created_bed_name, output_bed ) - - #parse xls files to interval files as needed - if options['xls_to_interval']: - create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' ) - create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name ) - if os.path.exists( create_peak_xls_file ): - xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' ) - - #merge and move wig files as needed, delete gz'd files and remove emptied dirs - if 'wig' in options: - wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name ) - if os.path.exists( wig_base_dir ): - #treatment - treatment_dir = os.path.join( wig_base_dir, "treat" ) - if os.path.exists( treatment_dir ): - gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True ) - os.rmdir( treatment_dir ) - #control - if options['input_control']: - control_dir = os.path.join( wig_base_dir, "control" ) - if os.path.exists( control_dir ): - gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True ) - os.rmdir( control_dir ) - os.rmdir( wig_base_dir ) - - #move all remaining files to extra files path of html file output to allow user download - out_html = open( output_extra_html, 'wb' ) - out_html.write( 'Additional output created by MACS (%s)

Additional Files:

\n' ) - out_html.write( '

CMD Executed:

\n

%s

\n' % ( cmdline_macs14 ) ) - out_html.write( '

Messages from MACS:

\n

%s

\n' % open( stderr_name, 'rb' ).read() ) - out_html.write( '\n' ) - out_html.close() - - os.unlink( stderr_name ) - os.rmdir( tmp_dir ) - -if __name__ == "__main__": main() diff -r c1eeccec29d1 -r 845215ebd132 macs14/macs14_wrapper.xml --- a/macs14/macs14_wrapper.xml Mon Feb 13 02:34:11 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,238 +0,0 @@ - - Model-based Analysis of ChIP-Seq (1.4.1) - macs14_wrapper.py $options_file $output_bed_file $output_extra_files $output_extra_files.files_path - - macs14 - macs14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - xls_to_interval is True - - - xls_to_interval is True - input_type['input_control_file1'] is not None - - - wig_type['wig_type_selector']=='wig' - - - wig_type['wig_type_selector'] == 'wig' - input_type['input_control_file1'] is not None - - - - - <% -import simplejson -%> -#set $__options = { 'experiment_name':str( $experiment_name ), 'gsize':int( float( str( $gsize ) ) ), 'tsize':str( $tsize ), 'bw':str( $bw ), 'pvalue':str( $pvalue ), 'mfold':str( $mfold ), 'nolambda':str( $nolambda ), 'slocal': str( $slocal ), 'llocal': str( $llocal ), 'futurefdr':str( $futurefdr ) } -#if str( $xls_to_interval ) == 'create': -#set $__options['xls_to_interval'] = { 'peaks_file': str( $output_xls_to_interval_peaks_file ), 'negative_peaks_file': str( $output_xls_to_interval_negative_peaks_file ) } -#else: -#set $__options['xls_to_interval'] = False -#end if -##treatment/tag input files and format -#set $__options['input_chipseq'] = [ str( $input_type['input_chipseq_file1'] ) ] -#if $input_type['input_type_selector'] == 'paired_end': -#set $_hole = __options['input_chipseq'].append( str( $input_type['input_chipseq_file2'] ) ) -#set $__options['format'] = 'ELANDMULTIPET' -#else: -#set $__options['format'] = $input_type['input_chipseq_file1'].extension.upper() -#end if -##control/input files -#set $__options['input_control'] = [] -#if str( $input_type['input_control_file1'] ) != 'None': -#set $_hole = __options['input_control'].append( str( $input_type['input_control_file1'] ) ) -#end if -#if $input_type['input_type_selector'] == 'paired_end' and str( $input_type['input_control_file2'] ) != 'None': -#set $_hole = __options['input_control'].append( str( $input_type['input_control_file2'] ) ) -#end if -##wig options -#if $wig_type['wig_type_selector'] == 'wig': -#set $__options['wig'] = {} -#set $__options['wig']['space'] = str( $wig_type['space'] ) -#set $__options['wig']['output_treatment_file'] = str( $output_treatment_wig_file ) -#if $input_type['input_control_file1'] is not None: -#set $__options['wig']['output_control_file'] = str( $output_control_wig_file ) -#end if -#end if -##model options -#if $nomodel_type['nomodel_type_selector'] == 'nomodel': -#set $__options['nomodel'] = str( $nomodel_type['shiftsize'] ) -#end if -##diag options -#if $diag_type['diag_type_selector'] == 'diag': -#set $__options['diag'] = { 'fe-min':str( $diag_type['fe-min'] ), 'fe-max':str( $diag_type['fe-max'] ), 'fe-step':str( $diag_type['fe-step'] ) } -#end if -${ simplejson.dumps( __options ) } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -This tool allows ChIP-seq peak calling using MACS. - -Depending upon selected options, 2 to 6 history items will be created; the first output will be a standard BED file and the last will be an HTML report containing links to download additional files generated by MACS. Up to two each of wig and interval files can be optionally created; the interval files are parsed from the xls output. - -View the original MACS documentation: http://liulab.dfci.harvard.edu/MACS/00README.html. - -