Previous changeset 0:642c0da30ca6 (2013-10-17) |
Commit message:
Replace simplejson with json. Add option to call broad peaks. |
modified:
macs2_wrapper.py macs2_wrapper.xml |
b |
diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.py --- a/macs2_wrapper.py Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.py Tue May 20 12:10:38 2014 -0400 |
[ |
b'@@ -1,12 +1,9 @@\n-#purpose: macs2 python wrapper\n-#author: Ziru Zhou\n-#date: November, 2012\n+# macs2 python wrapper\n+# based on http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2\n \n import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip\n from galaxy import eggs\n-import pkg_resources\n-pkg_resources.require( "simplejson" )\n-import simplejson\n+import json\n \n CHUNK_SIZE = 1024\n \n@@ -38,12 +35,12 @@\n #keep all existing comment lines\n if line.startswith( \'#\' ):\n out.write( line )\n-\t#added for macs2 since there is an extra newline \n+ #added for macs2 since there is an extra newline \n elif line.startswith( \'\\n\' ):\n out.write( line )\n elif not wrote_header:\n out.write( \'#%s\' % line )\n-\t print line\n+ print line\n wrote_header = True\n else:\n fields = line.split( \'\\t\' )\n@@ -57,8 +54,8 @@\n #==========================================================================================\n def main():\n #take in options file and output file names\n- options = simplejson.load( open( sys.argv[1] ) )\n- outputs = simplejson.load( open( sys.argv[2] ) )\n+ options = json.load( open( sys.argv[1] ) )\n+ outputs = json.load( open( sys.argv[2] ) )\n \n #=================================================================================\n #parse options and execute macs2\n@@ -71,26 +68,29 @@\n \n #=================================================================================\n if (options[\'command\'] == "callpeak"):\n- \toutput_bed = outputs[\'output_bed_file\']\n- \toutput_extra_html = outputs[\'output_extra_file\']\n- \toutput_extra_path = outputs[\'output_extra_file_path\']\n- \toutput_peaks = outputs[\'output_peaks_file\']\n- \toutput_narrowpeaks = outputs[\'output_narrowpeaks_file\'] \n-\toutput_xls_to_interval_peaks_file = outputs[\'output_xls_to_interval_peaks_file\']\n-\toutput_xls_to_interval_negative_peaks_file = outputs[\'output_xls_to_interval_negative_peaks_file\']\n+ output_bed = outputs[\'output_bed_file\']\n+ output_extra_html = outputs[\'output_extra_file\']\n+ output_extra_path = outputs[\'output_extra_file_path\']\n+ output_peaks = outputs[\'output_peaks_file\']\n+ output_narrowpeaks = outputs[\'output_narrowpeaks_file\']\n+ output_xls_to_interval_peaks_file = outputs[\'output_xls_to_interval_peaks_file\']\n+ output_xls_to_interval_negative_peaks_file = outputs[\'output_xls_to_interval_negative_peaks_file\']\n \n-\tif \'pvalue\' in options:\n- \t\tcmdline = "%s --format=\'%s\' --name=\'%s\' --gsize=\'%s\' --bw=\'%s\' --pvalue=\'%s\' --mfold %s %s %s %s" % ( cmdline, options[\'format\'], experiment_name, options[\'gsize\'], options[\'bw\'], options[\'pvalue\'], options[\'mfoldlo\'], options[\'mfoldhi\'], options[\'nolambda\'], options[\'bdg\'] )\n-\telif \'qvalue\' in options:\n- \t\tcmdline = "%s --format=\'%s\' --name=\'%s\' --gsize=\'%s\' --bw=\'%s\' --qvalue=\'%s\' --mfold %s %s %s %s" % ( cmdline, options[\'format\'], experiment_name, options[\'gsize\'], options[\'bw\'], options[\'qvalue\'], options[\'mfoldlo\'], options[\'mfoldhi\'], options[\'nolambda\'], options[\'bdg\'] )\n-\t\t\n-\tif \'nomodel\' in options:\n- \tcmdline = "%s --nomodel --shiftsize=\'%s\'" % ( cmdline, options[\'nomodel\'] )\n+ if \'pvalue\' in options:\n+ cmdline = "%s --format=\'%s\' --name=\'%s\' --gsize=\'%s\' --bw=\'%s\' --pvalue=\'%s\' --mfold %s %s %s %s" % ( cmdline, options[\'format\'], experiment_name, options[\'gsize\'], options[\'bw\'], options[\'pvalue\'], options[\'mfoldlo\'], options[\'mfoldhi\'], options[\'nolambda\'], options[\'bdg\'] )\n+ elif \'qvalue\' in options:\n+ cmdline = "%s --format=\'%s\' --name=\'%s\' --gsize=\'%s\' --bw=\'%s\' --qvalue=\'%s\' --mfold %s %s %s %s" % ( cmdline, options[\'format\'], experiment_name, options[\'gsize\'], options[\'bw\'], options[\'qvalue\'], options[\'mfoldlo\'], options[\'mfoldhi\'], options[\'nolambda\'], options[\'bdg\'] )\n+\n+ if \'broad_cutoff\' in options:\n+ cmdline += " --broad --broad-cutoff=%s" % (options[\'broad_cutoff\'])\n+\n+ if \'nomode'..b'reate_peak_xls_file ):\n- \t\txls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = \'peaks file\' )\n- \tcreate_peak_xls_file = os.path.join( tmp_dir, \'%s_negative_peaks.xls\' % experiment_name )\n- \tif os.path.exists( create_peak_xls_file ):\n-\t\t\tprint "negative file exists"\n- \t\txls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = \'negative peaks file\' )\n+ #parse xls files to interval files as needed\n+ #if \'xls_to_interval\' in options:\n+ if (options[\'xls_to_interval\'] == "True"):\n+ create_peak_xls_file = os.path.join( tmp_dir, \'%s_peaks.xls\' % experiment_name )\n+ if os.path.exists( create_peak_xls_file ):\n+ xls_to_interval( create_peak_xls_file, output_xls_to_interval_peaks_file, header = \'peaks file\' )\n+ create_peak_xls_file = os.path.join( tmp_dir, \'%s_negative_peaks.xls\' % experiment_name )\n+ if os.path.exists( create_peak_xls_file ):\n+ print "negative file exists"\n+ xls_to_interval( create_peak_xls_file, output_xls_to_interval_negative_peaks_file, header = \'negative peaks file\' )\n \n- \t#move all remaining files to extra files path of html file output to allow user download\n- \tout_html = open( output_extra_html, \'wb\' )\n- \tout_html.write( \'<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\\n\' % experiment_name )\n- \tos.mkdir( output_extra_path )\n- \tfor filename in sorted( os.listdir( tmp_dir ) ):\n- \t\tshutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )\n- \tout_html.write( \'<li><a href="%s">%s</a></li>\\n\' % ( filename, filename ) )\n-\t\t#out_html.write( \'<li><a href="%s">%s</a>peakxls %s SomethingDifferent tmp_dir %s path %s exp_name %s</li>\\n\' % ( created_peak_xls_file, filename, filename, tmp_dir, output_extra_path, experiment_name ) )\n- \tout_html.write( \'</ul></p>\\n\' )\n- \tout_html.write( \'<h3>Messages from MACS:</h3>\\n<p><pre>%s</pre></p>\\n\' % open( stderr_name, \'rb\' ).read() )\n- \tout_html.write( \'</body></html>\\n\' )\n- \tout_html.close()\n+ #move all remaining files to extra files path of html file output to allow user download\n+ out_html = open( output_extra_html, \'wb\' )\n+ out_html.write( \'<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\\n\' % experiment_name )\n+ os.mkdir( output_extra_path )\n+ for filename in sorted( os.listdir( tmp_dir ) ):\n+ shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )\n+ out_html.write( \'<li><a href="%s">%s</a></li>\\n\' % ( filename, filename ) )\n+ #out_html.write( \'<li><a href="%s">%s</a>peakxls %s SomethingDifferent tmp_dir %s path %s exp_name %s</li>\\n\' % ( created_peak_xls_file, filename, filename, tmp_dir, output_extra_path, experiment_name ) )\n+ out_html.write( \'</ul></p>\\n\' )\n+ out_html.write( \'<h3>Messages from MACS:</h3>\\n<p><pre>%s</pre></p>\\n\' % open( stderr_name, \'rb\' ).read() )\n+ out_html.write( \'</body></html>\\n\' )\n+ out_html.close()\n \n #=================================================================================\n #move files generated by bdgcmp command\n if (options[\'command\'] == "bdgcmp"):\n- \tcreated_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" )\n- \tif os.path.exists( created_bdgcmp_file ):\n-\t\tshutil.move (created_bdgcmp_file, output_bdgcmp )\n- \n+ created_bdgcmp_file = os.path.join (tmp_dir, "bdgcmp_out.bdg" )\n+ if os.path.exists( created_bdgcmp_file ):\n+ shutil.move (created_bdgcmp_file, output_bdgcmp )\n+\n #================================================================================= \n #cleanup\n #================================================================================= \n' |
b |
diff -r 642c0da30ca6 -r c05f607d116c macs2_wrapper.xml --- a/macs2_wrapper.xml Thu Oct 17 12:47:49 2013 -0400 +++ b/macs2_wrapper.xml Tue May 20 12:10:38 2014 -0400 |
[ |
b'@@ -1,6 +1,7 @@\n <tool id="macs2_peakcalling" name="MACS2" version="2.0.10">\n <description>Model-based Analysis of ChIP-Seq</description>\n <command interpreter="python">macs2_wrapper.py $options_file $outputs_file</command>\n+ <version_command>macs2 --version</version_command>\n <inputs>\n <!--experiment name and option of selecting paired or single end will always be present-->\n <param name="experiment_name" type="text" value="MACS2 in Galaxy" size="50" label="Experiment Name"/>\n@@ -8,53 +9,63 @@\n <!--select one of the 7 major commands offered by macs2-->\n <conditional name="major_command">\n <param name="major_command_selector" type="select" label="Select action to be performed">\n-\t<option value="callpeak">Peak Calling</option>\n-\t<!--<option value="filterdup">filterdup</option>\n-\t<option value="randsample">randsample</option>-->\n-\t<option value="bdgcmp">Compare .bdg Files</option>\n-\t<!--<option value="bdgdiff">bdgdiff</option>\n-\t<option value="bdgpeakcall">bdgpeakcall</option>\n-\t<option value="bdgbroadcall">bdgbroadcall</option>-->\n+ <option value="callpeak">Peak Calling</option>\n+ <!--<option value="filterdup">filterdup</option>\n+ <option value="randsample">randsample</option>-->\n+ <option value="bdgcmp">Compare .bdg Files</option>\n+ <!--<option value="bdgdiff">bdgdiff</option>\n+ <option value="bdgpeakcall">bdgpeakcall</option>\n+ <option value="bdgbroadcall">bdgbroadcall</option>-->\n </param>\n <!--callpeak option of macs2-->\n <when value="callpeak">\n-\t<!--may need to add a few more formats at later time-->\n+ <!--may need to add a few more formats at later time-->\n <param name="input_chipseq_file1" type="data" format="bam" label="ChIP-Seq Tag File" />\n <param name="input_control_file1" type="data" format="bam" optional="True" label="ChIP-Seq Control File" />\n-\t<param name="gsize" type="float" label="Effective genome size" value="2.7e+9" help="Human: 3.3e+9, Mouse: 3.0e+9, Fly: 1.9e+8, Worm: 1.3e+8 (--gsize)"/>\n-\t<param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>\n-\t<param name="xls_to_interval" label="Parse xls files into into distinct interval files" type="boolean" truevalue="True" falsevalue="False" checked="False"/>\n-\t<param name="bdg" label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph" type="boolean" truevalue="-B" falsevalue="" checked="False" help="files located in html report"/>\n+ <param name="gsize" type="float" label="Effective genome size" value="2.7e+9" help="Human: 3.3e+9, Mouse: 3.0e+9, Fly: 1.9e+8, Worm: 1.3e+8 (--gsize)"/>\n+ <param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>\n+ <param name="xls_to_interval" label="Parse xls files into into distinct interval files" type="boolean" truevalue="True" falsevalue="False" checked="False"/>\n+ <param name="bdg" label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph" type="boolean" truevalue="-B" falsevalue="" checked="False" help="files located in html report"/>\n \n-\t<conditional name="pq_options">\n-\t <param name="pq_options_selector" type="select" label="Select p-value or q-value" help="default uses q-value">\n-\t <option value="qvalue">q-value</option>\n-\t <option value="pvalue">p-value</option>\n-\t </param>\n-\t <when value="pvalue">\n-\t <param name="pvalue" type="float" label="p-value cutoff for peak detection" value="1e-2" help="default: 1e-2 (--pvalue)"/>\n-\t </when>\n-\t <when value="qvalue">\n-\t <param name="qvalue" type="float" label="q-value cutoff for peak detection" value="5e-2" help="default: 5e-2 (--qvalue)"/>\n-\t </when> \n-\t</conditional>\n-\t<conditional name="advanced_options">\n-\t <param name="advanced_options_selector" type="select" label="Display advanced options">\n-\t <option value="off">Hide</option>\n-\t <option value="on">Display</option>\n-\t </param>\n-\t <when value="on">\n+ <conditional name'..b'xls_to_interval )\n-\n-\t##advanced options\n-\t#if str( $major_command.advanced_options.advanced_options_selector ) == \'on\':\n-\t\t#set $__options[\'mfoldlo\'] = int( $major_command.advanced_options.mfoldlo )\n-\t\t#set $__options[\'mfoldhi\'] = int( $major_command.advanced_options.mfoldhi )\n-\t\t#set $__options[\'nolambda\'] = str( $major_command.advanced_options.nolambda )\n-\t#else:\n-\t\t#set $__options[\'mfoldlo\'] = int( "10" )\n-\t\t#set $__options[\'mfoldhi\'] = int( "30" )\n-\t\t#set $__options[\'nolambda\'] = str( "" )\t\t\n-\t#end if\n+ #set $__options[\'command\'] = str( "callpeak" )\n+ #set $__options[\'gsize\'] = int( $major_command.gsize )\n+ #set $__options[\'bw\'] = str( $major_command.bw )\n+ #set $__options[\'bdg\'] = str( $major_command.bdg )\n+ #set $__options[\'xls_to_interval\'] = str( $major_command.xls_to_interval )\n \n-\t##enable xls file options\n-\t##if str( $major_command.xls_to_interval ) == \'create\':\n-\t\t##set $__options[\'xls_to_interval\'] = { \'peaks_file\': str( $output_xls_to_interval_peaks_file ), \'negative_peaks_file\': str( $output_xls_to_interval_negative_peaks_file ) }\n-\t##end if\n-\t\n-\t##pq value select options\n-\t#if str( $major_command.pq_options.pq_options_selector ) == \'qvalue\':\n-\t\t#set $__options[\'qvalue\'] = str( $major_command.pq_options.qvalue )\n-\t#else:\n-\t\t#set $__options[\'pvalue\'] = str( $major_command.pq_options.pvalue )\n-\t#end if\n-\t\n-\t##model options\n-\t#if str( $major_command.nomodel_type.nomodel_type_selector ) == \'nomodel\':\n-\t\t#set $__options[\'nomodel\'] = str( $major_command.nomodel_type.shiftsize )\n-\t#end if\n+ ##advanced options\n+ #if str( $major_command.advanced_options.advanced_options_selector ) == \'on\':\n+ #set $__options[\'mfoldlo\'] = int( $major_command.advanced_options.mfoldlo )\n+ #set $__options[\'mfoldhi\'] = int( $major_command.advanced_options.mfoldhi )\n+ #set $__options[\'nolambda\'] = str( $major_command.advanced_options.nolambda )\n+ #if str($major_command.advanced_options.broad_options.broad_options_selector) == \'broad\'\n+ #set $__options[\'broad_cutoff\'] = str($major_command.advanced_options.broad_options.broad_cutoff)\n+ #end if\n+ #else:\n+ #set $__options[\'mfoldlo\'] = int( "10" )\n+ #set $__options[\'mfoldhi\'] = int( "30" )\n+ #set $__options[\'nolambda\'] = str( "" )\n+ #end if\n+\n+ ##enable xls file options\n+ ##if str( $major_command.xls_to_interval ) == \'create\':\n+ ##set $__options[\'xls_to_interval\'] = { \'peaks_file\': str( $output_xls_to_interval_peaks_file ), \'negative_peaks_file\': str( $output_xls_to_interval_negative_peaks_file ) }\n+ ##end if\n+ \n+ ##pq value select options\n+ #if str( $major_command.pq_options.pq_options_selector ) == \'qvalue\':\n+ #set $__options[\'qvalue\'] = str( $major_command.pq_options.qvalue )\n+ #else:\n+ #set $__options[\'pvalue\'] = str( $major_command.pq_options.pvalue )\n+ #end if\n+ \n+ ##model options\n+ #if str( $major_command.nomodel_type.nomodel_type_selector ) == \'nomodel\':\n+ #set $__options[\'nomodel\'] = str( $major_command.nomodel_type.shiftsize )\n+ #end if\n #end if\n ##=======================================================================================\n #if str( $major_command.major_command_selector ) == \'bdgcmp\':\n-\t#set $__options[\'command\'] = str( "bdgcmp" )\n-\t#set $__options[\'pseudocount\'] = float( str( $major_command.pseudocount ) )\n-\t#set $__options[\'m\'] = str( $major_command.bdgcmp_options.bdgcmp_options_selector )\n+ #set $__options[\'command\'] = str( "bdgcmp" )\n+ #set $__options[\'pseudocount\'] = float( str( $major_command.pseudocount ) )\n+ #set $__options[\'m\'] = str( $major_command.bdgcmp_options.bdgcmp_options_selector )\n #end if\n ##=======================================================================================\n \n-${ simplejson.dumps( __options ) }\n+${ json.dumps( __options ) }\n </configfile>\n </configfiles>\n <tests>\n-\t<!--none yet for macs2-->\n+ <!--none yet for macs2-->\n </tests>\n <help>\n **What it does**\n' |