comparison sicer_wrapper.py @ 3:5c2cc3b58c7d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sicer commit 0cbb1b33c232da498a31902aa5afcdc97971a74b"
author devteam
date Wed, 28 Oct 2020 23:36:25 +0000
parents 82a8234e03f2
children
comparison
equal deleted inserted replaced
2:74c9214cc8e6 3:5c2cc3b58c7d
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Dan Blankenberg 2 # Dan Blankenberg
3 3
4 """ 4 """
5 A wrapper script for running SICER (spatial clustering approach for the identification of ChIP-enriched regions) region caller. 5 A wrapper script for running SICER (spatial clustering approach for the identification of ChIP-enriched regions) region caller.
6 """ 6 """
7 7
8 import sys, optparse, os, tempfile, subprocess, shutil 8 import optparse
9 import os
10 import shutil
11 import subprocess
12 import sys
13 import tempfile
9 14
10 CHUNK_SIZE = 2**20 #1mb 15 CHUNK_SIZE = 2**20 # 1mb
11 16
12 #HACK! FIXME: allow using all specified builds, would currently require hacking SICER's "GenomeData.py" on the fly. 17 # HACK! FIXME: allow using all specified builds, would currently require hacking SICER's "GenomeData.py" on the fly.
13 VALID_BUILDS = [ 'mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8' ] 18 VALID_BUILDS = ['mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8']
14
15 def cleanup_before_exit( tmp_dir ):
16 if tmp_dir and os.path.exists( tmp_dir ):
17 shutil.rmtree( tmp_dir )
18 19
19 20
20 def open_file_from_option( filename, mode = 'rb' ): 21 def cleanup_before_exit(tmp_dir):
22 if tmp_dir and os.path.exists(tmp_dir):
23 shutil.rmtree(tmp_dir)
24
25
26 def open_file_from_option(filename, mode='rb'):
21 if filename: 27 if filename:
22 return open( filename, mode = mode ) 28 return open(filename, mode=mode)
23 return None 29 return None
24 30
25 def add_one_to_file_column( filename, column, split_char = "\t", startswith_skip = None ): 31
26 tmp_out = tempfile.TemporaryFile( mode='w+b' ) 32 def add_one_to_file_column(filename, column, split_char="\t", startswith_skip=None):
27 tmp_in = open( filename ) 33 with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as tmp_out:
28 for line in tmp_in: 34 with open(filename) as fh:
29 if startswith_skip and line.startswith( startswith_skip ): 35 tmp_path = tmp_out.name
30 tmp_out.write( line ) 36 for line in fh:
31 else: 37 if startswith_skip and line.startswith(startswith_skip):
32 fields = line.rstrip( '\n\r' ).split( split_char ) 38 tmp_out.write(line)
33 if len( fields ) <= column: 39 else:
34 tmp_out.write( line ) 40 fields = line.rstrip('\n\r').split(split_char)
35 else: 41 if len(fields) <= column:
36 fields[ column ] = str( int( fields[ column ] ) + 1 ) 42 tmp_out.write(line)
37 tmp_out.write( "%s\n" % ( split_char.join( fields ) ) ) 43 else:
38 tmp_in.close() 44 fields[column] = str(int(fields[column]) + 1)
39 tmp_out.seek( 0 ) 45 tmp_out.write("%s\n" % (split_char.join(fields)))
40 tmp_in = open( filename, 'wb' ) 46 shutil.move(tmp_path, filename)
41 while True: 47
42 chunk = tmp_out.read( CHUNK_SIZE )
43 if chunk:
44 tmp_in.write( chunk )
45 else:
46 break
47 tmp_in.close()
48 tmp_out.close()
49 48
50 def __main__(): 49 def __main__():
51 #Parse Command Line
52 parser = optparse.OptionParser() 50 parser = optparse.OptionParser()
53 #stdout/err 51 parser.add_option('', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.')
54 parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' ) 52 parser.add_option('', '--fix_off_by_one_errors', dest='fix_off_by_one_errors', action='store_true', default=False, help='If specified, fix off-by-one errors in output files')
55 parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) 53 # inputs
56 parser.add_option( '', '--fix_off_by_one_errors', dest='fix_off_by_one_errors', action='store_true', default=False, help='If specified, fix off-by-one errors in output files' ) 54 parser.add_option('-b', '--bed_file', dest='bed_file', action='store', type="string", default=None, help='Input ChIP BED file.')
57 #inputs 55 parser.add_option('-c', '--control_file', dest='control_file', action='store', type="string", default=None, help='Input control BED file.')
58 parser.add_option( '-b', '--bed_file', dest='bed_file', action='store', type="string", default=None, help='Input ChIP BED file.' ) 56 parser.add_option('-d', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='Input dbkey.')
59 parser.add_option( '-c', '--control_file', dest='control_file', action='store', type="string", default=None, help='Input control BED file.' ) 57 parser.add_option('-r', '--redundancy_threshold', dest='redundancy_threshold', action='store', type="int", default=1, help='Redundancy Threshold: The number of copies of identical reads allowed in a library.')
60 parser.add_option( '-d', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='Input dbkey.' ) 58 parser.add_option('-w', '--window_size', dest='window_size', action='store', type="int", default=200, help='Window size: resolution of SICER algorithm. For histone modifications, one can use 200 bp')
61 parser.add_option( '-r', '--redundancy_threshold', dest='redundancy_threshold', action='store', type="int", default=1, help='Redundancy Threshold: The number of copies of identical reads allowed in a library.' ) 59 parser.add_option('-f', '--fragment_size', dest='fragment_size', action='store', type="int", default=150, help='Fragment size: is for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75.')
62 parser.add_option( '-w', '--window_size', dest='window_size', action='store', type="int", default=200, help='Window size: resolution of SICER algorithm. For histone modifications, one can use 200 bp' ) 60 parser.add_option('-e', '--effective_genome_fraction', dest='effective_genome_fraction', action='store', type="float", default=0.74, help='Effective genome fraction: Effective Genome as fraction of the genome size. It depends on read length.')
63 parser.add_option( '-f', '--fragment_size', dest='fragment_size', action='store', type="int", default=150, help='Fragment size: is for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75.' ) 61 parser.add_option('-g', '--gap_size', dest='gap_size', action='store', type="int", default=600, help='Gap size: needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ... .')
64 parser.add_option( '-e', '--effective_genome_fraction', dest='effective_genome_fraction', action='store', type="float", default=0.74, help='Effective genome fraction: Effective Genome as fraction of the genome size. It depends on read length.' ) 62 parser.add_option('-o', '--error_cut_off', dest='error_cut_off', action='store', type="string", default="0.1", help='Error Cut off: FDR or E-value') # read as string to construct names properly
65 parser.add_option( '-g', '--gap_size', dest='gap_size', action='store', type="int", default=600, help='Gap size: needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ... .' ) 63 # outputs
66 parser.add_option( '-o', '--error_cut_off', dest='error_cut_off', action='store', type="string", default="0.1", help='Error Cut off: FDR or E-value' ) #read as string to construct names properly 64 parser.add_option('', '--redundancy_removed_test_bed_output_file', dest='redundancy_removed_test_bed_output_file', action='store', type="string", default=None, help='test-1-removed.bed: redundancy_removed test bed file')
67 #outputs 65 parser.add_option('', '--redundancy_removed_control_bed_output_file', dest='redundancy_removed_control_bed_output_file', action='store', type="string", default=None, help='control-1-removed.bed: redundancy_removed control bed file')
68 parser.add_option( '', '--redundancy_removed_test_bed_output_file', dest='redundancy_removed_test_bed_output_file', action='store', type="string", default=None, help='test-1-removed.bed: redundancy_removed test bed file' ) 66 parser.add_option('', '--summary_graph_output_file', dest='summary_graph_output_file', action='store', type="string", default=None, help='test-W200.graph: summary graph file for test-1-removed.bed with window size 200, in bedGraph format.')
69 parser.add_option( '', '--redundancy_removed_control_bed_output_file', dest='redundancy_removed_control_bed_output_file', action='store', type="string", default=None, help='control-1-removed.bed: redundancy_removed control bed file' ) 67 parser.add_option('', '--test_normalized_wig_output_file', dest='test_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-normalized.wig: the above file normalized by library size per million and converted into wig format. This file can be uploaded to the UCSC genome browser')
70 parser.add_option( '', '--summary_graph_output_file', dest='summary_graph_output_file', action='store', type="string", default=None, help='test-W200.graph: summary graph file for test-1-removed.bed with window size 200, in bedGraph format.' ) 68 parser.add_option('', '--score_island_output_file', dest='score_island_output_file', action='store', type="string", default=None, help='test-W200-G600.scoreisland: an intermediate file for debugging usage.')
71 parser.add_option( '', '--test_normalized_wig_output_file', dest='test_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-normalized.wig: the above file normalized by library size per million and converted into wig format. This file can be uploaded to the UCSC genome browser' ) 69 parser.add_option('', '--islands_summary_output_file', dest='islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary: summary of all candidate islands with their statistical significance.')
72 parser.add_option( '', '--score_island_output_file', dest='score_island_output_file', action='store', type="string", default=None, help='test-W200-G600.scoreisland: an intermediate file for debugging usage.' ) 70 parser.add_option('', '--significant_islands_summary_output_file', dest='significant_islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary-FDR.01: summary file of significant islands with requirement of FDR=0.01.')
73 parser.add_option( '', '--islands_summary_output_file', dest='islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary: summary of all candidate islands with their statistical significance.' ) 71 parser.add_option('', '--significant_islands_output_file', dest='significant_islands_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-island.bed: delineation of significant islands in "chrom start end read-count-from-redundancy_removed-test.bed" format')
74 parser.add_option( '', '--significant_islands_summary_output_file', dest='significant_islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary-FDR.01: summary file of significant islands with requirement of FDR=0.01.' ) 72 parser.add_option('', '--island_filtered_output_file', dest='island_filtered_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered.bed: library of raw redundancy_removed reads on significant islands.')
75 parser.add_option( '', '--significant_islands_output_file', dest='significant_islands_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-island.bed: delineation of significant islands in "chrom start end read-count-from-redundancy_removed-test.bed" format' ) 73 parser.add_option('', '--island_filtered_normalized_wig_output_file', dest='island_filtered_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered-normalized.wig: wig file for the island-filtered redundancy_removed reads.')
76 parser.add_option( '', '--island_filtered_output_file', dest='island_filtered_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered.bed: library of raw redundancy_removed reads on significant islands.' )
77 parser.add_option( '', '--island_filtered_normalized_wig_output_file', dest='island_filtered_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered-normalized.wig: wig file for the island-filtered redundancy_removed reads.' )
78 (options, args) = parser.parse_args() 74 (options, args) = parser.parse_args()
79 75
80 #check if valid build 76 # check if valid build
81 assert options.dbkey in VALID_BUILDS, ValueError( "The specified build ('%s') is not available for this tool." % options.dbkey ) 77 assert options.dbkey in VALID_BUILDS, ValueError("The specified build ('%s') is not available for this tool." % options.dbkey)
82 78
83 #everything will occur in this temp directory 79 # everything will occur in this temp directory
84 tmp_dir = tempfile.mkdtemp() 80 tmp_dir = tempfile.mkdtemp()
85 81
86 #link input files into tmp_dir and build command line 82 # link input files into tmp_dir and build command line
87 bed_base_filename = 'input_bed_file' 83 bed_base_filename = 'input_bed_file'
88 bed_filename = '%s.bed' % bed_base_filename 84 bed_filename = '%s.bed' % bed_base_filename
89 os.symlink( options.bed_file, os.path.join( tmp_dir, bed_filename ) ) 85 os.symlink(options.bed_file, os.path.join(tmp_dir, bed_filename))
90 if options.control_file is not None: 86 if options.control_file is not None:
91 cmd = "SICER.sh" 87 cmd = "SICER.sh"
92 else: 88 else:
93 cmd = "SICER-rb.sh" 89 cmd = "SICER-rb.sh"
94 cmd = '%s "%s" "%s"' % ( cmd, tmp_dir, bed_filename ) 90 cmd = '%s "%s" "%s"' % (cmd, tmp_dir, bed_filename)
95 if options.control_file is not None: 91 if options.control_file is not None:
96 control_base_filename = 'input_control_file' 92 control_base_filename = 'input_control_file'
97 control_filename = '%s.bed' % control_base_filename 93 control_filename = '%s.bed' % control_base_filename
98 os.symlink( options.control_file, os.path.join( tmp_dir, control_filename ) ) 94 os.symlink(options.control_file, os.path.join(tmp_dir, control_filename))
99 cmd = '%s "%s"' % ( cmd, control_filename ) 95 cmd = '%s "%s"' % (cmd, control_filename)
100 cmd = '%s "%s" "%s" "%i" "%i" "%i" "%f" "%i" "%s"' % ( cmd, tmp_dir, options.dbkey, options.redundancy_threshold, options.window_size, options.fragment_size, options.effective_genome_fraction, options.gap_size, options.error_cut_off ) 96 cmd = '%s "%s" "%s" "%i" "%i" "%i" "%f" "%i" "%s"' % (cmd, tmp_dir, options.dbkey, options.redundancy_threshold, options.window_size, options.fragment_size, options.effective_genome_fraction, options.gap_size, options.error_cut_off)
101 97
102 #set up stdout and stderr output options 98 # set up stdout and stderr output options
103 stdout = open_file_from_option( options.stdout, mode = 'wb' ) 99 stdout = open_file_from_option(options.stdout, mode='wb')
104 stderr = open_file_from_option( options.stderr, mode = 'wb' ) 100 with tempfile.NamedTemporaryFile(dir=tmp_dir) as stderr:
105 #if no stderr file is specified, we'll use our own 101 return_code = subprocess.call(args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir)
106 if stderr is None: 102
107 stderr = tempfile.NamedTemporaryFile( dir=tmp_dir ) 103 if return_code:
108 stderr.close() 104 try:
109 stderr = open( stderr.name, 'w+b' ) 105 stderr_target = sys.stderr.buffer
110 106 except AttributeError:
111 proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) 107 # Python 2
112 return_code = proc.wait() 108 stderr_target = sys.stderr
113 109 else:
110 stderr_target = stdout
111 stderr_target.write("\nAdditionally, these warnings were reported:\n")
112 stderr.flush()
113 stderr.seek(0)
114 while True:
115 chunk = stderr.read(CHUNK_SIZE)
116 if chunk:
117 stderr_target.write(chunk)
118 else:
119 break
114 if return_code: 120 if return_code:
115 stderr_target = sys.stderr 121 cleanup_before_exit(tmp_dir)
116 else: 122 raise Exception("Error running: %s" % cmd)
117 stderr_target = stdout #sys.stdout 123
118 stderr_target.write( "\nAdditionally, these warnings were reported:\n" ) 124 try:
119 stderr.flush() 125 # move files to where they belong
120 stderr.seek(0) 126 shutil.move(os.path.join(tmp_dir, '%s-%i-removed.bed' % (bed_base_filename, options.redundancy_threshold)), options.redundancy_removed_test_bed_output_file)
121 while True: 127 shutil.move(os.path.join(tmp_dir, '%s-W%i.graph' % (bed_base_filename, options.window_size)), options.summary_graph_output_file)
122 chunk = stderr.read( CHUNK_SIZE ) 128 if options.fix_off_by_one_errors:
123 if chunk: 129 add_one_to_file_column(options.summary_graph_output_file, 2)
124 stderr_target.write( chunk ) 130 shutil.move(os.path.join(tmp_dir, '%s-W%i-normalized.wig' % (bed_base_filename, options.window_size)), options.test_normalized_wig_output_file)
131 if options.control_file is not None:
132 shutil.move(os.path.join(tmp_dir, '%s-%i-removed.bed' % (control_base_filename, options.redundancy_threshold)), options.redundancy_removed_control_bed_output_file)
133 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i.scoreisland' % (bed_base_filename, options.window_size, options.gap_size)), options.score_island_output_file)
134 if options.fix_off_by_one_errors:
135 add_one_to_file_column(options.score_island_output_file, 2)
136 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-islands-summary' % (bed_base_filename, options.window_size, options.gap_size)), options.islands_summary_output_file)
137 if options.fix_off_by_one_errors:
138 add_one_to_file_column(options.islands_summary_output_file, 2)
139 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-islands-summary-FDR%s' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.significant_islands_summary_output_file)
140 if options.fix_off_by_one_errors:
141 add_one_to_file_column(options.significant_islands_summary_output_file, 2)
142 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-FDR%s-island.bed' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.significant_islands_output_file)
143 if options.fix_off_by_one_errors:
144 add_one_to_file_column(options.significant_islands_output_file, 2)
145 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-FDR%s-islandfiltered.bed' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.island_filtered_output_file)
146 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-FDR%s-islandfiltered-normalized.wig' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.island_filtered_normalized_wig_output_file)
125 else: 147 else:
126 break 148 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-E%s.scoreisland' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.score_island_output_file)
127 stderr.close() 149 if options.fix_off_by_one_errors:
128 150 add_one_to_file_column(options.score_island_output_file, 2)
129 try: 151 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-E%s-islandfiltered.bed' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.island_filtered_output_file)
130 #move files to where they belong 152 shutil.move(os.path.join(tmp_dir, '%s-W%i-G%i-E%s-islandfiltered-normalized.wig' % (bed_base_filename, options.window_size, options.gap_size, options.error_cut_off)), options.island_filtered_normalized_wig_output_file)
131 shutil.move( os.path.join( tmp_dir,'%s-%i-removed.bed' % ( bed_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_test_bed_output_file )
132 shutil.move( os.path.join( tmp_dir,'%s-W%i.graph' % ( bed_base_filename, options.window_size ) ), options.summary_graph_output_file )
133 if options.fix_off_by_one_errors: add_one_to_file_column( options.summary_graph_output_file, 2 )
134 shutil.move( os.path.join( tmp_dir,'%s-W%i-normalized.wig' % ( bed_base_filename, options.window_size ) ), options.test_normalized_wig_output_file )
135 if options.control_file is not None:
136 shutil.move( os.path.join( tmp_dir,'%s-%i-removed.bed' % ( control_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_control_bed_output_file )
137 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.score_island_output_file )
138 if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
139 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.islands_summary_output_file )
140 if options.fix_off_by_one_errors: add_one_to_file_column( options.islands_summary_output_file, 2 )
141 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary-FDR%s' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_summary_output_file )
142 if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_summary_output_file, 2 )
143 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-island.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_output_file )
144 if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_output_file, 2 )
145 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
146 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
147 else:
148 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.score_island_output_file )
149 if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
150 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
151 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
152 except Exception, e:
153 raise e
154 finally: 153 finally:
155 cleanup_before_exit( tmp_dir ) 154 cleanup_before_exit(tmp_dir)
156 155
157 if __name__=="__main__": __main__() 156
157 if __name__ == "__main__":
158 __main__()