Mercurial > repos > iuc > gatk2
comparison gatk2_wrapper.py @ 4:f244b8209eb8 draft
bug fix release
author | iuc |
---|---|
date | Mon, 25 Aug 2014 17:43:11 -0400 |
parents | 340633249b3d |
children | 35c00763cb5c |
comparison
equal
deleted
inserted
replaced
3:2553f84b8174 | 4:f244b8209eb8 |
---|---|
5 A wrapper script for running the GenomeAnalysisTK.jar commands. | 5 A wrapper script for running the GenomeAnalysisTK.jar commands. |
6 """ | 6 """ |
7 | 7 |
8 import sys, optparse, os, tempfile, subprocess, shutil | 8 import sys, optparse, os, tempfile, subprocess, shutil |
9 from binascii import unhexlify | 9 from binascii import unhexlify |
10 from string import Template | |
11 | 10 |
12 GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is | 11 GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is |
13 GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed | 12 GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed |
14 DEFAULT_GATK_PREFIX = "gatk_file" | 13 DEFAULT_GATK_PREFIX = "gatk_file" |
15 CHUNK_SIZE = 2**20 #1mb | 14 CHUNK_SIZE = 2**20 #1mb |
17 | 16 |
18 def cleanup_before_exit( tmp_dir ): | 17 def cleanup_before_exit( tmp_dir ): |
19 if tmp_dir and os.path.exists( tmp_dir ): | 18 if tmp_dir and os.path.exists( tmp_dir ): |
20 shutil.rmtree( tmp_dir ) | 19 shutil.rmtree( tmp_dir ) |
21 | 20 |
21 | |
22 def gatk_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ): | 22 def gatk_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ): |
23 suffix = GALAXY_EXT_TO_GATK_EXT.get( galaxy_ext, galaxy_ext ) | 23 suffix = GALAXY_EXT_TO_GATK_EXT.get( galaxy_ext, galaxy_ext ) |
24 if prefix is None: | 24 if prefix is None: |
25 prefix = DEFAULT_GATK_PREFIX | 25 prefix = DEFAULT_GATK_PREFIX |
26 if target_dir is None: | 26 if target_dir is None: |
27 target_dir = os.getcwd() | 27 target_dir = os.getcwd() |
28 gatk_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) ) | 28 gatk_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) ) |
29 os.symlink( galaxy_filename, gatk_filename ) | 29 os.symlink( galaxy_filename, gatk_filename ) |
30 return gatk_filename | 30 return gatk_filename |
31 | 31 |
32 | |
32 def gatk_filetype_argument_substitution( argument, galaxy_ext ): | 33 def gatk_filetype_argument_substitution( argument, galaxy_ext ): |
33 return argument % dict( file_type = GALAXY_EXT_TO_GATK_FILE_TYPE.get( galaxy_ext, galaxy_ext ) ) | 34 return argument % dict( file_type = GALAXY_EXT_TO_GATK_FILE_TYPE.get( galaxy_ext, galaxy_ext ) ) |
35 | |
34 | 36 |
35 def open_file_from_option( filename, mode = 'rb' ): | 37 def open_file_from_option( filename, mode = 'rb' ): |
36 if filename: | 38 if filename: |
37 return open( filename, mode = mode ) | 39 return open( filename, mode = mode ) |
38 return None | 40 return None |
41 | |
39 | 42 |
40 def html_report_from_directory( html_out, dir ): | 43 def html_report_from_directory( html_out, dir ): |
41 html_out.write( '<html>\n<head>\n<title>Galaxy - GATK Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) | 44 html_out.write( '<html>\n<head>\n<title>Galaxy - GATK Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) |
42 for fname in sorted( os.listdir( dir ) ): | 45 for fname in sorted( os.listdir( dir ) ): |
43 html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) | 46 html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) |
44 html_out.write( '</ul>\n</body>\n</html>\n' ) | 47 html_out.write( '</ul>\n</body>\n</html>\n' ) |
45 | 48 |
46 def index_bam_files( bam_filenames, tmp_dir ): | 49 |
50 def index_bam_files( bam_filenames ): | |
47 for bam_filename in bam_filenames: | 51 for bam_filename in bam_filenames: |
48 bam_index_filename = "%s.bai" % bam_filename | 52 bam_index_filename = "%s.bai" % bam_filename |
49 if not os.path.exists( bam_index_filename ): | 53 if not os.path.exists( bam_index_filename ): |
50 #need to index this bam file | 54 #need to index this bam file |
51 stderr_name = tempfile.NamedTemporaryFile( prefix = "bam_index_stderr" ).name | 55 stderr_name = tempfile.NamedTemporaryFile( prefix = "bam_index_stderr" ).name |
52 command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) | 56 command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) |
53 proc = subprocess.Popen( args=command, shell=True, stderr=open( stderr_name, 'wb' ) ) | 57 try: |
54 return_code = proc.wait() | 58 subprocess.check_call( args=command, shell=True, stderr=open( stderr_name, 'wb' ) ) |
55 if return_code: | 59 except: |
56 for line in open( stderr_name ): | 60 for line in open( stderr_name ): |
57 print >> sys.stderr, line | 61 print >> sys.stderr, line |
58 os.unlink( stderr_name ) #clean up | |
59 cleanup_before_exit( tmp_dir ) | |
60 raise Exception( "Error indexing BAM file" ) | 62 raise Exception( "Error indexing BAM file" ) |
61 os.unlink( stderr_name ) #clean up | 63 finally: |
64 os.unlink( stderr_name ) | |
62 | 65 |
63 def __main__(): | 66 def __main__(): |
64 #Parse Command Line | 67 #Parse Command Line |
65 parser = optparse.OptionParser() | 68 parser = optparse.OptionParser() |
66 parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to GATK, without any modification.' ) | 69 parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to GATK, without any modification.' ) |
72 parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) | 75 parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) |
73 parser.add_option( '', '--html_report_from_directory', dest='html_report_from_directory', action='append', type="string", nargs=2, help='"Target HTML File" "Directory"') | 76 parser.add_option( '', '--html_report_from_directory', dest='html_report_from_directory', action='append', type="string", nargs=2, help='"Target HTML File" "Directory"') |
74 parser.add_option( '-e', '--phone_home', dest='phone_home', action='store', type="string", default='STANDARD', help='What kind of GATK run report should we generate(NO_ET|STANDARD|STDOUT)' ) | 77 parser.add_option( '-e', '--phone_home', dest='phone_home', action='store', type="string", default='STANDARD', help='What kind of GATK run report should we generate(NO_ET|STANDARD|STDOUT)' ) |
75 parser.add_option( '-K', '--gatk_key', dest='gatk_key', action='store', type="string", default=None, help='What kind of GATK run report should we generate(NO_ET|STANDARD|STDOUT)' ) | 78 parser.add_option( '-K', '--gatk_key', dest='gatk_key', action='store', type="string", default=None, help='What kind of GATK run report should we generate(NO_ET|STANDARD|STDOUT)' ) |
76 (options, args) = parser.parse_args() | 79 (options, args) = parser.parse_args() |
77 | 80 |
78 tmp_dir = tempfile.mkdtemp( prefix='tmp-gatk-' ) | |
79 if options.pass_through_options: | 81 if options.pass_through_options: |
80 cmd = ' '.join( options.pass_through_options ) | 82 cmd = ' '.join( options.pass_through_options ) |
81 else: | 83 else: |
82 cmd = '' | 84 cmd = '' |
83 if options.pass_through_options_encoded: | 85 if options.pass_through_options_encoded: |
85 if options.max_jvm_heap is not None: | 87 if options.max_jvm_heap is not None: |
86 cmd = cmd.replace( 'java ', 'java -Xmx%s ' % ( options.max_jvm_heap ), 1 ) | 88 cmd = cmd.replace( 'java ', 'java -Xmx%s ' % ( options.max_jvm_heap ), 1 ) |
87 elif options.max_jvm_heap_fraction is not None: | 89 elif options.max_jvm_heap_fraction is not None: |
88 cmd = cmd.replace( 'java ', 'java -XX:DefaultMaxRAMFraction=%s -XX:+UseParallelGC ' % ( options.max_jvm_heap_fraction ), 1 ) | 90 cmd = cmd.replace( 'java ', 'java -XX:DefaultMaxRAMFraction=%s -XX:+UseParallelGC ' % ( options.max_jvm_heap_fraction ), 1 ) |
89 bam_filenames = [] | 91 bam_filenames = [] |
90 if options.datasets: | 92 tmp_dir = tempfile.mkdtemp( prefix='tmp-gatk-' ) |
91 for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: | 93 try: |
92 gatk_filename = gatk_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix ) | 94 if options.datasets: |
93 if dataset_arg: | 95 for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: |
94 cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) | 96 gatk_filename = gatk_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix ) |
95 if galaxy_ext == "bam": | 97 if dataset_arg: |
96 bam_filenames.append( gatk_filename ) | 98 cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) |
97 index_bam_files( bam_filenames, tmp_dir ) | 99 if galaxy_ext == "bam": |
98 #set up stdout and stderr output options | 100 bam_filenames.append( gatk_filename ) |
99 stdout = open_file_from_option( options.stdout, mode = 'wb' ) | 101 if galaxy_ext == 'fasta': |
100 stderr = open_file_from_option( options.stderr, mode = 'wb' ) | 102 subprocess.check_call( 'samtools faidx "%s"' % gatk_filename, shell=True ) |
101 #if no stderr file is specified, we'll use our own | 103 subprocess.check_call( 'java -jar %s R=%s O=%s QUIET=true' % ( os.path.join(os.environ['JAVA_JAR_PATH'], 'CreateSequenceDictionary.jar'), gatk_filename, os.path.splitext(gatk_filename)[0] + '.dict' ), shell=True ) |
102 if stderr is None: | 104 index_bam_files( bam_filenames ) |
103 stderr = tempfile.NamedTemporaryFile( prefix="gatk-stderr-", dir=tmp_dir ) | 105 #set up stdout and stderr output options |
104 | 106 stdout = open_file_from_option( options.stdout, mode = 'wb' ) |
105 proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) | 107 stderr = open_file_from_option( options.stderr, mode = 'wb' ) |
106 return_code = proc.wait() | 108 #if no stderr file is specified, we'll use our own |
107 | 109 if stderr is None: |
108 if return_code: | 110 stderr = tempfile.NamedTemporaryFile( prefix="gatk-stderr-", dir=tmp_dir ) |
109 stderr_target = sys.stderr | 111 |
110 else: | 112 proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) |
111 stderr_target = sys.stdout | 113 return_code = proc.wait() |
112 stderr.flush() | 114 |
113 stderr.seek(0) | 115 if return_code: |
114 while True: | 116 stderr_target = sys.stderr |
115 chunk = stderr.read( CHUNK_SIZE ) | |
116 if chunk: | |
117 stderr_target.write( chunk ) | |
118 else: | 117 else: |
119 break | 118 stderr_target = sys.stdout |
120 stderr.close() | 119 stderr.flush() |
120 stderr.seek(0) | |
121 while True: | |
122 chunk = stderr.read( CHUNK_SIZE ) | |
123 if chunk: | |
124 stderr_target.write( chunk ) | |
125 else: | |
126 break | |
127 stderr.close() | |
128 finally: | |
129 cleanup_before_exit( tmp_dir ) | |
130 | |
121 #generate html reports | 131 #generate html reports |
122 if options.html_report_from_directory: | 132 if options.html_report_from_directory: |
123 for ( html_filename, html_dir ) in options.html_report_from_directory: | 133 for ( html_filename, html_dir ) in options.html_report_from_directory: |
124 html_report_from_directory( open( html_filename, 'wb' ), html_dir ) | 134 html_report_from_directory( open( html_filename, 'wb' ), html_dir ) |
125 | |
126 cleanup_before_exit( tmp_dir ) | |
127 | 135 |
128 if __name__=="__main__": __main__() | 136 |
137 if __name__ == "__main__": | |
138 __main__() |