| 0 | 1 #!/usr/bin/env python | 
|  | 2 #Dan Blankenberg | 
|  | 3 | 
|  | 4 """ | 
|  | 5 A wrapper script for running the GenomeAnalysisTK.jar commands. | 
|  | 6 """ | 
|  | 7 | 
|  | 8 from __future__ import print_function | 
|  | 9 import sys, argparse, os, tempfile, subprocess, shutil | 
|  | 10 from binascii import unhexlify | 
|  | 11 from string import Template | 
|  | 12 from galaxy import eggs | 
|  | 13 #import pkg_resources; pkg_resources.require( "bx-python" ) | 
|  | 14 | 
|  | 15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is | 
|  | 16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed | 
|  | 17 #DEFAULT_GATK_PREFIX = "gatk_file" | 
|  | 18 #CHUNK_SIZE = 2**20 #1mb | 
|  | 19 # | 
|  | 20 # | 
|  | 21 def cleanup_before_exit( tmp_dir ): | 
|  | 22     if tmp_dir and os.path.exists( tmp_dir ): | 
|  | 23         shutil.rmtree( tmp_dir ) | 
|  | 24 | 
|  | 25 def _create_config(args, config_path): | 
|  | 26     conf_file = open(config_path, "w") | 
|  | 27     conf_file.write("[user]\n") | 
|  | 28     for option in args: | 
|  | 29         if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None: | 
|  | 30     	    conf_file.write("%s=%s\n" % (option, args[option])) | 
|  | 31     conf_file.close() | 
|  | 32 | 
|  | 33 def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error): | 
|  | 34     stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name | 
|  | 35     proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) ) | 
|  | 36     return_code = proc.wait() | 
|  | 37     if return_code: | 
|  | 38 	for line in open( stderr_name ): | 
|  | 39            print(line, file=sys.stderr) | 
|  | 40 	os.unlink( stderr_name ) #clean up | 
|  | 41  	cleanup_before_exit( tmp_dir ) | 
|  | 42  	raise Exception( msg_error ) | 
|  | 43     else: | 
|  | 44         os.unlink( stderr_name ) | 
|  | 45 | 
|  | 46 def index_bam_files( bam_filenames, tmp_dir ): | 
|  | 47     for bam_filename in bam_filenames: | 
|  | 48         bam_index_filename = "%s.bai" % bam_filename | 
|  | 49         print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename)) | 
|  | 50         if not os.path.exists( bam_index_filename ): | 
|  | 51             #need to index this bam file | 
|  | 52             command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) | 
|  | 53             my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename) | 
|  | 54 | 
|  | 55 def index_fasta_files( fasta_filenames, tmp_dir ): | 
|  | 56     for fasta_filename in fasta_filenames: | 
|  | 57         fasta_index_filename = "%s.fai" % fasta_filename | 
|  | 58         print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename)) | 
|  | 59         if not os.path.exists( fasta_index_filename ): | 
|  | 60             #need to index this bam file | 
|  | 61             command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) | 
|  | 62             my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) | 
|  | 63 | 
|  | 64 def __main__(): | 
|  | 65     #Parse Command Line  OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD | 
|  | 66     #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD | 
|  | 67 | 
|  | 68     root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2" | 
|  | 69     expected_dir="for_tests" | 
|  | 70     job_dir=os.getcwd() | 
|  | 71     analysis_dir=job_dir + "/StrelkaAnalysis" | 
|  | 72     config_script=root_dir + "/configureStrelkaWorkflow.pl" | 
|  | 73     tmp_dir = "tmp"  #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) | 
|  | 74     config_ini = "%s/config.ini" % (tmp_dir) | 
|  | 75 | 
|  | 76     print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" +  config_ini) | 
|  | 77 | 
|  | 78     #manage parsing | 
|  | 79     parser = argparse.ArgumentParser() | 
|  | 80     parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) | 
|  | 81     parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) | 
|  | 82     parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) | 
|  | 83     parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) | 
|  | 84     parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) | 
|  | 85     parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) | 
|  | 86     parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) | 
|  | 87     parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) | 
|  | 88     parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) | 
|  | 89     parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) | 
|  | 90     parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) | 
|  | 91     parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) | 
|  | 92     parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) | 
|  | 93     parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) | 
|  | 94     parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) | 
|  | 95     parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) | 
|  | 96     parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) | 
|  | 97     parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) | 
|  | 98     parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) | 
|  | 99     parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) | 
|  | 100     parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) | 
|  | 101     parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) | 
|  | 102     parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) | 
|  | 103     parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) | 
|  | 104     args = parser.parse_args() | 
|  | 105 | 
|  | 106     #verifying eveything's ok | 
|  | 107     if not os.path.isfile(config_script): | 
|  | 108     	sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") | 
|  | 109     print("configuring...", file=sys.stdout) | 
|  | 110     if os.path.exists(analysis_dir): | 
|  | 111 	sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") | 
|  | 112 | 
|  | 113 | 
|  | 114     # creating index if needed | 
|  | 115     os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] | 
|  | 116     bam_filenames = [ args.tumorBam, args.normalBam ] | 
|  | 117     index_bam_files( bam_filenames, tmp_dir ) | 
|  | 118     fasta_files = [ args.refFile ] | 
|  | 119     index_fasta_files( fasta_files, tmp_dir ) | 
|  | 120 | 
|  | 121     #creating config file if needed | 
|  | 122     if args.configFile == "Custom": | 
|  | 123     	_create_config(vars(args), config_ini) | 
|  | 124     elif args.configFile == "Default": | 
|  | 125         cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) | 
|  | 126         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 
|  | 127     else: | 
|  | 128     	if not os.path.exists(args.configFile): | 
|  | 129 	     print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) | 
|  | 130     	cmdbash="cp %s %s" % (args.configFile, config_ini) | 
|  | 131         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 
|  | 132 | 
|  | 133 | 
|  | 134 | 
|  | 135 | 
|  | 136     #configuration of workflow | 
|  | 137     cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir) | 
|  | 138     print( "**** Starting configuration.") | 
|  | 139     print( "**** Configuration cmd: '" + cmd + "'") | 
|  | 140     my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !") | 
|  | 141     print("completed configuration") | 
|  | 142 | 
|  | 143     #run the workflow ! | 
|  | 144     cmd="make -C " + analysis_dir | 
|  | 145     print("**** starting workflow.") | 
|  | 146     print("**** workflow cmd: '" + cmd + "'") | 
|  | 147     my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") | 
|  | 148     print("**** completed workflow execution") | 
|  | 149 | 
|  | 150 | 
|  | 151 | 
|  | 152 | 
|  | 153 | 
|  | 154 | 
|  | 155 | 
|  | 156 | 
|  | 157 | 
|  | 158 | 
|  | 159 | 
|  | 160 | 
|  | 161 | 
|  | 162 | 
|  | 163 #bam_filenames = [] | 
|  | 164 #    if options.datasets: | 
|  | 165 #        for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: | 
|  | 166 #            gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function | 
|  | 167 #            if dataset_arg: | 
|  | 168 #                cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) | 
|  | 169 #            if galaxy_ext == "bam": | 
|  | 170 #                bam_filenames.append( gatk_filename ) | 
|  | 171 #    #set up stdout and stderr output options | 
|  | 172 #    stdout = open_file_from_option( options.stdout, mode = 'wb' ) | 
|  | 173 #    stderr = open_file_from_option( options.stderr, mode = 'wb' ) | 
|  | 174 #    #if no stderr file is specified, we'll use our own | 
|  | 175 #    if stderr is None: | 
|  | 176 #        stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) | 
|  | 177 # | 
|  | 178 #    proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) | 
|  | 179 #    return_code = proc.wait() | 
|  | 180 # | 
|  | 181 #    if return_code: | 
|  | 182 #        stderr_target = sys.stderr | 
|  | 183 #    else: | 
|  | 184 #        stderr_target = sys.stdout | 
|  | 185 #    stderr.flush() | 
|  | 186 #    stderr.seek(0) | 
|  | 187 #    while True: | 
|  | 188 #        chunk = stderr.read( CHUNK_SIZE ) | 
|  | 189 #        if chunk: | 
|  | 190 #            stderr_target.write( chunk ) | 
|  | 191 #        else: | 
|  | 192 #            break | 
|  | 193 #    stderr.close() | 
|  | 194 #    #generate html reports | 
|  | 195 #    if options.html_report_from_directory: | 
|  | 196 #        for ( html_filename, html_dir ) in options.html_report_from_directory: | 
|  | 197 #            html_report_from_directory( open( html_filename, 'wb' ), html_dir ) | 
|  | 198 # | 
|  | 199 #    cleanup_before_exit( tmp_dir ) | 
|  | 200 | 
|  | 201 if __name__=="__main__": __main__() |