annotate hint_wrapper.py @ 7:9920c489a5c3 draft

Uploaded
author sjung
date Wed, 24 May 2017 00:36:46 -0400
parents 43e4ba4796c1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
1 #!/usr/bin/python
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
2
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
3 import optparse, os, shutil, sys, tempfile, glob, shlex, vcf, pysam, tarfile
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
4 from subprocess import *
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
5 import subprocess
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
6
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
7 CHUNK_SIZE = 2**20 #1mb
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
8
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
9 def cleanup_before_exit( tmp_dir ):
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
10 if tmp_dir and os.path.exists( tmp_dir ):
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
11 shutil.rmtree( tmp_dir )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
12
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
13 def __main__():
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
14 parser = optparse.OptionParser()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
15 parser.add_option('','--input', dest="inputF", action='store', type="string", default=None, help='')
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
16 parser.add_option('','--region', dest="regionF", action='store', type="string", default=None, help='')
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
17 parser.add_option( '', '--output', dest='outputF', action='store', type="string", default=None, help='')
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
18 parser.add_option( '', '--out-dir', dest='output_dir', action='store', type="string", default=None, help='If specified, the output directory for extra files.' )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
19
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
20 (options, args) = parser.parse_args()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
21
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
22 if not os.path.exists(options.output_dir):
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
23 os.mkdir(options.output_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
24 input_dir = "%s/input" % options.output_dir
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
25 output_dir = "%s/output" % options.output_dir
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
26 config_dir = "%s/config" % options.output_dir
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
27
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
28 if not os.path.exists(output_dir):
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
29 os.mkdir(input_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
30 os.mkdir(output_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
31 os.mkdir(config_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
32 # region input file
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
33 linked_bed_name = "%s/regions.bed" % input_dir
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
34 os.symlink(options.regionF, linked_bed_name)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
35 linked_bam_name="%s/sample.bam" % input_dir
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
36 os.symlink(options.inputF, linked_bam_name)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
37 pysam.index(linked_bam_name)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
38
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
39 input_config = open("%s/input.txt" % config_dir, 'w')
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
40 input_config.write("name\ttype\tfile\tdata\tgroup\n")
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
41 input_config.write("HS2\tregions\t%s\tHS\tDU_K562_HINT\n" % linked_bed_name)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
42 input_config.write("DNase\treads\t%s\tDNASE\tDU_K562_HINT\n" % linked_bam_name)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
43 input_config.close()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
44
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
45 hint_cmd = "rgt-hint --output-location %s/ %s/input.txt" % (output_dir, config_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
46 print "hint cmd:%s" % hint_cmd
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
47 stdout = tempfile.NamedTemporaryFile( prefix="hint-stdout-", dir=options.output_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
48 stderr = tempfile.NamedTemporaryFile( prefix="hint-stderr-", dir=options.output_dir)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
49 proc = subprocess.Popen( args=hint_cmd, stdout=stdout, stderr=stderr, shell=True, cwd=options.output_dir )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
50 return_code = proc.wait()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
51
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
52 if return_code:
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
53 stderr_target = sys.stderr
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
54 else:
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
55 stderr_target = sys.stdout
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
56 stderr.flush()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
57 stderr.seek(0)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
58 while True:
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
59 chunk = stderr.read( CHUNK_SIZE )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
60 if chunk:
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
61 stderr_target.write( chunk )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
62 else:
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
63 break
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
64 stderr.close()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
65 stdout.close()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
66
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
67 # copy files to final output locations
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
68 shutil.copy('%s/DU_K562_HINT.bed' % output_dir, options.outputF)
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
69 cleanup_before_exit( options.output_dir )
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
70
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
71 if __name__=="__main__": __main__()
43e4ba4796c1 Uploaded
sjung
parents:
diff changeset
72