annotate SNV/snvmix.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/env python
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 Runs the SNVMix2 binary on a bam input file with various options.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 usage: %prog [options]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 -i, --input1=i: bam file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 -o, --output1=o: Output SNVMix (raw)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 -d, --dbkey=d: dbkey of user-supplied file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 -x, --indexDir=x: index directory
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 -t, --type=t: analysis type (e.g. mb|m|b|M|Mb|MB|SNVMix1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12 -q, --base=q: base qual threshold
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 -Q, --map=Q: map qual threshold
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 -l, --pos=l: position file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 -f, --full=f: Full mode (output scores for every position)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 -R, --keep_dups: Retain reads flagged as duplicates (not recommended!)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 -c, --keep_chastity: Retain reads that failed the chastity filter
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 import os, shutil, subprocess, sys, tempfile
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 from galaxy import eggs
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 import pkg_resources; pkg_resources.require( "bx-python" )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 from bx.cookbook import doc_optparse
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 def stop_err( msg ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 sys.stderr.write( '%s\n' % msg )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 sys.exit()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 seqPath = ''
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 for line in open( seqFile ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 line = line.rstrip( '\r\n' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 fields = line.split( '\t' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 if len( fields ) < 3:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 continue
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 if fields[1] == dbkey:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 seqPath = fields[2].strip()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 break
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 return seqPath
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 def __main__():
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 #Parse Command Line
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 options, args = doc_optparse.parse( __doc__ )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 seqPath = check_seq_file( options.dbkey, options.indexDir )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 #make temp dir
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 tmpDir = tempfile.mkdtemp()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 #prepare basic SNVMix2 command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 cmd = 'SNVMix2 -p b -i %s -r %s -o %s -q %s -Q %s -t %s'
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 # have to nest try-except in try-finally to handle 2.4
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 if not os.path.exists( "%s.fai" % seqPath ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 cmd = cmd % ( options.input1, seqPath, options.output1, options.base, options.map, options.type)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 if options.pos != "none":
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 if os.path.isfile(options.pos):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 cmd = cmd + ' -l ' + options.pos
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 if options.full == "yes":
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 cmd = cmd + ' -f '
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 raise Exception, "position file doesn't exist"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 if options.keep_chastity == "yes":
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 cmd = cmd + ' -c'
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 if options.keep_dups == "yes":
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 cmd = cmd + ' -R'
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 #perform SNVMix2 command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 tmp_stderr = open( tmp, 'wb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80 returncode = proc.wait()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82 #did it succeed?
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 # get stderr, allowing for case where it's very large
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 tmp_stderr = open( tmp, 'rb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 stderr = ''
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 buffsize = 1048576
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88 while True:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 stderr += tmp_stderr.read( buffsize )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
90 if not stderr or len( stderr ) % buffsize != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
91 break
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
92 except OverflowError:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
93 pass
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
94 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
95 if returncode != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
96 raise Exception, stderr
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
97 except Exception, e:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
98 stop_err( 'Error running SNVMix2 tool\n' + str( e ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
99 finally:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
100 #clean up temp files
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
101 if os.path.exists( tmpDir ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
102 shutil.rmtree( tmpDir )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
103 # check that there are results in the output file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
104 if os.path.getsize( options.output1 ) > 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
105 sys.stdout.write( 'wrote SNVMix output' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
106 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
107 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
108
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
109 if __name__ == "__main__" : __main__()