annotate SNV/snp_filters.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children 361d6506850a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/env python
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 Creates a pileup file from a bam file and a reference.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 usage: %prog [options]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 -i, --input=i: raw snp call file chr:pos
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 -o, --output1=o: novel snp calls in file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 -c, --output2=c: filtered novel SNPs associated with codons
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 -K, --known_snps=k: known SNPs for filtering (sorted chr:pos file)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 -C, --codon=C: codon lookup file (sorted chr:pos)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 #my $cmd7 = "sort -S 2000M -k 1 $snps | join -a 1 - $known | grep -v dbS | grep -v Vent | grep -v Yor | grep -v Wats | sort -S 2000 -k 1 > $out";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 #my $cmd8 = "join $codon $snps\_novel.txt > $snps\_novel." . $base . "codon";
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 import os, shutil, subprocess, sys, tempfile
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 from galaxy import eggs
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 import pkg_resources; pkg_resources.require( "bx-python" )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 from bx.cookbook import doc_optparse
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 def stop_err( msg ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24 sys.stderr.write( '%s\n' % msg )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 sys.exit()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 def __main__():
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 #Parse Command Line
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 options, args = doc_optparse.parse( __doc__ )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 # if options.known_snps == "" or options.input == "" or options.codon or "":
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 # print('Error, required arguments not provided\n')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 # return(1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 tmpDir = tempfile.mkdtemp()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 #prepare basic filter_snvmix command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 filter_cmd = "sort -S 2G -k 1 %s | join -a 1 - %s | grep -v dbS | grep -v Vent | grep -v Yor | grep -v Wats | sort -S 2G -k 1 > %s"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 filter_cmd = filter_cmd % ( options.input, options.known_snps, options.output1 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 #run command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 #print(filter_cmd)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 tmp_stderr = open( tmp, 'wb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 proc = subprocess.Popen( args=filter_cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 returncode = proc.wait()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 #did it succeed?
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 # get stderr, allowing for case where it's very large
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 tmp_stderr = open( tmp, 'rb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 stderr = ''
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 while True:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 stderr += tmp_stderr.read( )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 if not stderr:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 break
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 if returncode != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 raise Exception, stderr
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 except Exception, e:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 stop_err( 'Error running filter command\n' + str( e ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 # check that there are results in the output file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 if os.path.getsize( options.output1 ) > 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 sys.stdout.write( 'wrote output1' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 stop_err( 'The output file is empty. All SNVs might have been known or there may be an error with your input file or settings.' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 codon_cmd = "join %s %s > %s"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 codon_cmd = codon_cmd % ( options.codon, options.output1, options.output2 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 #run command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 #print(codon_cmd)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 tmp_stderr = open( tmp, 'wb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 proc = subprocess.Popen( args=codon_cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 returncode = proc.wait()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 #did it succeed?
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 # get stderr, allowing for case where it's very large
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 tmp_stderr = open( tmp, 'rb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78 stderr = ''
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 while True:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80 stderr += tmp_stderr.read()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 if not stderr:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82 break
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 if returncode != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 raise Exception, stderr
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 except Exception, e:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 stop_err( 'Error running codon command\n' + str( e ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 # check that there are results in the output file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
90 if os.path.getsize( options.output1 ) > 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
91 sys.stdout.write( 'wrote output2' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
92 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
93 stop_err( 'The output file is empty. All SNVs might have been intronic or intergenic or there may be an error with your input file or settings.' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
94
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
95
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
96
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
97 if __name__ == "__main__" : __main__()