annotate bwa_0_7_5/bwa_0_7_5.py @ 4:22449c3a0b7f draft default tip

debug when use 'history' input fasta as genome reference
author yufei-luo
date Tue, 10 Dec 2013 08:48:23 -0500
parents 8409cff2d740
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
1 #!/usr/bin/env python
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
2 ## yufei.luo@gustave.roussy 22/07/2013
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
3
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
4 """
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
5 Runs BWA on single-end or paired-end data.
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
6 Produces a SAM file containing the mappings.
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
7 Works with BWA version 0.7.5.
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
8 NOTICE: In this wrapper, we only use 'mem' for mapping step.
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
9
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
10 usage: bwa_0_7_5.py [args]
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
11
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
12 See below for args
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
13 """
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
14
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
15 import optparse, os, shutil, subprocess, sys, tempfile
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
16 import argparse
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
17
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
18 def stop_err( msg ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
19 sys.stderr.write( '%s\n' % msg )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
20 sys.exit()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
21
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
22 def check_is_double_encoded( fastq ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
23 # check that first read is bases, not one base followed by numbers
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
24 bases = [ 'A', 'C', 'G', 'T', 'a', 'c', 'g', 't', 'N' ]
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
25 nums = [ '0', '1', '2', '3' ]
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
26 for line in file( fastq, 'rb'):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
27 if not line.strip() or line.startswith( '@' ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
28 continue
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
29 if len( [ b for b in line.strip() if b in nums ] ) > 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
30 return False
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
31 elif line.strip()[0] in bases and len( [ b for b in line.strip() if b in bases ] ) == len( line.strip() ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
32 return True
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
33 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
34 raise Exception, 'First line in first read does not appear to be a valid FASTQ read in either base-space or color-space'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
35 raise Exception, 'There is no non-comment and non-blank line in your FASTQ file'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
36
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
37 def __main__():
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
38
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
39 descr = "bwa_0_7_5.py: version 1.0. Map the reads(long length) against the genome reference with BWA MEM. \n"
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
40 descr += "Usage: BWA mem -t thread -R groupInfo refSequence read.R1.fastq (read.R2.fastq) > out.sam"
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
41 parser = argparse.ArgumentParser(description=descr)
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
42 parser.add_argument( '-t', '--threads', default=1, help='The number of threads to use [1]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
43 parser.add_argument( '--color-space', default=False, help='If the input files are SOLiD format' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
44 parser.add_argument( '--ref', help='The reference genome to use or index' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
45 parser.add_argument( '-f', '--fastq', help='The (forward) fastq file to use for the mapping' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
46 parser.add_argument( '-F', '--rfastq', help='The reverse fastq file to use for mapping if paired-end data' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
47 parser.add_argument( '-u', '--output', help='The file to save the output (SAM format)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
48 parser.add_argument( '-g', '--genAlignType', help='The type of pairing (single or paired)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
49 parser.add_argument( '--params', help='Parameter setting to use (pre_set or full)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
50 parser.add_argument( '-s', '--fileSource', help='Whether to use a previously indexed reference sequence or one form history (indexed or history)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
51 parser.add_argument( '-D', '--dbkey', help='Dbkey for reference genome' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
52
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
53 parser.add_argument( '-k', '--minEditDistSeed', default=19, type=int, help='Minimum edit distance to the seed [19]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
54 parser.add_argument( '-w', '--bandWidth', default=100, type=int, help='Band width for banded alignment [100]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
55 parser.add_argument( '-d', '--offDiagonal', default=100, type=int, help='off-diagonal X-dropoff [100]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
56 parser.add_argument( '-r', '--internalSeeds', default=1.5, type=float, help='look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
57 parser.add_argument( '-c', '--seedsOccurrence', default=10000, type=int, help='skip seeds with more than INT occurrences [10000]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
58 parser.add_argument( '-S', '--mateRescue', default=False, help='skip mate rescue' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
59 parser.add_argument( '-P', '--skipPairing', default=False, help='skpe pairing, mate rescue performed unless -S also in use' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
60 parser.add_argument( '-A', '--seqMatch', default=1, type=int, help='score of a sequence match' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
61 parser.add_argument( '-B', '--mismatch', default=4,type=int, help='penalty for a mismatch' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
62 parser.add_argument( '-O', '--gapOpen', default=6, type=int, help='gap open penalty' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
63 parser.add_argument( '-E', '--gapExtension', default=None, help='gap extension penalty; a gap of size k cost {-O} + {-E}*k [1]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
64 parser.add_argument( '-L', '--clipping', default=5, type=int, help='penalty for clipping [5]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
65 parser.add_argument( '-U', '--unpairedReadpair', default=17, type=int, help='penalty for an unpaired read pair [17]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
66 parser.add_argument( '-p', '--interPairEnd', default=False, help='first query file consists of interleaved paired-end sequences' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
67 parser.add_argument( '--rgid', help='Read group identifier' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
68 parser.add_argument( '--rgsm', help='Sample' )
4
22449c3a0b7f debug when use 'history' input fasta as genome reference
yufei-luo
parents: 3
diff changeset
69 parser.add_argument( '--rgpl', choices=[ 'CAPILLARY', 'LS454', 'ILLUMINA', 'SOLID', 'HELICOS', 'IONTORRENT', 'PACBIO' ], help='Platform/technology used to produce the reads' )
0
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
70 parser.add_argument( '--rglb', help='Library name' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
71 parser.add_argument( '--rgpu', help='Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
72 parser.add_argument( '--rgcn', help='Sequencing center that produced the read' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
73 parser.add_argument( '--rgds', help='Description' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
74 parser.add_argument( '--rgdt', help='Date that run was produced (ISO8601 format date or date/time, like YYYY-MM-DD)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
75 parser.add_argument( '--rgfo', help='Flow order' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
76 parser.add_argument( '--rgks', help='The array of nucleotide bases that correspond to the key sequence of each read' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
77 parser.add_argument( '--rgpg', help='Programs used for processing the read group' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
78 parser.add_argument( '--rgpi', help='Predicted median insert size' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
79 parser.add_argument( '-T', '--minScore', default=30, type=int, help='minimum score to output [30]' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
80 parser.add_argument( '-M', '--mark', default=False, help='mark shorter split hits as secondary (for Picard/GATK compatibility)' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
81 args = parser.parse_args()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
82
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
83
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
84 # output version # of tool
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
85 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
86 tmp = tempfile.NamedTemporaryFile().name
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
87 tmp_stdout = open( tmp, 'wb' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
88 proc = subprocess.Popen( args='bwa 2>&1', shell=True, stdout=tmp_stdout )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
89 tmp_stdout.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
90 returncode = proc.wait()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
91 stdout = None
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
92 for line in open( tmp_stdout.name, 'rb' ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
93 if line.lower().find( 'version' ) >= 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
94 stdout = line.strip()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
95 break
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
96 if stdout:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
97 sys.stdout.write( 'BWA %s\n' % stdout )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
98 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
99 raise Exception
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
100 except:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
101 sys.stdout.write( 'Could not determine BWA version\n' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
102
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
103 # check for color space fastq that's not double-encoded and exit if appropriate
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
104 # if args.color_space:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
105 # if not check_is_double_encoded( args.fastq ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
106 # stop_err( 'Your file must be double-encoded (it must be converted from "numbers" to "bases"). See the help section for details' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
107 # if args.genAlignType == 'paired':
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
108 # if not check_is_double_encoded( args.rfastq ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
109 # stop_err( 'Your reverse reads file must also be double-encoded (it must be converted from "numbers" to "bases"). See the help section for details' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
110
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
111 fastq = args.fastq
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
112 if args.rfastq:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
113 rfastq = args.rfastq
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
114
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
115 # set color space variable
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
116 # if args.color_space:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
117 # color_space = '-c'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
118 # else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
119 # color_space = ''
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
120
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
121 # make temp directory for placement of indices
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
122 tmp_index_dir = tempfile.mkdtemp()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
123 tmp_dir = tempfile.mkdtemp()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
124 # index if necessary
4
22449c3a0b7f debug when use 'history' input fasta as genome reference
yufei-luo
parents: 3
diff changeset
125 # if args.fileSource == 'history' and not args.do_not_build_index:
22449c3a0b7f debug when use 'history' input fasta as genome reference
yufei-luo
parents: 3
diff changeset
126 if args.fileSource == 'history' :
0
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
127 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
128 ref_file_name = ref_file.name
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
129 ref_file.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
130 os.symlink( args.ref, ref_file_name )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
131 # determine which indexing algorithm to use, based on size
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
132 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
133 size = os.stat( args.ref ).st_size
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
134 if size <= 2**30:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
135 indexingAlg = 'is'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
136 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
137 indexingAlg = 'bwtsw'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
138 except:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
139 indexingAlg = 'is'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
140 #indexing_cmds = '%s -a %s' % ( color_space, indexingAlg )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
141 indexing_cmds = '-a %s' % indexingAlg
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
142 cmd1 = 'bwa index %s %s' % ( indexing_cmds, ref_file_name )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
143 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
144 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
145 tmp_stderr = open( tmp, 'wb' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
146 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
147 returncode = proc.wait()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
148 tmp_stderr.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
149 # get stderr, allowing for case where it's very large
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
150 tmp_stderr = open( tmp, 'rb' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
151 stderr = ''
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
152 buffsize = 1048576
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
153 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
154 while True:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
155 stderr += tmp_stderr.read( buffsize )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
156 if not stderr or len( stderr ) % buffsize != 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
157 break
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
158 except OverflowError:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
159 pass
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
160 tmp_stderr.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
161 if returncode != 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
162 raise Exception, stderr
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
163 except Exception, e:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
164 # clean up temp dirs
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
165 if os.path.exists( tmp_index_dir ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
166 shutil.rmtree( tmp_index_dir )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
167 if os.path.exists( tmp_dir ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
168 shutil.rmtree( tmp_dir )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
169 stop_err( 'Error indexing reference sequence. ' + str( e ) )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
170 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
171 ref_file_name = args.ref
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
172 # if args.illumina13qual:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
173 # illumina_quals = "-I"
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
174 # else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
175 # illumina_quals = ""
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
176
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
177 # set up aligning and generate aligning command args
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
178 start_cmds = '-t %s ' % args.threads
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
179 if args.params == 'pre_set':
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
180 # aligning_cmds = '-t %s %s %s' % ( args.threads, color_space, illumina_quals )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
181 #start_cmds = '-t %s ' % args.threads
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
182 end_cmds = ' '
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
183 print start_cmds, end_cmds
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
184
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
185 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
186 end_cmds = '-k %s -w %s -d %s -r %s -c %s -A %s -B %s -O %s -L %s -U %s -T %s ' % (args.minEditDistSeed, args.bandWidth, args.offDiagonal, args.internalSeeds, args.seedsOccurrence, args.seqMatch, args.mismatch, args.gapOpen, args.clipping, args.unpairedReadpair, args.minScore)
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
187 if args.mateRescue:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
188 end_cmds += '-S '
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
189 if args.skipPairing:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
190 end_cmds += '-P '
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
191 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
192 if args.skipPairing:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
193 print "Option Error and will not be considered, you should also choose 'skip mate rescue -S' option! "
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
194 if args.gapExtension != None:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
195 end_cmds += '-E %s ' % args.gapExtension
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
196
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
197 if args.rgid:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
198 if not args.rglb or not args.rgpl or not args.rgsm or not args.rglb:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
199 stop_err( 'If you want to specify read groups, you must include the ID, LB, PL, and SM tags.' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
200 # readGroup = '@RG\tID:%s\tLB:%s\tPL:%s\tSM:%s' % ( args.rgid, args.rglb, args.rgpl, args.rgsm )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
201 readGroup = '@RG\tID:%s\tLB:%s\tPL:%s\tSM:%s' % ( args.rgid, args.rglb, args.rgpl, args.rgsm )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
202 if args.rgpu:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
203 readGroup += '\tPU:%s' % args.rgpu
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
204 if args.rgcn:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
205 readGroup += '\tCN:%s' % args.rgcn
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
206 if args.rgds:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
207 readGroup += '\tDS:%s' % args.rgds
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
208 if args.rgdt:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
209 readGroup += '\tDT:%s' % args.rgdt
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
210 if args.rgfo:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
211 readGroup += '\tFO:%s' % args.rgfo
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
212 if args.rgks:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
213 readGroup += '\tKS:%s' % args.rgks
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
214 if args.rgpg:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
215 readGroup += '\tPG:%s' % args.rgpg
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
216 if args.rgpi:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
217 readGroup += '\tPI:%s' % args.rgpi
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
218 end_cmds += ' -R "%s" ' % readGroup
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
219
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
220 if args.interPairEnd:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
221 end_cmds += '-p %s ' % args.interPairEnd
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
222 if args.mark:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
223 end_cmds += '-M '
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
224
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
225
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
226 if args.genAlignType == 'paired':
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
227 cmd = 'bwa mem %s %s %s %s %s > %s' % ( start_cmds, ref_file_name, fastq, rfastq, end_cmds, args.output )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
228 else:
3
8409cff2d740 corrected the bug
yufei-luo
parents: 0
diff changeset
229 cmd = 'bwa mem %s %s %s %s > %s' % ( start_cmds, ref_file_name, fastq, end_cmds, args.output )
0
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
230
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
231 # perform alignments
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
232 buffsize = 1048576
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
233 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
234 # need to nest try-except in try-finally to handle 2.4
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
235 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
236 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
237 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
238 tmp_stderr = open( tmp, 'wb' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
239 print "The cmd is %s" % cmd
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
240 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
241 returncode = proc.wait()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
242 tmp_stderr.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
243 # get stderr, allowing for case where it's very large
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
244 tmp_stderr = open( tmp, 'rb' )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
245 stderr = ''
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
246 try:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
247 while True:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
248 stderr += tmp_stderr.read( buffsize )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
249 if not stderr or len( stderr ) % buffsize != 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
250 break
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
251 except OverflowError:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
252 pass
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
253 tmp_stderr.close()
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
254 if returncode != 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
255 raise Exception, stderr
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
256 except Exception, e:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
257 raise Exception, 'Error generating alignments. ' + str( e )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
258
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
259 # check that there are results in the output file
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
260 if os.path.getsize( args.output ) > 0:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
261 sys.stdout.write( 'BWA run on %s-end data' % args.genAlignType )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
262 else:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
263 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
264 except Exception, e:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
265 stop_err( 'The alignment failed.\n' + str( e ) )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
266 finally:
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
267 # clean up temp dir
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
268 if os.path.exists( tmp_index_dir ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
269 shutil.rmtree( tmp_index_dir )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
270 if os.path.exists( tmp_dir ):
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
271 shutil.rmtree( tmp_dir )
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
272
839e36b39c3f Uploaded
yufei-luo
parents:
diff changeset
273 if __name__=="__main__": __main__()