annotate bowtie_wrapper.py @ 1:86d20727b5a9 draft default tip

Added README file
author "Shantanu Pavgi <pavgi@uab.edu>"
date Mon, 26 Aug 2013 16:02:58 -0500
parents b8d21c7bb4e4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
1 #!/usr/bin/env python
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
2
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
3 """
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
4 Runs Bowtie on single-end or paired-end data.
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
5 For use with Bowtie v. 0.12.7
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
6
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
7 usage: bowtie_wrapper.py [options]
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
8 -t, --threads=t: The number of threads to run
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
9 -o, --output=o: The output file
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
19 -2, --paired=2: Whether the data is single- or paired-end
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
20 -g, --genomeSource=g: The type of reference provided
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
21 -r, --ref=r: The reference genome to use or index
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
22 -s, --skip=s: Skip the first n reads
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
23 -a, --alignLimit=a: Only align the first n reads
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
28 -l, --seedLen=l: Seed length
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
30 -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
32 -v, --valAlign=v: Report up to n valid arguments per read
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
33 -V, --allValAligns=V: Whether or not to report all valid alignments per read
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
44 -F, --offrate=F: Override the offrate of the index to n
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
45 -8, --snpphred=8: SNP penalty on Phred scale
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
48 -S, --seed=S: Seed for pseudo-random number generator
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
49 -C, --params=C: Whether to use default or specified parameters
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
50 -u, --iautoB=u: Automatic or specified behavior
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
54 -D, --idcv=D: The period for the difference-cover sample
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
62 -c, --icutoff=c: Number of first bases of the reference sequence to index
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
63 -x, --indexSettings=x: Whether or not indexing options are to be set
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
64 -H, --suppressHeader=H: Suppress header
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
65 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
66 """
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
67
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
68 import optparse, os, shutil, subprocess, sys, tempfile
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
69
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
70 #Allow more than Sanger encoded variants
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
71 DEFAULT_ASCII_ENCODING = '--phred33-quals'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
72 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
73 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
74
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
75 def stop_err( msg ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
76 sys.stderr.write( '%s\n' % msg )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
77 sys.exit()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
78
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
79 def __main__():
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
80 #Parse Command Line
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
81 parser = optparse.OptionParser()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
82 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
83 parser.add_option( '-o', '--output', dest='output', help='The output file' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
84 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
85 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
86 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
87 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
88 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
89 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
90 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
91 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
92 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
93 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
94 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
95 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
96 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
97 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
98 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
99 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
100 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
101 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
102 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
103 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
104 parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
105 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
107 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
108 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
109 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
110 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
111 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
112 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
113 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
114 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
115 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
116 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
117 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
118 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
119 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
120 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
121 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
122 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
123 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
124 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
125 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
126 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
127 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
128 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
129 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
130 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
131 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
132 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
133 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
134 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
135 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
136 parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
137 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
138 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
139 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
140 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
141 (options, args) = parser.parse_args()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
142 stdout = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
143
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
144 # make temp directory for placement of indices and copy reference file there if necessary
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
145 tmp_index_dir = tempfile.mkdtemp()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
146 # get type of data (solid or solexa)
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
147 if options.dataType == 'solid':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
148 colorspace = '-C'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
149 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
150 colorspace = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
151 # index if necessary
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
152 if options.genomeSource == 'history' and not options.do_not_build_index:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
153 # set up commands
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
154 if options.index_settings =='indexPreSet':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
155 indexing_cmds = '%s' % colorspace
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
156 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
157 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
158 if options.iautoB and options.iautoB == 'set':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
159 iautoB = '--noauto'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
160 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
161 iautoB = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
162 if options. ipacked and options.ipacked == 'packed':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
163 ipacked = '--packed'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
164 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
165 ipacked = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
166 if options.ibmax and int( options.ibmax ) >= 1:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
167 ibmax = '--bmax %s' % options.ibmax
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
168 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
169 ibmax = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
172 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
173 ibmaxdivn = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
174 if options.idcv and int( options.idcv ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
175 idcv = '--dcv %s' % options.idcv
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
176 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
177 idcv = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
178 if options.inodc and options.inodc == 'nodc':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
179 inodc = '--nodc'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
180 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
181 inodc = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
182 if options.inoref and options.inoref == 'noref':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
183 inoref = '--noref'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
184 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
185 inoref = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
186 if options.iftab and int( options.iftab ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
187 iftab = '--ftabchars %s' % options.iftab
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
188 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
189 iftab = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
190 if options.intoa and options.intoa == 'yes':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
191 intoa = '--ntoa'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
192 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
193 intoa = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
194 if options.iendian and options.iendian == 'big':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
195 iendian = '--big'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
196 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
197 iendian = '--little'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
198 if options.iseed and int( options.iseed ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
199 iseed = '--seed %s' % options.iseed
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
200 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
201 iseed = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
202 if options.icutoff and int( options.icutoff ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
203 icutoff = '--cutoff %s' % options.icutoff
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
204 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
205 icutoff = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
206 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
207 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
208 inoref, options.ioffrate, iftab, intoa, iendian,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
209 iseed, icutoff, colorspace )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
210 except ValueError, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
211 # clean up temp dir
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
212 if os.path.exists( tmp_index_dir ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
213 shutil.rmtree( tmp_index_dir )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
214 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
215 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
216 ref_file_name = ref_file.name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
217 ref_file.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
218 os.symlink( options.ref, ref_file_name )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
219 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
220 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
221 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
222 tmp_stderr = open( tmp, 'wb' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
223 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
224 returncode = proc.wait()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
225 tmp_stderr.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
226 # get stderr, allowing for case where it's very large
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
227 tmp_stderr = open( tmp, 'rb' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
228 stderr = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
229 buffsize = 1048576
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
230 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
231 while True:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
232 stderr += tmp_stderr.read( buffsize )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
233 if not stderr or len( stderr ) % buffsize != 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
234 break
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
235 except OverflowError:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
236 pass
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
237 tmp_stderr.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
238 if returncode != 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
239 raise Exception, stderr
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
240 except Exception, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
241 # clean up temp dir
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
242 if os.path.exists( tmp_index_dir ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
243 shutil.rmtree( tmp_index_dir )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
244 stop_err( 'Error indexing reference sequence\n' + str( e ) )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
245 stdout += 'File indexed. '
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
246 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
247 ref_file_name = options.ref
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
248 # set up aligning and generate aligning command options
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
249 # automatically set threads in both cases
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
250 tmp_suppressed_file_name = None
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
251 tmp_unmapped_file_name = None
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
252 if options.suppressHeader == 'true':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
253 suppressHeader = '--sam-nohead'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
254 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
255 suppressHeader = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
256 if options.maxInsert and int( options.maxInsert ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
257 maxInsert = '-X %s' % options.maxInsert
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
258 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
259 maxInsert = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
260 if options.mateOrient:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
261 mateOrient = '--%s' % options.mateOrient
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
262 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
263 mateOrient = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
264 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
265 if options.params == 'preSet':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
266 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
267 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
268 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
269 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
270 if options.skip and int( options.skip ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
271 skip = '-s %s' % options.skip
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
272 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
273 skip = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
274 if options.alignLimit and int( options.alignLimit ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
275 alignLimit = '-u %s' % options.alignLimit
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
276 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
277 alignLimit = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
278 if options.trimH and int( options.trimH ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
279 trimH = '-5 %s' % options.trimH
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
280 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
281 trimH = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
282 if options.trimL and int( options.trimL ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
283 trimL = '-3 %s' % options.trimL
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
284 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
285 trimL = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
286 if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
287 maqSoapAlign = '-v %s' % options.maqSoapAlign
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
288 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
289 maqSoapAlign = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
291 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
292 mismatchSeed = '-n %s' % options.mismatchSeed
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
293 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
294 mismatchSeed = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
295 if options.mismatchQual and int( options.mismatchQual ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
296 mismatchQual = '-e %s' % options.mismatchQual
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
297 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
298 mismatchQual = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
299 if options.seedLen and int( options.seedLen ) >= 5:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
300 seedLen = '-l %s' % options.seedLen
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
301 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
302 seedLen = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
303 if options.rounding == 'noRound':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
304 rounding = '--nomaqround'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
305 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
306 rounding = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
307 if options.minInsert and int( options.minInsert ) > 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
308 minInsert = '-I %s' % options.minInsert
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
309 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
310 minInsert = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
311 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
312 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
313 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
314 maxAlignAttempt = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
315 if options.forwardAlign == 'noForward':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
316 forwardAlign = '--nofw'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
317 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
318 forwardAlign = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
319 if options.reverseAlign == 'noReverse':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
320 reverseAlign = '--norc'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
321 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
322 reverseAlign = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
323 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
324 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
325 maxBacktracks = '--maxbts %s' % options.maxBacktracks
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
326 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
327 maxBacktracks = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
328 if options.tryHard == 'doTryHard':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
329 tryHard = '-y'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
330 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
331 tryHard = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
332 if options.valAlign and int( options.valAlign ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
333 valAlign = '-k %s' % options.valAlign
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
334 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
335 valAlign = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
336 if options.allValAligns == 'doAllValAligns':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
337 allValAligns = '-a'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
338 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
339 allValAligns = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
340 if options.suppressAlign and int( options.suppressAlign ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
341 suppressAlign = '-m %s' % options.suppressAlign
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
342 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
343 suppressAlign = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
344 if options.best == 'doBest':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
345 best = '--best'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
346 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
347 best = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
348 if options.strata == 'doStrata':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
349 strata = '--strata'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
350 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
351 strata = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
352 if options.offrate and int( options.offrate ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
353 offrate = '-o %s' % options.offrate
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
354 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
355 offrate = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
356 if options.seed and int( options.seed ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
357 seed = '--seed %s' % options.seed
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
358 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
359 seed = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
360 if options.paired == 'paired':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
361 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
362 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
363 tmp_unmapped_file_name = tmp_unmapped_file.name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
364 tmp_unmapped_file.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
365 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
366 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
367 output_unmapped_reads = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
368 if options.output_suppressed_reads:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
369 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
370 tmp_suppressed_file_name = tmp_suppressed_file.name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
371 tmp_suppressed_file.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
372 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
373 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
374 output_suppressed_reads = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
375 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
376 if options.output_unmapped_reads:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
377 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
378 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
379 output_unmapped_reads = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
380 if options.output_suppressed_reads:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
381 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
382 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
383 output_suppressed_reads = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
384 snpfrac = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
385 if options.snpphred and int( options.snpphred ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
386 snpphred = '--snpphred %s' % options.snpphred
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
387 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
388 snpphred = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
389 if options.snpfrac and float( options.snpfrac ) >= 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
390 snpfrac = '--snpfrac %s' % options.snpfrac
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
391 if options.keepends and options.keepends == 'doKeepends':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
392 keepends = '--col-keepends'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
393 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
394 keepends = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
397 ( maxInsert, mateOrient, options.threads, suppressHeader,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
398 colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
401 tryHard, valAlign, allValAligns, suppressAlign, best,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
402 strata, offrate, seed, snpphred, snpfrac, keepends,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
403 output_unmapped_reads, output_suppressed_reads,
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
404 quality_score_encoding )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
405 except ValueError, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
406 # clean up temp dir
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
407 if os.path.exists( tmp_index_dir ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
408 shutil.rmtree( tmp_index_dir )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
410 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
411 # have to nest try-except in try-finally to handle 2.4
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
412 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
413 # prepare actual mapping commands
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
414 if options.paired == 'paired':
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
415 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
416 else:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
417 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
418 # align
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
419 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
420 tmp_stderr = open( tmp, 'wb' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
421 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
422 returncode = proc.wait()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
423 tmp_stderr.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
424 # get stderr, allowing for case where it's very large
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
425 tmp_stderr = open( tmp, 'rb' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
426 stderr = ''
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
427 buffsize = 1048576
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
428 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
429 while True:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
430 stderr += tmp_stderr.read( buffsize )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
431 if not stderr or len( stderr ) % buffsize != 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
432 break
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
433 except OverflowError:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
434 pass
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
435 tmp_stderr.close()
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
436 if returncode != 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
437 raise Exception, stderr
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
438 # get suppressed and unmapped reads output files in place if appropriate
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
439 if options.paired == 'paired' and tmp_suppressed_file_name and \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
440 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
441 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
442 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
443 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
444 shutil.move( left, options.output_suppressed_reads_l )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
445 shutil.move( right, options.output_suppressed_reads_r )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
446 except Exception, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
447 sys.stdout.write( 'Error producing the suppressed output file.\n' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
448 if options.paired == 'paired' and tmp_unmapped_file_name and \
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
449 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
450 try:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
451 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
452 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
453 shutil.move( left, options.output_unmapped_reads_l )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
454 shutil.move( right, options.output_unmapped_reads_r )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
455 except Exception, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
456 sys.stdout.write( 'Error producing the unmapped output file.\n' )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
457 # check that there are results in the output file
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
458 if os.path.getsize( options.output ) == 0:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
459 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
460 except Exception, e:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
461 stop_err( 'Error aligning sequence. ' + str( e ) )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
462 finally:
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
463 # clean up temp dir
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
464 if os.path.exists( tmp_index_dir ):
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
465 shutil.rmtree( tmp_index_dir )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
466 stdout += 'Sequence file aligned.\n'
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
467 sys.stdout.write( stdout )
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
468
b8d21c7bb4e4 Uploaded
shantanu
parents:
diff changeset
469 if __name__=="__main__": __main__()